Browse Source

feat(dnn/fallback): add some new gi api

GitOrigin-RevId: 4aede0ac6a
HuaHua404-patch-4
Megvii Engine Team 2 years ago
parent
commit
b3a7d149a0
2 changed files with 1778 additions and 74 deletions
  1. +1125
    -71
      dnn/src/fallback/general_intrinsic/gi_int.h
  2. +653
    -3
      dnn/test/fallback/gi.cpp

+ 1125
- 71
dnn/src/fallback/general_intrinsic/gi_int.h
File diff suppressed because it is too large
View File


+ 653
- 3
dnn/test/fallback/gi.cpp View File

@@ -2122,7 +2122,7 @@ TEST_F(FALLBACK, GiBSLFloat32) {
#if defined(GI_RVV_INTRINSICS)
vuint32m1_t mask = vundefined_u32m1();
#else
GI_UINT32_t mask;
GI_UINT32_t mask = {0u, 0u};
#endif
std::vector<float> s0{1.1f, 2.2f, 4.5f, 4.9f};
std::vector<float> s1{2312.1f, 345.244f, 3.59f, -12.8f};
@@ -2752,7 +2752,7 @@ TEST_F(FALLBACK, GiStoreLowInt8) {
assert_eq<int8_t>(ret.data(), s0, SIMD_LEN_8 / 2);
}

TEST_F(FALLBACK, GiStoreHihgInt8) {
TEST_F(FALLBACK, GiStoreHighInt8) {
GI_INT8_t src0;
std::vector<int8_t> s0{127, 2, 56, -128, 1, 2, 3, 4, 127, 2, 56, -128, 1, 2, 3, 4};
s0.resize(SIMD_LEN_8);
@@ -2760,7 +2760,7 @@ TEST_F(FALLBACK, GiStoreHihgInt8) {

std::vector<int8_t> ret{0};
ret.resize(SIMD_LEN_8 / 2);
GiStoreHihgInt8(ret.data(), src0);
GiStoreHighInt8(ret.data(), src0);

std::vector<int8_t> naive;
for (size_t i = 0; i < SIMD_LEN_8 / 2; i++) {
@@ -4360,6 +4360,656 @@ TEST_F(FALLBACK, GiDivFloat32) {
assert_lt((float*)&ret, naive, 1e-3);
}

TEST_F(FALLBACK, GiLoadUint8) {
std::vector<uint8_t> s0{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 255};
GI_UINT8_t ret;

force_memset_ret((void*)&ret, GI_SIMD_LEN_BYTE);
ret = GiLoadUint8(s0.data());

std::vector<uint8_t> naive;
for (size_t i = 0; i < SIMD_LEN_8; i++) {
naive.push_back(s0[i]);
}

assert_eq((uint8_t*)&ret, naive, SIMD_LEN_8);
}

TEST_F(FALLBACK, GiReverseUint8) {
std::vector<uint8_t> s0{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
GI_UINT8_t ret;

force_memset_ret((void*)&ret, GI_SIMD_LEN_BYTE);
ret = GiLoadUint8(s0.data());
ret = GiReverseUint8(ret);

std::vector<uint8_t> naive;
for (size_t i = 0; i < SIMD_LEN_8; i++) {
naive.push_back(s0[SIMD_LEN_8 - i - 1]);
}

assert_eq((uint8_t*)&ret, naive, SIMD_LEN_8);
}

TEST_F(FALLBACK, GiStoreUint8) {
std::vector<uint8_t> s0{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 255};
GI_UINT8_t src;
std::vector<uint8_t> ret;
ret.resize(SIMD_LEN_8);
force_memset_ret((void*)&src, GI_SIMD_LEN_BYTE);
src = GiLoadUint8(s0.data());
GiStoreUint8(ret.data(), src);
std::vector<uint8_t> naive;
for (size_t i = 0; i < SIMD_LEN_8; i++) {
naive.push_back(s0[i]);
}

assert_eq(ret.data(), naive, SIMD_LEN_8);
}
TEST_F(FALLBACK, GiLoadUzip0V3Uint8) {
std::vector<uint8_t> s0{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 255};
GI_UINT8_t ret;

force_memset_ret((void*)&ret, GI_SIMD_LEN_BYTE);
ret = GiLoadUzip0V3Uint8(s0.data());

std::vector<uint8_t> naive;
for (size_t i = 0; i < SIMD_LEN_8; i++) {
naive.push_back(s0[i * 3]);
}

assert_eq((uint8_t*)&ret, naive, SIMD_LEN_8);
}
TEST_F(FALLBACK, GiLoadUzip1V3Uint8) {
std::vector<uint8_t> s0{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 255};
GI_UINT8_t ret;

force_memset_ret((void*)&ret, GI_SIMD_LEN_BYTE);
ret = GiLoadUzip1V3Uint8(s0.data());

std::vector<uint8_t> naive;
for (size_t i = 0; i < SIMD_LEN_8; i++) {
naive.push_back(s0[i * 3 + 1]);
}

assert_eq((uint8_t*)&ret, naive, SIMD_LEN_8);
}
TEST_F(FALLBACK, GiLoadUzip2V3Uint8) {
std::vector<uint8_t> s0{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 255};
GI_UINT8_t ret;

force_memset_ret((void*)&ret, GI_SIMD_LEN_BYTE);
ret = GiLoadUzip2V3Uint8(s0.data());

std::vector<uint8_t> naive;
for (size_t i = 0; i < SIMD_LEN_8; i++) {
naive.push_back(s0[i * 3 + 2]);
}

assert_eq((uint8_t*)&ret, naive, SIMD_LEN_8);
}
TEST_F(FALLBACK, GiStoreZipUint8V3) {
std::vector<uint8_t> s0{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 255};
GI_UINT8_t src0, src1, src2;
std::vector<uint8_t> ret;
ret.resize(SIMD_LEN_8 * 3);

force_memset_ret((void*)&src0, GI_SIMD_LEN_BYTE);
force_memset_ret((void*)&src1, GI_SIMD_LEN_BYTE);
force_memset_ret((void*)&src2, GI_SIMD_LEN_BYTE);
src0 = GiLoadUzip0V3Uint8(s0.data());
src1 = GiLoadUzip1V3Uint8(s0.data());
src2 = GiLoadUzip2V3Uint8(s0.data());

GiStoreZipUint8V3(ret.data(), src0, src1, src2);

std::vector<uint8_t> naive;
for (size_t i = 0; i < SIMD_LEN_8 * 3; i++) {
naive.push_back(s0[i]);
}
assert_eq(ret.data(), naive, SIMD_LEN_8);
}

TEST_F(FALLBACK, GiShiftRightInt16ToUint8) {
std::vector<int16_t> s0{INT16_MAX, INT16_MIN, 0x00005678, -0x00005678,
0x00001234, -0x00001234, 0x00000fff, -0x00000fff};
GI_INT16_t src;
force_memset_ret((void*)&src, GI_SIMD_LEN_BYTE);
src = GiLoadInt16(s0.data());

std::vector<uint8_t> naive;
naive.resize(SIMD_LEN_8);
GI_UINT8_t ret;
#define TEST_BLOCK(shift) \
ret = GiShiftRightInt16ToUint8(src, shift); \
for (size_t i = 0; i < SIMD_LEN_16; i++) { \
uint8_t val = Saturate(s0[i] >> shift, 0, UINT8_MAX); \
naive[i] = val; \
naive[i + SIMD_LEN_16] = val; \
} \
assert_eq((uint8_t*)&ret, naive, SIMD_LEN_8);

TEST_BLOCK(1);
TEST_BLOCK(2);
TEST_BLOCK(3);
TEST_BLOCK(4);
TEST_BLOCK(5);
TEST_BLOCK(6);
TEST_BLOCK(7);
TEST_BLOCK(8);
#undef TEST_BLOCK
}

TEST_F(FALLBACK, GiCombineInt16Low) {
std::vector<int16_t> s0{INT16_MAX, INT16_MIN, 0x00005678, -0x00005678,
0x00001234, -0x00001234, 0x00000fff, -0x00000fff};
std::vector<int16_t> s1{1, 2, 3, -4, 5, -6, 7, -8};
GI_INT16_t src0, src1;
force_memset_ret((void*)&src0, GI_SIMD_LEN_BYTE);
force_memset_ret((void*)&src1, GI_SIMD_LEN_BYTE);
src0 = GiLoadInt16(s0.data());
src1 = GiLoadInt16(s1.data());

std::vector<int16_t> naive;
naive.resize(SIMD_LEN_16);
GI_INT16_t ret = GiCombineInt16Low(src0, src1);
for (size_t i = 0; i < SIMD_LEN; i++) {
naive[i] = s0[i];
naive[i + SIMD_LEN] = s1[i];
}
assert_eq((int16_t*)&ret, naive, SIMD_LEN_16);
}
TEST_F(FALLBACK, GiCombineUint8Low) {
std::vector<uint8_t> s0{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
GI_UINT8_t src0, src1;
force_memset_ret((void*)&src0, GI_SIMD_LEN_BYTE);
force_memset_ret((void*)&src1, GI_SIMD_LEN_BYTE);
src0 = GiLoadUint8(s0.data());
src1 = GiLoadUint8(s0.data());

std::vector<uint8_t> naive;
naive.resize(SIMD_LEN_8);
GI_UINT8_t ret = GiCombineUint8Low(src0, src1);
for (size_t i = 0; i < SIMD_LEN_16; i++) {
naive[i] = s0[i];
naive[i + SIMD_LEN_16] = s0[i];
}
assert_eq((uint8_t*)&ret, naive, SIMD_LEN_8);
}
TEST_F(FALLBACK, GiZipV0Int8) {
std::vector<int8_t> s0{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
GI_INT8_t src0, src1;
force_memset_ret((void*)&src0, GI_SIMD_LEN_BYTE);
force_memset_ret((void*)&src1, GI_SIMD_LEN_BYTE);
src0 = GiLoadInt8(s0.data());
src1 = GiLoadInt8(s0.data());

std::vector<int8_t> naive;
naive.resize(SIMD_LEN_8);
GI_INT8_t ret = GiZipV0Int8(src0, src1);
for (size_t i = 0; i < SIMD_LEN_16; ++i) {
naive[2 * i] = s0[i];
naive[2 * i + 1] = s0[i];
}
assert_eq((int8_t*)&ret, naive, SIMD_LEN_8);
}
TEST_F(FALLBACK, GiZipV1Int8) {
std::vector<int8_t> s0{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
GI_INT8_t src0, src1;
force_memset_ret((void*)&src0, GI_SIMD_LEN_BYTE);
force_memset_ret((void*)&src1, GI_SIMD_LEN_BYTE);
src0 = GiLoadInt8(s0.data());
src1 = GiLoadInt8(s0.data());

std::vector<int8_t> naive;
naive.resize(SIMD_LEN_8);
GI_INT8_t ret = GiZipV1Int8(src0, src1);
for (size_t i = 0; i < SIMD_LEN_16; ++i) {
naive[2 * i] = s0[i + SIMD_LEN_16];
naive[2 * i + 1] = s0[i + SIMD_LEN_16];
}
assert_eq((int8_t*)&ret, naive, SIMD_LEN_8);
}
TEST_F(FALLBACK, GiReinterpretInt8AsInt16) {
GI_INT8_t src0;
GI_INT16_t ret, naive;
std::vector<int8_t> s0{1, 2, -2, -1, INT8_MAX, INT8_MIN, 5, 6,
7, 8, 9, 10, 11, 12, 13, 14};
s0.resize(SIMD_LEN);
init((int8_t*)&src0, s0);

force_memset_ret((void*)&ret, GI_SIMD_LEN_BYTE);
ret = GiReinterpretInt8AsInt16(src0);
memcpy(&naive, &src0, GI_SIMD_LEN_BYTE);

ASSERT_FALSE(memcmp(&ret, &naive, GI_SIMD_LEN_BYTE));
}
TEST_F(FALLBACK, GiZipV0Int16) {
std::vector<int16_t> s0{INT16_MAX, INT16_MIN, 0x00005678, -0x00005678,
0x00001234, -0x00001234, 0x00000fff, -0x00000fff};
GI_INT16_t src0, src1;
force_memset_ret((void*)&src0, GI_SIMD_LEN_BYTE);
force_memset_ret((void*)&src1, GI_SIMD_LEN_BYTE);
src0 = GiLoadInt16(s0.data());
src1 = GiLoadInt16(s0.data());

std::vector<int16_t> naive;
naive.resize(SIMD_LEN_16);
GI_INT16_t ret = GiZipV0Int16(src0, src1);
for (size_t i = 0; i < SIMD_LEN; i++) {
naive[2 * i] = s0[i];
naive[2 * i + 1] = s0[i];
}
assert_eq((int16_t*)&ret, naive, SIMD_LEN_16);
}
TEST_F(FALLBACK, GiZipV1Int16) {
std::vector<int16_t> s0{INT16_MAX, INT16_MIN, 0x00005678, -0x00005678,
0x00001234, -0x00001234, 0x00000fff, -0x00000fff};
GI_INT16_t src0, src1;
force_memset_ret((void*)&src0, GI_SIMD_LEN_BYTE);
force_memset_ret((void*)&src1, GI_SIMD_LEN_BYTE);
src0 = GiLoadInt16(s0.data());
src1 = GiLoadInt16(s0.data());

std::vector<int16_t> naive;
naive.resize(SIMD_LEN_16);
GI_INT16_t ret = GiZipV1Int16(src0, src1);
for (size_t i = 0; i < SIMD_LEN; i++) {
naive[2 * i] = s0[i + SIMD_LEN];
naive[2 * i + 1] = s0[i + SIMD_LEN];
}
assert_eq((int16_t*)&ret, naive, SIMD_LEN_16);
}
TEST_F(FALLBACK, GiReinterpretInt16AsInt32) {
GI_INT16_t src0;
GI_INT32_t ret, naive;
std::vector<int16_t> s0{1, 2, -2, -1, INT16_MAX, INT16_MIN, 5, 6};
s0.resize(SIMD_LEN);
init((int16_t*)&src0, s0);

force_memset_ret((void*)&ret, GI_SIMD_LEN_BYTE);
ret = GiReinterpretInt16AsInt32(src0);
memcpy(&naive, &src0, GI_SIMD_LEN_BYTE);

ASSERT_FALSE(memcmp(&ret, &naive, GI_SIMD_LEN_BYTE));
}
TEST_F(FALLBACK, GiZipV0Int32) {
std::vector<int32_t> s0{INT32_MAX, INT32_MIN, 0x00005678, -0x00005678};
GI_INT32_t src0, src1;
force_memset_ret((void*)&src0, GI_SIMD_LEN_BYTE);
force_memset_ret((void*)&src1, GI_SIMD_LEN_BYTE);
src0 = GiLoadInt32(s0.data());
src1 = GiLoadInt32(s0.data());

std::vector<int32_t> naive;
naive.resize(SIMD_LEN);
GI_INT32_t ret = GiZipV0Int32(src0, src1);
for (size_t i = 0; i < SIMD_LEN / 2; i++) {
naive[2 * i] = s0[i];
naive[2 * i + 1] = s0[i];
}
assert_eq((int32_t*)&ret, naive, SIMD_LEN);
}
TEST_F(FALLBACK, GiZipV1Int32) {
std::vector<int32_t> s0{INT16_MAX, INT16_MIN, 0x00005678, -0x00005678};
GI_INT32_t src0, src1;
force_memset_ret((void*)&src0, GI_SIMD_LEN_BYTE);
force_memset_ret((void*)&src1, GI_SIMD_LEN_BYTE);
src0 = GiLoadInt32(s0.data());
src1 = GiLoadInt32(s0.data());

std::vector<int32_t> naive;
naive.resize(SIMD_LEN);
GI_INT32_t ret = GiZipV1Int32(src0, src1);
for (size_t i = 0; i < SIMD_LEN / 2; i++) {
naive[2 * i] = s0[i + SIMD_LEN / 2];
naive[2 * i + 1] = s0[i + SIMD_LEN / 2];
}
assert_eq((int32_t*)&ret, naive, SIMD_LEN);
}
TEST_F(FALLBACK, GiCombineInt32Low) {
std::vector<int32_t> s0{INT16_MAX, INT16_MIN, 0x00005678, -0x00005678};
GI_INT32_t src0, src1;
force_memset_ret((void*)&src0, GI_SIMD_LEN_BYTE);
force_memset_ret((void*)&src1, GI_SIMD_LEN_BYTE);
src0 = GiLoadInt32(s0.data());
src1 = GiLoadInt32(s0.data());

std::vector<int32_t> naive;
naive.resize(SIMD_LEN);
GI_INT32_t ret = GiCombineInt32Low(src0, src1);
for (size_t i = 0; i < SIMD_LEN / 2; i++) {
naive[i] = s0[i];
naive[i + SIMD_LEN / 2] = s0[i];
}
assert_eq((int32_t*)&ret, naive, SIMD_LEN);
}
TEST_F(FALLBACK, GiCombineInt32High) {
std::vector<int32_t> s0{INT16_MAX, INT16_MIN, 0x00005678, -0x00005678};
GI_INT32_t src0, src1;
force_memset_ret((void*)&src0, GI_SIMD_LEN_BYTE);
force_memset_ret((void*)&src1, GI_SIMD_LEN_BYTE);
src0 = GiLoadInt32(s0.data());
src1 = GiLoadInt32(s0.data());

std::vector<int32_t> naive;
naive.resize(SIMD_LEN);
GI_INT32_t ret = GiCombineInt32High(src0, src1);
for (size_t i = 0; i < SIMD_LEN / 2; i++) {
naive[i] = s0[i + SIMD_LEN / 2];
naive[i + SIMD_LEN / 2] = s0[i + SIMD_LEN / 2];
}
assert_eq((int32_t*)&ret, naive, SIMD_LEN);
}

TEST_F(FALLBACK, GiStoreZipInt8V3) {
std::vector<int8_t> s0{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 127};
GI_INT8_t src0, src1, src2;
GI_INT8_V3_t src;
std::vector<int8_t> ret;
ret.resize(SIMD_LEN_8 * 3);
force_memset_ret((void*)&src, GI_SIMD_LEN_BYTE * 3);
force_memset_ret((void*)&src0, GI_SIMD_LEN_BYTE);
force_memset_ret((void*)&src1, GI_SIMD_LEN_BYTE);
force_memset_ret((void*)&src2, GI_SIMD_LEN_BYTE);
src = GiLoadUzipInt8V3(s0.data());
src0 = GiGetSubVectorInt8V3(src, 0);
src1 = GiGetSubVectorInt8V3(src, 1);
src2 = GiGetSubVectorInt8V3(src, 2);

GiStoreZipInt8V3(ret.data(), src0, src1, src2);

std::vector<int8_t> naive;
for (size_t i = 0; i < SIMD_LEN_8 * 3; i++) {
naive.push_back(s0[i]);
}
assert_eq(ret.data(), naive, SIMD_LEN_8);
}

TEST_F(FALLBACK, GiShiftRightInt32) {
std::vector<int32_t> s0{INT32_MAX, INT32_MIN, 0x12345678, -0x12345678};
GI_INT32_t src;
force_memset_ret((void*)&src, GI_SIMD_LEN_BYTE);
src = GiLoadInt32(s0.data());

std::vector<int32_t> naive;
naive.resize(SIMD_LEN);
GI_INT32_t ret;
#define TEST_BLOCK(shift) \
ret = GiShiftRightInt32(src, shift); \
for (size_t i = 0; i < SIMD_LEN; i++) { \
naive[i] = s0[i] >> shift; \
} \
assert_eq((int32_t*)&ret, naive, SIMD_LEN);

TEST_BLOCK(1);
TEST_BLOCK(2);
TEST_BLOCK(3);
TEST_BLOCK(4);
TEST_BLOCK(5);
TEST_BLOCK(6);
TEST_BLOCK(7);
TEST_BLOCK(8);
TEST_BLOCK(9);
TEST_BLOCK(10);
TEST_BLOCK(11);
TEST_BLOCK(12);
TEST_BLOCK(13);
TEST_BLOCK(14);
TEST_BLOCK(15);
TEST_BLOCK(16);

#undef TEST_BLOCK
}
TEST_F(FALLBACK, GiShiftLeftInt32) {
std::vector<int32_t> s0{INT32_MAX, INT32_MIN, 0x12345678, -0x12345678};
GI_INT32_t src;
force_memset_ret((void*)&src, GI_SIMD_LEN_BYTE);
src = GiLoadInt32(s0.data());

std::vector<int32_t> naive;
naive.resize(SIMD_LEN);
GI_INT32_t ret;
#define TEST_BLOCK(shift) \
ret = GiShiftLeftInt32(src, shift); \
for (size_t i = 0; i < SIMD_LEN; i++) { \
naive[i] = s0[i] << shift; \
} \
assert_eq((int32_t*)&ret, naive, SIMD_LEN);

TEST_BLOCK(1);
TEST_BLOCK(2);
TEST_BLOCK(3);
TEST_BLOCK(4);
TEST_BLOCK(5);
TEST_BLOCK(6);
TEST_BLOCK(7);
TEST_BLOCK(8);
TEST_BLOCK(9);
TEST_BLOCK(10);
TEST_BLOCK(11);
TEST_BLOCK(12);
TEST_BLOCK(13);
TEST_BLOCK(14);
TEST_BLOCK(15);
TEST_BLOCK(16);

#undef TEST_BLOCK
}

TEST_F(FALLBACK, GiBroadcastInt16) {
int16_t src0 = 5;
GI_INT16_t ret;

force_memset_ret((void*)&ret, GI_SIMD_LEN_BYTE);
ret = GiBroadcastInt16(src0);

std::vector<int16_t> naive;
for (size_t i = 0; i < SIMD_LEN_16; i++) {
naive.push_back(src0);
}

assert_eq((int16_t*)&ret, naive, SIMD_LEN_16);
}
TEST_F(FALLBACK, GiAndInt16) {
std::vector<int16_t> s0{INT16_MAX, INT16_MIN, 0x00005678, -0x00005678,
0x00001234, -0x00001234, 0x00000fff, -0x00000fff};
GI_INT16_t src0, src1;
force_memset_ret((void*)&src0, GI_SIMD_LEN_BYTE);
force_memset_ret((void*)&src1, GI_SIMD_LEN_BYTE);
src0 = GiLoadInt16(s0.data());
src1 = GiLoadInt16(s0.data());

std::vector<int16_t> naive;
naive.resize(SIMD_LEN_16);
GI_INT16_t ret = GiAndInt16(src0, src1);
for (size_t i = 0; i < SIMD_LEN_16; i++) {
naive[i] = s0[i] & s0[i];
}
assert_eq((int16_t*)&ret, naive, SIMD_LEN_16);
}
TEST_F(FALLBACK, GiCvtInt32ToInt16) {
std::vector<int32_t> s0{INT32_MAX, INT32_MIN, 0x12345678, -0x12345678};
GI_INT32_t src;
force_memset_ret((void*)&src, GI_SIMD_LEN_BYTE);
src = GiLoadInt32(s0.data());

std::vector<int16_t> naive;
naive.resize(SIMD_LEN_16);
GI_INT16_t ret;
ret = GiCvtInt32ToInt16(src);
for (size_t i = 0; i < SIMD_LEN; i++) {
int16_t val = Saturate(s0[i], INT16_MIN, INT16_MAX);
naive[i] = val;
naive[i + SIMD_LEN] = val;
}
assert_eq((int16_t*)&ret, naive, SIMD_LEN_16);
}

TEST_F(FALLBACK, GiInterleave4Int8) {
std::vector<int8_t> s0{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
GI_INT8_t ret;

force_memset_ret((void*)&ret, GI_SIMD_LEN_BYTE);
ret = GiLoadInt8(s0.data());
ret = GiInterleave4Int8(ret);

std::vector<int8_t> naive;
naive.resize(SIMD_LEN_8);
for (size_t i = 0; i < SIMD_LEN; i++) {
naive[i] = s0[i * 4];
naive[i + 4] = s0[i * 4 + 1];
naive[i + 2 * 4] = s0[i * 4 + 2];
naive[i + 3 * 4] = s0[i * 4 + 3];
}

assert_eq((int8_t*)&ret, naive, SIMD_LEN_8);
}
TEST_F(FALLBACK, GiCvtUint8toInt16Low) {
std::vector<uint8_t> s0{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 127};
GI_INT16_t ret;

force_memset_ret((void*)&ret, GI_SIMD_LEN_BYTE);
GI_UINT8_t src = GiLoadUint8(s0.data());
ret = GiCvtUint8toInt16Low(src);
std::vector<int16_t> naive;
naive.resize(SIMD_LEN_16);
for (size_t i = 0; i < SIMD_LEN_16; i++) {
naive[i] = s0[i];
}

assert_eq((int16_t*)&ret, naive, SIMD_LEN_16);
}
TEST_F(FALLBACK, GiCvtUint8toInt16High) {
std::vector<uint8_t> s0{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 127};
GI_INT16_t ret;

force_memset_ret((void*)&ret, GI_SIMD_LEN_BYTE);
GI_UINT8_t src = GiLoadUint8(s0.data());
ret = GiCvtUint8toInt16High(src);
std::vector<int16_t> naive;
naive.resize(SIMD_LEN_16);
for (size_t i = 0; i < SIMD_LEN_16; i++) {
naive[i] = s0[i + SIMD_LEN_16];
}

assert_eq((int16_t*)&ret, naive, SIMD_LEN_16);
}
TEST_F(FALLBACK, GiMultiplyAddInt16LongLow) {
GI_INT16_t src0, src1;
GI_INT32_t src2;
std::vector<int32_t> s1{1, 2, 3, 4};
std::vector<int16_t> s0{INT16_MAX, INT16_MIN, 0x00005678, -0x00005678,
0x00001234, -0x00001234, 0x00000fff, -0x00000fff};
force_memset_ret((void*)&src0, GI_SIMD_LEN_BYTE);
force_memset_ret((void*)&src1, GI_SIMD_LEN_BYTE);
force_memset_ret((void*)&src2, GI_SIMD_LEN_BYTE);
src0 = GiLoadInt16(s0.data());
src1 = GiLoadInt16(s0.data());
src2 = GiLoadInt32(s1.data());

std::vector<int32_t> naive;
naive.resize(SIMD_LEN);
GI_INT32_t ret = GiMultiplyAddInt16LongLow(src2, src0, src1);
for (size_t i = 0; i < SIMD_LEN; i++) {
naive[i] = (int32_t)s1[i] + (int32_t)s0[i] * (int32_t)s0[i];
}
assert_eq((int32_t*)&ret, naive, SIMD_LEN);
}
TEST_F(FALLBACK, GiMultiplyAddInt16LongHigh) {
GI_INT16_t src0, src1;
GI_INT32_t src2;
std::vector<int32_t> s1{1, 2, 3, 4};
std::vector<int16_t> s0{INT16_MAX, INT16_MIN, 0x00005678, -0x00005678,
0x00001234, -0x00001234, 0x00000fff, -0x00000fff};
force_memset_ret((void*)&src0, GI_SIMD_LEN_BYTE);
force_memset_ret((void*)&src1, GI_SIMD_LEN_BYTE);
force_memset_ret((void*)&src2, GI_SIMD_LEN_BYTE);
src0 = GiLoadInt16(s0.data());
src1 = GiLoadInt16(s0.data());
src2 = GiLoadInt32(s1.data());

std::vector<int32_t> naive;
naive.resize(SIMD_LEN);
GI_INT32_t ret = GiMultiplyAddInt16LongHigh(src2, src0, src1);
for (size_t i = 0; i < SIMD_LEN; i++) {
naive[i] =
(int32_t)s1[i] + (int32_t)s0[i + SIMD_LEN] * (int32_t)s0[i + SIMD_LEN];
}
assert_eq((int32_t*)&ret, naive, SIMD_LEN);
}
TEST_F(FALLBACK, GiCvtFromInt32V4ToUint8) {
std::vector<int32_t> s0{INT16_MAX, INT16_MIN, 0x00005678, -0x00005678};
GI_INT32_t src0, src1, src2, src3;
force_memset_ret((void*)&src0, GI_SIMD_LEN_BYTE);
force_memset_ret((void*)&src1, GI_SIMD_LEN_BYTE);
src0 = GiLoadInt32(s0.data());
src1 = GiLoadInt32(s0.data());
src2 = GiLoadInt32(s0.data());
src3 = GiLoadInt32(s0.data());
GI_UINT8_t ret = GiCvtFromInt32V4ToUint8(src0, src1, src2, src3);
std::vector<uint8_t> naive;
naive.resize(SIMD_LEN_8);
for (size_t i = 0; i < SIMD_LEN; i++) {
naive[i] = Saturate(s0[i], 0, UINT8_MAX);
naive[i + SIMD_LEN] = Saturate(s0[i], 0, UINT8_MAX);
naive[i + 2 * SIMD_LEN] = Saturate(s0[i], 0, UINT8_MAX);
naive[i + 3 * SIMD_LEN] = Saturate(s0[i], 0, UINT8_MAX);
}

assert_eq((uint8_t*)&ret, naive, SIMD_LEN_8);
}
TEST_F(FALLBACK, GiSubtractInt16) {
std::vector<int16_t> s0{INT16_MAX, INT16_MIN, 0x00005678, -0x00005678,
0x00001234, -0x00001234, 0x00000fff, -0x00000fff};
GI_INT16_t src0, src1;
force_memset_ret((void*)&src0, GI_SIMD_LEN_BYTE);
force_memset_ret((void*)&src1, GI_SIMD_LEN_BYTE);
src0 = GiLoadInt16(s0.data());
src1 = GiLoadInt16(s0.data());

std::vector<int16_t> naive;
naive.resize(SIMD_LEN_16);
GI_INT16_t ret = GiSubtractInt16(src0, src1);
for (size_t i = 0; i < SIMD_LEN_16; i++) {
naive[i] = s0[i] - s0[i];
}
assert_eq((int16_t*)&ret, naive, SIMD_LEN_16);
}

TEST_F(FALLBACK, GiInterleave2UInt8) {
std::vector<uint8_t> s0{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
GI_UINT8_t ret;

force_memset_ret((void*)&ret, GI_SIMD_LEN_BYTE);
ret = GiLoadUint8(s0.data());
ret = GiInterleave2Uint8(ret);

std::vector<int8_t> naive;
naive.resize(SIMD_LEN_8);
for (size_t i = 0; i < SIMD_LEN_16; i++) {
naive[i] = s0[2 * i];
naive[i + SIMD_LEN_16] = s0[2 * i + 1];
}

assert_eq((int8_t*)&ret, naive, SIMD_LEN_8);
}

} // namespace test
} // namespace megdnn



Loading…
Cancel
Save