|
@@ -2122,7 +2122,7 @@ TEST_F(FALLBACK, GiBSLFloat32) { |
|
|
#if defined(GI_RVV_INTRINSICS) |
|
|
#if defined(GI_RVV_INTRINSICS) |
|
|
vuint32m1_t mask = vundefined_u32m1(); |
|
|
vuint32m1_t mask = vundefined_u32m1(); |
|
|
#else |
|
|
#else |
|
|
GI_UINT32_t mask; |
|
|
|
|
|
|
|
|
GI_UINT32_t mask = {0u, 0u}; |
|
|
#endif |
|
|
#endif |
|
|
std::vector<float> s0{1.1f, 2.2f, 4.5f, 4.9f}; |
|
|
std::vector<float> s0{1.1f, 2.2f, 4.5f, 4.9f}; |
|
|
std::vector<float> s1{2312.1f, 345.244f, 3.59f, -12.8f}; |
|
|
std::vector<float> s1{2312.1f, 345.244f, 3.59f, -12.8f}; |
|
@@ -2752,7 +2752,7 @@ TEST_F(FALLBACK, GiStoreLowInt8) { |
|
|
assert_eq<int8_t>(ret.data(), s0, SIMD_LEN_8 / 2); |
|
|
assert_eq<int8_t>(ret.data(), s0, SIMD_LEN_8 / 2); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
TEST_F(FALLBACK, GiStoreHihgInt8) { |
|
|
|
|
|
|
|
|
TEST_F(FALLBACK, GiStoreHighInt8) { |
|
|
GI_INT8_t src0; |
|
|
GI_INT8_t src0; |
|
|
std::vector<int8_t> s0{127, 2, 56, -128, 1, 2, 3, 4, 127, 2, 56, -128, 1, 2, 3, 4}; |
|
|
std::vector<int8_t> s0{127, 2, 56, -128, 1, 2, 3, 4, 127, 2, 56, -128, 1, 2, 3, 4}; |
|
|
s0.resize(SIMD_LEN_8); |
|
|
s0.resize(SIMD_LEN_8); |
|
@@ -2760,7 +2760,7 @@ TEST_F(FALLBACK, GiStoreHihgInt8) { |
|
|
|
|
|
|
|
|
std::vector<int8_t> ret{0}; |
|
|
std::vector<int8_t> ret{0}; |
|
|
ret.resize(SIMD_LEN_8 / 2); |
|
|
ret.resize(SIMD_LEN_8 / 2); |
|
|
GiStoreHihgInt8(ret.data(), src0); |
|
|
|
|
|
|
|
|
GiStoreHighInt8(ret.data(), src0); |
|
|
|
|
|
|
|
|
std::vector<int8_t> naive; |
|
|
std::vector<int8_t> naive; |
|
|
for (size_t i = 0; i < SIMD_LEN_8 / 2; i++) { |
|
|
for (size_t i = 0; i < SIMD_LEN_8 / 2; i++) { |
|
@@ -4360,6 +4360,656 @@ TEST_F(FALLBACK, GiDivFloat32) { |
|
|
assert_lt((float*)&ret, naive, 1e-3); |
|
|
assert_lt((float*)&ret, naive, 1e-3); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
TEST_F(FALLBACK, GiLoadUint8) { |
|
|
|
|
|
std::vector<uint8_t> s0{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 255}; |
|
|
|
|
|
GI_UINT8_t ret; |
|
|
|
|
|
|
|
|
|
|
|
force_memset_ret((void*)&ret, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
ret = GiLoadUint8(s0.data()); |
|
|
|
|
|
|
|
|
|
|
|
std::vector<uint8_t> naive; |
|
|
|
|
|
for (size_t i = 0; i < SIMD_LEN_8; i++) { |
|
|
|
|
|
naive.push_back(s0[i]); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
assert_eq((uint8_t*)&ret, naive, SIMD_LEN_8); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
TEST_F(FALLBACK, GiReverseUint8) { |
|
|
|
|
|
std::vector<uint8_t> s0{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; |
|
|
|
|
|
GI_UINT8_t ret; |
|
|
|
|
|
|
|
|
|
|
|
force_memset_ret((void*)&ret, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
ret = GiLoadUint8(s0.data()); |
|
|
|
|
|
ret = GiReverseUint8(ret); |
|
|
|
|
|
|
|
|
|
|
|
std::vector<uint8_t> naive; |
|
|
|
|
|
for (size_t i = 0; i < SIMD_LEN_8; i++) { |
|
|
|
|
|
naive.push_back(s0[SIMD_LEN_8 - i - 1]); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
assert_eq((uint8_t*)&ret, naive, SIMD_LEN_8); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
TEST_F(FALLBACK, GiStoreUint8) { |
|
|
|
|
|
std::vector<uint8_t> s0{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 255}; |
|
|
|
|
|
GI_UINT8_t src; |
|
|
|
|
|
std::vector<uint8_t> ret; |
|
|
|
|
|
ret.resize(SIMD_LEN_8); |
|
|
|
|
|
force_memset_ret((void*)&src, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
src = GiLoadUint8(s0.data()); |
|
|
|
|
|
GiStoreUint8(ret.data(), src); |
|
|
|
|
|
std::vector<uint8_t> naive; |
|
|
|
|
|
for (size_t i = 0; i < SIMD_LEN_8; i++) { |
|
|
|
|
|
naive.push_back(s0[i]); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
assert_eq(ret.data(), naive, SIMD_LEN_8); |
|
|
|
|
|
} |
|
|
|
|
|
TEST_F(FALLBACK, GiLoadUzip0V3Uint8) { |
|
|
|
|
|
std::vector<uint8_t> s0{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, |
|
|
|
|
|
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, |
|
|
|
|
|
24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, |
|
|
|
|
|
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 255}; |
|
|
|
|
|
GI_UINT8_t ret; |
|
|
|
|
|
|
|
|
|
|
|
force_memset_ret((void*)&ret, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
ret = GiLoadUzip0V3Uint8(s0.data()); |
|
|
|
|
|
|
|
|
|
|
|
std::vector<uint8_t> naive; |
|
|
|
|
|
for (size_t i = 0; i < SIMD_LEN_8; i++) { |
|
|
|
|
|
naive.push_back(s0[i * 3]); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
assert_eq((uint8_t*)&ret, naive, SIMD_LEN_8); |
|
|
|
|
|
} |
|
|
|
|
|
TEST_F(FALLBACK, GiLoadUzip1V3Uint8) { |
|
|
|
|
|
std::vector<uint8_t> s0{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, |
|
|
|
|
|
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, |
|
|
|
|
|
24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, |
|
|
|
|
|
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 255}; |
|
|
|
|
|
GI_UINT8_t ret; |
|
|
|
|
|
|
|
|
|
|
|
force_memset_ret((void*)&ret, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
ret = GiLoadUzip1V3Uint8(s0.data()); |
|
|
|
|
|
|
|
|
|
|
|
std::vector<uint8_t> naive; |
|
|
|
|
|
for (size_t i = 0; i < SIMD_LEN_8; i++) { |
|
|
|
|
|
naive.push_back(s0[i * 3 + 1]); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
assert_eq((uint8_t*)&ret, naive, SIMD_LEN_8); |
|
|
|
|
|
} |
|
|
|
|
|
TEST_F(FALLBACK, GiLoadUzip2V3Uint8) { |
|
|
|
|
|
std::vector<uint8_t> s0{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, |
|
|
|
|
|
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, |
|
|
|
|
|
24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, |
|
|
|
|
|
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 255}; |
|
|
|
|
|
GI_UINT8_t ret; |
|
|
|
|
|
|
|
|
|
|
|
force_memset_ret((void*)&ret, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
ret = GiLoadUzip2V3Uint8(s0.data()); |
|
|
|
|
|
|
|
|
|
|
|
std::vector<uint8_t> naive; |
|
|
|
|
|
for (size_t i = 0; i < SIMD_LEN_8; i++) { |
|
|
|
|
|
naive.push_back(s0[i * 3 + 2]); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
assert_eq((uint8_t*)&ret, naive, SIMD_LEN_8); |
|
|
|
|
|
} |
|
|
|
|
|
TEST_F(FALLBACK, GiStoreZipUint8V3) { |
|
|
|
|
|
std::vector<uint8_t> s0{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, |
|
|
|
|
|
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, |
|
|
|
|
|
24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, |
|
|
|
|
|
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 255}; |
|
|
|
|
|
GI_UINT8_t src0, src1, src2; |
|
|
|
|
|
std::vector<uint8_t> ret; |
|
|
|
|
|
ret.resize(SIMD_LEN_8 * 3); |
|
|
|
|
|
|
|
|
|
|
|
force_memset_ret((void*)&src0, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
force_memset_ret((void*)&src1, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
force_memset_ret((void*)&src2, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
src0 = GiLoadUzip0V3Uint8(s0.data()); |
|
|
|
|
|
src1 = GiLoadUzip1V3Uint8(s0.data()); |
|
|
|
|
|
src2 = GiLoadUzip2V3Uint8(s0.data()); |
|
|
|
|
|
|
|
|
|
|
|
GiStoreZipUint8V3(ret.data(), src0, src1, src2); |
|
|
|
|
|
|
|
|
|
|
|
std::vector<uint8_t> naive; |
|
|
|
|
|
for (size_t i = 0; i < SIMD_LEN_8 * 3; i++) { |
|
|
|
|
|
naive.push_back(s0[i]); |
|
|
|
|
|
} |
|
|
|
|
|
assert_eq(ret.data(), naive, SIMD_LEN_8); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
TEST_F(FALLBACK, GiShiftRightInt16ToUint8) { |
|
|
|
|
|
std::vector<int16_t> s0{INT16_MAX, INT16_MIN, 0x00005678, -0x00005678, |
|
|
|
|
|
0x00001234, -0x00001234, 0x00000fff, -0x00000fff}; |
|
|
|
|
|
GI_INT16_t src; |
|
|
|
|
|
force_memset_ret((void*)&src, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
src = GiLoadInt16(s0.data()); |
|
|
|
|
|
|
|
|
|
|
|
std::vector<uint8_t> naive; |
|
|
|
|
|
naive.resize(SIMD_LEN_8); |
|
|
|
|
|
GI_UINT8_t ret; |
|
|
|
|
|
#define TEST_BLOCK(shift) \ |
|
|
|
|
|
ret = GiShiftRightInt16ToUint8(src, shift); \ |
|
|
|
|
|
for (size_t i = 0; i < SIMD_LEN_16; i++) { \ |
|
|
|
|
|
uint8_t val = Saturate(s0[i] >> shift, 0, UINT8_MAX); \ |
|
|
|
|
|
naive[i] = val; \ |
|
|
|
|
|
naive[i + SIMD_LEN_16] = val; \ |
|
|
|
|
|
} \ |
|
|
|
|
|
assert_eq((uint8_t*)&ret, naive, SIMD_LEN_8); |
|
|
|
|
|
|
|
|
|
|
|
TEST_BLOCK(1); |
|
|
|
|
|
TEST_BLOCK(2); |
|
|
|
|
|
TEST_BLOCK(3); |
|
|
|
|
|
TEST_BLOCK(4); |
|
|
|
|
|
TEST_BLOCK(5); |
|
|
|
|
|
TEST_BLOCK(6); |
|
|
|
|
|
TEST_BLOCK(7); |
|
|
|
|
|
TEST_BLOCK(8); |
|
|
|
|
|
#undef TEST_BLOCK |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
TEST_F(FALLBACK, GiCombineInt16Low) { |
|
|
|
|
|
std::vector<int16_t> s0{INT16_MAX, INT16_MIN, 0x00005678, -0x00005678, |
|
|
|
|
|
0x00001234, -0x00001234, 0x00000fff, -0x00000fff}; |
|
|
|
|
|
std::vector<int16_t> s1{1, 2, 3, -4, 5, -6, 7, -8}; |
|
|
|
|
|
GI_INT16_t src0, src1; |
|
|
|
|
|
force_memset_ret((void*)&src0, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
force_memset_ret((void*)&src1, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
src0 = GiLoadInt16(s0.data()); |
|
|
|
|
|
src1 = GiLoadInt16(s1.data()); |
|
|
|
|
|
|
|
|
|
|
|
std::vector<int16_t> naive; |
|
|
|
|
|
naive.resize(SIMD_LEN_16); |
|
|
|
|
|
GI_INT16_t ret = GiCombineInt16Low(src0, src1); |
|
|
|
|
|
for (size_t i = 0; i < SIMD_LEN; i++) { |
|
|
|
|
|
naive[i] = s0[i]; |
|
|
|
|
|
naive[i + SIMD_LEN] = s1[i]; |
|
|
|
|
|
} |
|
|
|
|
|
assert_eq((int16_t*)&ret, naive, SIMD_LEN_16); |
|
|
|
|
|
} |
|
|
|
|
|
TEST_F(FALLBACK, GiCombineUint8Low) { |
|
|
|
|
|
std::vector<uint8_t> s0{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; |
|
|
|
|
|
GI_UINT8_t src0, src1; |
|
|
|
|
|
force_memset_ret((void*)&src0, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
force_memset_ret((void*)&src1, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
src0 = GiLoadUint8(s0.data()); |
|
|
|
|
|
src1 = GiLoadUint8(s0.data()); |
|
|
|
|
|
|
|
|
|
|
|
std::vector<uint8_t> naive; |
|
|
|
|
|
naive.resize(SIMD_LEN_8); |
|
|
|
|
|
GI_UINT8_t ret = GiCombineUint8Low(src0, src1); |
|
|
|
|
|
for (size_t i = 0; i < SIMD_LEN_16; i++) { |
|
|
|
|
|
naive[i] = s0[i]; |
|
|
|
|
|
naive[i + SIMD_LEN_16] = s0[i]; |
|
|
|
|
|
} |
|
|
|
|
|
assert_eq((uint8_t*)&ret, naive, SIMD_LEN_8); |
|
|
|
|
|
} |
|
|
|
|
|
TEST_F(FALLBACK, GiZipV0Int8) { |
|
|
|
|
|
std::vector<int8_t> s0{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; |
|
|
|
|
|
GI_INT8_t src0, src1; |
|
|
|
|
|
force_memset_ret((void*)&src0, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
force_memset_ret((void*)&src1, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
src0 = GiLoadInt8(s0.data()); |
|
|
|
|
|
src1 = GiLoadInt8(s0.data()); |
|
|
|
|
|
|
|
|
|
|
|
std::vector<int8_t> naive; |
|
|
|
|
|
naive.resize(SIMD_LEN_8); |
|
|
|
|
|
GI_INT8_t ret = GiZipV0Int8(src0, src1); |
|
|
|
|
|
for (size_t i = 0; i < SIMD_LEN_16; ++i) { |
|
|
|
|
|
naive[2 * i] = s0[i]; |
|
|
|
|
|
naive[2 * i + 1] = s0[i]; |
|
|
|
|
|
} |
|
|
|
|
|
assert_eq((int8_t*)&ret, naive, SIMD_LEN_8); |
|
|
|
|
|
} |
|
|
|
|
|
TEST_F(FALLBACK, GiZipV1Int8) { |
|
|
|
|
|
std::vector<int8_t> s0{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; |
|
|
|
|
|
GI_INT8_t src0, src1; |
|
|
|
|
|
force_memset_ret((void*)&src0, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
force_memset_ret((void*)&src1, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
src0 = GiLoadInt8(s0.data()); |
|
|
|
|
|
src1 = GiLoadInt8(s0.data()); |
|
|
|
|
|
|
|
|
|
|
|
std::vector<int8_t> naive; |
|
|
|
|
|
naive.resize(SIMD_LEN_8); |
|
|
|
|
|
GI_INT8_t ret = GiZipV1Int8(src0, src1); |
|
|
|
|
|
for (size_t i = 0; i < SIMD_LEN_16; ++i) { |
|
|
|
|
|
naive[2 * i] = s0[i + SIMD_LEN_16]; |
|
|
|
|
|
naive[2 * i + 1] = s0[i + SIMD_LEN_16]; |
|
|
|
|
|
} |
|
|
|
|
|
assert_eq((int8_t*)&ret, naive, SIMD_LEN_8); |
|
|
|
|
|
} |
|
|
|
|
|
TEST_F(FALLBACK, GiReinterpretInt8AsInt16) { |
|
|
|
|
|
GI_INT8_t src0; |
|
|
|
|
|
GI_INT16_t ret, naive; |
|
|
|
|
|
std::vector<int8_t> s0{1, 2, -2, -1, INT8_MAX, INT8_MIN, 5, 6, |
|
|
|
|
|
7, 8, 9, 10, 11, 12, 13, 14}; |
|
|
|
|
|
s0.resize(SIMD_LEN); |
|
|
|
|
|
init((int8_t*)&src0, s0); |
|
|
|
|
|
|
|
|
|
|
|
force_memset_ret((void*)&ret, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
ret = GiReinterpretInt8AsInt16(src0); |
|
|
|
|
|
memcpy(&naive, &src0, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
|
|
|
|
|
|
ASSERT_FALSE(memcmp(&ret, &naive, GI_SIMD_LEN_BYTE)); |
|
|
|
|
|
} |
|
|
|
|
|
TEST_F(FALLBACK, GiZipV0Int16) { |
|
|
|
|
|
std::vector<int16_t> s0{INT16_MAX, INT16_MIN, 0x00005678, -0x00005678, |
|
|
|
|
|
0x00001234, -0x00001234, 0x00000fff, -0x00000fff}; |
|
|
|
|
|
GI_INT16_t src0, src1; |
|
|
|
|
|
force_memset_ret((void*)&src0, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
force_memset_ret((void*)&src1, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
src0 = GiLoadInt16(s0.data()); |
|
|
|
|
|
src1 = GiLoadInt16(s0.data()); |
|
|
|
|
|
|
|
|
|
|
|
std::vector<int16_t> naive; |
|
|
|
|
|
naive.resize(SIMD_LEN_16); |
|
|
|
|
|
GI_INT16_t ret = GiZipV0Int16(src0, src1); |
|
|
|
|
|
for (size_t i = 0; i < SIMD_LEN; i++) { |
|
|
|
|
|
naive[2 * i] = s0[i]; |
|
|
|
|
|
naive[2 * i + 1] = s0[i]; |
|
|
|
|
|
} |
|
|
|
|
|
assert_eq((int16_t*)&ret, naive, SIMD_LEN_16); |
|
|
|
|
|
} |
|
|
|
|
|
TEST_F(FALLBACK, GiZipV1Int16) { |
|
|
|
|
|
std::vector<int16_t> s0{INT16_MAX, INT16_MIN, 0x00005678, -0x00005678, |
|
|
|
|
|
0x00001234, -0x00001234, 0x00000fff, -0x00000fff}; |
|
|
|
|
|
GI_INT16_t src0, src1; |
|
|
|
|
|
force_memset_ret((void*)&src0, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
force_memset_ret((void*)&src1, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
src0 = GiLoadInt16(s0.data()); |
|
|
|
|
|
src1 = GiLoadInt16(s0.data()); |
|
|
|
|
|
|
|
|
|
|
|
std::vector<int16_t> naive; |
|
|
|
|
|
naive.resize(SIMD_LEN_16); |
|
|
|
|
|
GI_INT16_t ret = GiZipV1Int16(src0, src1); |
|
|
|
|
|
for (size_t i = 0; i < SIMD_LEN; i++) { |
|
|
|
|
|
naive[2 * i] = s0[i + SIMD_LEN]; |
|
|
|
|
|
naive[2 * i + 1] = s0[i + SIMD_LEN]; |
|
|
|
|
|
} |
|
|
|
|
|
assert_eq((int16_t*)&ret, naive, SIMD_LEN_16); |
|
|
|
|
|
} |
|
|
|
|
|
TEST_F(FALLBACK, GiReinterpretInt16AsInt32) { |
|
|
|
|
|
GI_INT16_t src0; |
|
|
|
|
|
GI_INT32_t ret, naive; |
|
|
|
|
|
std::vector<int16_t> s0{1, 2, -2, -1, INT16_MAX, INT16_MIN, 5, 6}; |
|
|
|
|
|
s0.resize(SIMD_LEN); |
|
|
|
|
|
init((int16_t*)&src0, s0); |
|
|
|
|
|
|
|
|
|
|
|
force_memset_ret((void*)&ret, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
ret = GiReinterpretInt16AsInt32(src0); |
|
|
|
|
|
memcpy(&naive, &src0, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
|
|
|
|
|
|
ASSERT_FALSE(memcmp(&ret, &naive, GI_SIMD_LEN_BYTE)); |
|
|
|
|
|
} |
|
|
|
|
|
TEST_F(FALLBACK, GiZipV0Int32) { |
|
|
|
|
|
std::vector<int32_t> s0{INT32_MAX, INT32_MIN, 0x00005678, -0x00005678}; |
|
|
|
|
|
GI_INT32_t src0, src1; |
|
|
|
|
|
force_memset_ret((void*)&src0, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
force_memset_ret((void*)&src1, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
src0 = GiLoadInt32(s0.data()); |
|
|
|
|
|
src1 = GiLoadInt32(s0.data()); |
|
|
|
|
|
|
|
|
|
|
|
std::vector<int32_t> naive; |
|
|
|
|
|
naive.resize(SIMD_LEN); |
|
|
|
|
|
GI_INT32_t ret = GiZipV0Int32(src0, src1); |
|
|
|
|
|
for (size_t i = 0; i < SIMD_LEN / 2; i++) { |
|
|
|
|
|
naive[2 * i] = s0[i]; |
|
|
|
|
|
naive[2 * i + 1] = s0[i]; |
|
|
|
|
|
} |
|
|
|
|
|
assert_eq((int32_t*)&ret, naive, SIMD_LEN); |
|
|
|
|
|
} |
|
|
|
|
|
TEST_F(FALLBACK, GiZipV1Int32) { |
|
|
|
|
|
std::vector<int32_t> s0{INT16_MAX, INT16_MIN, 0x00005678, -0x00005678}; |
|
|
|
|
|
GI_INT32_t src0, src1; |
|
|
|
|
|
force_memset_ret((void*)&src0, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
force_memset_ret((void*)&src1, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
src0 = GiLoadInt32(s0.data()); |
|
|
|
|
|
src1 = GiLoadInt32(s0.data()); |
|
|
|
|
|
|
|
|
|
|
|
std::vector<int32_t> naive; |
|
|
|
|
|
naive.resize(SIMD_LEN); |
|
|
|
|
|
GI_INT32_t ret = GiZipV1Int32(src0, src1); |
|
|
|
|
|
for (size_t i = 0; i < SIMD_LEN / 2; i++) { |
|
|
|
|
|
naive[2 * i] = s0[i + SIMD_LEN / 2]; |
|
|
|
|
|
naive[2 * i + 1] = s0[i + SIMD_LEN / 2]; |
|
|
|
|
|
} |
|
|
|
|
|
assert_eq((int32_t*)&ret, naive, SIMD_LEN); |
|
|
|
|
|
} |
|
|
|
|
|
TEST_F(FALLBACK, GiCombineInt32Low) { |
|
|
|
|
|
std::vector<int32_t> s0{INT16_MAX, INT16_MIN, 0x00005678, -0x00005678}; |
|
|
|
|
|
GI_INT32_t src0, src1; |
|
|
|
|
|
force_memset_ret((void*)&src0, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
force_memset_ret((void*)&src1, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
src0 = GiLoadInt32(s0.data()); |
|
|
|
|
|
src1 = GiLoadInt32(s0.data()); |
|
|
|
|
|
|
|
|
|
|
|
std::vector<int32_t> naive; |
|
|
|
|
|
naive.resize(SIMD_LEN); |
|
|
|
|
|
GI_INT32_t ret = GiCombineInt32Low(src0, src1); |
|
|
|
|
|
for (size_t i = 0; i < SIMD_LEN / 2; i++) { |
|
|
|
|
|
naive[i] = s0[i]; |
|
|
|
|
|
naive[i + SIMD_LEN / 2] = s0[i]; |
|
|
|
|
|
} |
|
|
|
|
|
assert_eq((int32_t*)&ret, naive, SIMD_LEN); |
|
|
|
|
|
} |
|
|
|
|
|
TEST_F(FALLBACK, GiCombineInt32High) { |
|
|
|
|
|
std::vector<int32_t> s0{INT16_MAX, INT16_MIN, 0x00005678, -0x00005678}; |
|
|
|
|
|
GI_INT32_t src0, src1; |
|
|
|
|
|
force_memset_ret((void*)&src0, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
force_memset_ret((void*)&src1, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
src0 = GiLoadInt32(s0.data()); |
|
|
|
|
|
src1 = GiLoadInt32(s0.data()); |
|
|
|
|
|
|
|
|
|
|
|
std::vector<int32_t> naive; |
|
|
|
|
|
naive.resize(SIMD_LEN); |
|
|
|
|
|
GI_INT32_t ret = GiCombineInt32High(src0, src1); |
|
|
|
|
|
for (size_t i = 0; i < SIMD_LEN / 2; i++) { |
|
|
|
|
|
naive[i] = s0[i + SIMD_LEN / 2]; |
|
|
|
|
|
naive[i + SIMD_LEN / 2] = s0[i + SIMD_LEN / 2]; |
|
|
|
|
|
} |
|
|
|
|
|
assert_eq((int32_t*)&ret, naive, SIMD_LEN); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
TEST_F(FALLBACK, GiStoreZipInt8V3) { |
|
|
|
|
|
std::vector<int8_t> s0{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, |
|
|
|
|
|
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, |
|
|
|
|
|
24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, |
|
|
|
|
|
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 127}; |
|
|
|
|
|
GI_INT8_t src0, src1, src2; |
|
|
|
|
|
GI_INT8_V3_t src; |
|
|
|
|
|
std::vector<int8_t> ret; |
|
|
|
|
|
ret.resize(SIMD_LEN_8 * 3); |
|
|
|
|
|
force_memset_ret((void*)&src, GI_SIMD_LEN_BYTE * 3); |
|
|
|
|
|
force_memset_ret((void*)&src0, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
force_memset_ret((void*)&src1, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
force_memset_ret((void*)&src2, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
src = GiLoadUzipInt8V3(s0.data()); |
|
|
|
|
|
src0 = GiGetSubVectorInt8V3(src, 0); |
|
|
|
|
|
src1 = GiGetSubVectorInt8V3(src, 1); |
|
|
|
|
|
src2 = GiGetSubVectorInt8V3(src, 2); |
|
|
|
|
|
|
|
|
|
|
|
GiStoreZipInt8V3(ret.data(), src0, src1, src2); |
|
|
|
|
|
|
|
|
|
|
|
std::vector<int8_t> naive; |
|
|
|
|
|
for (size_t i = 0; i < SIMD_LEN_8 * 3; i++) { |
|
|
|
|
|
naive.push_back(s0[i]); |
|
|
|
|
|
} |
|
|
|
|
|
assert_eq(ret.data(), naive, SIMD_LEN_8); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
TEST_F(FALLBACK, GiShiftRightInt32) { |
|
|
|
|
|
std::vector<int32_t> s0{INT32_MAX, INT32_MIN, 0x12345678, -0x12345678}; |
|
|
|
|
|
GI_INT32_t src; |
|
|
|
|
|
force_memset_ret((void*)&src, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
src = GiLoadInt32(s0.data()); |
|
|
|
|
|
|
|
|
|
|
|
std::vector<int32_t> naive; |
|
|
|
|
|
naive.resize(SIMD_LEN); |
|
|
|
|
|
GI_INT32_t ret; |
|
|
|
|
|
#define TEST_BLOCK(shift) \ |
|
|
|
|
|
ret = GiShiftRightInt32(src, shift); \ |
|
|
|
|
|
for (size_t i = 0; i < SIMD_LEN; i++) { \ |
|
|
|
|
|
naive[i] = s0[i] >> shift; \ |
|
|
|
|
|
} \ |
|
|
|
|
|
assert_eq((int32_t*)&ret, naive, SIMD_LEN); |
|
|
|
|
|
|
|
|
|
|
|
TEST_BLOCK(1); |
|
|
|
|
|
TEST_BLOCK(2); |
|
|
|
|
|
TEST_BLOCK(3); |
|
|
|
|
|
TEST_BLOCK(4); |
|
|
|
|
|
TEST_BLOCK(5); |
|
|
|
|
|
TEST_BLOCK(6); |
|
|
|
|
|
TEST_BLOCK(7); |
|
|
|
|
|
TEST_BLOCK(8); |
|
|
|
|
|
TEST_BLOCK(9); |
|
|
|
|
|
TEST_BLOCK(10); |
|
|
|
|
|
TEST_BLOCK(11); |
|
|
|
|
|
TEST_BLOCK(12); |
|
|
|
|
|
TEST_BLOCK(13); |
|
|
|
|
|
TEST_BLOCK(14); |
|
|
|
|
|
TEST_BLOCK(15); |
|
|
|
|
|
TEST_BLOCK(16); |
|
|
|
|
|
|
|
|
|
|
|
#undef TEST_BLOCK |
|
|
|
|
|
} |
|
|
|
|
|
TEST_F(FALLBACK, GiShiftLeftInt32) { |
|
|
|
|
|
std::vector<int32_t> s0{INT32_MAX, INT32_MIN, 0x12345678, -0x12345678}; |
|
|
|
|
|
GI_INT32_t src; |
|
|
|
|
|
force_memset_ret((void*)&src, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
src = GiLoadInt32(s0.data()); |
|
|
|
|
|
|
|
|
|
|
|
std::vector<int32_t> naive; |
|
|
|
|
|
naive.resize(SIMD_LEN); |
|
|
|
|
|
GI_INT32_t ret; |
|
|
|
|
|
#define TEST_BLOCK(shift) \ |
|
|
|
|
|
ret = GiShiftLeftInt32(src, shift); \ |
|
|
|
|
|
for (size_t i = 0; i < SIMD_LEN; i++) { \ |
|
|
|
|
|
naive[i] = s0[i] << shift; \ |
|
|
|
|
|
} \ |
|
|
|
|
|
assert_eq((int32_t*)&ret, naive, SIMD_LEN); |
|
|
|
|
|
|
|
|
|
|
|
TEST_BLOCK(1); |
|
|
|
|
|
TEST_BLOCK(2); |
|
|
|
|
|
TEST_BLOCK(3); |
|
|
|
|
|
TEST_BLOCK(4); |
|
|
|
|
|
TEST_BLOCK(5); |
|
|
|
|
|
TEST_BLOCK(6); |
|
|
|
|
|
TEST_BLOCK(7); |
|
|
|
|
|
TEST_BLOCK(8); |
|
|
|
|
|
TEST_BLOCK(9); |
|
|
|
|
|
TEST_BLOCK(10); |
|
|
|
|
|
TEST_BLOCK(11); |
|
|
|
|
|
TEST_BLOCK(12); |
|
|
|
|
|
TEST_BLOCK(13); |
|
|
|
|
|
TEST_BLOCK(14); |
|
|
|
|
|
TEST_BLOCK(15); |
|
|
|
|
|
TEST_BLOCK(16); |
|
|
|
|
|
|
|
|
|
|
|
#undef TEST_BLOCK |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
TEST_F(FALLBACK, GiBroadcastInt16) { |
|
|
|
|
|
int16_t src0 = 5; |
|
|
|
|
|
GI_INT16_t ret; |
|
|
|
|
|
|
|
|
|
|
|
force_memset_ret((void*)&ret, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
ret = GiBroadcastInt16(src0); |
|
|
|
|
|
|
|
|
|
|
|
std::vector<int16_t> naive; |
|
|
|
|
|
for (size_t i = 0; i < SIMD_LEN_16; i++) { |
|
|
|
|
|
naive.push_back(src0); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
assert_eq((int16_t*)&ret, naive, SIMD_LEN_16); |
|
|
|
|
|
} |
|
|
|
|
|
TEST_F(FALLBACK, GiAndInt16) { |
|
|
|
|
|
std::vector<int16_t> s0{INT16_MAX, INT16_MIN, 0x00005678, -0x00005678, |
|
|
|
|
|
0x00001234, -0x00001234, 0x00000fff, -0x00000fff}; |
|
|
|
|
|
GI_INT16_t src0, src1; |
|
|
|
|
|
force_memset_ret((void*)&src0, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
force_memset_ret((void*)&src1, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
src0 = GiLoadInt16(s0.data()); |
|
|
|
|
|
src1 = GiLoadInt16(s0.data()); |
|
|
|
|
|
|
|
|
|
|
|
std::vector<int16_t> naive; |
|
|
|
|
|
naive.resize(SIMD_LEN_16); |
|
|
|
|
|
GI_INT16_t ret = GiAndInt16(src0, src1); |
|
|
|
|
|
for (size_t i = 0; i < SIMD_LEN_16; i++) { |
|
|
|
|
|
naive[i] = s0[i] & s0[i]; |
|
|
|
|
|
} |
|
|
|
|
|
assert_eq((int16_t*)&ret, naive, SIMD_LEN_16); |
|
|
|
|
|
} |
|
|
|
|
|
TEST_F(FALLBACK, GiCvtInt32ToInt16) { |
|
|
|
|
|
std::vector<int32_t> s0{INT32_MAX, INT32_MIN, 0x12345678, -0x12345678}; |
|
|
|
|
|
GI_INT32_t src; |
|
|
|
|
|
force_memset_ret((void*)&src, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
src = GiLoadInt32(s0.data()); |
|
|
|
|
|
|
|
|
|
|
|
std::vector<int16_t> naive; |
|
|
|
|
|
naive.resize(SIMD_LEN_16); |
|
|
|
|
|
GI_INT16_t ret; |
|
|
|
|
|
ret = GiCvtInt32ToInt16(src); |
|
|
|
|
|
for (size_t i = 0; i < SIMD_LEN; i++) { |
|
|
|
|
|
int16_t val = Saturate(s0[i], INT16_MIN, INT16_MAX); |
|
|
|
|
|
naive[i] = val; |
|
|
|
|
|
naive[i + SIMD_LEN] = val; |
|
|
|
|
|
} |
|
|
|
|
|
assert_eq((int16_t*)&ret, naive, SIMD_LEN_16); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
TEST_F(FALLBACK, GiInterleave4Int8) { |
|
|
|
|
|
std::vector<int8_t> s0{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; |
|
|
|
|
|
GI_INT8_t ret; |
|
|
|
|
|
|
|
|
|
|
|
force_memset_ret((void*)&ret, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
ret = GiLoadInt8(s0.data()); |
|
|
|
|
|
ret = GiInterleave4Int8(ret); |
|
|
|
|
|
|
|
|
|
|
|
std::vector<int8_t> naive; |
|
|
|
|
|
naive.resize(SIMD_LEN_8); |
|
|
|
|
|
for (size_t i = 0; i < SIMD_LEN; i++) { |
|
|
|
|
|
naive[i] = s0[i * 4]; |
|
|
|
|
|
naive[i + 4] = s0[i * 4 + 1]; |
|
|
|
|
|
naive[i + 2 * 4] = s0[i * 4 + 2]; |
|
|
|
|
|
naive[i + 3 * 4] = s0[i * 4 + 3]; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
assert_eq((int8_t*)&ret, naive, SIMD_LEN_8); |
|
|
|
|
|
} |
|
|
|
|
|
TEST_F(FALLBACK, GiCvtUint8toInt16Low) { |
|
|
|
|
|
std::vector<uint8_t> s0{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 127}; |
|
|
|
|
|
GI_INT16_t ret; |
|
|
|
|
|
|
|
|
|
|
|
force_memset_ret((void*)&ret, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
GI_UINT8_t src = GiLoadUint8(s0.data()); |
|
|
|
|
|
ret = GiCvtUint8toInt16Low(src); |
|
|
|
|
|
std::vector<int16_t> naive; |
|
|
|
|
|
naive.resize(SIMD_LEN_16); |
|
|
|
|
|
for (size_t i = 0; i < SIMD_LEN_16; i++) { |
|
|
|
|
|
naive[i] = s0[i]; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
assert_eq((int16_t*)&ret, naive, SIMD_LEN_16); |
|
|
|
|
|
} |
|
|
|
|
|
TEST_F(FALLBACK, GiCvtUint8toInt16High) { |
|
|
|
|
|
std::vector<uint8_t> s0{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 127}; |
|
|
|
|
|
GI_INT16_t ret; |
|
|
|
|
|
|
|
|
|
|
|
force_memset_ret((void*)&ret, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
GI_UINT8_t src = GiLoadUint8(s0.data()); |
|
|
|
|
|
ret = GiCvtUint8toInt16High(src); |
|
|
|
|
|
std::vector<int16_t> naive; |
|
|
|
|
|
naive.resize(SIMD_LEN_16); |
|
|
|
|
|
for (size_t i = 0; i < SIMD_LEN_16; i++) { |
|
|
|
|
|
naive[i] = s0[i + SIMD_LEN_16]; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
assert_eq((int16_t*)&ret, naive, SIMD_LEN_16); |
|
|
|
|
|
} |
|
|
|
|
|
TEST_F(FALLBACK, GiMultiplyAddInt16LongLow) { |
|
|
|
|
|
GI_INT16_t src0, src1; |
|
|
|
|
|
GI_INT32_t src2; |
|
|
|
|
|
std::vector<int32_t> s1{1, 2, 3, 4}; |
|
|
|
|
|
std::vector<int16_t> s0{INT16_MAX, INT16_MIN, 0x00005678, -0x00005678, |
|
|
|
|
|
0x00001234, -0x00001234, 0x00000fff, -0x00000fff}; |
|
|
|
|
|
force_memset_ret((void*)&src0, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
force_memset_ret((void*)&src1, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
force_memset_ret((void*)&src2, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
src0 = GiLoadInt16(s0.data()); |
|
|
|
|
|
src1 = GiLoadInt16(s0.data()); |
|
|
|
|
|
src2 = GiLoadInt32(s1.data()); |
|
|
|
|
|
|
|
|
|
|
|
std::vector<int32_t> naive; |
|
|
|
|
|
naive.resize(SIMD_LEN); |
|
|
|
|
|
GI_INT32_t ret = GiMultiplyAddInt16LongLow(src2, src0, src1); |
|
|
|
|
|
for (size_t i = 0; i < SIMD_LEN; i++) { |
|
|
|
|
|
naive[i] = (int32_t)s1[i] + (int32_t)s0[i] * (int32_t)s0[i]; |
|
|
|
|
|
} |
|
|
|
|
|
assert_eq((int32_t*)&ret, naive, SIMD_LEN); |
|
|
|
|
|
} |
|
|
|
|
|
TEST_F(FALLBACK, GiMultiplyAddInt16LongHigh) { |
|
|
|
|
|
GI_INT16_t src0, src1; |
|
|
|
|
|
GI_INT32_t src2; |
|
|
|
|
|
std::vector<int32_t> s1{1, 2, 3, 4}; |
|
|
|
|
|
std::vector<int16_t> s0{INT16_MAX, INT16_MIN, 0x00005678, -0x00005678, |
|
|
|
|
|
0x00001234, -0x00001234, 0x00000fff, -0x00000fff}; |
|
|
|
|
|
force_memset_ret((void*)&src0, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
force_memset_ret((void*)&src1, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
force_memset_ret((void*)&src2, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
src0 = GiLoadInt16(s0.data()); |
|
|
|
|
|
src1 = GiLoadInt16(s0.data()); |
|
|
|
|
|
src2 = GiLoadInt32(s1.data()); |
|
|
|
|
|
|
|
|
|
|
|
std::vector<int32_t> naive; |
|
|
|
|
|
naive.resize(SIMD_LEN); |
|
|
|
|
|
GI_INT32_t ret = GiMultiplyAddInt16LongHigh(src2, src0, src1); |
|
|
|
|
|
for (size_t i = 0; i < SIMD_LEN; i++) { |
|
|
|
|
|
naive[i] = |
|
|
|
|
|
(int32_t)s1[i] + (int32_t)s0[i + SIMD_LEN] * (int32_t)s0[i + SIMD_LEN]; |
|
|
|
|
|
} |
|
|
|
|
|
assert_eq((int32_t*)&ret, naive, SIMD_LEN); |
|
|
|
|
|
} |
|
|
|
|
|
TEST_F(FALLBACK, GiCvtFromInt32V4ToUint8) { |
|
|
|
|
|
std::vector<int32_t> s0{INT16_MAX, INT16_MIN, 0x00005678, -0x00005678}; |
|
|
|
|
|
GI_INT32_t src0, src1, src2, src3; |
|
|
|
|
|
force_memset_ret((void*)&src0, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
force_memset_ret((void*)&src1, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
src0 = GiLoadInt32(s0.data()); |
|
|
|
|
|
src1 = GiLoadInt32(s0.data()); |
|
|
|
|
|
src2 = GiLoadInt32(s0.data()); |
|
|
|
|
|
src3 = GiLoadInt32(s0.data()); |
|
|
|
|
|
GI_UINT8_t ret = GiCvtFromInt32V4ToUint8(src0, src1, src2, src3); |
|
|
|
|
|
std::vector<uint8_t> naive; |
|
|
|
|
|
naive.resize(SIMD_LEN_8); |
|
|
|
|
|
for (size_t i = 0; i < SIMD_LEN; i++) { |
|
|
|
|
|
naive[i] = Saturate(s0[i], 0, UINT8_MAX); |
|
|
|
|
|
naive[i + SIMD_LEN] = Saturate(s0[i], 0, UINT8_MAX); |
|
|
|
|
|
naive[i + 2 * SIMD_LEN] = Saturate(s0[i], 0, UINT8_MAX); |
|
|
|
|
|
naive[i + 3 * SIMD_LEN] = Saturate(s0[i], 0, UINT8_MAX); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
assert_eq((uint8_t*)&ret, naive, SIMD_LEN_8); |
|
|
|
|
|
} |
|
|
|
|
|
TEST_F(FALLBACK, GiSubtractInt16) { |
|
|
|
|
|
std::vector<int16_t> s0{INT16_MAX, INT16_MIN, 0x00005678, -0x00005678, |
|
|
|
|
|
0x00001234, -0x00001234, 0x00000fff, -0x00000fff}; |
|
|
|
|
|
GI_INT16_t src0, src1; |
|
|
|
|
|
force_memset_ret((void*)&src0, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
force_memset_ret((void*)&src1, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
src0 = GiLoadInt16(s0.data()); |
|
|
|
|
|
src1 = GiLoadInt16(s0.data()); |
|
|
|
|
|
|
|
|
|
|
|
std::vector<int16_t> naive; |
|
|
|
|
|
naive.resize(SIMD_LEN_16); |
|
|
|
|
|
GI_INT16_t ret = GiSubtractInt16(src0, src1); |
|
|
|
|
|
for (size_t i = 0; i < SIMD_LEN_16; i++) { |
|
|
|
|
|
naive[i] = s0[i] - s0[i]; |
|
|
|
|
|
} |
|
|
|
|
|
assert_eq((int16_t*)&ret, naive, SIMD_LEN_16); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
TEST_F(FALLBACK, GiInterleave2UInt8) { |
|
|
|
|
|
std::vector<uint8_t> s0{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; |
|
|
|
|
|
GI_UINT8_t ret; |
|
|
|
|
|
|
|
|
|
|
|
force_memset_ret((void*)&ret, GI_SIMD_LEN_BYTE); |
|
|
|
|
|
ret = GiLoadUint8(s0.data()); |
|
|
|
|
|
ret = GiInterleave2Uint8(ret); |
|
|
|
|
|
|
|
|
|
|
|
std::vector<int8_t> naive; |
|
|
|
|
|
naive.resize(SIMD_LEN_8); |
|
|
|
|
|
for (size_t i = 0; i < SIMD_LEN_16; i++) { |
|
|
|
|
|
naive[i] = s0[2 * i]; |
|
|
|
|
|
naive[i + SIMD_LEN_16] = s0[2 * i + 1]; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
assert_eq((int8_t*)&ret, naive, SIMD_LEN_8); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
} // namespace test |
|
|
} // namespace test |
|
|
} // namespace megdnn |
|
|
} // namespace megdnn |
|
|
|
|
|
|
|
|