You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

gi.cpp 81 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280
  1. #include <vector>
  2. #include "test/fallback/fixture.h"
  3. #include "src/fallback/general_intrinsic/gi_float.h"
  4. #include "src/fallback/general_intrinsic/gi_int.h"
  5. namespace megdnn {
  6. namespace test {
  7. #define SIMD_LEN GI_SIMD_LEN_BYTE / sizeof(float)
  8. #define SIMD_LEN_16 GI_SIMD_LEN_BYTE / sizeof(int16_t)
  9. #define SIMD_LEN_8 GI_SIMD_LEN_BYTE / sizeof(int8_t)
  10. template <typename T>
  11. static void init(
  12. T* dst, const std::vector<T>& value, const size_t simd_len = SIMD_LEN) {
  13. for (size_t i = 0; i < simd_len; i++) {
  14. dst[i] = value[i];
  15. }
  16. }
  17. template <typename T>
  18. static void assert_eq(T* a, const std::vector<T>& b, const size_t simd_len = SIMD_LEN) {
  19. for (size_t i = 0; i < simd_len; i++) {
  20. ASSERT_EQ(a[i], b[i]);
  21. }
  22. }
  23. template <typename T>
  24. static void assert_eq_and_nan(
  25. T* a, const std::vector<T>& b, const size_t simd_len = SIMD_LEN) {
  26. for (size_t i = 0; i < simd_len; i++) {
  27. if (isnan(a[i]) && isnan(b[i])) {
  28. continue;
  29. }
  30. ASSERT_EQ(a[i], b[i]);
  31. }
  32. }
  33. static void assert_lt(
  34. float* a, const std::vector<float>& b, const float eps,
  35. const size_t simd_len = SIMD_LEN) {
  36. for (size_t i = 0; i < simd_len; i++) {
  37. ASSERT_LT(std::abs(a[i] - b[i]), eps);
  38. }
  39. }
  40. TEST_F(FALLBACK, GiGetSimdType) {
  41. auto t = GiGetSimdType();
  42. auto should_type = GI_UNKNOWN;
  43. #if defined(GI_AVX_INTRINSICS) || defined(GI_AVX2_INTRINSICS) || \
  44. defined(GI_FMA_INTRINSICS)
  45. should_type = GI_AVX;
  46. #elif defined(GI_NEON_INTRINSICS)
  47. should_type = GI_NEON;
  48. #elif defined(GI_SSE2_INTRINSICS) || defined(GI_SSE42_INTRINSICS)
  49. #if defined(GI_SSE42_INTRINSICS)
  50. should_type = GI_SSE42;
  51. #elif defined(GI_SSE2_INTRINSICS)
  52. should_type = GI_SSE2;
  53. #else
  54. should_type = GI_UNKNOWN;
  55. #error "code issue happened!!"
  56. #endif
  57. #else
  58. should_type = GI_NAIVE;
  59. #endif
  60. printf("test GiGetSimdType: %d, should_type: %d\n", t, should_type);
  61. ASSERT_EQ(t, should_type);
  62. }
  63. TEST_F(FALLBACK, GiAndInt32) {
  64. GI_INT32_t src0, src1, ret;
  65. std::vector<int32_t> s0{1, 2, 3, 4};
  66. s0.resize(SIMD_LEN);
  67. std::vector<int32_t> s1{5, 6, 7, 8};
  68. s1.resize(SIMD_LEN);
  69. init((int32_t*)&src0, s0);
  70. init((int32_t*)&src1, s1);
  71. ret = GiAndInt32(src0, src1);
  72. std::vector<int32_t> naive;
  73. for (size_t i = 0; i < SIMD_LEN; i++) {
  74. naive.push_back(s0[i] & s1[i]);
  75. }
  76. assert_eq((int32_t*)&ret, naive);
  77. }
  78. TEST_F(FALLBACK, GiOrInt32) {
  79. GI_INT32_t src0, src1, ret;
  80. std::vector<int32_t> s0{1, 2, 3, 4};
  81. s0.resize(SIMD_LEN);
  82. std::vector<int32_t> s1{5, 6, 7, 8};
  83. s1.resize(SIMD_LEN);
  84. init((int32_t*)&src0, s0);
  85. init((int32_t*)&src1, s1);
  86. ret = GiOrInt32(src0, src1);
  87. std::vector<int32_t> naive;
  88. for (size_t i = 0; i < SIMD_LEN; i++) {
  89. naive.push_back(s0[i] | s1[i]);
  90. }
  91. assert_eq((int*)&ret, naive);
  92. }
  93. TEST_F(FALLBACK, GiAndNotInt32) {
  94. GI_INT32_t src0, src1, ret;
  95. std::vector<int32_t> s0{1, 2, 3, 4};
  96. s0.resize(SIMD_LEN);
  97. std::vector<int32_t> s1{5, 6, 7, 8};
  98. s1.resize(SIMD_LEN);
  99. init((int32_t*)&src0, s0);
  100. init((int32_t*)&src1, s1);
  101. ret = GiAndNotInt32(src0, src1);
  102. std::vector<int32_t> naive;
  103. for (size_t i = 0; i < SIMD_LEN; i++) {
  104. naive.push_back(~s0[i] & s1[i]);
  105. }
  106. assert_eq((int32_t*)&ret, naive);
  107. }
  108. TEST_F(FALLBACK, GiXorInt32) {
  109. GI_INT32_t src0, src1, ret;
  110. std::vector<int32_t> s0{1, 2, 3, 4};
  111. s0.resize(SIMD_LEN);
  112. std::vector<int32_t> s1{5, 6, 7, 8};
  113. s1.resize(SIMD_LEN);
  114. init((int32_t*)&src0, s0);
  115. init((int32_t*)&src1, s1);
  116. ret = GiXorInt32(src0, src1);
  117. std::vector<int32_t> naive;
  118. for (size_t i = 0; i < SIMD_LEN; i++) {
  119. naive.push_back(s0[i] ^ s1[i]);
  120. }
  121. assert_eq((int32_t*)&ret, naive);
  122. }
  123. TEST_F(FALLBACK, GiBroadcastFloat32) {
  124. GI_FLOAT32_t ret;
  125. float b = 2022.0420;
  126. ret = GiBroadcastFloat32(b);
  127. std::vector<float> naive;
  128. for (size_t i = 0; i < SIMD_LEN; i++) {
  129. naive.push_back(b);
  130. }
  131. assert_eq((float*)&ret, naive);
  132. }
  133. TEST_F(FALLBACK, GiBroadcastInt32) {
  134. GI_INT32_t ret;
  135. int32_t b = 20220420;
  136. ret = GiBroadcastInt32(b);
  137. std::vector<int32_t> naive;
  138. for (size_t i = 0; i < SIMD_LEN; i++) {
  139. naive.push_back(b);
  140. }
  141. assert_eq((int32_t*)&ret, naive);
  142. }
  143. TEST_F(FALLBACK, GiReinterpretAsInt32) {
  144. GI_INT32_t ret;
  145. GI_FLOAT32_t src0;
  146. std::vector<float> s0{1.0f, 2.2f, 3.4f, 4.5f};
  147. s0.resize(SIMD_LEN);
  148. init((float*)&src0, s0);
  149. ret = GiReinterpretAsInt32(src0);
  150. std::vector<int32_t> naive;
  151. for (size_t i = 0; i < SIMD_LEN; i++) {
  152. int32_t tmp;
  153. memcpy(&tmp, &s0[i], sizeof(int32_t));
  154. naive.push_back(tmp);
  155. }
  156. assert_eq((int32_t*)&ret, naive);
  157. }
  158. TEST_F(FALLBACK, GiReinterpretAsUint32) {
  159. GI_UINT32_t ret;
  160. GI_FLOAT32_t src0;
  161. std::vector<float> s0{1.0f, 2.2f, 3.4f, 4.5f};
  162. s0.resize(SIMD_LEN);
  163. init((float*)&src0, s0);
  164. ret = GiReinterpretAsUint32(src0);
  165. std::vector<uint32_t> naive;
  166. for (size_t i = 0; i < SIMD_LEN; i++) {
  167. uint32_t tmp;
  168. memcpy(&tmp, &s0[i], sizeof(uint32_t));
  169. naive.push_back(tmp);
  170. }
  171. assert_eq((uint32_t*)&ret, naive);
  172. }
  173. TEST_F(FALLBACK, GiReintInt32ToFloat32) {
  174. GI_FLOAT32_t ret;
  175. GI_INT32_t src0;
  176. std::vector<int32_t> s0{1, 2, 3, 4};
  177. s0.resize(SIMD_LEN);
  178. init((int32_t*)&src0, s0);
  179. ret = GiReintInt32ToFloat32(src0);
  180. std::vector<float> naive;
  181. for (size_t i = 0; i < SIMD_LEN; i++) {
  182. float tmp;
  183. memcpy(&tmp, &s0[i], sizeof(float));
  184. naive.push_back(tmp);
  185. }
  186. assert_eq((float*)&ret, naive);
  187. }
  188. TEST_F(FALLBACK, GiReintUint32ToFloat32) {
  189. GI_FLOAT32_t ret;
  190. GI_UINT32_t src0;
  191. std::vector<uint32_t> s0{1, 2, 3, 4};
  192. s0.resize(SIMD_LEN);
  193. init((uint32_t*)&src0, s0);
  194. ret = GiReintUint32ToFloat32(src0);
  195. std::vector<float> naive;
  196. for (size_t i = 0; i < SIMD_LEN; i++) {
  197. float tmp;
  198. memcpy(&tmp, &s0[i], sizeof(float));
  199. naive.push_back(tmp);
  200. }
  201. assert_eq((float*)&ret, naive);
  202. }
  203. TEST_F(FALLBACK, GiRoundAsInt32) {
  204. GI_FLOAT32_t src0;
  205. GI_INT32_t ret;
  206. std::vector<float> s0{1.1f, 2.2f, 3.5f, 4.9f};
  207. s0.resize(SIMD_LEN);
  208. init((float*)&src0, s0);
  209. ret = GiRoundAsInt32(src0);
  210. std::vector<int32_t> naive;
  211. for (size_t i = 0; i < SIMD_LEN; i++) {
  212. naive.push_back((int32_t)round(s0[i]));
  213. }
  214. assert_eq((int*)&ret, naive);
  215. }
  216. TEST_F(FALLBACK, GiCastToInt32) {
  217. GI_FLOAT32_t src0;
  218. GI_INT32_t ret;
  219. std::vector<float> s0{1.1f, 2.2f, 3.5f, 4.9f};
  220. s0.resize(SIMD_LEN);
  221. init((float*)&src0, s0);
  222. ret = GiCastToInt32(src0);
  223. std::vector<int32_t> naive;
  224. for (size_t i = 0; i < SIMD_LEN; i++) {
  225. naive.push_back((int32_t)(s0[i]));
  226. }
  227. assert_eq((int*)&ret, naive);
  228. }
  229. TEST_F(FALLBACK, GiCastToFloat32) {
  230. GI_INT32_t src0;
  231. GI_FLOAT32_t ret;
  232. std::vector<int32_t> s0{100, 200, 300, 400};
  233. s0.resize(SIMD_LEN);
  234. init((int32_t*)&src0, s0);
  235. ret = GiCastToFloat32(src0);
  236. std::vector<float> naive;
  237. for (size_t i = 0; i < SIMD_LEN; i++) {
  238. naive.push_back((float)s0[i]);
  239. }
  240. assert_eq((float*)&ret, naive);
  241. }
  242. TEST_F(FALLBACK, GiLoadBroadcastFloat32) {
  243. GI_FLOAT32_t ret;
  244. float p = 2022.0420;
  245. ret = GiLoadBroadcastFloat32(&p);
  246. std::vector<float> naive;
  247. for (size_t i = 0; i < SIMD_LEN; i++) {
  248. naive.push_back(p);
  249. }
  250. assert_eq((float*)&ret, naive);
  251. }
  252. TEST_F(FALLBACK, GiZeroFloat32) {
  253. GI_FLOAT32_t ret;
  254. memset(&ret, 'f', sizeof(GI_FLOAT32_t));
  255. float p = 0;
  256. ret = GiZeroFloat32();
  257. std::vector<float> naive;
  258. for (size_t i = 0; i < SIMD_LEN; i++) {
  259. naive.push_back(p);
  260. }
  261. assert_eq((float*)&ret, naive);
  262. }
  263. TEST_F(FALLBACK, GiLoadFloat32) {
  264. GI_FLOAT32_t ret;
  265. std::vector<float> s0{2.3f, 4.7f, -1.4f, 1223.6f};
  266. s0.resize(SIMD_LEN);
  267. ret = GiLoadFloat32(s0.data());
  268. std::vector<float> naive;
  269. for (size_t i = 0; i < SIMD_LEN; i++) {
  270. naive.push_back(s0[i]);
  271. }
  272. assert_eq((float*)&ret, naive);
  273. }
  274. TEST_F(FALLBACK, GiLoadFloat32V2) {
  275. GI_FLOAT32_V2_t ret;
  276. std::vector<float> s0{2.3f, 4.7f, -1.4f, 1223.6f, 1.1f, 4.0f, 99.7f, 1234.9f};
  277. s0.resize(SIMD_LEN * 2);
  278. ret = GiLoadFloat32V2(s0.data());
  279. std::vector<float> naive;
  280. for (size_t i = 0; i < SIMD_LEN * 2; i++) {
  281. naive.push_back(s0[i]);
  282. }
  283. assert_eq((float*)&ret, naive, SIMD_LEN * 2);
  284. }
  285. TEST_F(FALLBACK, GiLoadFloat32LowHalf) {
  286. GI_FLOAT32_t ret;
  287. std::vector<float> s0{2.3f, 4.7f, -1.4f, 1223.6f};
  288. s0.resize(SIMD_LEN);
  289. ret = GiLoadFloat32LowHalf(s0.data());
  290. std::vector<float> naive;
  291. for (size_t i = 0; i < SIMD_LEN; i++) {
  292. if (i < SIMD_LEN / 2) {
  293. naive.push_back(s0[i]);
  294. } else {
  295. naive.push_back(0);
  296. }
  297. }
  298. assert_eq((float*)&ret, naive);
  299. }
  300. TEST_F(FALLBACK, GiMlaqFloat32) {
  301. GI_FLOAT32_t src0, src1, src2, ret;
  302. std::vector<float> s0{1.1f, 2.2f, 3.5f, 4.9f};
  303. std::vector<float> s1{2312.1f, 345.244f, 3.59f, -12.8f};
  304. std::vector<float> s2{1.2f, -3.1f, 9.0f, 11.2f};
  305. s0.resize(SIMD_LEN);
  306. s1.resize(SIMD_LEN);
  307. s2.resize(SIMD_LEN);
  308. init((float*)&src0, s0);
  309. init((float*)&src1, s1);
  310. init((float*)&src2, s2);
  311. ret = GiMlaqFloat32(src0, src1, src2);
  312. std::vector<float> naive;
  313. for (size_t i = 0; i < SIMD_LEN; i++) {
  314. naive.push_back(s0[i] + (s1[i] * s2[i]));
  315. }
  316. assert_eq((float*)&ret, naive);
  317. }
  318. TEST_F(FALLBACK, GiUzpqFloat32) {
  319. GI_FLOAT32_t src0, src1;
  320. GI_FLOAT32_V2_t ret;
  321. std::vector<float> s0{1.1f, 2.2f, 3.5f, 4.9f};
  322. std::vector<float> s1{2312.1f, 345.244f, 3.59f, -12.8f};
  323. s0.resize(SIMD_LEN);
  324. s1.resize(SIMD_LEN);
  325. init((float*)&src0, s0);
  326. init((float*)&src1, s1);
  327. ret = GiUzpqFloat32(src0, src1);
  328. std::vector<float> naive0;
  329. std::vector<float> naive1;
  330. naive0.push_back(s0[0]);
  331. naive0.push_back(s0[2]);
  332. naive0.push_back(s1[0]);
  333. naive0.push_back(s1[2]);
  334. naive1.push_back(s0[1]);
  335. naive1.push_back(s0[3]);
  336. naive1.push_back(s1[1]);
  337. naive1.push_back(s1[3]);
  338. assert_eq((float*)&ret, naive0);
  339. assert_eq((float*)&ret + SIMD_LEN, naive1);
  340. }
  341. TEST_F(FALLBACK, GiDupFloat32) {
  342. float32x2_t ret;
  343. float t = 3.1415;
  344. ret = GiDupFloat32(t);
  345. auto r = (float*)&ret;
  346. ASSERT_EQ(*r, t);
  347. ASSERT_EQ(*(r + 1), t);
  348. }
  349. TEST_F(FALLBACK, GiLdFloat32) {
  350. float32x2_t ret;
  351. std::vector<float> s0{1.1f, -3.1415f};
  352. ret = GiLdFloat32(s0.data());
  353. auto r = (float*)&ret;
  354. ASSERT_EQ(*r, s0[0]);
  355. ASSERT_EQ(*(r + 1), s0[1]);
  356. }
  357. TEST_F(FALLBACK, GiAddDFloat32) {
  358. float32x2_t src0, src1, ret;
  359. std::vector<float> s0{1.1f, -3.1415f};
  360. std::vector<float> s1{2.3f, 3.14777f};
  361. memcpy(&src0, s0.data(), sizeof(float32x2_t));
  362. memcpy(&src1, s1.data(), sizeof(float32x2_t));
  363. ret = GiAddDFloat32(src0, src1);
  364. auto r = (float*)&ret;
  365. auto naive0 = s0[0] + s1[0];
  366. auto naive1 = s0[1] + s1[1];
  367. ASSERT_EQ(*r, naive0);
  368. ASSERT_EQ(*(r + 1), naive1);
  369. }
  370. TEST_F(FALLBACK, GiGetLaneFloat32) {
  371. float32x2_t src0;
  372. std::vector<float> s0{1.1f, -3.1415f};
  373. memcpy(&src0, s0.data(), sizeof(float32x2_t));
  374. auto ret = GiGetLaneFloat32(src0, 0);
  375. ASSERT_EQ(ret, s0[0]);
  376. ret = GiGetLaneFloat32(src0, 1);
  377. ASSERT_EQ(ret, s0[1]);
  378. }
  379. TEST_F(FALLBACK, GiSetLaneFloat32) {
  380. float32x2_t src0, ret;
  381. std::vector<float> s0{2.1f, -3.1415f};
  382. memcpy(&src0, s0.data(), sizeof(float32x2_t));
  383. float p = 2022.0420;
  384. auto r = (float*)&ret;
  385. ret = GiSetLaneFloat32(p, src0, 0);
  386. ASSERT_EQ(*r, p);
  387. ASSERT_EQ(*(r + 1), s0[1]);
  388. ret = GiSetLaneFloat32(p, src0, 1);
  389. ASSERT_EQ(*r, s0[0]);
  390. ASSERT_EQ(*(r + 1), p);
  391. }
  392. TEST_F(FALLBACK, GiSt1Float32) {
  393. float32x2_t src0;
  394. std::vector<float> s0{2.1f, -3.1415f};
  395. memcpy(&src0, s0.data(), sizeof(float32x2_t));
  396. std::vector<float> ret{0, 0};
  397. GiSt1Float32(ret.data(), src0);
  398. ASSERT_EQ(ret[0], s0[0]);
  399. ASSERT_EQ(ret[1], s0[1]);
  400. }
  401. TEST_F(FALLBACK, GiLd2qFloat32) {
  402. GI_FLOAT32_V2_t ret;
  403. std::vector<float> s0{1.1f, 2.2f, 3.5f, 4.9f, 2312.1f, 345.244f, 3.59f, -12.8f};
  404. ret = GiLd2qFloat32(s0.data());
  405. std::vector<float> naive0;
  406. std::vector<float> naive1;
  407. naive0.push_back(s0[0]);
  408. naive0.push_back(s0[2]);
  409. naive0.push_back(s0[4]);
  410. naive0.push_back(s0[6]);
  411. naive1.push_back(s0[1]);
  412. naive1.push_back(s0[3]);
  413. naive1.push_back(s0[5]);
  414. naive1.push_back(s0[7]);
  415. assert_eq((float*)&ret, naive0);
  416. assert_eq((float*)&ret + SIMD_LEN, naive1);
  417. }
  418. TEST_F(FALLBACK, GiExtqFloat32) {
  419. GI_FLOAT32_t src0, src1, ret;
  420. std::vector<float> s0{1.1f, 2.2f, 3.5f, 4.9f};
  421. std::vector<float> s1{-9.1f, 34234.6f, 9.0f, 34.1f};
  422. s0.resize(SIMD_LEN);
  423. s1.resize(SIMD_LEN);
  424. init((float*)&src0, s0);
  425. init((float*)&src1, s1);
  426. std::vector<float> naive = {0, 0, 0, 0};
  427. auto compare = [&](const size_t n) {
  428. size_t t_count = SIMD_LEN;
  429. size_t a_count = t_count - n;
  430. for (size_t i = 0; i < a_count; i++) {
  431. naive[i] = s0[i + n];
  432. }
  433. for (size_t i = 0; i < n; i++) {
  434. naive[i + a_count] = s1[i];
  435. }
  436. assert_eq((float*)&ret, naive);
  437. };
  438. #define CB(n) \
  439. ret = GiExtqFloat32(src0, src1, n); \
  440. compare(n);
  441. CB(0)
  442. CB(1)
  443. CB(2)
  444. CB(3)
  445. #undef CB
  446. }
  447. TEST_F(FALLBACK, GiMultiplySubFloat32) {
  448. GI_FLOAT32_t src0, src1, src2, ret;
  449. std::vector<float> s0{1.1f, 2.2f, 3.5f, 4.9f};
  450. std::vector<float> s1{-9.1f, 34234.6f, 9.0f, 34.1f};
  451. std::vector<float> s2{0.4f, 9.9f, 4.3f, 6.2f};
  452. s0.resize(SIMD_LEN);
  453. s1.resize(SIMD_LEN);
  454. s2.resize(SIMD_LEN);
  455. init((float*)&src0, s0);
  456. init((float*)&src1, s1);
  457. init((float*)&src2, s2);
  458. ret = GiMultiplySubFloat32(src0, src1, src2);
  459. std::vector<float> naive;
  460. for (size_t i = 0; i < SIMD_LEN; i++) {
  461. naive.push_back(s0[i] - (s1[i] * s2[i]));
  462. }
  463. assert_eq((float*)&ret, naive);
  464. }
  465. TEST_F(FALLBACK, GiLd1qLaneFloat32) {
  466. GI_FLOAT32_t src0, ret;
  467. std::vector<float> s0{1.1f, 2.2f, 3.5f, 4.9f};
  468. s0.resize(SIMD_LEN);
  469. init((float*)&src0, s0);
  470. std::vector<float> naive = {0, 0, 0, 0};
  471. float buffer = 3.14159;
  472. auto compare = [&](const size_t n) {
  473. memcpy(naive.data(), s0.data(), sizeof(GI_FLOAT32_t));
  474. naive[n] = buffer;
  475. assert_eq((float*)&ret, naive);
  476. };
  477. #define CB(n) \
  478. ret = GiLd1qLaneFloat32(&buffer, src0, n); \
  479. compare(n);
  480. CB(0)
  481. CB(1)
  482. CB(2)
  483. CB(3)
  484. #undef CB
  485. }
  486. TEST_F(FALLBACK, GiSetqLaneFloat32) {
  487. GI_FLOAT32_t src0, ret;
  488. std::vector<float> s0{2.1f, 6.2f, -9.5f, 2.9f};
  489. s0.resize(SIMD_LEN);
  490. init((float*)&src0, s0);
  491. std::vector<float> naive = {0, 0, 0, 0};
  492. float buffer = 6.14159;
  493. auto compare = [&](const size_t n) {
  494. memcpy(naive.data(), s0.data(), sizeof(GI_FLOAT32_t));
  495. naive[n] = buffer;
  496. assert_eq((float*)&ret, naive);
  497. };
  498. #define CB(n) \
  499. ret = GiSetqLaneFloat32(buffer, src0, n); \
  500. compare(n);
  501. CB(0)
  502. CB(1)
  503. CB(2)
  504. CB(3)
  505. #undef CB
  506. }
  507. TEST_F(FALLBACK, GiMlaqLaneFloat32HighHalf) {
  508. GI_FLOAT32_t src0, src1, src2, ret;
  509. std::vector<float> s0{1.1f, 2.2f, 3.5f, 4.9f};
  510. std::vector<float> s1{-9.1f, 34234.6f, 9.0f, 34.1f};
  511. std::vector<float> s2{0.4f, 9.9f, 4.3f, 6.2f};
  512. s0.resize(SIMD_LEN);
  513. s1.resize(SIMD_LEN);
  514. s2.resize(SIMD_LEN);
  515. init((float*)&src0, s0);
  516. init((float*)&src1, s1);
  517. init((float*)&src2, s2);
  518. std::vector<float> naive = {0, 0, 0, 0};
  519. auto compare = [&](const size_t n) {
  520. for (size_t i = 0; i < GI_SIMD_LEN_BYTE / sizeof(float); i++) {
  521. naive[i] = s0[i] + (s1[i] * s2[n + 2]);
  522. }
  523. assert_eq((float*)&ret, naive);
  524. };
  525. #define CB(n) \
  526. ret = GiMlaqLaneFloat32HighHalf(src0, src1, src2, n); \
  527. compare(n);
  528. CB(0)
  529. CB(1)
  530. #undef CB
  531. }
  532. TEST_F(FALLBACK, GiVmlaqLaneFloat32LowHalf) {
  533. GI_FLOAT32_t src0, src1, src2, ret;
  534. std::vector<float> s0{1.1f, 2.2f, 3.5f, 4.9f};
  535. std::vector<float> s1{-9.1f, 34234.6f, 9.0f, 34.1f};
  536. std::vector<float> s2{0.4f, 9.9f, 4.3f, 6.2f};
  537. s0.resize(SIMD_LEN);
  538. s1.resize(SIMD_LEN);
  539. s2.resize(SIMD_LEN);
  540. init((float*)&src0, s0);
  541. init((float*)&src1, s1);
  542. init((float*)&src2, s2);
  543. std::vector<float> naive = {0, 0, 0, 0};
  544. auto compare = [&](const size_t n) {
  545. for (size_t i = 0; i < GI_SIMD_LEN_BYTE / sizeof(float); i++) {
  546. naive[i] = s0[i] + (s1[i] * s2[n]);
  547. }
  548. assert_eq((float*)&ret, naive);
  549. };
  550. #define CB(n) \
  551. ret = GiVmlaqLaneFloat32LowHalf(src0, src1, src2, n); \
  552. compare(n);
  553. CB(0)
  554. CB(1)
  555. #undef CB
  556. }
  557. TEST_F(FALLBACK, GiStoreFloat32) {
  558. GI_FLOAT32_t src0;
  559. std::vector<float> s0{1.1f, 2.2f, 3.5f, 4.9f};
  560. s0.resize(SIMD_LEN);
  561. init((float*)&src0, s0);
  562. std::vector<float> ret{0};
  563. ret.resize(SIMD_LEN);
  564. GiStoreFloat32(ret.data(), src0);
  565. assert_eq(ret.data(), s0);
  566. }
  567. TEST_F(FALLBACK, GiStoreFloat32V2) {
  568. GI_FLOAT32_V2_t src0;
  569. std::vector<float> s0{1.1f, 2.2f, 3.5f, 4.9f, -1.1f, -2.2f, -3.5f, -4.9};
  570. s0.resize(SIMD_LEN * 2);
  571. init((float*)&src0, s0, SIMD_LEN * 2);
  572. std::vector<float> ret{0};
  573. ret.resize(SIMD_LEN * 2);
  574. GiStoreFloat32V2(ret.data(), src0);
  575. assert_eq(ret.data(), s0, SIMD_LEN * 2);
  576. }
  577. TEST_F(FALLBACK, GiStoreLaneXXFloat32) {
  578. GI_FLOAT32_t src0;
  579. std::vector<float> s0{1.1f, 2.2f, 3.5f, 4.9f};
  580. s0.resize(SIMD_LEN);
  581. init((float*)&src0, s0);
  582. float ret{0};
  583. #define CB(n) \
  584. GiStoreLane##n##Float32(&ret, src0); \
  585. ASSERT_EQ(ret, s0[n]);
  586. CB(0)
  587. CB(1)
  588. CB(2)
  589. CB(3)
  590. #undef CB
  591. }
  592. TEST_F(FALLBACK, GiExtractLaneXXFloat32) {
  593. GI_FLOAT32_t src0;
  594. std::vector<float> s0{1.1f, 2.2f, 3.5f, 4.9f};
  595. s0.resize(SIMD_LEN);
  596. init((float*)&src0, s0);
  597. float ret{0};
  598. #define CB(n) \
  599. ret = GiExtractLane##n##Float32(src0); \
  600. ASSERT_EQ(ret, s0[n]);
  601. CB(0)
  602. CB(1)
  603. CB(2)
  604. CB(3)
  605. #undef CB
  606. }
  607. TEST_F(FALLBACK, GiZipqFloat32) {
  608. GI_FLOAT32_t src0, src1;
  609. GI_FLOAT32_V2_t ret;
  610. std::vector<float> s0{1.1f, 2.2f, 3.5f, 4.9f};
  611. std::vector<float> s1{2312.1f, 345.244f, 3.59f, -12.8f};
  612. s0.resize(SIMD_LEN);
  613. s1.resize(SIMD_LEN);
  614. init((float*)&src0, s0);
  615. init((float*)&src1, s1);
  616. ret = GiZipqFloat32(src0, src1);
  617. std::vector<float> naive0;
  618. std::vector<float> naive1;
  619. naive0.push_back(s0[0]);
  620. naive0.push_back(s1[0]);
  621. naive0.push_back(s0[1]);
  622. naive0.push_back(s1[1]);
  623. naive1.push_back(s0[2]);
  624. naive1.push_back(s1[2]);
  625. naive1.push_back(s0[3]);
  626. naive1.push_back(s1[3]);
  627. assert_eq((float*)&ret, naive0);
  628. assert_eq((float*)&ret + SIMD_LEN, naive1);
  629. }
  630. TEST_F(FALLBACK, GiInterleaveLowFloat32) {
  631. GI_FLOAT32_t src0, src1, ret;
  632. std::vector<float> s0{1.1f, 2.2f, 3.5f, 4.9f};
  633. std::vector<float> s1{2312.1f, 345.244f, 3.59f, -12.8f};
  634. s0.resize(SIMD_LEN);
  635. s1.resize(SIMD_LEN);
  636. init((float*)&src0, s0);
  637. init((float*)&src1, s1);
  638. ret = GiInterleaveLowFloat32(src0, src1);
  639. std::vector<float> naive;
  640. naive.resize(SIMD_LEN);
  641. for (size_t i = 0; i < SIMD_LEN / 2; i++) {
  642. naive[2 * i] = s0[i];
  643. naive[2 * i + 1] = s1[i];
  644. }
  645. assert_eq((float*)&ret, naive);
  646. }
  647. TEST_F(FALLBACK, GiInterleaveHighFloat32) {
  648. GI_FLOAT32_t src0, src1, ret;
  649. std::vector<float> s0{1.1f, 2.2f, 3.5f, 4.9f};
  650. std::vector<float> s1{2312.1f, 345.244f, 3.59f, -12.8f};
  651. s0.resize(SIMD_LEN);
  652. s1.resize(SIMD_LEN);
  653. init((float*)&src0, s0);
  654. init((float*)&src1, s1);
  655. ret = GiInterleaveHighFloat32(src0, src1);
  656. std::vector<float> naive;
  657. naive.resize(SIMD_LEN);
  658. for (size_t i = 0; i < SIMD_LEN / 2; i++) {
  659. naive[2 * i] = s0[i + SIMD_LEN / 2];
  660. naive[2 * i + 1] = s1[i + SIMD_LEN / 2];
  661. }
  662. assert_eq((float*)&ret, naive);
  663. }
  664. TEST_F(FALLBACK, GiAddFloat32) {
  665. GI_FLOAT32_t src0, src1, ret;
  666. std::vector<float> s0{1.1f, 2.2f, 3.5f, 4.9f};
  667. std::vector<float> s1{2312.1f, 345.244f, 3.59f, -12.8f};
  668. s0.resize(SIMD_LEN);
  669. s1.resize(SIMD_LEN);
  670. init((float*)&src0, s0);
  671. init((float*)&src1, s1);
  672. ret = GiAddFloat32(src0, src1);
  673. std::vector<float> naive;
  674. for (size_t i = 0; i < SIMD_LEN; i++) {
  675. naive.push_back(s0[i] + s1[i]);
  676. }
  677. assert_eq((float*)&ret, naive);
  678. }
  679. TEST_F(FALLBACK, GiSubtractFloat32) {
  680. GI_FLOAT32_t src0, src1, ret;
  681. std::vector<float> s0{1.1f, 2.2f, 3.5f, 4.9f};
  682. std::vector<float> s1{2312.1f, 345.244f, 3.59f, -12.8f};
  683. s0.resize(SIMD_LEN);
  684. s1.resize(SIMD_LEN);
  685. init((float*)&src0, s0);
  686. init((float*)&src1, s1);
  687. ret = GiSubtractFloat32(src0, src1);
  688. std::vector<float> naive;
  689. for (size_t i = 0; i < SIMD_LEN; i++) {
  690. naive.push_back(s0[i] - s1[i]);
  691. }
  692. assert_eq((float*)&ret, naive);
  693. }
  694. TEST_F(FALLBACK, GiMultiplyFloat32) {
  695. GI_FLOAT32_t src0, src1, ret;
  696. std::vector<float> s0{1.1f, 2.2f, 3.5f, 4.9f};
  697. std::vector<float> s1{2312.1f, 345.244f, 3.59f, -12.8f};
  698. s0.resize(SIMD_LEN);
  699. s1.resize(SIMD_LEN);
  700. init((float*)&src0, s0);
  701. init((float*)&src1, s1);
  702. ret = GiMultiplyFloat32(src0, src1);
  703. std::vector<float> naive;
  704. for (size_t i = 0; i < SIMD_LEN; i++) {
  705. naive.push_back(s0[i] * s1[i]);
  706. }
  707. assert_eq((float*)&ret, naive);
  708. }
  709. TEST_F(FALLBACK, GiMultiplyScalerFloat32) {
  710. GI_FLOAT32_t src0, ret;
  711. std::vector<float> s0{1.1f, 2.2f, 3.5f, 4.9f};
  712. s0.resize(SIMD_LEN);
  713. init((float*)&src0, s0);
  714. float scalar = 3.1415;
  715. ret = GiMultiplyScalerFloat32(src0, scalar);
  716. std::vector<float> naive;
  717. for (size_t i = 0; i < SIMD_LEN; i++) {
  718. naive.push_back(s0[i] * scalar);
  719. }
  720. assert_eq((float*)&ret, naive);
  721. }
  722. TEST_F(FALLBACK, GiMultiplyAddFloat32) {
  723. GI_FLOAT32_t src0, src1, src2, ret;
  724. std::vector<float> s0{1.1f, 2.2f, 3.5f, 4.9f};
  725. std::vector<float> s1{2312.1f, 345.244f, 3.59f, -12.8f};
  726. std::vector<float> s2{12.1f, 35.244f, 23.59f, -112.8f};
  727. s0.resize(SIMD_LEN);
  728. s1.resize(SIMD_LEN);
  729. s2.resize(SIMD_LEN);
  730. init((float*)&src0, s0);
  731. init((float*)&src1, s1);
  732. init((float*)&src2, s2);
  733. ret = GiMultiplyAddFloat32(src0, src1, src2);
  734. std::vector<float> naive;
  735. for (size_t i = 0; i < SIMD_LEN; i++) {
  736. naive.push_back(s1[i] * s2[i] + s0[i]);
  737. }
  738. assert_lt((float*)&ret, naive, 1e-3);
  739. }
  740. TEST_F(FALLBACK, GiMultiplyAddScalarFloat32) {
  741. GI_FLOAT32_t src0, src1, ret;
  742. std::vector<float> s0{1.1f, 2.2f, 3.5f, 4.9f};
  743. std::vector<float> s1{2312.1f, 345.244f, 3.59f, -12.8f};
  744. s0.resize(SIMD_LEN);
  745. s1.resize(SIMD_LEN);
  746. init((float*)&src0, s0);
  747. init((float*)&src1, s1);
  748. float scalar = 3.1415;
  749. ret = GiMultiplyAddScalarFloat32(src0, src1, scalar);
  750. std::vector<float> naive;
  751. for (size_t i = 0; i < SIMD_LEN; i++) {
  752. naive.push_back(s1[i] * scalar + s0[i]);
  753. }
  754. assert_eq((float*)&ret, naive);
  755. }
  756. TEST_F(FALLBACK, GiMultiplyAddLanXXFloat32) {
  757. GI_FLOAT32_t src0, src1, src2, ret;
  758. std::vector<float> s0{1.1f, 2.2f, 3.5f, 4.9f};
  759. std::vector<float> s1{2312.1f, 345.244f, 3.59f, -12.8f};
  760. std::vector<float> s2{12.1f, 35.244f, 23.59f, -112.8f};
  761. s0.resize(SIMD_LEN);
  762. s1.resize(SIMD_LEN);
  763. s2.resize(SIMD_LEN);
  764. init((float*)&src0, s0);
  765. init((float*)&src1, s1);
  766. init((float*)&src2, s2);
  767. std::vector<float> naive = {0, 0, 0, 0};
  768. auto compare = [&](const size_t n) {
  769. for (size_t i = 0; i < GI_SIMD_LEN_BYTE / sizeof(float); i++) {
  770. naive[i] = s0[i] + (s1[i] * s2[n]);
  771. }
  772. assert_eq((float*)&ret, naive);
  773. };
  774. #define CB(n) \
  775. ret = GiMultiplyAddLan##n##Float32(src0, src1, src2); \
  776. compare(n);
  777. CB(0)
  778. CB(1)
  779. CB(2)
  780. CB(3)
  781. #undef CB
  782. }
  783. TEST_F(FALLBACK, GiDivideFloat32) {
  784. GI_FLOAT32_t src0, src1, ret;
  785. std::vector<float> s0{1.1f, 2.2f, 3.5f, 4.9f};
  786. std::vector<float> s1{2312.1f, 345.244f, 3.59f, -12.8f};
  787. s0.resize(SIMD_LEN);
  788. s1.resize(SIMD_LEN);
  789. init((float*)&src0, s0);
  790. init((float*)&src1, s1);
  791. ret = GiDivideFloat32(src0, src1);
  792. std::vector<float> naive;
  793. for (size_t i = 0; i < SIMD_LEN; i++) {
  794. naive.push_back(s0[i] / s1[i]);
  795. }
  796. assert_lt((float*)&ret, naive, 1e-3);
  797. }
  798. TEST_F(FALLBACK, GiRecpeSFloat32) {
  799. GI_FLOAT32_t src0, src1, ret;
  800. std::vector<float> s0{1.1f, 2.2f, 3.5f, 4.9f};
  801. std::vector<float> s1{2312.1f, 345.244f, 3.59f, -12.8f};
  802. s0.resize(SIMD_LEN);
  803. s1.resize(SIMD_LEN);
  804. init((float*)&src0, s0);
  805. init((float*)&src1, s1);
  806. ret = GiRecpeSFloat32(src0, src1);
  807. std::vector<float> naive;
  808. for (size_t i = 0; i < SIMD_LEN; i++) {
  809. naive.push_back(2.0f - s0[i] * s1[i]);
  810. }
  811. assert_eq((float*)&ret, naive);
  812. }
  813. TEST_F(FALLBACK, GiRecpeFloat32) {
  814. GI_FLOAT32_t src0, ret;
  815. std::vector<float> s0{100.1f, 2.2f, 3.5f, 4.9f};
  816. s0.resize(SIMD_LEN);
  817. init((float*)&src0, s0);
  818. ret = GiRecpeFloat32(src0);
  819. std::vector<float> naive;
  820. for (size_t i = 0; i < SIMD_LEN; i++) {
  821. naive.push_back(1.0f / s0[i]);
  822. }
  823. assert_lt((float*)&ret, naive, 1e-3);
  824. }
  825. TEST_F(FALLBACK, GiNegFloat32) {
  826. GI_FLOAT32_t src0, ret;
  827. std::vector<float> s0{-1.1f, 2.2f, 3.5f, 4.9f};
  828. s0.resize(SIMD_LEN);
  829. init((float*)&src0, s0);
  830. ret = GiNegFloat32(src0);
  831. std::vector<float> naive;
  832. for (size_t i = 0; i < SIMD_LEN; i++) {
  833. naive.push_back(-s0[i]);
  834. }
  835. assert_eq((float*)&ret, naive);
  836. }
  837. TEST_F(FALLBACK, GiGreaterThanFloat32) {
  838. GI_FLOAT32_t src0, src1;
  839. GI_UINT32_t ret;
  840. std::vector<float> s0{1.1f, 2.2f, 3.5f, 4.9f};
  841. std::vector<float> s1{2312.1f, 0.1f, 3.59f, -12.8f};
  842. s0.resize(SIMD_LEN);
  843. s1.resize(SIMD_LEN);
  844. init((float*)&src0, s0);
  845. init((float*)&src1, s1);
  846. ret = GiGreaterThanFloat32(src0, src1);
  847. std::vector<int32_t> naive;
  848. for (size_t i = 0; i < SIMD_LEN; i++) {
  849. naive.push_back(s0[i] > s1[i] ? 0xFFFFFFFF : 0);
  850. }
  851. assert_eq((int32_t*)&ret, naive);
  852. }
  853. TEST_F(FALLBACK, GiLessThanEqFloat32) {
  854. GI_FLOAT32_t src0, src1;
  855. GI_UINT32_t ret;
  856. std::vector<float> s0{1.1f, 2.2f, 3.5f, 4.9f};
  857. std::vector<float> s1{2312.1f, 0.1f, 3.59f, -12.8f};
  858. s0.resize(SIMD_LEN);
  859. s1.resize(SIMD_LEN);
  860. init((float*)&src0, s0);
  861. init((float*)&src1, s1);
  862. ret = GiLessThanEqFloat32(src0, src1);
  863. std::vector<int32_t> naive;
  864. for (size_t i = 0; i < SIMD_LEN; i++) {
  865. naive.push_back(s0[i] <= s1[i] ? 0xFFFFFFFF : 0);
  866. }
  867. assert_eq((int32_t*)&ret, naive);
  868. }
  869. TEST_F(FALLBACK, GiLessThanFloat32) {
  870. GI_FLOAT32_t src0, src1;
  871. GI_UINT32_t ret;
  872. std::vector<float> s0{1.1f, 2.2f, 3.5f, 4.9f};
  873. std::vector<float> s1{1.1f, 0.1f, 3.59f, -12.8f};
  874. s0.resize(SIMD_LEN);
  875. s1.resize(SIMD_LEN);
  876. init((float*)&src0, s0);
  877. init((float*)&src1, s1);
  878. ret = GiLessThanFloat32(src0, src1);
  879. std::vector<int32_t> naive;
  880. for (size_t i = 0; i < SIMD_LEN; i++) {
  881. naive.push_back(s0[i] < s1[i] ? 0xFFFFFFFF : 0);
  882. }
  883. assert_eq((int32_t*)&ret, naive);
  884. }
  885. TEST_F(FALLBACK, GiAndFloat32) {
  886. GI_FLOAT32_t src0, src1, ret;
  887. std::vector<float> s0{1.1f, 2.2f, 3.5f, 4.9f};
  888. std::vector<float> s1{2312.1f, 345.244f, 3.59f, -12.8f};
  889. s0.resize(SIMD_LEN);
  890. s1.resize(SIMD_LEN);
  891. init((float*)&src0, s0);
  892. init((float*)&src1, s1);
  893. ret = GiAndFloat32(src0, src1);
  894. std::vector<float> naive;
  895. for (size_t i = 0; i < SIMD_LEN; i++) {
  896. int32_t tmp0, tmp1, tmp;
  897. float tmp2;
  898. memcpy(&tmp0, &s0[i], sizeof(int32_t));
  899. memcpy(&tmp1, &s1[i], sizeof(int32_t));
  900. tmp = tmp0 & tmp1;
  901. memcpy(&tmp2, &tmp, sizeof(float));
  902. naive.push_back(tmp2);
  903. }
  904. assert_eq((float*)&ret, naive);
  905. }
  906. TEST_F(FALLBACK, GiOrFloat32) {
  907. GI_FLOAT32_t src0, src1, ret;
  908. std::vector<float> s0{2, 2, 3, 4};
  909. std::vector<float> s1{6, 6, 7, 8};
  910. s0.resize(SIMD_LEN);
  911. s1.resize(SIMD_LEN);
  912. init((float*)&src0, s0);
  913. init((float*)&src1, s1);
  914. ret = GiOrFloat32(src0, src1);
  915. std::vector<float> naive;
  916. for (size_t i = 0; i < SIMD_LEN; i++) {
  917. int32_t tmp0, tmp1, tmp;
  918. float tmp2;
  919. memcpy(&tmp0, &s0[i], sizeof(int32_t));
  920. memcpy(&tmp1, &s1[i], sizeof(int32_t));
  921. tmp = tmp0 | tmp1;
  922. memcpy(&tmp2, &tmp, sizeof(float));
  923. naive.push_back(tmp2);
  924. }
  925. assert_eq((float*)&ret, naive);
  926. }
  927. TEST_F(FALLBACK, GiAndNotFloat32) {
  928. GI_FLOAT32_t src0, src1, ret;
  929. std::vector<float> s0{1.1f, 2.2f, 3.5f, 4.9f};
  930. std::vector<float> s1{2312.1f, 345.244f, 3.59f, -12.8f};
  931. s0.resize(SIMD_LEN);
  932. s1.resize(SIMD_LEN);
  933. init((float*)&src0, s0);
  934. init((float*)&src1, s1);
  935. ret = GiAndNotFloat32(src0, src1);
  936. std::vector<float> naive;
  937. for (size_t i = 0; i < SIMD_LEN; i++) {
  938. int32_t tmp0, tmp1, tmp;
  939. float tmp2;
  940. memcpy(&tmp0, &s0[i], sizeof(int32_t));
  941. memcpy(&tmp1, &s1[i], sizeof(int32_t));
  942. tmp = ~tmp0 & tmp1;
  943. memcpy(&tmp2, &tmp, sizeof(float));
  944. naive.push_back(tmp2);
  945. }
  946. assert_eq((float*)&ret, naive);
  947. }
  948. TEST_F(FALLBACK, GiXorFloat32) {
  949. GI_FLOAT32_t src0, src1, ret;
  950. std::vector<float> s0{1.1f, 2.2f, 3.5f, 4.9f};
  951. std::vector<float> s1{2312.1f, 345.244f, 3.59f, -12.8f};
  952. s0.resize(SIMD_LEN);
  953. s1.resize(SIMD_LEN);
  954. init((float*)&src0, s0);
  955. init((float*)&src1, s1);
  956. ret = GiXorFloat32(src0, src1);
  957. std::vector<float> naive;
  958. for (size_t i = 0; i < SIMD_LEN; i++) {
  959. int32_t tmp0, tmp1, tmp;
  960. float tmp2;
  961. memcpy(&tmp0, &s0[i], sizeof(int32_t));
  962. memcpy(&tmp1, &s1[i], sizeof(int32_t));
  963. tmp = tmp0 ^ tmp1;
  964. memcpy(&tmp2, &tmp, sizeof(float));
  965. naive.push_back(tmp2);
  966. }
  967. assert_eq((float*)&ret, naive);
  968. }
  969. TEST_F(FALLBACK, GiBSLFloat32) {
  970. GI_FLOAT32_t src0, src1, ret, na;
  971. GI_UINT32_t mask;
  972. std::vector<float> s0{1.1f, 2.2f, 4.5f, 4.9f};
  973. std::vector<float> s1{2312.1f, 345.244f, 3.59f, -12.8f};
  974. std::vector<std::vector<uint32_t>> s2s = {
  975. {1, 2, 3, 0}, {0u, 0u, 0u, 0u}, {~0u, 0u, 0u, 0u},
  976. {~0u, ~0u, 0u, 0u}, {~0u, ~0u, ~0u, 0u}, {~0u, ~0u, ~0u, ~0u}};
  977. s0.resize(SIMD_LEN);
  978. s1.resize(SIMD_LEN);
  979. init((float*)&src0, s0);
  980. init((float*)&src1, s1);
  981. for (auto& s2 : s2s) {
  982. init((uint32_t*)&mask, s2);
  983. ret = GiBSLFloat32(mask, src0, src1);
  984. na = GiBlendFloat32(src0, src1, GiReintUint32ToFloat32(mask));
  985. std::vector<float> naive;
  986. naive.resize(SIMD_LEN);
  987. memcpy(naive.data(), &na, sizeof(GI_FLOAT32_t));
  988. assert_eq_and_nan((float*)&ret, naive);
  989. }
  990. }
  991. TEST_F(FALLBACK, GiMaximumFloat32) {
  992. GI_FLOAT32_t src0, src1, ret;
  993. std::vector<float> s0{1.1f, 2.2f, 4.5f, 4.9f};
  994. std::vector<float> s1{2312.1f, 345.244f, 3.59f, -12.8f};
  995. s0.resize(SIMD_LEN);
  996. s1.resize(SIMD_LEN);
  997. init((float*)&src0, s0);
  998. init((float*)&src1, s1);
  999. ret = GiMaximumFloat32(src0, src1);
  1000. std::vector<float> naive;
  1001. for (size_t i = 0; i < SIMD_LEN; i++) {
  1002. naive.push_back(Max(s0[i], s1[i]));
  1003. }
  1004. assert_eq((float*)&ret, naive);
  1005. }
  1006. TEST_F(FALLBACK, GiMinimumFloat32) {
  1007. GI_FLOAT32_t src0, src1, ret;
  1008. std::vector<float> s0{1.1f, 2.2f, 4.5f, 4.9f};
  1009. std::vector<float> s1{2312.1f, 345.244f, 3.59f, -12.8f};
  1010. s0.resize(SIMD_LEN);
  1011. s1.resize(SIMD_LEN);
  1012. init((float*)&src0, s0);
  1013. init((float*)&src1, s1);
  1014. ret = GiMinimumFloat32(src0, src1);
  1015. std::vector<float> naive;
  1016. for (size_t i = 0; i < SIMD_LEN; i++) {
  1017. naive.push_back(Min(s0[i], s1[i]));
  1018. }
  1019. assert_eq((float*)&ret, naive);
  1020. }
  1021. TEST_F(FALLBACK, GiMaxNanFloat32) {
  1022. GI_FLOAT32_t src0, src1, ret;
  1023. std::vector<float> s0{1.1f, 2.2f, 4.5f, NAN};
  1024. std::vector<float> s1{2312.1f, 345.244f, NAN, -12.8f};
  1025. s0.resize(SIMD_LEN);
  1026. s1.resize(SIMD_LEN);
  1027. init((float*)&src0, s0);
  1028. init((float*)&src1, s1);
  1029. ret = GiMaxNanFloat32(src0, src1);
  1030. std::vector<float> naive;
  1031. for (size_t i = 0; i < SIMD_LEN; i++) {
  1032. auto t = MAX_NAN(s0[i], s1[i]);
  1033. naive.push_back(t);
  1034. }
  1035. assert_eq_and_nan((float*)&ret, naive);
  1036. }
  1037. TEST_F(FALLBACK, GiMinNanFloat32) {
  1038. GI_FLOAT32_t src0, src1, ret;
  1039. std::vector<float> s0{NAN, 2.2f, NAN, 4.9f};
  1040. std::vector<float> s1{2312.1f, 345.244f, 3.59f, -12.8f};
  1041. s0.resize(SIMD_LEN);
  1042. s1.resize(SIMD_LEN);
  1043. init((float*)&src0, s0);
  1044. init((float*)&src1, s1);
  1045. ret = GiMinNanFloat32(src0, src1);
  1046. std::vector<float> naive;
  1047. for (size_t i = 0; i < SIMD_LEN; i++) {
  1048. auto t = MIN_NAN(s0[i], s1[i]);
  1049. naive.push_back(t);
  1050. }
  1051. assert_eq_and_nan((float*)&ret, naive);
  1052. }
  1053. TEST_F(FALLBACK, GiClampFloat32) {
  1054. GI_FLOAT32_t src0, src1, ret, na;
  1055. std::vector<float> s0{1.1f, 2.2f, 4.5f, 4.9f};
  1056. std::vector<float> s1{1.1f, 2.2f, 4.5f, 4.9f};
  1057. s0.resize(SIMD_LEN);
  1058. s1.resize(SIMD_LEN);
  1059. init((float*)&src0, s0);
  1060. init((float*)&src1, s1);
  1061. float LowerRange = 3.1415;
  1062. float UpperRange = 4.876;
  1063. auto naive_c = [](GI_FLOAT32_t Value, float LowerRange,
  1064. float UpperRange) -> GI_FLOAT32_t {
  1065. Value = GiMaximumFloat32(GiBroadcastFloat32(LowerRange), Value);
  1066. Value = GiMinimumFloat32(GiBroadcastFloat32(UpperRange), Value);
  1067. return Value;
  1068. };
  1069. ret = GiClampFloat32(src0, LowerRange, UpperRange);
  1070. na = naive_c(src1, LowerRange, UpperRange);
  1071. std::vector<float> naive;
  1072. naive.resize(SIMD_LEN);
  1073. memcpy(naive.data(), &na, sizeof(GI_FLOAT32_t));
  1074. assert_eq((float*)&ret, naive);
  1075. }
  1076. TEST_F(FALLBACK, GiReduceAddFloat32) {
  1077. GI_FLOAT32_t src0;
  1078. float ret{0};
  1079. std::vector<float> s0{1.1f, 2.2f, 4.5f, -4.9f};
  1080. s0.resize(SIMD_LEN);
  1081. init((float*)&src0, s0);
  1082. ret = GiReduceAddFloat32(src0);
  1083. float naive{0};
  1084. for (size_t i = 0; i < SIMD_LEN; i++) {
  1085. naive += s0[i];
  1086. }
  1087. ASSERT_LT(std::abs(ret - naive), 1e-3);
  1088. }
  1089. TEST_F(FALLBACK, GiReduceMultiplyFloat32) {
  1090. GI_FLOAT32_t src0;
  1091. float ret{0};
  1092. std::vector<float> s0{1.1f, 2.2f, 4.5f, -4.9f};
  1093. s0.resize(SIMD_LEN);
  1094. init((float*)&src0, s0);
  1095. ret = GiReduceMultiplyFloat32(src0);
  1096. float naive{1};
  1097. for (size_t i = 0; i < SIMD_LEN; i++) {
  1098. naive *= s0[i];
  1099. }
  1100. ASSERT_LT(std::abs(ret - naive), 1e-3);
  1101. }
  1102. TEST_F(FALLBACK, GiReduceMaxNanFloat32) {
  1103. GI_FLOAT32_t src0;
  1104. float ret{0};
  1105. std::vector<float> s0{1.1f, 2.2f, 4.9f, -4.9f};
  1106. s0.resize(SIMD_LEN);
  1107. init((float*)&src0, s0);
  1108. ret = GiReduceMaxNanFloat32(src0);
  1109. float naive = s0[0];
  1110. for (size_t i = 0; i < SIMD_LEN; i++) {
  1111. naive = MAX_NAN(naive, s0[i]);
  1112. }
  1113. ASSERT_EQ(ret, naive);
  1114. ret = 0;
  1115. s0 = {1.1f, 2.2f, 4.9f, NAN};
  1116. init((float*)&src0, s0);
  1117. ret = GiReduceMaxNanFloat32(src0);
  1118. ASSERT_TRUE(isnan(ret));
  1119. }
  1120. TEST_F(FALLBACK, GiReduceMinNanFloat32) {
  1121. GI_FLOAT32_t src0;
  1122. float ret{0};
  1123. std::vector<float> s0{1.1f, 2.2f, 4.5f, -4.9f};
  1124. s0.resize(SIMD_LEN);
  1125. init((float*)&src0, s0);
  1126. ret = GiReduceMinNanFloat32(src0);
  1127. float naive = s0[0];
  1128. for (size_t i = 0; i < SIMD_LEN; i++) {
  1129. naive = MIN_NAN(naive, s0[i]);
  1130. }
  1131. ASSERT_EQ(ret, naive);
  1132. ret = 0;
  1133. s0 = {-1.1f, 2.2f, 4.9f, NAN};
  1134. init((float*)&src0, s0);
  1135. ret = GiReduceMaxNanFloat32(src0);
  1136. ASSERT_TRUE(isnan(ret));
  1137. }
  1138. TEST_F(FALLBACK, GiAbsFloat32) {
  1139. GI_FLOAT32_t src0, ret;
  1140. std::vector<float> s0{2312.1f, 345.244f, 3.59f, -12.8f};
  1141. s0.resize(SIMD_LEN);
  1142. init((float*)&src0, s0);
  1143. ret = GiAbsFloat32(src0);
  1144. std::vector<float> naive;
  1145. for (size_t i = 0; i < SIMD_LEN; i++) {
  1146. naive.push_back(s0[i] > 0 ? s0[i] : -s0[i]);
  1147. }
  1148. assert_eq((float*)&ret, naive);
  1149. }
  1150. TEST_F(FALLBACK, GiZip1qS64) {
  1151. GI_INT64_t src0, src1, ret;
  1152. std::vector<int64_t> s0{234242423424245, 42342342422323};
  1153. std::vector<int64_t> s1{23424245, -4234234242232};
  1154. s0.resize(SIMD_LEN / 2);
  1155. s1.resize(SIMD_LEN / 2);
  1156. memcpy(&src0, s0.data(), sizeof(GI_INT64_t));
  1157. memcpy(&src1, s1.data(), sizeof(GI_INT64_t));
  1158. ret = GiZip1qS64(src0, src1);
  1159. std::vector<int64_t> naive;
  1160. naive.push_back(s0[0]);
  1161. naive.push_back(s1[0]);
  1162. auto p = (int64_t*)&ret;
  1163. ASSERT_EQ(naive[0], p[0]);
  1164. ASSERT_EQ(naive[1], p[1]);
  1165. }
  1166. TEST_F(FALLBACK, GiZip2qS64) {
  1167. GI_INT64_t src0, src1, ret;
  1168. std::vector<int64_t> s0{234242423424245, 42342342422323};
  1169. std::vector<int64_t> s1{23424245, -4234234242232};
  1170. s0.resize(SIMD_LEN / 2);
  1171. s1.resize(SIMD_LEN / 2);
  1172. memcpy(&src0, s0.data(), sizeof(GI_INT64_t));
  1173. memcpy(&src1, s1.data(), sizeof(GI_INT64_t));
  1174. ret = GiZip2qS64(src0, src1);
  1175. std::vector<int64_t> naive;
  1176. naive.push_back(s0[1]);
  1177. naive.push_back(s1[1]);
  1178. auto p = (int64_t*)&ret;
  1179. ASSERT_EQ(naive[0], p[0]);
  1180. ASSERT_EQ(naive[1], p[1]);
  1181. }
  1182. TEST_F(FALLBACK, GiReinterpretqS64ToFloat32) {
  1183. GI_INT64_t src0;
  1184. GI_FLOAT32_t ret;
  1185. std::vector<int64_t> s0{234242423424245, 42342342422323};
  1186. s0.resize(SIMD_LEN / 2);
  1187. memcpy(&src0, s0.data(), sizeof(GI_INT64_t));
  1188. ret = GiReinterpretqS64ToFloat32(src0);
  1189. std::vector<float> naive;
  1190. naive.resize(SIMD_LEN);
  1191. memcpy(naive.data(), s0.data(), sizeof(GI_FLOAT32_t));
  1192. assert_eq((float*)&ret, naive);
  1193. }
  1194. TEST_F(FALLBACK, GiReinterpretqFloat32ToS64) {
  1195. GI_FLOAT32_t src0;
  1196. GI_INT64_t ret;
  1197. std::vector<float> s0{2312.1f, 345.244f, 3.59f, -12.8f};
  1198. s0.resize(SIMD_LEN);
  1199. init((float*)&src0, s0);
  1200. ret = GiReinterpretqFloat32ToS64(src0);
  1201. std::vector<float> naive;
  1202. naive.resize(SIMD_LEN);
  1203. memcpy(naive.data(), s0.data(), sizeof(GI_INT64_t));
  1204. assert_eq((float*)&ret, naive);
  1205. }
  1206. TEST_F(FALLBACK, GiSimdFmaLane) {
  1207. GI_FLOAT32_t src0, src1, src2, ret;
  1208. std::vector<float> s0{1.1f, 2.2f, 3.5f, 4.9f};
  1209. std::vector<float> s1{2312.1f, 345.244f, 3.59f, -12.8f};
  1210. std::vector<float> s2{12.1f, 2.2f, 89.0f, -112.8f};
  1211. s0.resize(SIMD_LEN);
  1212. s1.resize(SIMD_LEN);
  1213. s2.resize(SIMD_LEN);
  1214. init((float*)&src0, s0);
  1215. init((float*)&src1, s1);
  1216. init((float*)&src2, s2);
  1217. std::vector<float> naive = {0, 0, 0, 0};
  1218. auto compare = [&](const size_t n) {
  1219. for (size_t i = 0; i < GI_SIMD_LEN_BYTE / sizeof(float); i++) {
  1220. naive[i] = s0[i] + (s1[i] * s2[n]);
  1221. }
  1222. assert_eq((float*)&ret, naive);
  1223. };
  1224. #define CB(n) \
  1225. ret = GiSimdFmaLane(src0, src1, src2, n); \
  1226. compare(n);
  1227. CB(0)
  1228. CB(1)
  1229. CB(2)
  1230. CB(3)
  1231. #undef CB
  1232. }
  1233. TEST_F(FALLBACK, GiMlaqLowLaneFloat32) {
  1234. GI_FLOAT32_t src0, src1, src2, ret;
  1235. std::vector<float> s0{1.1f, 2.2f, 3.5f, 4.9f};
  1236. std::vector<float> s1{2312.1f, 345.244f, 3.59f, -12.8f};
  1237. std::vector<float> s2{12.1f, 2.2f, 89.0f, -112.8f};
  1238. s0.resize(SIMD_LEN);
  1239. s1.resize(SIMD_LEN);
  1240. s2.resize(SIMD_LEN);
  1241. init((float*)&src0, s0);
  1242. init((float*)&src1, s1);
  1243. init((float*)&src2, s2);
  1244. std::vector<float> naive = {0, 0, 0, 0};
  1245. auto compare = [&](const size_t n) {
  1246. for (size_t i = 0; i < GI_SIMD_LEN_BYTE / sizeof(float); i++) {
  1247. naive[i] = s0[i] + (s1[i] * s2[n]);
  1248. }
  1249. assert_eq((float*)&ret, naive);
  1250. };
  1251. #define CB(n) \
  1252. ret = GiMlaqLowLaneFloat32(src0, src1, src2, n); \
  1253. compare(n);
  1254. CB(0)
  1255. CB(1)
  1256. #undef CB
  1257. }
  1258. TEST_F(FALLBACK, GiMlaqHighLaneFloat32) {
  1259. GI_FLOAT32_t src0, src1, src2, ret;
  1260. std::vector<float> s0{1.1f, 2.2f, 3.5f, 4.9f};
  1261. std::vector<float> s1{2312.1f, 345.244f, 3.59f, -12.8f};
  1262. std::vector<float> s2{12.1f, 2.2f, 89.0f, -112.8f};
  1263. s0.resize(SIMD_LEN);
  1264. s1.resize(SIMD_LEN);
  1265. s2.resize(SIMD_LEN);
  1266. init((float*)&src0, s0);
  1267. init((float*)&src1, s1);
  1268. init((float*)&src2, s2);
  1269. std::vector<float> naive = {0, 0, 0, 0};
  1270. auto compare = [&](const size_t n) {
  1271. for (size_t i = 0; i < GI_SIMD_LEN_BYTE / sizeof(float); i++) {
  1272. naive[i] = s0[i] + (s1[i] * s2[n]);
  1273. }
  1274. assert_eq((float*)&ret, naive);
  1275. };
  1276. #define CB(n) \
  1277. ret = GiMlaqHighLaneFloat32(src0, src1, src2, n); \
  1278. compare(n);
  1279. CB(2)
  1280. CB(3)
  1281. #undef CB
  1282. }
  1283. TEST_F(FALLBACK, GiFmsqLaneQFloat32) {
  1284. GI_FLOAT32_t src0, src1, src2, ret;
  1285. std::vector<float> s0{1.1f, 2.2f, 3.5f, 4.9f};
  1286. std::vector<float> s1{2312.1f, 345.244f, 3.59f, -12.8f};
  1287. std::vector<float> s2{12.1f, 2.2f, 89.0f, -112.8f};
  1288. s0.resize(SIMD_LEN);
  1289. s1.resize(SIMD_LEN);
  1290. s2.resize(SIMD_LEN);
  1291. init((float*)&src0, s0);
  1292. init((float*)&src1, s1);
  1293. init((float*)&src2, s2);
  1294. std::vector<float> naive = {0, 0, 0, 0};
  1295. auto compare = [&](const size_t n) {
  1296. for (size_t i = 0; i < GI_SIMD_LEN_BYTE / sizeof(float); i++) {
  1297. naive[i] = s0[i] - (s1[i] * s2[n]);
  1298. }
  1299. assert_eq((float*)&ret, naive);
  1300. };
  1301. #define CB(n) \
  1302. ret = GiFmsqLaneQFloat32(src0, src1, src2, n); \
  1303. compare(n);
  1304. CB(0)
  1305. CB(1)
  1306. CB(2)
  1307. CB(3)
  1308. #undef CB
  1309. }
  1310. TEST_F(FALLBACK, GiBroadcastUint32) {
  1311. int32_t src0 = 20220422;
  1312. GI_UINT32_t ret;
  1313. ret = GiBroadcastUint32(src0);
  1314. std::vector<uint32_t> naive;
  1315. for (size_t i = 0; i < SIMD_LEN; i++) {
  1316. naive.push_back(src0);
  1317. }
  1318. assert_eq((uint32_t*)&ret, naive);
  1319. }
  1320. TEST_F(FALLBACK, GiLoadInt32) {
  1321. std::vector<int32_t> s0{1, 2, -200, 999};
  1322. GI_INT32_t ret;
  1323. ret = GiLoadInt32(s0.data());
  1324. std::vector<uint32_t> naive;
  1325. for (size_t i = 0; i < SIMD_LEN; i++) {
  1326. naive.push_back(s0[i]);
  1327. }
  1328. assert_eq((uint32_t*)&ret, naive);
  1329. }
  1330. TEST_F(FALLBACK, GiLoadInt16) {
  1331. std::vector<int16_t> s0{1, 2, -200, 32767, -32768, 45, 3, 0};
  1332. GI_INT16_t ret;
  1333. ret = GiLoadInt16(s0.data());
  1334. auto p = (int16_t*)&ret;
  1335. for (size_t i = 0; i < SIMD_LEN_16; i++) {
  1336. ASSERT_EQ(p[i], s0[i]);
  1337. }
  1338. }
  1339. TEST_F(FALLBACK, GiLoadInt8) {
  1340. std::vector<int8_t> s0{9, 2, -128, 127, 2, 45, 3, 0,
  1341. 11, 2, -128, 127, 2, 55, 3, -1};
  1342. GI_INT8_t ret;
  1343. ret = GiLoadInt8(s0.data());
  1344. auto p = (int8_t*)&ret;
  1345. for (size_t i = 0; i < SIMD_LEN_8; i++) {
  1346. ASSERT_EQ(p[i], s0[i]);
  1347. }
  1348. }
  1349. TEST_F(FALLBACK, GiStoreInt32) {
  1350. GI_INT32_t src0;
  1351. std::vector<int32_t> s0{1, 2, -200, 999};
  1352. s0.resize(SIMD_LEN);
  1353. init((int32_t*)&src0, s0);
  1354. std::vector<int32_t> ret;
  1355. ret.resize(SIMD_LEN);
  1356. GiStoreInt32(ret.data(), src0);
  1357. assert_eq<int32_t>(ret.data(), s0);
  1358. }
  1359. TEST_F(FALLBACK, GiStoreLaneXXInt32) {
  1360. GI_INT32_t src0;
  1361. std::vector<int32_t> s0{1, 2, -200, 999};
  1362. s0.resize(SIMD_LEN);
  1363. init((int32_t*)&src0, s0);
  1364. int32_t ret = 8888;
  1365. #define CB(n) \
  1366. GiStoreLane##n##Int32(&ret, src0); \
  1367. ASSERT_EQ(s0[n], ret);
  1368. CB(0)
  1369. CB(1)
  1370. CB(2)
  1371. CB(3)
  1372. }
  1373. TEST_F(FALLBACK, GiReinterInt32ToInt8) {
  1374. GI_INT32_t src0;
  1375. GI_INT8_t ret, naive;
  1376. std::vector<int32_t> s0{65536, 2, -200, 999};
  1377. s0.resize(SIMD_LEN);
  1378. init((int32_t*)&src0, s0);
  1379. ret = GiReinterInt32ToInt8(src0);
  1380. naive = (GI_INT8_t)src0;
  1381. ASSERT_FALSE(memcmp(&ret, &naive, sizeof(GI_INT8_t)));
  1382. }
  1383. TEST_F(FALLBACK, GiStoreInt16) {
  1384. GI_INT16_t src0;
  1385. std::vector<int16_t> s0{32767, 2, -200, -32768, 1, 2, 3, 4};
  1386. s0.resize(SIMD_LEN_16);
  1387. init((int16_t*)&src0, s0, SIMD_LEN_16);
  1388. std::vector<int16_t> ret;
  1389. ret.resize(SIMD_LEN_16);
  1390. GiStoreInt16(ret.data(), src0);
  1391. assert_eq<int16_t>(ret.data(), s0, SIMD_LEN_16);
  1392. }
  1393. TEST_F(FALLBACK, GiStoreInt8) {
  1394. GI_INT8_t src0;
  1395. std::vector<int8_t> s0{127, 2, 56, -128, 1, 2, 3, 4, 127, 2, 56, -128, 1, 2, 3, 4};
  1396. s0.resize(SIMD_LEN_8);
  1397. init((int8_t*)&src0, s0, SIMD_LEN_8);
  1398. std::vector<int8_t> ret;
  1399. ret.resize(SIMD_LEN_8);
  1400. GiStoreInt8(ret.data(), src0);
  1401. assert_eq<int8_t>(ret.data(), s0, SIMD_LEN_8);
  1402. }
  1403. TEST_F(FALLBACK, GiStoreLowInt8) {
  1404. GI_INT8_t src0;
  1405. std::vector<int8_t> s0{127, 2, 56, -128, 1, 2, 3, 4, 127, 2, 56, -128, 1, 2, 3, 4};
  1406. s0.resize(SIMD_LEN_8);
  1407. init((int8_t*)&src0, s0, SIMD_LEN_8);
  1408. std::vector<int8_t> ret;
  1409. ret.resize(SIMD_LEN_8 / 2);
  1410. GiStoreLowInt8(ret.data(), src0);
  1411. assert_eq<int8_t>(ret.data(), s0, SIMD_LEN_8 / 2);
  1412. }
  1413. TEST_F(FALLBACK, GiStoreHihgInt8) {
  1414. GI_INT8_t src0;
  1415. std::vector<int8_t> s0{127, 2, 56, -128, 1, 2, 3, 4, 127, 2, 56, -128, 1, 2, 3, 4};
  1416. s0.resize(SIMD_LEN_8);
  1417. init((int8_t*)&src0, s0, SIMD_LEN_8);
  1418. std::vector<int8_t> ret;
  1419. ret.resize(SIMD_LEN_8 / 2);
  1420. GiStoreHihgInt8(ret.data(), src0);
  1421. std::vector<int8_t> naive;
  1422. for (size_t i = 0; i < SIMD_LEN_8 / 2; i++) {
  1423. naive.push_back(s0[SIMD_LEN_8 / 2 + i]);
  1424. }
  1425. assert_eq<int8_t>(ret.data(), naive, SIMD_LEN_8 / 2);
  1426. }
  1427. TEST_F(FALLBACK, GiNegInt32) {
  1428. GI_INT32_t src0, ret;
  1429. std::vector<int32_t> s0{
  1430. std::numeric_limits<int32_t>::max(), std::numeric_limits<int32_t>::min(),
  1431. -3, 4};
  1432. s0.resize(SIMD_LEN);
  1433. init((int32_t*)&src0, s0);
  1434. ret = GiNegInt32(src0);
  1435. std::vector<int32_t> naive;
  1436. for (size_t i = 0; i < SIMD_LEN; i++) {
  1437. naive.push_back(-s0[i]);
  1438. }
  1439. assert_eq((int32_t*)&ret, naive);
  1440. }
  1441. TEST_F(FALLBACK, GiNegInt8) {
  1442. GI_INT8_t src0, ret;
  1443. std::vector<int8_t> s0{
  1444. std::numeric_limits<int8_t>::max(),
  1445. std::numeric_limits<int8_t>::min(),
  1446. 56,
  1447. -128,
  1448. 1,
  1449. 2,
  1450. 3,
  1451. 4,
  1452. 127,
  1453. 2,
  1454. 56,
  1455. -128,
  1456. 1,
  1457. 2,
  1458. 3,
  1459. 4};
  1460. s0.resize(SIMD_LEN_8);
  1461. init((int8_t*)&src0, s0, SIMD_LEN_8);
  1462. ret = GiNegInt8(src0);
  1463. std::vector<int8_t> naive;
  1464. for (size_t i = 0; i < SIMD_LEN_8; i++) {
  1465. naive.push_back(-s0[i]);
  1466. }
  1467. assert_eq<int8_t>((int8_t*)&ret, naive, SIMD_LEN_8);
  1468. }
  1469. TEST_F(FALLBACK, GiTestAndSetUint32) {
  1470. GI_UINT32_t src0, src1, ret;
  1471. std::vector<uint32_t> s0{
  1472. 8, 2, std::numeric_limits<uint32_t>::max(),
  1473. std::numeric_limits<uint32_t>::min()};
  1474. std::vector<uint32_t> s1{
  1475. 8, 4, std::numeric_limits<uint32_t>::max(),
  1476. std::numeric_limits<uint32_t>::max()};
  1477. s0.resize(SIMD_LEN);
  1478. s1.resize(SIMD_LEN);
  1479. init((uint32_t*)&src0, s0);
  1480. init((uint32_t*)&src1, s1);
  1481. ret = GiTestAndSetUint32(src0, src1);
  1482. std::vector<uint32_t> naive;
  1483. for (size_t i = 0; i < SIMD_LEN; i++) {
  1484. naive.push_back(s0[i] & s1[i] ? 0xFFFFFFFF : 0);
  1485. }
  1486. assert_eq<uint32_t>((uint32_t*)&ret, naive);
  1487. }
  1488. TEST_F(FALLBACK, GiAddInt32) {
  1489. GI_INT32_t src0, src1, ret;
  1490. std::vector<int32_t> s0{127, 2, std::numeric_limits<int32_t>::max(), 9999};
  1491. std::vector<int32_t> s1{1, 2, std::numeric_limits<int32_t>::max(), -9};
  1492. s0.resize(SIMD_LEN);
  1493. s1.resize(SIMD_LEN);
  1494. init((int32_t*)&src0, s0);
  1495. init((int32_t*)&src1, s1);
  1496. ret = GiAddInt32(src0, src1);
  1497. std::vector<int32_t> naive;
  1498. for (size_t i = 0; i < SIMD_LEN; i++) {
  1499. naive.push_back(s0[i] + s1[i]);
  1500. }
  1501. assert_eq((int32_t*)&ret, naive);
  1502. }
  1503. TEST_F(FALLBACK, GiAddUint32) {
  1504. GI_UINT32_t src0, src1, ret;
  1505. std::vector<uint32_t> s0{127, 2, std::numeric_limits<uint32_t>::max(), 9999};
  1506. std::vector<uint32_t> s1{1, 2, std::numeric_limits<uint32_t>::max(), 9};
  1507. s0.resize(SIMD_LEN);
  1508. s1.resize(SIMD_LEN);
  1509. init((uint32_t*)&src0, s0);
  1510. init((uint32_t*)&src1, s1);
  1511. ret = GiAddUint32(src0, src1);
  1512. std::vector<uint32_t> naive;
  1513. for (size_t i = 0; i < SIMD_LEN; i++) {
  1514. naive.push_back(s0[i] + s1[i]);
  1515. }
  1516. assert_eq((uint32_t*)&ret, naive);
  1517. }
  1518. TEST_F(FALLBACK, GiAddInt16) {
  1519. GI_INT16_t src0, src1, ret;
  1520. std::vector<int16_t> s0{-127, 2, std::numeric_limits<int16_t>::max(), 9999, 1, 2,
  1521. 3, 4};
  1522. std::vector<int16_t> s1{1,
  1523. 2,
  1524. std::numeric_limits<int16_t>::max(),
  1525. std::numeric_limits<int16_t>::min(),
  1526. -1,
  1527. 23,
  1528. -3,
  1529. -5};
  1530. s0.resize(SIMD_LEN_16);
  1531. s1.resize(SIMD_LEN_16);
  1532. init((int16_t*)&src0, s0, SIMD_LEN_16);
  1533. init((int16_t*)&src1, s1, SIMD_LEN_16);
  1534. ret = GiAddInt16(src0, src1);
  1535. std::vector<int16_t> naive;
  1536. for (size_t i = 0; i < SIMD_LEN_16; i++) {
  1537. naive.push_back(s0[i] + s1[i]);
  1538. }
  1539. assert_eq<int16_t>((int16_t*)&ret, naive, SIMD_LEN_16);
  1540. }
  1541. TEST_F(FALLBACK, GiAddInt8) {
  1542. GI_INT8_t src0, src1, ret;
  1543. std::vector<int8_t> s0{
  1544. std::numeric_limits<int8_t>::max(),
  1545. std::numeric_limits<int8_t>::min(),
  1546. 56,
  1547. -128,
  1548. 1,
  1549. 2,
  1550. 3,
  1551. 4,
  1552. 127,
  1553. 2,
  1554. 56,
  1555. -128,
  1556. 1,
  1557. 2,
  1558. 3,
  1559. 4};
  1560. std::vector<int8_t> s1{
  1561. 3,
  1562. std::numeric_limits<int8_t>::max(),
  1563. std::numeric_limits<int8_t>::min(),
  1564. 56,
  1565. -128,
  1566. 1,
  1567. 2,
  1568. 3,
  1569. 4,
  1570. 127,
  1571. 2,
  1572. 56,
  1573. -128,
  1574. 1,
  1575. 2,
  1576. 4};
  1577. s0.resize(SIMD_LEN_8);
  1578. s1.resize(SIMD_LEN_8);
  1579. init((int8_t*)&src0, s0, SIMD_LEN_8);
  1580. init((int8_t*)&src1, s1, SIMD_LEN_8);
  1581. ret = GiAddInt8(src0, src1);
  1582. std::vector<int8_t> naive;
  1583. for (size_t i = 0; i < SIMD_LEN_8; i++) {
  1584. naive.push_back(s0[i] + s1[i]);
  1585. }
  1586. assert_eq<int8_t>((int8_t*)&ret, naive, SIMD_LEN_8);
  1587. }
  1588. TEST_F(FALLBACK, GiSubtractInt32) {
  1589. GI_INT32_t src0, src1, ret;
  1590. std::vector<int32_t> s0{127, 2, std::numeric_limits<int32_t>::max(), 9999};
  1591. std::vector<int32_t> s1{1, 2, std::numeric_limits<int32_t>::max(), -9};
  1592. s0.resize(SIMD_LEN);
  1593. s1.resize(SIMD_LEN);
  1594. init((int32_t*)&src0, s0);
  1595. init((int32_t*)&src1, s1);
  1596. ret = GiSubtractInt32(src0, src1);
  1597. std::vector<int32_t> naive;
  1598. for (size_t i = 0; i < SIMD_LEN; i++) {
  1599. naive.push_back(s0[i] - s1[i]);
  1600. }
  1601. assert_eq((int32_t*)&ret, naive);
  1602. }
  1603. TEST_F(FALLBACK, GiSubtractUint32) {
  1604. GI_UINT32_t src0, src1, ret;
  1605. std::vector<uint32_t> s0{127, 2, std::numeric_limits<uint32_t>::max(), 9999};
  1606. std::vector<uint32_t> s1{1, 2, std::numeric_limits<uint32_t>::max(), 9};
  1607. s0.resize(SIMD_LEN);
  1608. s1.resize(SIMD_LEN);
  1609. init((uint32_t*)&src0, s0);
  1610. init((uint32_t*)&src1, s1);
  1611. ret = GiSubtractUint32(src0, src1);
  1612. std::vector<uint32_t> naive;
  1613. for (size_t i = 0; i < SIMD_LEN; i++) {
  1614. naive.push_back(s0[i] - s1[i]);
  1615. }
  1616. assert_eq((uint32_t*)&ret, naive);
  1617. }
  1618. TEST_F(FALLBACK, GiSubtractInt8) {
  1619. GI_INT8_t src0, src1, ret;
  1620. std::vector<int8_t> s0{
  1621. std::numeric_limits<int8_t>::max(),
  1622. std::numeric_limits<int8_t>::min(),
  1623. 56,
  1624. -128,
  1625. 1,
  1626. 2,
  1627. 3,
  1628. 4,
  1629. 127,
  1630. 2,
  1631. 56,
  1632. -128,
  1633. 1,
  1634. 2,
  1635. 3,
  1636. 4};
  1637. std::vector<int8_t> s1{
  1638. 3,
  1639. std::numeric_limits<int8_t>::max(),
  1640. std::numeric_limits<int8_t>::min(),
  1641. 56,
  1642. -128,
  1643. 1,
  1644. 2,
  1645. 3,
  1646. 4,
  1647. 127,
  1648. 2,
  1649. 56,
  1650. -128,
  1651. 1,
  1652. 2,
  1653. 4};
  1654. s0.resize(SIMD_LEN_8);
  1655. s1.resize(SIMD_LEN_8);
  1656. init((int8_t*)&src0, s0, SIMD_LEN_8);
  1657. init((int8_t*)&src1, s1, SIMD_LEN_8);
  1658. ret = GiSubtractInt8(src0, src1);
  1659. std::vector<int8_t> naive;
  1660. for (size_t i = 0; i < SIMD_LEN_8; i++) {
  1661. naive.push_back(s0[i] - s1[i]);
  1662. }
  1663. assert_eq<int8_t>((int8_t*)&ret, naive, SIMD_LEN_8);
  1664. }
  1665. TEST_F(FALLBACK, GiMultiplyInt32) {
  1666. GI_INT32_t src0, src1, ret;
  1667. std::vector<int32_t> s0{127, 2, 202204, 99};
  1668. std::vector<int32_t> s1{1, 2, -4, -9};
  1669. s0.resize(SIMD_LEN);
  1670. s1.resize(SIMD_LEN);
  1671. init((int32_t*)&src0, s0);
  1672. init((int32_t*)&src1, s1);
  1673. ret = GiMultiplyInt32(src0, src1);
  1674. std::vector<int32_t> naive;
  1675. for (size_t i = 0; i < SIMD_LEN; i++) {
  1676. naive.push_back(s0[i] * s1[i]);
  1677. }
  1678. assert_eq((int32_t*)&ret, naive);
  1679. }
  1680. TEST_F(FALLBACK, GiMultiplyInt8) {
  1681. GI_INT8_t src0, src1, ret;
  1682. std::vector<int8_t> s0{
  1683. std::numeric_limits<int8_t>::max(),
  1684. std::numeric_limits<int8_t>::min(),
  1685. 56,
  1686. -128,
  1687. 1,
  1688. 2,
  1689. 3,
  1690. 4,
  1691. 127,
  1692. 2,
  1693. 56,
  1694. -128,
  1695. 1,
  1696. 2,
  1697. 3,
  1698. 4};
  1699. std::vector<int8_t> s1{
  1700. 3,
  1701. std::numeric_limits<int8_t>::max(),
  1702. std::numeric_limits<int8_t>::min(),
  1703. 56,
  1704. -128,
  1705. 1,
  1706. 2,
  1707. 3,
  1708. 4,
  1709. 127,
  1710. 2,
  1711. 56,
  1712. -128,
  1713. 1,
  1714. 2,
  1715. 4};
  1716. s0.resize(SIMD_LEN_8);
  1717. s1.resize(SIMD_LEN_8);
  1718. init((int8_t*)&src0, s0, SIMD_LEN_8);
  1719. init((int8_t*)&src1, s1, SIMD_LEN_8);
  1720. ret = GiMultiplyInt8(src0, src1);
  1721. std::vector<int8_t> naive;
  1722. for (size_t i = 0; i < SIMD_LEN_8; i++) {
  1723. naive.push_back(s0[i] * s1[i]);
  1724. }
  1725. assert_eq<int8_t>((int8_t*)&ret, naive, SIMD_LEN_8);
  1726. }
  1727. TEST_F(FALLBACK, GiMultiplyAddInt32) {
  1728. GI_INT32_t src0, src1, src2, ret;
  1729. std::vector<int32_t> s0{127, 2, 67, 9999};
  1730. std::vector<int32_t> s1{1, 2, 90, -9};
  1731. std::vector<int32_t> s2{-1, 12, 4, -9};
  1732. s0.resize(SIMD_LEN);
  1733. s1.resize(SIMD_LEN);
  1734. s2.resize(SIMD_LEN);
  1735. init((int32_t*)&src0, s0);
  1736. init((int32_t*)&src1, s1);
  1737. init((int32_t*)&src2, s2);
  1738. ret = GiMultiplyAddInt32(src0, src1, src2);
  1739. std::vector<int32_t> naive;
  1740. for (size_t i = 0; i < SIMD_LEN; i++) {
  1741. naive.push_back(s0[i] + s1[i] * s2[i]);
  1742. }
  1743. assert_eq((int32_t*)&ret, naive);
  1744. }
  1745. TEST_F(FALLBACK, GiMultiplyAddInt8) {
  1746. GI_INT8_t src0, src1, src2, ret;
  1747. std::vector<int8_t> s0{
  1748. std::numeric_limits<int8_t>::max(),
  1749. std::numeric_limits<int8_t>::min(),
  1750. 56,
  1751. -128,
  1752. 1,
  1753. 2,
  1754. 3,
  1755. 4,
  1756. 127,
  1757. 2,
  1758. 56,
  1759. -128,
  1760. 1,
  1761. 2,
  1762. 3,
  1763. 4};
  1764. std::vector<int8_t> s1{
  1765. 3,
  1766. std::numeric_limits<int8_t>::max(),
  1767. std::numeric_limits<int8_t>::min(),
  1768. 56,
  1769. -128,
  1770. 1,
  1771. 2,
  1772. 3,
  1773. 4,
  1774. 127,
  1775. 2,
  1776. 56,
  1777. -128,
  1778. 1,
  1779. 2,
  1780. 4};
  1781. std::vector<int8_t> s2{
  1782. std::numeric_limits<int8_t>::min(),
  1783. 56,
  1784. -128,
  1785. 1,
  1786. 2,
  1787. 3,
  1788. 4,
  1789. 127,
  1790. 2,
  1791. 56,
  1792. -128,
  1793. 1,
  1794. 2,
  1795. 5,
  1796. 8,
  1797. 4};
  1798. s0.resize(SIMD_LEN_8);
  1799. s1.resize(SIMD_LEN_8);
  1800. s2.resize(SIMD_LEN_8);
  1801. init((int8_t*)&src0, s0, SIMD_LEN_8);
  1802. init((int8_t*)&src1, s1, SIMD_LEN_8);
  1803. init((int8_t*)&src2, s2, SIMD_LEN_8);
  1804. ret = GiMultiplyAddInt8(src0, src1, src2);
  1805. std::vector<int8_t> naive;
  1806. for (size_t i = 0; i < SIMD_LEN_8; i++) {
  1807. naive.push_back(s0[i] + s1[i] * s2[i]);
  1808. }
  1809. assert_eq<int8_t>((int8_t*)&ret, naive, SIMD_LEN_8);
  1810. }
  1811. TEST_F(FALLBACK, GiAndInt8) {
  1812. GI_INT8_t src0, src1, ret;
  1813. std::vector<int8_t> s0{
  1814. std::numeric_limits<int8_t>::max(),
  1815. std::numeric_limits<int8_t>::min(),
  1816. 56,
  1817. -128,
  1818. 1,
  1819. 2,
  1820. 3,
  1821. 4,
  1822. 127,
  1823. 2,
  1824. 56,
  1825. -128,
  1826. 1,
  1827. 2,
  1828. 3,
  1829. 4};
  1830. std::vector<int8_t> s1{
  1831. 3,
  1832. std::numeric_limits<int8_t>::max(),
  1833. std::numeric_limits<int8_t>::min(),
  1834. 56,
  1835. -128,
  1836. 1,
  1837. 2,
  1838. 3,
  1839. 4,
  1840. 127,
  1841. 2,
  1842. 56,
  1843. -128,
  1844. 1,
  1845. 2,
  1846. 4};
  1847. s0.resize(SIMD_LEN_8);
  1848. s1.resize(SIMD_LEN_8);
  1849. init((int8_t*)&src0, s0, SIMD_LEN_8);
  1850. init((int8_t*)&src1, s1, SIMD_LEN_8);
  1851. ret = GiAndInt8(src0, src1);
  1852. std::vector<int8_t> naive;
  1853. for (size_t i = 0; i < SIMD_LEN_8; i++) {
  1854. naive.push_back(s0[i] & s1[i]);
  1855. }
  1856. assert_eq<int8_t>((int8_t*)&ret, naive, SIMD_LEN_8);
  1857. }
  1858. TEST_F(FALLBACK, GiEOrUint32) {
  1859. GI_UINT32_t src0, src1, ret;
  1860. std::vector<uint32_t> s0{127, 2, std::numeric_limits<uint32_t>::max(), 9999};
  1861. std::vector<uint32_t> s1{1, 2, std::numeric_limits<uint32_t>::max(), 9};
  1862. s0.resize(SIMD_LEN);
  1863. s1.resize(SIMD_LEN);
  1864. init((uint32_t*)&src0, s0);
  1865. init((uint32_t*)&src1, s1);
  1866. ret = GiEOrUint32(src0, src1);
  1867. std::vector<uint32_t> naive;
  1868. for (size_t i = 0; i < SIMD_LEN; i++) {
  1869. naive.push_back(s0[i] ^ s1[i]);
  1870. }
  1871. assert_eq((uint32_t*)&ret, naive);
  1872. }
  1873. TEST_F(FALLBACK, GiOrInt8) {
  1874. GI_INT8_t src0, src1, ret;
  1875. std::vector<int8_t> s0{
  1876. std::numeric_limits<int8_t>::max(),
  1877. std::numeric_limits<int8_t>::min(),
  1878. 56,
  1879. -128,
  1880. 1,
  1881. 2,
  1882. 3,
  1883. 4,
  1884. 127,
  1885. 2,
  1886. 56,
  1887. -128,
  1888. 1,
  1889. 2,
  1890. 3,
  1891. 4};
  1892. std::vector<int8_t> s1{
  1893. 3,
  1894. std::numeric_limits<int8_t>::max(),
  1895. std::numeric_limits<int8_t>::min(),
  1896. 56,
  1897. -128,
  1898. 1,
  1899. 2,
  1900. 3,
  1901. 4,
  1902. 127,
  1903. 2,
  1904. 56,
  1905. -128,
  1906. 1,
  1907. 2,
  1908. 4};
  1909. s0.resize(SIMD_LEN_8);
  1910. s1.resize(SIMD_LEN_8);
  1911. init((int8_t*)&src0, s0, SIMD_LEN_8);
  1912. init((int8_t*)&src1, s1, SIMD_LEN_8);
  1913. ret = GiOrInt8(src0, src1);
  1914. std::vector<int8_t> naive;
  1915. for (size_t i = 0; i < SIMD_LEN_8; i++) {
  1916. naive.push_back(s0[i] | s1[i]);
  1917. }
  1918. assert_eq<int8_t>((int8_t*)&ret, naive, SIMD_LEN_8);
  1919. }
  1920. TEST_F(FALLBACK, GiAndNotInt8) {
  1921. GI_INT8_t src0, src1, ret;
  1922. std::vector<int8_t> s0{
  1923. std::numeric_limits<int8_t>::max(),
  1924. std::numeric_limits<int8_t>::min(),
  1925. 56,
  1926. -128,
  1927. 1,
  1928. 2,
  1929. 3,
  1930. 4,
  1931. 127,
  1932. 2,
  1933. 56,
  1934. -128,
  1935. 1,
  1936. 2,
  1937. 3,
  1938. 4};
  1939. std::vector<int8_t> s1{
  1940. 3,
  1941. std::numeric_limits<int8_t>::max(),
  1942. std::numeric_limits<int8_t>::min(),
  1943. 56,
  1944. -128,
  1945. 1,
  1946. 2,
  1947. 3,
  1948. 4,
  1949. 127,
  1950. 2,
  1951. 56,
  1952. -128,
  1953. 1,
  1954. 2,
  1955. 4};
  1956. s0.resize(SIMD_LEN_8);
  1957. s1.resize(SIMD_LEN_8);
  1958. init((int8_t*)&src0, s0, SIMD_LEN_8);
  1959. init((int8_t*)&src1, s1, SIMD_LEN_8);
  1960. ret = GiAndNotInt8(src0, src1);
  1961. std::vector<int8_t> naive;
  1962. for (size_t i = 0; i < SIMD_LEN_8; i++) {
  1963. naive.push_back((~s0[i]) & s1[i]);
  1964. }
  1965. assert_eq<int8_t>((int8_t*)&ret, naive, SIMD_LEN_8);
  1966. }
  1967. TEST_F(FALLBACK, GiXorInt8) {
  1968. GI_INT8_t src0, src1, ret;
  1969. std::vector<int8_t> s0{
  1970. std::numeric_limits<int8_t>::max(),
  1971. std::numeric_limits<int8_t>::min(),
  1972. 56,
  1973. -128,
  1974. 1,
  1975. 2,
  1976. 3,
  1977. 4,
  1978. 127,
  1979. 2,
  1980. 56,
  1981. -128,
  1982. 1,
  1983. 2,
  1984. 3,
  1985. 4};
  1986. std::vector<int8_t> s1{
  1987. 3,
  1988. std::numeric_limits<int8_t>::max(),
  1989. std::numeric_limits<int8_t>::min(),
  1990. 56,
  1991. -128,
  1992. 1,
  1993. 2,
  1994. 3,
  1995. 4,
  1996. 127,
  1997. 2,
  1998. 56,
  1999. -128,
  2000. 1,
  2001. 2,
  2002. 4};
  2003. s0.resize(SIMD_LEN_8);
  2004. s1.resize(SIMD_LEN_8);
  2005. init((int8_t*)&src0, s0, SIMD_LEN_8);
  2006. init((int8_t*)&src1, s1, SIMD_LEN_8);
  2007. ret = GiXorInt8(src0, src1);
  2008. std::vector<int8_t> naive;
  2009. for (size_t i = 0; i < SIMD_LEN_8; i++) {
  2010. naive.push_back((s0[i]) ^ s1[i]);
  2011. }
  2012. assert_eq<int8_t>((int8_t*)&ret, naive, SIMD_LEN_8);
  2013. }
  2014. TEST_F(FALLBACK, GiShiftRight23Int32) {
  2015. GI_INT32_t src0, ret;
  2016. std::vector<int32_t> s0{1, 2, 3, -4};
  2017. s0.resize(SIMD_LEN);
  2018. init((int32_t*)&src0, s0);
  2019. ret = GiShiftRight23Int32(src0);
  2020. std::vector<int32_t> naive;
  2021. for (size_t i = 0; i < SIMD_LEN; i++) {
  2022. naive.push_back(s0[i] >> 23);
  2023. }
  2024. assert_eq((int32_t*)&ret, naive);
  2025. }
  2026. TEST_F(FALLBACK, GiBlendInt32) {
  2027. GI_INT32_t src0, src1, src2, ret, na;
  2028. std::vector<int32_t> s0{1, 2, 3, -4};
  2029. std::vector<int32_t> s1{12, 22, 32, -43};
  2030. std::vector<int32_t> s2{-1, 21, 34, 4};
  2031. s0.resize(SIMD_LEN);
  2032. s1.resize(SIMD_LEN);
  2033. s2.resize(SIMD_LEN);
  2034. init((int32_t*)&src0, s0);
  2035. init((int32_t*)&src1, s1);
  2036. init((int32_t*)&src2, s2);
  2037. ret = GiBlendInt32(src0, src1, src2);
  2038. na = GiOrInt32(GiAndInt32(src1, src2), GiAndNotInt32(src2, src0));
  2039. std::vector<int32_t> naive;
  2040. auto p = (int32_t*)&na;
  2041. for (size_t i = 0; i < SIMD_LEN; i++) {
  2042. naive.push_back(p[i]);
  2043. }
  2044. assert_eq((int32_t*)&ret, naive);
  2045. }
  2046. TEST_F(FALLBACK, GiBlendInt8) {
  2047. GI_INT8_t src0, src1, src2, ret, na;
  2048. std::vector<int8_t> s0{
  2049. std::numeric_limits<int8_t>::max(),
  2050. std::numeric_limits<int8_t>::min(),
  2051. 56,
  2052. -128,
  2053. 1,
  2054. 2,
  2055. 3,
  2056. 4,
  2057. 127,
  2058. 2,
  2059. 56,
  2060. -128,
  2061. 1,
  2062. 2,
  2063. 3,
  2064. 4};
  2065. std::vector<int8_t> s1{
  2066. 3,
  2067. std::numeric_limits<int8_t>::max(),
  2068. std::numeric_limits<int8_t>::min(),
  2069. 56,
  2070. -128,
  2071. 1,
  2072. 2,
  2073. 3,
  2074. 4,
  2075. 127,
  2076. 2,
  2077. 56,
  2078. -128,
  2079. 1,
  2080. 2,
  2081. 4};
  2082. std::vector<int8_t> s2{
  2083. std::numeric_limits<int8_t>::min(),
  2084. 56,
  2085. -128,
  2086. 1,
  2087. 2,
  2088. 3,
  2089. 4,
  2090. 127,
  2091. 2,
  2092. 56,
  2093. -128,
  2094. 1,
  2095. 2,
  2096. 5,
  2097. 8,
  2098. 4};
  2099. s0.resize(SIMD_LEN_8);
  2100. s1.resize(SIMD_LEN_8);
  2101. s2.resize(SIMD_LEN_8);
  2102. init((int8_t*)&src0, s0, SIMD_LEN_8);
  2103. init((int8_t*)&src1, s1, SIMD_LEN_8);
  2104. init((int8_t*)&src2, s2, SIMD_LEN_8);
  2105. ret = GiBlendInt8(src0, src1, src2);
  2106. na = GiOrInt8(GiAndInt8(src1, src2), GiAndNotInt8(src2, src0));
  2107. std::vector<int8_t> naive;
  2108. auto p = (int8_t*)&na;
  2109. for (size_t i = 0; i < SIMD_LEN_8; i++) {
  2110. naive.push_back(p[i]);
  2111. }
  2112. assert_eq<int8_t>((int8_t*)&ret, naive, SIMD_LEN_8);
  2113. }
  2114. TEST_F(FALLBACK, GiAbsInt32) {
  2115. GI_INT32_t src0, ret;
  2116. std::vector<int32_t> s0{-1, 2, -3, 4};
  2117. s0.resize(SIMD_LEN);
  2118. init((int32_t*)&src0, s0);
  2119. ret = GiAbsInt32(src0);
  2120. std::vector<int32_t> naive;
  2121. for (size_t i = 0; i < SIMD_LEN; i++) {
  2122. naive.push_back(s0[i] > 0 ? s0[i] : -s0[i]);
  2123. }
  2124. assert_eq((int32_t*)&ret, naive);
  2125. }
  2126. TEST_F(FALLBACK, GiAbsInt16) {
  2127. GI_INT16_t src0, ret;
  2128. std::vector<int16_t> s0{-127, 2, std::numeric_limits<int16_t>::max(), 9999, 1, 2,
  2129. 3, 4};
  2130. s0.resize(SIMD_LEN_16);
  2131. init((int16_t*)&src0, s0, SIMD_LEN_16);
  2132. ret = GiAbsInt16(src0);
  2133. std::vector<int16_t> naive;
  2134. for (size_t i = 0; i < SIMD_LEN_16; i++) {
  2135. naive.push_back(s0[i] > 0 ? s0[i] : -s0[i]);
  2136. }
  2137. assert_eq<int16_t>((int16_t*)&ret, naive, SIMD_LEN_16);
  2138. }
  2139. TEST_F(FALLBACK, GiAbsInt8) {
  2140. GI_INT8_t src0, ret;
  2141. std::vector<int8_t> s0{
  2142. std::numeric_limits<int8_t>::max(),
  2143. std::numeric_limits<int8_t>::min(),
  2144. 56,
  2145. -128,
  2146. 1,
  2147. 2,
  2148. 3,
  2149. 4,
  2150. 127,
  2151. 2,
  2152. 56,
  2153. -128,
  2154. 1,
  2155. 2,
  2156. 3,
  2157. 4};
  2158. s0.resize(SIMD_LEN_8);
  2159. init((int8_t*)&src0, s0, SIMD_LEN_8);
  2160. ret = GiAbsInt8(src0);
  2161. std::vector<int8_t> naive;
  2162. for (size_t i = 0; i < SIMD_LEN_8; i++) {
  2163. naive.push_back(s0[i] > 0 ? s0[i] : -s0[i]);
  2164. }
  2165. assert_eq<int8_t>((int8_t*)&ret, naive, SIMD_LEN_8);
  2166. }
  2167. TEST_F(FALLBACK, GiMaximumInt32) {
  2168. GI_INT32_t src0, src1, src2, ret, na;
  2169. std::vector<int32_t> s0{1, -2, 3, 4};
  2170. s0.resize(SIMD_LEN);
  2171. std::vector<int32_t> s1{5, 6, 7, -8};
  2172. s1.resize(SIMD_LEN);
  2173. init((int32_t*)&src0, s0);
  2174. init((int32_t*)&src1, s1);
  2175. std::vector<int32_t> s2;
  2176. for (size_t i = 0; i < SIMD_LEN; i++) {
  2177. s2.push_back(s0[i] > s1[i] ? 0xFFFFFFFF : 0);
  2178. }
  2179. s2.resize(SIMD_LEN);
  2180. init((int32_t*)&src2, s2);
  2181. ret = GiMaximumInt32(src0, src1);
  2182. na = GiBlendInt32(src1, src0, src2);
  2183. std::vector<int32_t> naive;
  2184. auto p = (int32_t*)&na;
  2185. for (size_t i = 0; i < SIMD_LEN; i++) {
  2186. naive.push_back(p[i]);
  2187. }
  2188. assert_eq((int32_t*)&ret, naive);
  2189. }
  2190. TEST_F(FALLBACK, GiMinimumInt32) {
  2191. GI_INT32_t src0, src1, src2, ret, na;
  2192. std::vector<int32_t> s0{1, -2, 3, 4};
  2193. s0.resize(SIMD_LEN);
  2194. std::vector<int32_t> s1{5, 6, 7, -8};
  2195. s1.resize(SIMD_LEN);
  2196. init((int32_t*)&src0, s0);
  2197. init((int32_t*)&src1, s1);
  2198. std::vector<int32_t> s2;
  2199. for (size_t i = 0; i < SIMD_LEN; i++) {
  2200. s2.push_back(s1[i] > s0[i] ? 0xFFFFFFFF : 0);
  2201. }
  2202. s2.resize(SIMD_LEN);
  2203. init((int32_t*)&src2, s2);
  2204. ret = GiMinimumInt32(src0, src1);
  2205. na = GiBlendInt32(src1, src0, src2);
  2206. std::vector<int32_t> naive;
  2207. auto p = (int32_t*)&na;
  2208. for (size_t i = 0; i < SIMD_LEN; i++) {
  2209. naive.push_back(p[i]);
  2210. }
  2211. assert_eq((int32_t*)&ret, naive);
  2212. }
  2213. TEST_F(FALLBACK, GiBlendInt8x16) {
  2214. GI_INT8_t src0, src1, src2, ret, na;
  2215. std::vector<int8_t> s0{
  2216. std::numeric_limits<int8_t>::max(),
  2217. std::numeric_limits<int8_t>::min(),
  2218. 56,
  2219. -128,
  2220. 1,
  2221. 2,
  2222. 3,
  2223. 4,
  2224. 127,
  2225. 2,
  2226. 56,
  2227. -128,
  2228. 1,
  2229. 2,
  2230. 3,
  2231. 4};
  2232. std::vector<int8_t> s1{
  2233. 3,
  2234. std::numeric_limits<int8_t>::max(),
  2235. std::numeric_limits<int8_t>::min(),
  2236. 56,
  2237. -128,
  2238. 1,
  2239. 2,
  2240. 3,
  2241. 4,
  2242. 127,
  2243. 2,
  2244. 56,
  2245. -128,
  2246. 1,
  2247. 2,
  2248. 4};
  2249. std::vector<int8_t> s2{
  2250. std::numeric_limits<int8_t>::min(),
  2251. 56,
  2252. -128,
  2253. 1,
  2254. 2,
  2255. 3,
  2256. 4,
  2257. 127,
  2258. 2,
  2259. 56,
  2260. -128,
  2261. 1,
  2262. 2,
  2263. 5,
  2264. 8,
  2265. 4};
  2266. s0.resize(SIMD_LEN_8);
  2267. s1.resize(SIMD_LEN_8);
  2268. s2.resize(SIMD_LEN_8);
  2269. init((int8_t*)&src0, s0, SIMD_LEN_8);
  2270. init((int8_t*)&src1, s1, SIMD_LEN_8);
  2271. init((int8_t*)&src2, s2, SIMD_LEN_8);
  2272. ret = GiBlendInt8x16(src0, src1, src2);
  2273. na = GiOrInt8(GiAndInt8(src1, src2), GiAndNotInt8(src2, src0));
  2274. std::vector<int8_t> naive;
  2275. auto p = (int8_t*)&na;
  2276. for (size_t i = 0; i < SIMD_LEN_8; i++) {
  2277. naive.push_back(p[i]);
  2278. }
  2279. assert_eq<int8_t>((int8_t*)&ret, naive, SIMD_LEN_8);
  2280. }
  2281. TEST_F(FALLBACK, GiMaximumInt8) {
  2282. GI_INT8_t src0, src1, src2, ret, na;
  2283. std::vector<int8_t> s0{
  2284. std::numeric_limits<int8_t>::max(),
  2285. std::numeric_limits<int8_t>::min(),
  2286. 56,
  2287. -128,
  2288. 1,
  2289. 2,
  2290. 3,
  2291. 4,
  2292. 127,
  2293. 2,
  2294. 56,
  2295. -128,
  2296. 1,
  2297. 2,
  2298. 3,
  2299. 4};
  2300. std::vector<int8_t> s1{
  2301. 3,
  2302. std::numeric_limits<int8_t>::max(),
  2303. std::numeric_limits<int8_t>::min(),
  2304. 56,
  2305. -128,
  2306. 1,
  2307. 2,
  2308. 3,
  2309. 4,
  2310. 127,
  2311. 2,
  2312. 56,
  2313. -128,
  2314. 1,
  2315. 2,
  2316. 4};
  2317. s0.resize(SIMD_LEN_8);
  2318. s1.resize(SIMD_LEN_8);
  2319. init((int8_t*)&src0, s0, SIMD_LEN_8);
  2320. init((int8_t*)&src1, s1, SIMD_LEN_8);
  2321. std::vector<int8_t> s2;
  2322. for (size_t i = 0; i < SIMD_LEN_8; i++) {
  2323. s2.push_back(s1[i] < s0[i] ? 0xFF : 0);
  2324. }
  2325. s2.resize(SIMD_LEN_8);
  2326. init((int8_t*)&src2, s2, SIMD_LEN_8);
  2327. ret = GiMaximumInt8(src0, src1);
  2328. na = GiBlendInt8(src1, src0, src2);
  2329. std::vector<int8_t> naive;
  2330. auto p = (int8_t*)&na;
  2331. for (size_t i = 0; i < SIMD_LEN_8; i++) {
  2332. naive.push_back(p[i]);
  2333. }
  2334. assert_eq<int8_t>((int8_t*)&ret, naive, SIMD_LEN_8);
  2335. }
  2336. TEST_F(FALLBACK, GiMinimumInt8) {
  2337. GI_INT8_t src0, src1, src2, ret, na;
  2338. std::vector<int8_t> s0{
  2339. std::numeric_limits<int8_t>::max(),
  2340. std::numeric_limits<int8_t>::min(),
  2341. 56,
  2342. -128,
  2343. 1,
  2344. 2,
  2345. 3,
  2346. 4,
  2347. 127,
  2348. 2,
  2349. 56,
  2350. -128,
  2351. 1,
  2352. 2,
  2353. 3,
  2354. 4};
  2355. std::vector<int8_t> s1{
  2356. 3,
  2357. std::numeric_limits<int8_t>::max(),
  2358. std::numeric_limits<int8_t>::min(),
  2359. 56,
  2360. -128,
  2361. 1,
  2362. 2,
  2363. 3,
  2364. 4,
  2365. 127,
  2366. 2,
  2367. 56,
  2368. -128,
  2369. 1,
  2370. 2,
  2371. 4};
  2372. s0.resize(SIMD_LEN_8);
  2373. s1.resize(SIMD_LEN_8);
  2374. init((int8_t*)&src0, s0, SIMD_LEN_8);
  2375. init((int8_t*)&src1, s1, SIMD_LEN_8);
  2376. std::vector<int8_t> s2;
  2377. for (size_t i = 0; i < SIMD_LEN_8; i++) {
  2378. s2.push_back(s1[i] > s0[i] ? 0xFF : 0);
  2379. }
  2380. s2.resize(SIMD_LEN_8);
  2381. init((int8_t*)&src2, s2, SIMD_LEN_8);
  2382. ret = GiMinimumInt8(src0, src1);
  2383. na = GiBlendInt8(src1, src0, src2);
  2384. std::vector<int8_t> naive;
  2385. auto p = (int8_t*)&na;
  2386. for (size_t i = 0; i < SIMD_LEN_8; i++) {
  2387. naive.push_back(p[i]);
  2388. }
  2389. assert_eq<int8_t>((int8_t*)&ret, naive, SIMD_LEN_8);
  2390. }
  2391. TEST_F(FALLBACK, GiMoveHighLongInt8) {
  2392. GI_INT8_t src0;
  2393. GI_INT16_t ret;
  2394. std::vector<int8_t> s0{
  2395. std::numeric_limits<int8_t>::max(),
  2396. std::numeric_limits<int8_t>::min(),
  2397. 56,
  2398. -128,
  2399. 1,
  2400. 2,
  2401. 3,
  2402. 4,
  2403. 127,
  2404. 2,
  2405. 56,
  2406. -128,
  2407. std::numeric_limits<int8_t>::max(),
  2408. std::numeric_limits<int8_t>::min(),
  2409. 3,
  2410. 4};
  2411. s0.resize(SIMD_LEN_8);
  2412. init((int8_t*)&src0, s0, SIMD_LEN_8);
  2413. ret = GiMoveHighLongInt8(src0);
  2414. std::vector<int16_t> naive;
  2415. for (size_t i = 0; i < SIMD_LEN_8 / 2; i++) {
  2416. naive.push_back(s0[i + SIMD_LEN_8 / 2]);
  2417. }
  2418. assert_eq<int16_t>((int16_t*)&ret, naive, SIMD_LEN_16);
  2419. }
  2420. TEST_F(FALLBACK, GiMoveLowLongInt8) {
  2421. GI_INT8_t src0;
  2422. GI_INT16_t ret;
  2423. std::vector<int8_t> s0{
  2424. std::numeric_limits<int8_t>::max(),
  2425. std::numeric_limits<int8_t>::min(),
  2426. 56,
  2427. -128,
  2428. 1,
  2429. 2,
  2430. 3,
  2431. 4,
  2432. 127,
  2433. 2,
  2434. 56,
  2435. -128,
  2436. std::numeric_limits<int8_t>::max(),
  2437. std::numeric_limits<int8_t>::min(),
  2438. 3,
  2439. 4};
  2440. s0.resize(SIMD_LEN_8);
  2441. init((int8_t*)&src0, s0, SIMD_LEN_8);
  2442. ret = GiMoveLowLongInt8(src0);
  2443. std::vector<int16_t> naive;
  2444. for (size_t i = 0; i < SIMD_LEN_8 / 2; i++) {
  2445. naive.push_back(s0[i]);
  2446. }
  2447. assert_eq<int16_t>((int16_t*)&ret, naive, SIMD_LEN_16);
  2448. }
  2449. TEST_F(FALLBACK, GiMoveHighLongInt16) {
  2450. GI_INT16_t src0;
  2451. GI_INT32_t ret;
  2452. std::vector<int16_t> s0{-127, 2, std::numeric_limits<int16_t>::max(), 9999, 1, 2,
  2453. 3, 4};
  2454. s0.resize(SIMD_LEN_16);
  2455. init((int16_t*)&src0, s0, SIMD_LEN_16);
  2456. ret = GiMoveHighLongInt16(src0);
  2457. std::vector<int32_t> naive;
  2458. for (size_t i = 0; i < SIMD_LEN_16 / 2; i++) {
  2459. naive.push_back(s0[i + SIMD_LEN_16 / 2]);
  2460. }
  2461. assert_eq<int32_t>((int32_t*)&ret, naive, SIMD_LEN);
  2462. }
  2463. TEST_F(FALLBACK, GiMoveLowLongInt16) {
  2464. GI_INT16_t src0;
  2465. GI_INT32_t ret;
  2466. std::vector<int16_t> s0{-127, 2, std::numeric_limits<int16_t>::max(), 9999, 1, 2,
  2467. 3, 4};
  2468. s0.resize(SIMD_LEN_16);
  2469. init((int16_t*)&src0, s0, SIMD_LEN_16);
  2470. ret = GiMoveLowLongInt16(src0);
  2471. std::vector<int32_t> naive;
  2472. for (size_t i = 0; i < SIMD_LEN_16 / 2; i++) {
  2473. naive.push_back(s0[i]);
  2474. }
  2475. assert_eq<int32_t>((int32_t*)&ret, naive, SIMD_LEN);
  2476. }
  2477. TEST_F(FALLBACK, GiReduceAddInt8) {
  2478. GI_INT8_t src0;
  2479. int32_t ret{0};
  2480. std::vector<int8_t> s0{127, 2, 56, -128, 1, 2, 3, 4, 127, 2, 56, -128, 1, 2, 3, 4};
  2481. s0.resize(SIMD_LEN_8);
  2482. init((int8_t*)&src0, s0, SIMD_LEN_8);
  2483. ret = GiReduceAddInt8(src0);
  2484. int32_t naive{0};
  2485. for (auto i : s0) {
  2486. naive += i;
  2487. }
  2488. ASSERT_EQ(ret, naive);
  2489. }
  2490. TEST_F(FALLBACK, GiReduceMaxInt8) {
  2491. GI_INT8_t src0;
  2492. int8_t ret{0};
  2493. std::vector<int8_t> s0{127, 2, 56, -128, 1, 2, 3, 4, 127, 2, 56, -128, 1, 2, 3, 4};
  2494. s0.resize(SIMD_LEN_8);
  2495. init((int8_t*)&src0, s0, SIMD_LEN_8);
  2496. ret = GiReduceMaxInt8(src0);
  2497. int8_t naive{s0[0]};
  2498. for (size_t i = 0; i < SIMD_LEN_8; i++) {
  2499. naive = Max(naive, s0[i]);
  2500. }
  2501. ASSERT_EQ(ret, naive);
  2502. }
  2503. TEST_F(FALLBACK, GiReduceMinInt8) {
  2504. GI_INT8_t src0;
  2505. int8_t ret{0};
  2506. std::vector<int8_t> s0{127, 2, 56, -128, 1, 2, 3, 4, 127, 2, 56, -128, 1, 2, 3, 4};
  2507. s0.resize(SIMD_LEN_8);
  2508. init((int8_t*)&src0, s0, SIMD_LEN_8);
  2509. ret = GiReduceMinInt8(src0);
  2510. int8_t naive{s0[0]};
  2511. for (size_t i = 0; i < SIMD_LEN_8; i++) {
  2512. naive = Min(naive, s0[i]);
  2513. }
  2514. ASSERT_EQ(ret, naive);
  2515. }
  2516. TEST_F(FALLBACK, GiCvtFromFloat32ToInt8) {
  2517. GI_INT8_t ret;
  2518. GI_FLOAT32_t src0;
  2519. std::vector<float> s0{
  2520. 1.0f, -2.2f, std::numeric_limits<float>::max(),
  2521. std::numeric_limits<float>::min()};
  2522. s0.resize(SIMD_LEN);
  2523. init((float*)&src0, s0);
  2524. ret = GiCvtFromFloat32ToInt8(src0);
  2525. std::vector<int8_t> naive;
  2526. naive.resize(SIMD_LEN_8);
  2527. for (size_t i = 0; i < SIMD_LEN; i++) {
  2528. int8_t data = Saturate(round(s0[i]), -128, 127);
  2529. naive[i] = data;
  2530. naive[SIMD_LEN + i] = data;
  2531. naive[2 * SIMD_LEN + i] = data;
  2532. naive[3 * SIMD_LEN + i] = data;
  2533. }
  2534. assert_eq<int8_t>((int8_t*)&ret, naive, SIMD_LEN_8);
  2535. }
  2536. TEST_F(FALLBACK, GiCvtFromFloat32V2ToInt8) {
  2537. GI_INT8_t ret;
  2538. GI_FLOAT32_V2_t src0;
  2539. std::vector<float> s0{
  2540. 1.0f,
  2541. -2.2f,
  2542. std::numeric_limits<float>::max(),
  2543. std::numeric_limits<float>::min(),
  2544. 1.1f,
  2545. 2.2f,
  2546. -9.0f,
  2547. 899999.0f};
  2548. s0.resize(SIMD_LEN * 2);
  2549. init((float*)&src0, s0, SIMD_LEN * 2);
  2550. ret = GiCvtFromFloat32V2ToInt8(src0);
  2551. std::vector<int8_t> naive;
  2552. for (size_t i = 0; i < SIMD_LEN * 2; i++) {
  2553. naive.push_back(Saturate(round(s0[i]), -128, 127));
  2554. }
  2555. for (size_t i = 0; i < SIMD_LEN * 2; i++) {
  2556. naive.push_back(Saturate(round(s0[i]), -128, 127));
  2557. }
  2558. assert_eq<int8_t>((int8_t*)&ret, naive, SIMD_LEN_8);
  2559. }
  2560. TEST_F(FALLBACK, GiCvtFromFloat32V4ToInt8) {
  2561. GI_INT8_t ret;
  2562. GI_FLOAT32_V4_t src0;
  2563. std::vector<float> s0{
  2564. std::numeric_limits<float>::max(),
  2565. std::numeric_limits<float>::min(),
  2566. 1.0f,
  2567. -2.2f,
  2568. 3.1f,
  2569. 4.2f,
  2570. -5.0f,
  2571. 6.0f,
  2572. 7.0f,
  2573. 8.0f,
  2574. -9.9f,
  2575. 10.9f,
  2576. -11.9f,
  2577. 12.9f,
  2578. 13.9f,
  2579. -14.9f};
  2580. s0.resize(SIMD_LEN * 4);
  2581. init((float*)&src0, s0, SIMD_LEN * 4);
  2582. ret = GiCvtFromFloat32V4ToInt8(src0);
  2583. std::vector<int8_t> naive;
  2584. for (size_t i = 0; i < SIMD_LEN * 4; i++) {
  2585. naive.push_back(Saturate(round(s0[i]), -128, 127));
  2586. }
  2587. assert_eq<int8_t>((int8_t*)&ret, naive, SIMD_LEN_8);
  2588. }
  2589. TEST_F(FALLBACK, GiCombineFloat32) {
  2590. float32x2_t src0, src1;
  2591. GI_FLOAT32_t ret;
  2592. std::vector<float> s0{1.1f, -3.1415f};
  2593. std::vector<float> s1{2.3f, 3.14777f};
  2594. memcpy(&src0, s0.data(), sizeof(float32x2_t));
  2595. memcpy(&src1, s1.data(), sizeof(float32x2_t));
  2596. ret = GiCombineFloat32(src0, src1);
  2597. std::vector<float> naive;
  2598. naive.push_back(s0[0]);
  2599. naive.push_back(s0[1]);
  2600. naive.push_back(s1[0]);
  2601. naive.push_back(s1[1]);
  2602. assert_eq<float>((float*)&ret, naive);
  2603. }
  2604. TEST_F(FALLBACK, GiGetLowFloat32) {
  2605. float32x2_t ret;
  2606. GI_FLOAT32_t src0;
  2607. std::vector<float> s0{1.0f, 2.2f, 3.4f, 4.5f};
  2608. s0.resize(SIMD_LEN);
  2609. init((float*)&src0, s0);
  2610. ret = GiGetLowFloat32(src0);
  2611. auto r = (float*)&ret;
  2612. ASSERT_EQ(*r, s0[0]);
  2613. ASSERT_EQ(*(r + 1), s0[1]);
  2614. }
  2615. TEST_F(FALLBACK, GiGetHighFloat32) {
  2616. float32x2_t ret;
  2617. GI_FLOAT32_t src0;
  2618. std::vector<float> s0{1.0f, 2.2f, 3.4f, 4.5f};
  2619. s0.resize(SIMD_LEN);
  2620. init((float*)&src0, s0);
  2621. ret = GiGetHighFloat32(src0);
  2622. auto r = (float*)&ret;
  2623. ASSERT_EQ(*r, s0[2]);
  2624. ASSERT_EQ(*(r + 1), s0[3]);
  2625. }
  2626. TEST_F(FALLBACK, GiPaddFloat32) {
  2627. float32x2_t src0, src1, ret;
  2628. std::vector<float> s0{1.1f, -3.1415f};
  2629. std::vector<float> s1{2.3f, 3.14777f};
  2630. memcpy(&src0, s0.data(), sizeof(float32x2_t));
  2631. memcpy(&src1, s1.data(), sizeof(float32x2_t));
  2632. ret = GiPaddFloat32(src0, src1);
  2633. std::vector<float> naive;
  2634. naive.push_back(s0[0] + s0[1]);
  2635. naive.push_back(s1[0] + s1[1]);
  2636. auto r = (float*)&ret;
  2637. ASSERT_LT(std::abs(naive[0] - r[0]), 1e-3);
  2638. ASSERT_LT(std::abs(naive[1] - r[1]), 1e-3);
  2639. }
  2640. TEST_F(FALLBACK, GiPmaxFloat32) {
  2641. float32x2_t src0, src1, ret;
  2642. std::vector<float> s0{1.1f, -3.1415f};
  2643. std::vector<float> s1{2.3f, 3.14777f};
  2644. memcpy(&src0, s0.data(), sizeof(float32x2_t));
  2645. memcpy(&src1, s1.data(), sizeof(float32x2_t));
  2646. ret = GiPmaxFloat32(src0, src1);
  2647. std::vector<float> naive;
  2648. auto t0 = MAX_NAN(s0[0], s0[1]);
  2649. auto t1 = MAX_NAN(s1[0], s1[1]);
  2650. naive.push_back(t0);
  2651. naive.push_back(t1);
  2652. auto r = (float*)&ret;
  2653. ASSERT_LT(std::abs(naive[0] - r[0]), 1e-3);
  2654. ASSERT_LT(std::abs(naive[1] - r[1]), 1e-3);
  2655. }
  2656. TEST_F(FALLBACK, GiStoreZipFloat32V2) {
  2657. GI_FLOAT32_V2_t src0;
  2658. std::vector<float> s0{1.1f, 2.2f, 3.5f, 4.9f, 2312.1f, 345.244f, 3.59f, -12.8f};
  2659. s0.resize(SIMD_LEN * 2);
  2660. init((float*)&src0, s0, SIMD_LEN * 2);
  2661. std::vector<float> ret;
  2662. ret.resize(SIMD_LEN * 2);
  2663. std::vector<float> ret_cmp;
  2664. ret_cmp.resize(SIMD_LEN * 2);
  2665. GiStoreZipFloat32V2(ret.data(), src0);
  2666. GI_FLOAT32_V2_t tmp;
  2667. tmp = GiZipqFloat32(src0.val[0], src0.val[1]);
  2668. GiStoreFloat32(ret_cmp.data(), tmp.val[0]);
  2669. GiStoreFloat32(ret_cmp.data() + SIMD_LEN, tmp.val[1]);
  2670. assert_eq(ret.data(), ret_cmp, SIMD_LEN * 2);
  2671. }
  2672. TEST_F(FALLBACK, GiLoadUzipFloat32V3) {
  2673. GI_FLOAT32_V3_t ret;
  2674. std::vector<float> s0{1.1f, 2.2f, 3.5f, 4.9f, 2312.1f, 345.244f,
  2675. 3.59f, -12.8f, 2.2f, 6.0f, 90.0f, 89.3f};
  2676. s0.resize(SIMD_LEN * 3);
  2677. ret = GiLoadUzipFloat32V3(s0.data());
  2678. std::vector<float> naive;
  2679. for (size_t i = 0; i < 3; i++) {
  2680. naive.push_back(s0[0 + i]);
  2681. naive.push_back(s0[3 + i]);
  2682. naive.push_back(s0[6 + i]);
  2683. naive.push_back(s0[9 + i]);
  2684. }
  2685. assert_eq((float*)&ret, naive);
  2686. }
  2687. TEST_F(FALLBACK, GiStoreZipFloat32V3) {
  2688. GI_FLOAT32_V3_t src0;
  2689. std::vector<float> s0{1.1f, 2.2f, 3.5f, 4.9f, 2312.1f, 345.244f,
  2690. 3.59f, -12.8f, 3.59f, -12.8f, 2.2f, 6.0};
  2691. s0.resize(SIMD_LEN * 3);
  2692. init((float*)&src0, s0, SIMD_LEN * 3);
  2693. std::vector<float> ret;
  2694. ret.resize(SIMD_LEN * 3);
  2695. GiStoreZipFloat32V3(ret.data(), src0);
  2696. std::vector<float> ret_cmp;
  2697. for (size_t i = 0; i < SIMD_LEN; i++) {
  2698. ret_cmp.push_back(s0[0 + i]);
  2699. ret_cmp.push_back(s0[4 + i]);
  2700. ret_cmp.push_back(s0[8 + i]);
  2701. }
  2702. assert_eq(ret.data(), ret_cmp, SIMD_LEN * 3);
  2703. }
  2704. } // namespace test
  2705. } // namespace megdnn
  2706. // vim: syntax=cpp.doxygen