Keine Beschreibung
Du kannst nicht mehr als 25 Themen auswählen Themen müssen mit entweder einem Buchstaben oder einer Ziffer beginnen. Sie können Bindestriche („-“) enthalten und bis zu 35 Zeichen lang sein.

Sse2.cs 127KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132
  1. using System;
  2. using System.Diagnostics;
  3. namespace Unity.Burst.Intrinsics
  4. {
  5. public unsafe static partial class X86
  6. {
  7. /// <summary>
  8. /// SSE2 intrinsics
  9. /// </summary>
  10. public static class Sse2
  11. {
  12. /// <summary>
  13. /// Evaluates to true at compile time if SSE2 intrinsics are supported.
  14. /// </summary>
  15. public static bool IsSse2Supported { get { return false; } }
  16. // _MM_SHUFFLE2 macro
  17. /// <summary>
  18. /// Return a shuffle immediate suitable for use with _mm_shuffle_ps and similar instructions.
  19. /// </summary>
  20. /// <param name="x">Integer x</param>
  21. /// <param name="y">Integer y</param>
  22. /// <returns>Shuffle suitable for use with _mm_shuffle_ps and similar instructions</returns>
  23. [DebuggerStepThrough]
  24. public static int SHUFFLE2(int x, int y)
  25. {
  26. return y | (x << 1);
  27. }
  28. /// <summary>
  29. /// Store 32-bit integer "a" into memory using a non-temporal hint to minimize cache pollution. If the cache line containing address "mem_addr" is already in the cache, the cache will be updated.
  30. /// </summary>
  31. /// <param name="mem_addr">Memory address</param>
  32. /// <param name="a">32-bit integer</param>
  33. [DebuggerStepThrough]
  34. public static void stream_si32(int* mem_addr, int a)
  35. {
  36. *mem_addr = a;
  37. }
  38. /// <summary>
  39. /// Store 64-bit integer a into memory using a non-temporal hint to minimize cache pollution. If the cache line containing address mem_addr is already in the cache, the cache will be updated.
  40. /// </summary>
  41. /// <param name="mem_addr">Memory address</param>
  42. /// <param name="a">64-bit integer</param>
  43. [DebuggerStepThrough]
  44. public static void stream_si64(long* mem_addr, long a)
  45. {
  46. *mem_addr = a;
  47. }
  48. /// <summary>
  49. /// Store 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) from "a" into memory using a non-temporal memory hint. "mem_addr" must be aligned on a 16-byte boundary or a general-protection exception will be generated.
  50. /// </summary>
  51. /// <param name="mem_addr">Memory address</param>
  52. /// <param name="a">Vector a</param>
  53. [DebuggerStepThrough]
  54. public static void stream_pd(void* mem_addr, v128 a)
  55. {
  56. GenericCSharpStore(mem_addr, a);
  57. }
  58. /// <summary>
  59. /// Store 128-bits of integer data from a into memory using a non-temporal memory hint.mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
  60. /// </summary>
  61. /// <param name="mem_addr">Memory address</param>
  62. /// <param name="a">Vector a</param>
  63. [DebuggerStepThrough]
  64. public static void stream_si128(void* mem_addr, v128 a)
  65. {
  66. GenericCSharpStore(mem_addr, a);
  67. }
  68. // _mm_add_epi8
  69. /// <summary> Add packed 8-bit integers in "a" and "b", and store the results in "dst". </summary>
  70. /// <param name="a">Vector a</param>
  71. /// <param name="b">Vector b</param>
  72. /// <returns>Vector</returns>
  73. [DebuggerStepThrough]
  74. public static v128 add_epi8(v128 a, v128 b)
  75. {
  76. v128 dst = default(v128);
  77. sbyte* dptr = &dst.SByte0;
  78. sbyte* aptr = &a.SByte0;
  79. sbyte* bptr = &b.SByte0;
  80. for (int j = 0; j <= 15; j++)
  81. {
  82. dptr[j] = (sbyte)(aptr[j] + bptr[j]);
  83. }
  84. return dst;
  85. }
  86. // _mm_add_epi16
  87. /// <summary> Add packed 16-bit integers in "a" and "b", and store the results in "dst". </summary>
  88. /// <param name="a">Vector a</param>
  89. /// <param name="b">Vector b</param>
  90. /// <returns>Vector</returns>
  91. [DebuggerStepThrough]
  92. public static v128 add_epi16(v128 a, v128 b)
  93. {
  94. v128 dst = default(v128);
  95. short* dptr = &dst.SShort0;
  96. short* aptr = &a.SShort0;
  97. short* bptr = &b.SShort0;
  98. for (int j = 0; j <= 7; j++)
  99. {
  100. dptr[j] = (short)(aptr[j] + bptr[j]);
  101. }
  102. return dst;
  103. }
  104. // _mm_add_epi32
  105. /// <summary> Add packed 32-bit integers in "a" and "b", and store the results in "dst". </summary>
  106. /// <param name="a">Vector a</param>
  107. /// <param name="b">Vector b</param>
  108. /// <returns>Vector</returns>
  109. [DebuggerStepThrough]
  110. public static v128 add_epi32(v128 a, v128 b)
  111. {
  112. v128 dst = default(v128);
  113. dst.SInt0 = a.SInt0 + b.SInt0;
  114. dst.SInt1 = a.SInt1 + b.SInt1;
  115. dst.SInt2 = a.SInt2 + b.SInt2;
  116. dst.SInt3 = a.SInt3 + b.SInt3;
  117. return dst;
  118. }
  119. // _mm_add_epi64
  120. /// <summary> Add packed 64-bit integers in "a" and "b", and store the results in "dst". </summary>
  121. /// <param name="a">Vector a</param>
  122. /// <param name="b">Vector b</param>
  123. /// <returns>Vector</returns>
  124. [DebuggerStepThrough]
  125. public static v128 add_epi64(v128 a, v128 b)
  126. {
  127. v128 dst = default(v128);
  128. dst.SLong0 = a.SLong0 + b.SLong0;
  129. dst.SLong1 = a.SLong1 + b.SLong1;
  130. return dst;
  131. }
  132. // _mm_adds_epi8
  133. /// <summary> Add packed 8-bit integers in "a" and "b" using saturation, and store the results in "dst". </summary>
  134. /// <param name="a">Vector a</param>
  135. /// <param name="b">Vector b</param>
  136. /// <returns>Vector</returns>
  137. [DebuggerStepThrough]
  138. public static v128 adds_epi8(v128 a, v128 b)
  139. {
  140. v128 dst = default(v128);
  141. sbyte* dptr = &dst.SByte0;
  142. sbyte* aptr = &a.SByte0;
  143. sbyte* bptr = &b.SByte0;
  144. for (int j = 0; j <= 15; j++)
  145. {
  146. dptr[j] = Saturate_To_Int8(aptr[j] + bptr[j]);
  147. }
  148. return dst;
  149. }
  150. // _mm_adds_epi16
  151. /// <summary> Add packed 16-bit integers in "a" and "b" using saturation, and store the results in "dst". </summary>
  152. /// <param name="a">Vector a</param>
  153. /// <param name="b">Vector b</param>
  154. /// <returns>Vector</returns>
  155. [DebuggerStepThrough]
  156. public static v128 adds_epi16(v128 a, v128 b)
  157. {
  158. v128 dst = default(v128);
  159. short* dptr = &dst.SShort0;
  160. short* aptr = &a.SShort0;
  161. short* bptr = &b.SShort0;
  162. for (int j = 0; j <= 7; j++)
  163. {
  164. dptr[j] = Saturate_To_Int16(aptr[j] + bptr[j]);
  165. }
  166. return dst;
  167. }
  168. // _mm_adds_epu8
  169. /// <summary> Add packed unsigned 8-bit integers in "a" and "b" using saturation, and store the results in "dst". </summary>
  170. /// <param name="a">Vector a</param>
  171. /// <param name="b">Vector b</param>
  172. /// <returns>Vector</returns>
  173. [DebuggerStepThrough]
  174. public static v128 adds_epu8(v128 a, v128 b)
  175. {
  176. v128 dst = default(v128);
  177. byte* dptr = &dst.Byte0;
  178. byte* aptr = &a.Byte0;
  179. byte* bptr = &b.Byte0;
  180. for (int j = 0; j <= 15; j++)
  181. {
  182. dptr[j] = Saturate_To_UnsignedInt8(aptr[j] + bptr[j]);
  183. }
  184. return dst;
  185. }
  186. // _mm_adds_epu16
  187. /// <summary> Add packed unsigned 16-bit integers in "a" and "b" using saturation, and store the results in "dst". </summary>
  188. /// <param name="a">Vector a</param>
  189. /// <param name="b">Vector b</param>
  190. /// <returns>Vector</returns>
  191. [DebuggerStepThrough]
  192. public static v128 adds_epu16(v128 a, v128 b)
  193. {
  194. v128 dst = default(v128);
  195. ushort* dptr = &dst.UShort0;
  196. ushort* aptr = &a.UShort0;
  197. ushort* bptr = &b.UShort0;
  198. for (int j = 0; j <= 7; j++)
  199. {
  200. dptr[j] = Saturate_To_UnsignedInt16(aptr[j] + bptr[j]);
  201. }
  202. return dst;
  203. }
  204. // _mm_avg_epu8
  205. /// <summary> Average packed unsigned 8-bit integers in "a" and "b", and store the results in "dst". </summary>
  206. /// <param name="a">Vector a</param>
  207. /// <param name="b">Vector b</param>
  208. /// <returns>Vector</returns>
  209. [DebuggerStepThrough]
  210. // This was not added until LLVM => 9.0
  211. public static v128 avg_epu8(v128 a, v128 b)
  212. {
  213. v128 dst = default(v128);
  214. byte* dptr = &dst.Byte0;
  215. byte* aptr = &a.Byte0;
  216. byte* bptr = &b.Byte0;
  217. for (int j = 0; j <= 15; j++)
  218. {
  219. dptr[j] = (byte)((aptr[j] + bptr[j] + 1) >> 1);
  220. }
  221. return dst;
  222. }
  223. // _mm_avg_epu16
  224. /// <summary> Average packed unsigned 16-bit integers in "a" and "b", and store the results in "dst". </summary>
  225. /// <param name="a">Vector a</param>
  226. /// <param name="b">Vector b</param>
  227. /// <returns>Vector</returns>
  228. [DebuggerStepThrough]
  229. public static v128 avg_epu16(v128 a, v128 b)
  230. {
  231. v128 dst = default(v128);
  232. ushort* dptr = &dst.UShort0;
  233. ushort* aptr = &a.UShort0;
  234. ushort* bptr = &b.UShort0;
  235. for (int j = 0; j <= 7; j++)
  236. {
  237. dptr[j] = (ushort)((aptr[j] + bptr[j] + 1) >> 1);
  238. }
  239. return dst;
  240. }
  241. // _mm_madd_epi16
  242. /// <summary> Multiply packed signed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in "dst".</summary>
  243. /// <param name="a">Vector a</param>
  244. /// <param name="b">Vector b</param>
  245. /// <returns>Vector</returns>
  246. [DebuggerStepThrough]
  247. public static v128 madd_epi16(v128 a, v128 b)
  248. {
  249. v128 dst = default(v128);
  250. int* dptr = &dst.SInt0;
  251. short* aptr = &a.SShort0;
  252. short* bptr = &b.SShort0;
  253. for (int j = 0; j <= 3; j++)
  254. {
  255. int k = 2 * j;
  256. int r = aptr[k + 1] * bptr[k + 1];
  257. int q = aptr[k] * bptr[k];
  258. dptr[j] = r + q;
  259. }
  260. return dst;
  261. }
  262. // _mm_max_epi16
  263. /// <summary> Compare packed 16-bit integers in "a" and "b", and store packed maximum values in "dst". </summary>
  264. /// <param name="a">Vector a</param>
  265. /// <param name="b">Vector b</param>
  266. /// <returns>Vector</returns>
  267. [DebuggerStepThrough]
  268. public static v128 max_epi16(v128 a, v128 b)
  269. {
  270. v128 dst = default(v128);
  271. short* dptr = &dst.SShort0;
  272. short* aptr = &a.SShort0;
  273. short* bptr = &b.SShort0;
  274. for (int j = 0; j <= 7; j++)
  275. {
  276. dptr[j] = Math.Max(aptr[j], bptr[j]);
  277. }
  278. return dst;
  279. }
  280. // _mm_max_epu8
  281. /// <summary> Compare packed unsigned 8-bit integers in "a" and "b", and store packed maximum values in "dst". </summary>
  282. /// <param name="a">Vector a</param>
  283. /// <param name="b">Vector b</param>
  284. /// <returns>Vector</returns>
  285. [DebuggerStepThrough]
  286. public static v128 max_epu8(v128 a, v128 b)
  287. {
  288. v128 dst = default(v128);
  289. byte* dptr = &dst.Byte0;
  290. byte* aptr = &a.Byte0;
  291. byte* bptr = &b.Byte0;
  292. for (int j = 0; j <= 15; j++)
  293. {
  294. dptr[j] = Math.Max(aptr[j], bptr[j]);
  295. }
  296. return dst;
  297. }
  298. // _mm_min_epi16
  299. /// <summary> Compare packed 16-bit integers in "a" and "b", and store packed minimum values in "dst". </summary>
  300. /// <param name="a">Vector a</param>
  301. /// <param name="b">Vector b</param>
  302. /// <returns>Vector</returns>
  303. [DebuggerStepThrough]
  304. public static v128 min_epi16(v128 a, v128 b)
  305. {
  306. v128 dst = default(v128);
  307. short* dptr = &dst.SShort0;
  308. short* aptr = &a.SShort0;
  309. short* bptr = &b.SShort0;
  310. for (int j = 0; j <= 7; j++)
  311. {
  312. dptr[j] = Math.Min(aptr[j], bptr[j]);
  313. }
  314. return dst;
  315. }
  316. // _mm_min_epu8
  317. /// <summary> Compare packed unsigned 8-bit integers in "a" and "b", and store packed minimum values in "dst". </summary>
  318. /// <param name="a">Vector a</param>
  319. /// <param name="b">Vector b</param>
  320. /// <returns>Vector</returns>
  321. [DebuggerStepThrough]
  322. public static v128 min_epu8(v128 a, v128 b)
  323. {
  324. v128 dst = default(v128);
  325. byte* dptr = &dst.Byte0;
  326. byte* aptr = &a.Byte0;
  327. byte* bptr = &b.Byte0;
  328. for (int j = 0; j <= 15; j++)
  329. {
  330. dptr[j] = Math.Min(aptr[j], bptr[j]);
  331. }
  332. return dst;
  333. }
  334. // _mm_mulhi_epi16
  335. /// <summary> Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". </summary>
  336. /// <param name="a">Vector a</param>
  337. /// <param name="b">Vector b</param>
  338. /// <returns>Vector</returns>
  339. [DebuggerStepThrough]
  340. public static v128 mulhi_epi16(v128 a, v128 b)
  341. {
  342. v128 dst = default(v128);
  343. short* dptr = &dst.SShort0;
  344. short* aptr = &a.SShort0;
  345. short* bptr = &b.SShort0;
  346. for (int j = 0; j <= 7; j++)
  347. {
  348. int tmp = aptr[j] * bptr[j];
  349. dptr[j] = (short)(tmp >> 16);
  350. }
  351. return dst;
  352. }
  353. // _mm_mulhi_epu16
  354. /// <summary> Multiply the packed unsigned 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in "dst". </summary>
  355. /// <param name="a">Vector a</param>
  356. /// <param name="b">Vector b</param>
  357. /// <returns>Vector</returns>
  358. [DebuggerStepThrough]
  359. public static v128 mulhi_epu16(v128 a, v128 b)
  360. {
  361. v128 dst = default(v128);
  362. ushort* dptr = &dst.UShort0;
  363. ushort* aptr = &a.UShort0;
  364. ushort* bptr = &b.UShort0;
  365. for (int j = 0; j <= 7; j++)
  366. {
  367. uint tmp = (uint)(aptr[j] * bptr[j]);
  368. dptr[j] = (ushort)(tmp >> 16);
  369. }
  370. return dst;
  371. }
  372. // _mm_mullo_epi16
  373. /// <summary> Multiply the packed 16-bit integers in "a" and "b", producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in "dst". </summary>
  374. /// <param name="a">Vector a</param>
  375. /// <param name="b">Vector b</param>
  376. /// <returns>Vector</returns>
  377. [DebuggerStepThrough]
  378. public static v128 mullo_epi16(v128 a, v128 b)
  379. {
  380. v128 dst = default(v128);
  381. short* dptr = &dst.SShort0;
  382. short* aptr = &a.SShort0;
  383. short* bptr = &b.SShort0;
  384. for (int j = 0; j <= 7; j++)
  385. {
  386. int tmp = aptr[j] * bptr[j];
  387. dptr[j] = (short)tmp;
  388. }
  389. return dst;
  390. }
  391. // _mm_mul_epu32
  392. /// <summary> Multiply the low unsigned 32-bit integers from each packed 64-bit element in "a" and "b", and store the unsigned 64-bit results in "dst". </summary>
  393. /// <param name="a">Vector a</param>
  394. /// <param name="b">Vector b</param>
  395. /// <returns>Vector</returns>
  396. [DebuggerStepThrough]
  397. public static v128 mul_epu32(v128 a, v128 b)
  398. {
  399. v128 dst = default(v128);
  400. dst.ULong0 = (ulong)a.UInt0 * (ulong)b.UInt0;
  401. dst.ULong1 = (ulong)a.UInt2 * (ulong)b.UInt2;
  402. return dst;
  403. }
  404. // _mm_sad_epu8
  405. /// <summary> Compute the absolute differences of packed unsigned 8-bit integers in "a" and "b", then horizontally sum each consecutive 8 differences to produce two unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of 64-bit elements in "dst". </summary>
  406. /// <param name="a">Vector a</param>
  407. /// <param name="b">Vector b</param>
  408. /// <returns>Vector</returns>
  409. [DebuggerStepThrough]
  410. public static v128 sad_epu8(v128 a, v128 b)
  411. {
  412. v128 tmp;
  413. byte* tptr = &tmp.Byte0;
  414. byte* aptr = &a.Byte0;
  415. byte* bptr = &b.Byte0;
  416. for (int j = 0; j <= 15; j++)
  417. {
  418. tptr[j] = (byte)Math.Abs(aptr[j] - bptr[j]);
  419. }
  420. v128 dst = default(v128);
  421. ushort* dptr = &dst.UShort0;
  422. for (int j = 0; j <= 1; j++)
  423. {
  424. int bo = j * 8;
  425. dptr[4 * j] = (ushort)
  426. (tptr[bo + 0] + tptr[bo + 1] + tptr[bo + 2] + tptr[bo + 3] +
  427. tptr[bo + 4] + tptr[bo + 5] + tptr[bo + 6] + tptr[bo + 7]);
  428. }
  429. return dst;
  430. }
  431. // _mm_sub_epi8
  432. /// <summary> Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a", and store the results in "dst". </summary>
  433. /// <param name="a">Vector a</param>
  434. /// <param name="b">Vector b</param>
  435. /// <returns>Vector</returns>
  436. [DebuggerStepThrough]
  437. public static v128 sub_epi8(v128 a, v128 b)
  438. {
  439. v128 dst = default(v128);
  440. sbyte* dptr = &dst.SByte0;
  441. sbyte* aptr = &a.SByte0;
  442. sbyte* bptr = &b.SByte0;
  443. for (int j = 0; j <= 15; j++)
  444. {
  445. dptr[j] = (sbyte)(aptr[j] - bptr[j]);
  446. }
  447. return dst;
  448. }
  449. // _mm_sub_epi16
  450. /// <summary> Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a", and store the results in "dst". </summary>
  451. /// <param name="a">Vector a</param>
  452. /// <param name="b">Vector b</param>
  453. /// <returns>Vector</returns>
  454. [DebuggerStepThrough]
  455. public static v128 sub_epi16(v128 a, v128 b)
  456. {
  457. v128 dst = default(v128);
  458. short* dptr = &dst.SShort0;
  459. short* aptr = &a.SShort0;
  460. short* bptr = &b.SShort0;
  461. for (int j = 0; j <= 7; j++)
  462. {
  463. dptr[j] = (short)(aptr[j] - bptr[j]);
  464. }
  465. return dst;
  466. }
  467. // _mm_sub_epi32
  468. /// <summary> Subtract packed 32-bit integers in "b" from packed 32-bit integers in "a", and store the results in "dst". </summary>
  469. /// <param name="a">Vector a</param>
  470. /// <param name="b">Vector b</param>
  471. /// <returns>Vector</returns>
  472. [DebuggerStepThrough]
  473. public static v128 sub_epi32(v128 a, v128 b)
  474. {
  475. v128 dst = default(v128);
  476. int* dptr = &dst.SInt0;
  477. int* aptr = &a.SInt0;
  478. int* bptr = &b.SInt0;
  479. for (int j = 0; j <= 3; j++)
  480. {
  481. dptr[j] = aptr[j] - bptr[j];
  482. }
  483. return dst;
  484. }
  485. // _mm_sub_epi64
  486. /// <summary> Subtract packed 64-bit integers in "b" from packed 64-bit integers in "a", and store the results in "dst". </summary>
  487. /// <param name="a">Vector a</param>
  488. /// <param name="b">Vector b</param>
  489. /// <returns>Vector</returns>
  490. [DebuggerStepThrough]
  491. public static v128 sub_epi64(v128 a, v128 b)
  492. {
  493. v128 dst = default(v128);
  494. long* dptr = &dst.SLong0;
  495. long* aptr = &a.SLong0;
  496. long* bptr = &b.SLong0;
  497. for (int j = 0; j <= 1; j++)
  498. {
  499. dptr[j] = aptr[j] - bptr[j];
  500. }
  501. return dst;
  502. }
  503. // _mm_subs_epi8
  504. /// <summary> Subtract packed 8-bit integers in "b" from packed 8-bit integers in "a" using saturation, and store the results in "dst". </summary>
  505. /// <param name="a">Vector a</param>
  506. /// <param name="b">Vector b</param>
  507. /// <returns>Vector</returns>
  508. [DebuggerStepThrough]
  509. public static v128 subs_epi8(v128 a, v128 b)
  510. {
  511. v128 dst = default(v128);
  512. sbyte* dptr = &dst.SByte0;
  513. sbyte* aptr = &a.SByte0;
  514. sbyte* bptr = &b.SByte0;
  515. for (int j = 0; j <= 15; j++)
  516. {
  517. dptr[j] = Saturate_To_Int8(aptr[j] - bptr[j]);
  518. }
  519. return dst;
  520. }
  521. // _mm_subs_epi16
  522. /// <summary> Subtract packed 16-bit integers in "b" from packed 16-bit integers in "a" using saturation, and store the results in "dst". </summary>
  523. /// <param name="a">Vector a</param>
  524. /// <param name="b">Vector b</param>
  525. /// <returns>Vector</returns>
  526. [DebuggerStepThrough]
  527. public static v128 subs_epi16(v128 a, v128 b)
  528. {
  529. v128 dst = default(v128);
  530. short* dptr = &dst.SShort0;
  531. short* aptr = &a.SShort0;
  532. short* bptr = &b.SShort0;
  533. for (int j = 0; j <= 7; j++)
  534. {
  535. dptr[j] = Saturate_To_Int16(aptr[j] - bptr[j]);
  536. }
  537. return dst;
  538. }
  539. // _mm_subs_epu8
  540. /// <summary> Subtract packed unsigned 8-bit integers in "b" from packed unsigned 8-bit integers in "a" using saturation, and store the results in "dst". </summary>
  541. /// <param name="a">Vector a</param>
  542. /// <param name="b">Vector b</param>
  543. /// <returns>Vector</returns>
  544. [DebuggerStepThrough]
  545. public static v128 subs_epu8(v128 a, v128 b)
  546. {
  547. v128 dst = default(v128);
  548. byte* dptr = &dst.Byte0;
  549. byte* aptr = &a.Byte0;
  550. byte* bptr = &b.Byte0;
  551. for (int j = 0; j <= 15; j++)
  552. {
  553. dptr[j] = Saturate_To_UnsignedInt8(aptr[j] - bptr[j]);
  554. }
  555. return dst;
  556. }
  557. // _mm_subs_epu16
  558. /// <summary> Subtract packed unsigned 16-bit integers in "b" from packed unsigned 16-bit integers in "a" using saturation, and store the results in "dst". </summary>
  559. /// <param name="a">Vector a</param>
  560. /// <param name="b">Vector b</param>
  561. /// <returns>Vector</returns>
  562. [DebuggerStepThrough]
  563. public static v128 subs_epu16(v128 a, v128 b)
  564. {
  565. v128 dst = default(v128);
  566. ushort* dptr = &dst.UShort0;
  567. ushort* aptr = &a.UShort0;
  568. ushort* bptr = &b.UShort0;
  569. for (int j = 0; j <= 7; j++)
  570. {
  571. dptr[j] = Saturate_To_UnsignedInt16(aptr[j] - bptr[j]);
  572. }
  573. return dst;
  574. }
  575. // _mm_slli_si128
  576. /// <summary> Shift "a" left by "imm8" bytes while shifting in zeros, and store the results in "dst". </summary>
  577. /// <param name="a">Vector a</param>
  578. /// <param name="imm8">Offset</param>
  579. /// <returns>Vector</returns>
  580. [DebuggerStepThrough]
  581. public static v128 slli_si128(v128 a, int imm8)
  582. {
  583. int dist = Math.Min(imm8 & 0xff, 16);
  584. v128 dst = default(v128);
  585. byte* dptr = &dst.Byte0;
  586. byte* aptr = &a.Byte0;
  587. for (int j = 0; j < dist; ++j)
  588. {
  589. dptr[j] = 0;
  590. }
  591. for (int j = dist; j < 16; ++j)
  592. {
  593. dptr[j] = aptr[j - dist];
  594. }
  595. return dst;
  596. }
  597. // _mm_bslli_si128
  598. /// <summary> Shift "a" left by "imm8" bytes while shifting in zeros, and store the results in "dst". </summary>
  599. /// <param name="a">Vector a</param>
  600. /// <param name="imm8">Offset</param>
  601. /// <returns>Vector</returns>
  602. [DebuggerStepThrough]
  603. public static v128 bslli_si128(v128 a, int imm8)
  604. {
  605. return slli_si128(a, imm8);
  606. }
  607. // _mm_bsrli_si128
  608. /// <summary> Shift "a" right by "imm8" bytes while shifting in zeros, and store the results in "dst". </summary>
  609. /// <param name="a">Vector a</param>
  610. /// <param name="imm8">Offset</param>
  611. /// <returns>Vector</returns>
  612. [DebuggerStepThrough]
  613. public static v128 bsrli_si128(v128 a, int imm8)
  614. {
  615. int dist = Math.Min(imm8 & 0xff, 16);
  616. v128 dst = default(v128);
  617. byte* dptr = &dst.Byte0;
  618. byte* aptr = &a.Byte0;
  619. for (int j = 0; j < 16 - dist; ++j)
  620. {
  621. dptr[j] = aptr[dist + j];
  622. }
  623. for (int j = 16 - dist; j < 16; ++j)
  624. {
  625. dptr[j] = 0;
  626. }
  627. return dst;
  628. }
  629. // _mm_slli_epi16
  630. /// <summary> Shift packed 16-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". </summary>
  631. /// <param name="a">Vector a</param>
  632. /// <param name="imm8">Offset</param>
  633. /// <returns>Vector</returns>
  634. [DebuggerStepThrough]
  635. public static v128 slli_epi16(v128 a, int imm8)
  636. {
  637. v128 dst = default(v128);
  638. int dist = imm8 & 0xff;
  639. ushort* dptr = &dst.UShort0;
  640. ushort* aptr = &a.UShort0;
  641. for (int j = 0; j <= 7; j++)
  642. {
  643. if (dist > 15)
  644. dptr[j] = 0;
  645. else
  646. dptr[j] = (ushort)(aptr[j] << dist);
  647. }
  648. return dst;
  649. }
  650. // _mm_sll_epi16
  651. /// <summary> Shift packed 16-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". </summary>
  652. /// <param name="a">Vector a</param>
  653. /// <param name="count">Offset</param>
  654. /// <returns>Vector</returns>
  655. [DebuggerStepThrough]
  656. public static v128 sll_epi16(v128 a, v128 count)
  657. {
  658. v128 dst = default(v128);
  659. int dist = (int)Math.Min(count.ULong0, 16);
  660. ushort* dptr = &dst.UShort0;
  661. ushort* aptr = &a.UShort0;
  662. for (int j = 0; j <= 7; j++)
  663. {
  664. if (dist > 15)
  665. dptr[j] = 0;
  666. else
  667. dptr[j] = (ushort)(aptr[j] << dist);
  668. }
  669. return dst;
  670. }
  671. // _mm_slli_epi32
  672. /// <summary> Shift packed 32-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". </summary>
  673. /// <param name="a">Vector a</param>
  674. /// <param name="imm8">Offset</param>
  675. /// <returns>Vector</returns>
  676. [DebuggerStepThrough]
  677. public static v128 slli_epi32(v128 a, int imm8)
  678. {
  679. v128 dst = default(v128);
  680. int dist = Math.Min(imm8 & 0xff, 32);
  681. uint* dptr = &dst.UInt0;
  682. uint* aptr = &a.UInt0;
  683. for (int j = 0; j <= 3; j++)
  684. {
  685. if (dist > 31)
  686. dptr[j] = 0;
  687. else
  688. dptr[j] = aptr[j] << dist;
  689. }
  690. return dst;
  691. }
  692. // _mm_sll_epi32
  693. /// <summary> Shift packed 32-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". </summary>
  694. /// <param name="a">Vector a</param>
  695. /// <param name="count">Offset</param>
  696. /// <returns>Vector</returns>
  697. [DebuggerStepThrough]
  698. public static v128 sll_epi32(v128 a, v128 count)
  699. {
  700. v128 dst = default(v128);
  701. int dist = (int)Math.Min(count.ULong0, 32);
  702. uint* dptr = &dst.UInt0;
  703. uint* aptr = &a.UInt0;
  704. for (int j = 0; j <= 3; j++)
  705. {
  706. if (dist > 31)
  707. dptr[j] = 0;
  708. else
  709. dptr[j] = aptr[j] << dist;
  710. }
  711. return dst;
  712. }
  713. // _mm_slli_epi64
  714. /// <summary> Shift packed 64-bit integers in "a" left by "imm8" while shifting in zeros, and store the results in "dst". </summary>
  715. /// <param name="a">Vector a</param>
  716. /// <param name="imm8">Offset</param>
  717. /// <returns>Vector</returns>
  718. [DebuggerStepThrough]
  719. public static v128 slli_epi64(v128 a, int imm8)
  720. {
  721. v128 dst = default(v128);
  722. int dist = Math.Min(imm8 & 0xff, 64);
  723. ulong* dptr = &dst.ULong0;
  724. ulong* aptr = &a.ULong0;
  725. for (int j = 0; j <= 1; j++)
  726. {
  727. if (dist > 63)
  728. dptr[j] = 0;
  729. else
  730. dptr[j] = aptr[j] << dist;
  731. }
  732. return dst;
  733. }
  734. // _mm_sll_epi64
  735. /// <summary> Shift packed 64-bit integers in "a" left by "count" while shifting in zeros, and store the results in "dst". </summary>
  736. /// <param name="a">Vector a</param>
  737. /// <param name="count">Offset</param>
  738. /// <returns>Vector</returns>
  739. [DebuggerStepThrough]
  740. public static v128 sll_epi64(v128 a, v128 count)
  741. {
  742. v128 dst = default(v128);
  743. int dist = (int)Math.Min(count.ULong0, 64);
  744. ulong* dptr = &dst.ULong0;
  745. ulong* aptr = &a.ULong0;
  746. for (int j = 0; j <= 1; j++)
  747. {
  748. if (dist > 63)
  749. dptr[j] = 0;
  750. else
  751. dptr[j] = aptr[j] << dist;
  752. }
  753. return dst;
  754. }
  755. // _mm_srai_epi16
  756. /// <summary> Shift packed 16-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". </summary>
  757. /// <param name="a">Vector a</param>
  758. /// <param name="imm8">Offset</param>
  759. /// <returns>Vector</returns>
  760. [DebuggerStepThrough]
  761. public static v128 srai_epi16(v128 a, int imm8)
  762. {
  763. int dist = Math.Min(imm8 & 0xff, 16);
  764. v128 dst = a;
  765. short* dptr = &dst.SShort0;
  766. if (dist > 0)
  767. {
  768. dist--;
  769. for (int j = 0; j <= 7; j++)
  770. {
  771. // Work around modulo-16 shift distances for the 16 case (replicates sign bit)
  772. dptr[j] >>= 1;
  773. dptr[j] >>= dist;
  774. }
  775. }
  776. return dst;
  777. }
  778. // _mm_sra_epi16
  779. /// <summary> Shift packed 16-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". </summary>
  780. /// <param name="a">Vector a</param>
  781. /// <param name="count">Offset</param>
  782. /// <returns>Vector</returns>
  783. [DebuggerStepThrough]
  784. public static v128 sra_epi16(v128 a, v128 count)
  785. {
  786. int dist = (int)Math.Min(count.ULong0, 16);
  787. v128 dst = a;
  788. short* dptr = &dst.SShort0;
  789. if (dist > 0)
  790. {
  791. dist--;
  792. for (int j = 0; j <= 7; j++)
  793. {
  794. // Work around modulo-16 shift distances for the 16 case (replicates sign bit)
  795. dptr[j] >>= 1;
  796. dptr[j] >>= dist;
  797. }
  798. }
  799. return dst;
  800. }
  801. // _mm_srai_epi32
  802. /// <summary> Shift packed 32-bit integers in "a" right by "imm8" while shifting in sign bits, and store the results in "dst". </summary>
  803. /// <param name="a">Vector a</param>
  804. /// <param name="imm8">Offset</param>
  805. /// <returns>Vector</returns>
  806. [DebuggerStepThrough]
  807. public static v128 srai_epi32(v128 a, int imm8)
  808. {
  809. int dist = Math.Min(imm8 & 0xff, 32);
  810. v128 dst = a;
  811. int* dptr = &dst.SInt0;
  812. if (dist > 0)
  813. {
  814. dist--;
  815. for (int j = 0; j <= 3; j++)
  816. {
  817. // Work around modulo-32 shift distances for the 32 case (replicates sign bit)
  818. dptr[j] >>= 1;
  819. dptr[j] >>= dist;
  820. }
  821. }
  822. return dst;
  823. }
  824. // _mm_sra_epi32
  825. /// <summary> Shift packed 32-bit integers in "a" right by "count" while shifting in sign bits, and store the results in "dst". </summary>
  826. /// <param name="a">Vector a</param>
  827. /// <param name="count">Offset</param>
  828. /// <returns>Vector</returns>
  829. [DebuggerStepThrough]
  830. public static v128 sra_epi32(v128 a, v128 count)
  831. {
  832. int dist = (int)Math.Min(count.ULong0, 32);
  833. v128 dst = a;
  834. int* dptr = &dst.SInt0;
  835. if (dist > 0)
  836. {
  837. dist--;
  838. for (int j = 0; j <= 3; j++)
  839. {
  840. // Work around modulo-32 shift distances for the 32 case (replicates sign bit)
  841. dptr[j] >>= 1;
  842. dptr[j] >>= dist;
  843. }
  844. }
  845. return dst;
  846. }
  847. // _mm_srli_si128
  848. /// <summary> Shift "a" right by "imm8" bytes while shifting in zeros, and store the results in "dst". </summary>
  849. /// <param name="a">Vector a</param>
  850. /// <param name="imm8">Offset</param>
  851. /// <returns>Vector</returns>
  852. [DebuggerStepThrough]
  853. public static v128 srli_si128(v128 a, int imm8)
  854. {
  855. return bsrli_si128(a, imm8);
  856. }
  857. // _mm_srli_epi16
  858. /// <summary> Shift packed 16-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". </summary>
  859. /// <param name="a">Vector a</param>
  860. /// <param name="imm8">Offset</param>
  861. /// <returns>Vector</returns>
  862. [DebuggerStepThrough]
  863. public static v128 srli_epi16(v128 a, int imm8)
  864. {
  865. int dist = Math.Min(imm8 & 0xff, 16);
  866. v128 dst = a;
  867. ushort* dptr = &dst.UShort0;
  868. if (dist > 0)
  869. {
  870. dist--;
  871. for (int j = 0; j <= 7; j++)
  872. {
  873. // Work around modulo-16 shift distances for the 16 case
  874. dptr[j] >>= 1;
  875. dptr[j] >>= dist;
  876. }
  877. }
  878. return dst;
  879. }
  880. // _mm_srl_epi16
  881. /// <summary> Shift packed 16-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". </summary>
  882. /// <param name="a">Vector a</param>
  883. /// <param name="count">Offset</param>
  884. /// <returns>Vector</returns>
  885. [DebuggerStepThrough]
  886. public static v128 srl_epi16(v128 a, v128 count)
  887. {
  888. int dist = (int)Math.Min(count.ULong0, 16);
  889. v128 dst = a;
  890. ushort* dptr = &dst.UShort0;
  891. if (dist > 0)
  892. {
  893. dist--;
  894. for (int j = 0; j <= 7; j++)
  895. {
  896. // Work around modulo-16 shift distances for the 16 case
  897. dptr[j] >>= 1;
  898. dptr[j] >>= dist;
  899. }
  900. }
  901. return dst;
  902. }
  903. // _mm_srli_epi32
  904. /// <summary> Shift packed 32-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". </summary>
  905. /// <param name="a">Vector a</param>
  906. /// <param name="imm8">Offset</param>
  907. /// <returns>Vector</returns>
  908. [DebuggerStepThrough]
  909. public static v128 srli_epi32(v128 a, int imm8)
  910. {
  911. int dist = Math.Min(imm8 & 0xff, 32);
  912. v128 dst = a;
  913. uint* dptr = &dst.UInt0;
  914. if (dist > 0)
  915. {
  916. dist--;
  917. for (int j = 0; j <= 3; j++)
  918. {
  919. // Work around modulo-32 shift distances for the 32 case
  920. dptr[j] >>= 1;
  921. dptr[j] >>= dist;
  922. }
  923. }
  924. return dst;
  925. }
  926. // _mm_srl_epi32
  927. /// <summary> Shift packed 32-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". </summary>
  928. /// <param name="a">Vector a</param>
  929. /// <param name="count">Offset</param>
  930. /// <returns>Vector</returns>
  931. [DebuggerStepThrough]
  932. public static v128 srl_epi32(v128 a, v128 count)
  933. {
  934. int dist = (int)Math.Min(count.ULong0, 32);
  935. v128 dst = a;
  936. uint* dptr = &dst.UInt0;
  937. if (dist > 0)
  938. {
  939. dist--;
  940. for (int j = 0; j <= 3; j++)
  941. {
  942. // Work around modulo-32 shift distances for the 32 case
  943. dptr[j] >>= 1;
  944. dptr[j] >>= dist;
  945. }
  946. }
  947. return dst;
  948. }
  949. // _mm_srli_epi64
  950. /// <summary> Shift packed 64-bit integers in "a" right by "imm8" while shifting in zeros, and store the results in "dst". </summary>
  951. /// <param name="a">Vector a</param>
  952. /// <param name="imm8">Offset</param>
  953. /// <returns>Vector</returns>
  954. [DebuggerStepThrough]
  955. public static v128 srli_epi64(v128 a, int imm8)
  956. {
  957. int dist = Math.Min(imm8 & 0xff, 64);
  958. v128 dst = a;
  959. ulong* dptr = &dst.ULong0;
  960. if (dist > 0)
  961. {
  962. dist--;
  963. for (int j = 0; j <= 1; j++)
  964. {
  965. // Work around modulo-64 shift distances for the 64 case
  966. dptr[j] >>= 1;
  967. dptr[j] >>= dist;
  968. }
  969. }
  970. return dst;
  971. }
  972. // _mm_srl_epi64
  973. /// <summary> Shift packed 64-bit integers in "a" right by "count" while shifting in zeros, and store the results in "dst". </summary>
  974. /// <param name="a">Vector a</param>
  975. /// <param name="count">Offset</param>
  976. /// <returns>Vector</returns>
  977. [DebuggerStepThrough]
  978. public static v128 srl_epi64(v128 a, v128 count)
  979. {
  980. int dist = (int)Math.Min(count.ULong0, 64);
  981. v128 dst = a;
  982. ulong* dptr = &dst.ULong0;
  983. if (dist > 0)
  984. {
  985. dist--;
  986. for (int j = 0; j <= 1; j++)
  987. {
  988. // Work around modulo-32 shift distances for the 32 case
  989. dptr[j] >>= 1;
  990. dptr[j] >>= dist;
  991. }
  992. }
  993. return dst;
  994. }
  995. // _mm_and_si128
  996. /// <summary> Compute the bitwise AND of 128 bits (representing integer data) in "a" and "b", and store the result in "dst". </summary>
  997. /// <param name="a">Vector a</param>
  998. /// <param name="b">Vector b</param>
  999. /// <returns>Vector</returns>
  1000. [DebuggerStepThrough]
  1001. public static v128 and_si128(v128 a, v128 b)
  1002. {
  1003. v128 dst = default(v128);
  1004. dst.ULong0 = a.ULong0 & b.ULong0;
  1005. dst.ULong1 = a.ULong1 & b.ULong1;
  1006. return dst;
  1007. }
  1008. // _mm_andnot_si128
  1009. /// <summary> Compute the bitwise NOT of 128 bits (representing integer data) in "a" and then AND with "b", and store the result in "dst". </summary>
  1010. /// <param name="a">Vector a</param>
  1011. /// <param name="b">Vector b</param>
  1012. /// <returns>Vector</returns>
  1013. [DebuggerStepThrough]
  1014. public static v128 andnot_si128(v128 a, v128 b)
  1015. {
  1016. v128 dst = default(v128);
  1017. dst.ULong0 = (~a.ULong0) & b.ULong0;
  1018. dst.ULong1 = (~a.ULong1) & b.ULong1;
  1019. return dst;
  1020. }
  1021. // _mm_or_si128
  1022. /// <summary> Compute the bitwise OR of 128 bits (representing integer data) in "a" and "b", and store the result in "dst". </summary>
  1023. /// <param name="a">Vector a</param>
  1024. /// <param name="b">Vector b</param>
  1025. /// <returns>Vector</returns>
  1026. [DebuggerStepThrough]
  1027. public static v128 or_si128(v128 a, v128 b)
  1028. {
  1029. v128 dst = default(v128);
  1030. dst.ULong0 = a.ULong0 | b.ULong0;
  1031. dst.ULong1 = a.ULong1 | b.ULong1;
  1032. return dst;
  1033. }
  1034. // _mm_xor_si128
  1035. /// <summary> Compute the bitwise XOR of 128 bits (representing integer data) in "a" and "b", and store the result in "dst". </summary>
  1036. /// <param name="a">Vector a</param>
  1037. /// <param name="b">Vector b</param>
  1038. /// <returns>Vector</returns>
  1039. [DebuggerStepThrough]
  1040. public static v128 xor_si128(v128 a, v128 b)
  1041. {
  1042. v128 dst = default(v128);
  1043. dst.ULong0 = a.ULong0 ^ b.ULong0;
  1044. dst.ULong1 = a.ULong1 ^ b.ULong1;
  1045. return dst;
  1046. }
  1047. // _mm_cmpeq_epi8
  1048. /// <summary> Compare packed 8-bit integers in "a" and "b" for equality, and store the results in "dst". </summary>
  1049. /// <param name="a">Vector a</param>
  1050. /// <param name="b">Vector b</param>
  1051. /// <returns>Vector</returns>
  1052. [DebuggerStepThrough]
  1053. public static v128 cmpeq_epi8(v128 a, v128 b)
  1054. {
  1055. v128 dst = default(v128);
  1056. byte* aptr = &a.Byte0;
  1057. byte* bptr = &b.Byte0;
  1058. byte* dptr = &dst.Byte0;
  1059. for (int j = 0; j <= 15; j++)
  1060. {
  1061. dptr[j] = (byte)(aptr[j] == bptr[j] ? 0xff : 0x00);
  1062. }
  1063. return dst;
  1064. }
  1065. // _mm_cmpeq_epi16
  1066. /// <summary> Compare packed 16-bit integers in "a" and "b" for equality, and store the results in "dst". </summary>
  1067. /// <param name="a">Vector a</param>
  1068. /// <param name="b">Vector b</param>
  1069. /// <returns>Vector</returns>
  1070. [DebuggerStepThrough]
  1071. public static v128 cmpeq_epi16(v128 a, v128 b)
  1072. {
  1073. v128 dst = default(v128);
  1074. ushort* aptr = &a.UShort0;
  1075. ushort* bptr = &b.UShort0;
  1076. ushort* dptr = &dst.UShort0;
  1077. for (int j = 0; j <= 7; j++)
  1078. {
  1079. dptr[j] = (ushort)(aptr[j] == bptr[j] ? 0xffff : 0x0000);
  1080. }
  1081. return dst;
  1082. }
  1083. // _mm_cmpeq_epi32
  1084. /// <summary> Compare packed 32-bit integers in "a" and "b" for equality, and store the results in "dst". </summary>
  1085. /// <param name="a">Vector a</param>
  1086. /// <param name="b">Vector b</param>
  1087. /// <returns>Vector</returns>
  1088. [DebuggerStepThrough]
  1089. public static v128 cmpeq_epi32(v128 a, v128 b)
  1090. {
  1091. v128 dst = default(v128);
  1092. uint* aptr = &a.UInt0;
  1093. uint* bptr = &b.UInt0;
  1094. uint* dptr = &dst.UInt0;
  1095. for (int j = 0; j <= 3; j++)
  1096. {
  1097. dptr[j] = aptr[j] == bptr[j] ? 0xffffffff : 0x00000000;
  1098. }
  1099. return dst;
  1100. }
  1101. // _mm_cmpgt_epi8
  1102. /// <summary> Compare packed 8-bit integers in "a" and "b" for greater-than, and store the results in "dst". </summary>
  1103. /// <param name="a">Vector a</param>
  1104. /// <param name="b">Vector b</param>
  1105. /// <returns>Vector</returns>
  1106. [DebuggerStepThrough]
  1107. public static v128 cmpgt_epi8(v128 a, v128 b)
  1108. {
  1109. v128 dst = default(v128);
  1110. sbyte* aptr = &a.SByte0;
  1111. sbyte* bptr = &b.SByte0;
  1112. sbyte* dptr = &dst.SByte0;
  1113. for (int j = 0; j <= 15; j++)
  1114. {
  1115. dptr[j] = (sbyte)(aptr[j] > bptr[j] ? -1 : 0);
  1116. }
  1117. return dst;
  1118. }
  1119. // _mm_cmpgt_epi16
  1120. /// <summary> Compare packed 16-bit integers in "a" and "b" for greater-than, and store the results in "dst". </summary>
  1121. /// <param name="a">Vector a</param>
  1122. /// <param name="b">Vector b</param>
  1123. /// <returns>Vector</returns>
  1124. [DebuggerStepThrough]
  1125. public static v128 cmpgt_epi16(v128 a, v128 b)
  1126. {
  1127. v128 dst = default(v128);
  1128. short* aptr = &a.SShort0;
  1129. short* bptr = &b.SShort0;
  1130. short* dptr = &dst.SShort0;
  1131. for (int j = 0; j <= 7; j++)
  1132. {
  1133. dptr[j] = (short)(aptr[j] > bptr[j] ? -1 : 0);
  1134. }
  1135. return dst;
  1136. }
  1137. // _mm_cmpgt_epi32
  1138. /// <summary> Compare packed 32-bit integers in "a" and "b" for greater-than, and store the results in "dst". </summary>
  1139. /// <param name="a">Vector a</param>
  1140. /// <param name="b">Vector b</param>
  1141. /// <returns>Vector</returns>
  1142. [DebuggerStepThrough]
  1143. public static v128 cmpgt_epi32(v128 a, v128 b)
  1144. {
  1145. v128 dst = default(v128);
  1146. int* aptr = &a.SInt0;
  1147. int* bptr = &b.SInt0;
  1148. int* dptr = &dst.SInt0;
  1149. for (int j = 0; j <= 3; j++)
  1150. {
  1151. dptr[j] = aptr[j] > bptr[j] ? -1 : 0;
  1152. }
  1153. return dst;
  1154. }
  1155. // _mm_cmplt_epi8
  1156. /// <summary> Compare packed 8-bit integers in "a" and "b" for less-than, and store the results in "dst". Note: This intrinsic emits the pcmpgtb instruction with the order of the operands switched. </summary>
  1157. /// <param name="a">Vector a</param>
  1158. /// <param name="b">Vector b</param>
  1159. /// <returns>Vector</returns>
  1160. [DebuggerStepThrough]
  1161. [BurstTargetCpu(BurstTargetCpu.X64_SSE2)]
  1162. public static v128 cmplt_epi8(v128 a, v128 b)
  1163. {
  1164. return cmpgt_epi8(b, a);
  1165. }
  1166. // _mm_cmplt_epi16
  1167. /// <summary> Compare packed 16-bit integers in "a" and "b" for less-than, and store the results in "dst". Note: This intrinsic emits the pcmpgtw instruction with the order of the operands switched. </summary>
  1168. /// <param name="a">Vector a</param>
  1169. /// <param name="b">Vector b</param>
  1170. /// <returns>Vector</returns>
  1171. [DebuggerStepThrough]
  1172. [BurstTargetCpu(BurstTargetCpu.X64_SSE2)]
  1173. public static v128 cmplt_epi16(v128 a, v128 b)
  1174. {
  1175. return cmpgt_epi16(b, a);
  1176. }
  1177. // _mm_cmplt_epi32
  1178. /// <summary> Compare packed 32-bit integers in "a" and "b" for less-than, and store the results in "dst". Note: This intrinsic emits the pcmpgtd instruction with the order of the operands switched. </summary>
  1179. /// <param name="a">Vector a</param>
  1180. /// <param name="b">Vector b</param>
  1181. /// <returns>Vector</returns>
  1182. [DebuggerStepThrough]
  1183. [BurstTargetCpu(BurstTargetCpu.X64_SSE2)]
  1184. public static v128 cmplt_epi32(v128 a, v128 b)
  1185. {
  1186. return cmpgt_epi32(b, a);
  1187. }
  1188. // _mm_cvtepi32_pd
  1189. /// <summary> Convert packed 32-bit integers in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". </summary>
  1190. /// <param name="a">Vector a</param>
  1191. /// <returns>Vector</returns>
  1192. [DebuggerStepThrough]
  1193. public static v128 cvtepi32_pd(v128 a)
  1194. {
  1195. v128 dst = default(v128);
  1196. dst.Double0 = a.SInt0;
  1197. dst.Double1 = a.SInt1;
  1198. return dst;
  1199. }
  1200. // _mm_cvtsi32_sd
  1201. /// <summary> Convert the 32-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". </summary>
  1202. /// <param name="a">Vector a</param>
  1203. /// <param name="b">32-bit integer</param>
  1204. /// <returns>Vector</returns>
  1205. [DebuggerStepThrough]
  1206. public static v128 cvtsi32_sd(v128 a, int b)
  1207. {
  1208. v128 dst = a;
  1209. dst.Double0 = b;
  1210. return dst;
  1211. }
  1212. // _mm_cvtsi64_sd
  1213. /// <summary> Convert the 64-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". </summary>
  1214. /// <param name="a">Vector a</param>
  1215. /// <param name="b">64-bit integer</param>
  1216. /// <returns>Vector</returns>
  1217. [DebuggerStepThrough]
  1218. public static v128 cvtsi64_sd(v128 a, long b)
  1219. {
  1220. v128 dst = a;
  1221. dst.Double0 = b;
  1222. return dst;
  1223. }
  1224. // _mm_cvtsi64x_sd
  1225. /// <summary> Convert the 64-bit integer "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". </summary>
  1226. /// <param name="a">Vector a</param>
  1227. /// <param name="b">64-bit integer</param>
  1228. /// <returns>Vector</returns>
  1229. [DebuggerStepThrough]
  1230. [BurstTargetCpu(BurstTargetCpu.X64_SSE2)]
  1231. public static v128 cvtsi64x_sd(v128 a, long b)
  1232. {
  1233. return cvtsi64_sd(a, b);
  1234. }
  1235. // _mm_cvtepi32_ps
  1236. /// <summary> Convert packed 32-bit integers in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". </summary>
  1237. /// <param name="a">Vector a</param>
  1238. /// <returns>Vector</returns>
  1239. [DebuggerStepThrough]
  1240. public static v128 cvtepi32_ps(v128 a)
  1241. {
  1242. v128 dst = default(v128);
  1243. dst.Float0 = a.SInt0;
  1244. dst.Float1 = a.SInt1;
  1245. dst.Float2 = a.SInt2;
  1246. dst.Float3 = a.SInt3;
  1247. return dst;
  1248. }
  1249. // _mm_cvtsi32_si128
  1250. /// <summary> Copy 32-bit integer "a" to the lower elements of "dst", and zero the upper elements of "dst". </summary>
  1251. /// <param name="a">32-bit integer</param>
  1252. /// <returns>Vector</returns>
  1253. [DebuggerStepThrough]
  1254. public static v128 cvtsi32_si128(int a)
  1255. {
  1256. // This doesn't need an intrinsic implementation, the Burst IR is fine.
  1257. v128 dst = default(v128);
  1258. dst.SInt0 = a;
  1259. return dst;
  1260. }
  1261. // _mm_cvtsi64_si128
  1262. /// <param name="a">64-bit integer</param>
  1263. /// <returns>Vector</returns>
  1264. /// <summary> Copy 64-bit integer "a" to the lower element of "dst", and zero the upper element. </summary>
  1265. [DebuggerStepThrough]
  1266. public static v128 cvtsi64_si128(long a)
  1267. {
  1268. // This doesn't need an intrinsic implementation, the Burst IR is fine.
  1269. var dst = default(v128);
  1270. dst.SLong0 = a;
  1271. return dst;
  1272. }
  1273. // _mm_cvtsi64x_si128
  1274. /// <summary> Copy 64-bit integer "a" to the lower element of "dst", and zero the upper element. </summary>
  1275. /// <param name="a">64-bit integer</param>
  1276. /// <returns>Vector</returns>
  1277. [DebuggerStepThrough]
  1278. public static v128 cvtsi64x_si128(long a)
  1279. {
  1280. return cvtsi64_si128(a);
  1281. }
  1282. // _mm_cvtsi128_si32
  1283. /// <summary> Copy the lower 32-bit integer in "a" to "dst". </summary>
  1284. /// <param name="a">Vector a</param>
  1285. /// <returns>Integer</returns>
  1286. [DebuggerStepThrough]
  1287. public static int cvtsi128_si32(v128 a)
  1288. {
  1289. // This doesn't need an intrinsic implementation, the Burst IR is fine.
  1290. return a.SInt0;
  1291. }
  1292. // _mm_cvtsi128_si64
  1293. /// <summary> Copy the lower 64-bit integer in "a" to "dst". </summary>
  1294. /// <param name="a">Vector a</param>
  1295. /// <returns>Integer</returns>
  1296. [DebuggerStepThrough]
  1297. public static long cvtsi128_si64(v128 a)
  1298. {
  1299. // This doesn't need an intrinsic implementation, the Burst IR is fine.
  1300. return a.SLong0;
  1301. }
  1302. // _mm_cvtsi128_si64x
  1303. /// <summary> Copy the lower 64-bit integer in "a" to "dst". </summary>
  1304. /// <param name="a">Vector a</param>
  1305. /// <returns>Integer</returns>
  1306. [DebuggerStepThrough]
  1307. public static long cvtsi128_si64x(v128 a)
  1308. {
  1309. // This doesn't need an intrinsic implementation, the Burst IR is fine.
  1310. return a.SLong0;
  1311. }
  1312. // _mm_set_epi64x
  1313. /// <summary> Set packed 64-bit integers in "dst" with the supplied values. </summary>
  1314. /// <param name="e1">Value 1</param>
  1315. /// <param name="e0">Value 0</param>
  1316. /// <returns>Vector</returns>
  1317. [DebuggerStepThrough]
  1318. public static v128 set_epi64x(long e1, long e0)
  1319. {
  1320. // This doesn't need an intrinsic implementation, the Burst IR is fine.
  1321. v128 dst = default(v128);
  1322. dst.SLong0 = e0;
  1323. dst.SLong1 = e1;
  1324. return dst;
  1325. }
  1326. // _mm_set_epi32
  1327. /// <summary> Set packed 32-bit integers in "dst" with the supplied values. </summary>
  1328. /// <param name="e3">Value 3</param>
  1329. /// <param name="e2">Value 2</param>
  1330. /// <param name="e1">Value 1</param>
  1331. /// <param name="e0">Value 0</param>
  1332. /// <returns>Vector</returns>
  1333. [DebuggerStepThrough]
  1334. public static v128 set_epi32(int e3, int e2, int e1, int e0)
  1335. {
  1336. v128 dst = default(v128);
  1337. dst.SInt0 = e0;
  1338. dst.SInt1 = e1;
  1339. dst.SInt2 = e2;
  1340. dst.SInt3 = e3;
  1341. return dst;
  1342. }
  1343. // _mm_set_epi16
  1344. /// <summary> Set packed 16-bit integers in "dst" with the supplied values. </summary>
  1345. /// <param name="e7">Value 7</param>
  1346. /// <param name="e6">Value 6</param>
  1347. /// <param name="e5">Value 5</param>
  1348. /// <param name="e4">Value 4</param>
  1349. /// <param name="e3">Value 3</param>
  1350. /// <param name="e2">Value 2</param>
  1351. /// <param name="e1">Value 1</param>
  1352. /// <param name="e0">Value 0</param>
  1353. /// <returns>Vector</returns>
  1354. [DebuggerStepThrough]
  1355. public static v128 set_epi16(short e7, short e6, short e5, short e4, short e3, short e2, short e1, short e0)
  1356. {
  1357. v128 dst = default(v128);
  1358. dst.SShort0 = e0;
  1359. dst.SShort1 = e1;
  1360. dst.SShort2 = e2;
  1361. dst.SShort3 = e3;
  1362. dst.SShort4 = e4;
  1363. dst.SShort5 = e5;
  1364. dst.SShort6 = e6;
  1365. dst.SShort7 = e7;
  1366. return dst;
  1367. }
  1368. // _mm_set_epi8
  1369. /// <summary> Set packed 8-bit integers in "dst" with the supplied values in reverse order. </summary>
  1370. /// <param name="e15_">Value 15</param>
  1371. /// <param name="e14_">Value 14</param>
  1372. /// <param name="e13_">Value 13</param>
  1373. /// <param name="e12_">Value 12</param>
  1374. /// <param name="e11_">Value 11</param>
  1375. /// <param name="e10_">Value 10</param>
  1376. /// <param name="e9_">Value 9</param>
  1377. /// <param name="e8_">Value 8</param>
  1378. /// <param name="e7_">Value 7</param>
  1379. /// <param name="e6_">Value 6</param>
  1380. /// <param name="e5_">Value 5</param>
  1381. /// <param name="e4_">Value 4</param>
  1382. /// <param name="e3_">Value 3</param>
  1383. /// <param name="e2_">Value 2</param>
  1384. /// <param name="e1_">Value 1</param>
  1385. /// <param name="e0_">Value 0</param>
  1386. /// <returns>Vector</returns>
  1387. [DebuggerStepThrough]
  1388. public static v128 set_epi8(sbyte e15_, sbyte e14_, sbyte e13_, sbyte e12_, sbyte e11_, sbyte e10_, sbyte e9_, sbyte e8_, sbyte e7_, sbyte e6_, sbyte e5_, sbyte e4_, sbyte e3_, sbyte e2_, sbyte e1_, sbyte e0_)
  1389. {
  1390. v128 dst = default(v128);
  1391. dst.SByte0 = e0_;
  1392. dst.SByte1 = e1_;
  1393. dst.SByte2 = e2_;
  1394. dst.SByte3 = e3_;
  1395. dst.SByte4 = e4_;
  1396. dst.SByte5 = e5_;
  1397. dst.SByte6 = e6_;
  1398. dst.SByte7 = e7_;
  1399. dst.SByte8 = e8_;
  1400. dst.SByte9 = e9_;
  1401. dst.SByte10 = e10_;
  1402. dst.SByte11 = e11_;
  1403. dst.SByte12 = e12_;
  1404. dst.SByte13 = e13_;
  1405. dst.SByte14 = e14_;
  1406. dst.SByte15 = e15_;
  1407. return dst;
  1408. }
  1409. // _mm_set1_epi64x
  1410. /// <summary> Broadcast 64-bit integer "a" to all elements of "dst". This intrinsic may generate the "vpbroadcastq". </summary>
  1411. /// <param name="a">64-bit integer</param>
  1412. /// <returns>Vector</returns>
  1413. [DebuggerStepThrough]
  1414. public static v128 set1_epi64x(long a)
  1415. {
  1416. v128 dst = default(v128);
  1417. dst.SLong0 = a;
  1418. dst.SLong1 = a;
  1419. return dst;
  1420. }
  1421. // _mm_set1_epi32
  1422. /// <summary> Broadcast 32-bit integer "a" to all elements of "dst". This intrinsic may generate "vpbroadcastd". </summary>
  1423. /// <param name="a">32-bit integer</param>
  1424. /// <returns>Vector</returns>
  1425. [DebuggerStepThrough]
  1426. public static v128 set1_epi32(int a)
  1427. {
  1428. v128 dst = default(v128);
  1429. dst.SInt0 = a;
  1430. dst.SInt1 = a;
  1431. dst.SInt2 = a;
  1432. dst.SInt3 = a;
  1433. return dst;
  1434. }
  1435. // _mm_set1_epi16
  1436. /// <summary> Broadcast 16-bit integer "a" to all all elements of "dst". This intrinsic may generate "vpbroadcastw". </summary>
  1437. /// <param name="a">16-bit integer</param>
  1438. /// <returns>Vector</returns>
  1439. [DebuggerStepThrough]
  1440. public static v128 set1_epi16(short a)
  1441. {
  1442. v128 dst = default(v128);
  1443. short* dptr = &dst.SShort0;
  1444. for (int j = 0; j <= 7; j++)
  1445. {
  1446. dptr[j] = a;
  1447. }
  1448. return dst;
  1449. }
  1450. // _mm_set1_epi8
  1451. /// <summary> Broadcast 8-bit integer "a" to all elements of "dst". This intrinsic may generate "vpbroadcastb". </summary>
  1452. /// <param name="a">8-bit integer</param>
  1453. /// <returns>Vector</returns>
  1454. [DebuggerStepThrough]
  1455. public static v128 set1_epi8(sbyte a)
  1456. {
  1457. v128 dst = default(v128);
  1458. sbyte* dptr = &dst.SByte0;
  1459. for (int j = 0; j <= 15; j++)
  1460. {
  1461. dptr[j] = a;
  1462. }
  1463. return dst;
  1464. }
  1465. // _mm_setr_epi32
  1466. /// <summary> Set packed 32-bit integers in "dst" with the supplied values in reverse order. </summary>
  1467. /// <param name="e3">Value 3</param>
  1468. /// <param name="e2">Value 2</param>
  1469. /// <param name="e1">Value 1</param>
  1470. /// <param name="e0">Value 0</param>
  1471. /// <returns>Vector</returns>
  1472. [DebuggerStepThrough]
  1473. public static v128 setr_epi32(int e3, int e2, int e1, int e0)
  1474. {
  1475. v128 dst = default(v128);
  1476. dst.SInt0 = e3;
  1477. dst.SInt1 = e2;
  1478. dst.SInt2 = e1;
  1479. dst.SInt3 = e0;
  1480. return dst;
  1481. }
  1482. // _mm_setr_epi16
  1483. /// <summary> Set packed 16-bit integers in "dst" with the supplied values in reverse order. </summary>
  1484. /// <param name="e7">Value 7</param>
  1485. /// <param name="e6">Value 6</param>
  1486. /// <param name="e5">Value 5</param>
  1487. /// <param name="e4">Value 4</param>
  1488. /// <param name="e3">Value 3</param>
  1489. /// <param name="e2">Value 2</param>
  1490. /// <param name="e1">Value 1</param>
  1491. /// <param name="e0">Value 0</param>
  1492. /// <returns>Vector</returns>
  1493. [DebuggerStepThrough]
  1494. public static v128 setr_epi16(short e7, short e6, short e5, short e4, short e3, short e2, short e1, short e0)
  1495. {
  1496. v128 dst = default(v128);
  1497. dst.SShort0 = e7;
  1498. dst.SShort1 = e6;
  1499. dst.SShort2 = e5;
  1500. dst.SShort3 = e4;
  1501. dst.SShort4 = e3;
  1502. dst.SShort5 = e2;
  1503. dst.SShort6 = e1;
  1504. dst.SShort7 = e0;
  1505. return dst;
  1506. }
  1507. // _mm_setr_epi8
  1508. /// <summary> Set packed 8-bit integers in "dst" with the supplied values in reverse order. </summary>
  1509. /// <param name="e15_">Value 15</param>
  1510. /// <param name="e14_">Value 14</param>
  1511. /// <param name="e13_">Value 13</param>
  1512. /// <param name="e12_">Value 12</param>
  1513. /// <param name="e11_">Value 11</param>
  1514. /// <param name="e10_">Value 10</param>
  1515. /// <param name="e9_">Value 9</param>
  1516. /// <param name="e8_">Value 8</param>
  1517. /// <param name="e7_">Value 7</param>
  1518. /// <param name="e6_">Value 6</param>
  1519. /// <param name="e5_">Value 5</param>
  1520. /// <param name="e4_">Value 4</param>
  1521. /// <param name="e3_">Value 3</param>
  1522. /// <param name="e2_">Value 2</param>
  1523. /// <param name="e1_">Value 1</param>
  1524. /// <param name="e0_">Value 0</param>
  1525. /// <returns>Vector</returns>
  1526. [DebuggerStepThrough]
  1527. public static v128 setr_epi8(sbyte e15_, sbyte e14_, sbyte e13_, sbyte e12_, sbyte e11_, sbyte e10_, sbyte e9_, sbyte e8_, sbyte e7_, sbyte e6_, sbyte e5_, sbyte e4_, sbyte e3_, sbyte e2_, sbyte e1_, sbyte e0_)
  1528. {
  1529. v128 dst = default(v128);
  1530. dst.SByte0 = e15_;
  1531. dst.SByte1 = e14_;
  1532. dst.SByte2 = e13_;
  1533. dst.SByte3 = e12_;
  1534. dst.SByte4 = e11_;
  1535. dst.SByte5 = e10_;
  1536. dst.SByte6 = e9_;
  1537. dst.SByte7 = e8_;
  1538. dst.SByte8 = e7_;
  1539. dst.SByte9 = e6_;
  1540. dst.SByte10 = e5_;
  1541. dst.SByte11 = e4_;
  1542. dst.SByte12 = e3_;
  1543. dst.SByte13 = e2_;
  1544. dst.SByte14 = e1_;
  1545. dst.SByte15 = e0_;
  1546. return dst;
  1547. }
  1548. // _mm_setzero_si128
  1549. /// <summary> Return vector of type __m128i with all elements set to zero. </summary>
  1550. /// <returns>Vector</returns>
  1551. [DebuggerStepThrough]
  1552. public static v128 setzero_si128()
  1553. {
  1554. return default(v128);
  1555. }
  1556. // _mm_move_epi64
  1557. /// <summary> Copy the lower 64-bit integer in "a" to the lower element of "dst", and zero the upper element. </summary>
  1558. /// <param name="a">Vector a</param>
  1559. /// <returns>Vector</returns>
  1560. [DebuggerStepThrough]
  1561. public static v128 move_epi64(v128 a)
  1562. {
  1563. v128 dst = default(v128);
  1564. dst.ULong0 = a.ULong0;
  1565. dst.ULong1 = 0;
  1566. return dst;
  1567. }
  1568. // _mm_packs_epi16
  1569. /// <summary> Convert packed 16-bit integers from "a" and "b" to packed 8-bit integers using signed saturation, and store the results in "dst". </summary>
  1570. /// <param name="a">Vector a</param>
  1571. /// <param name="b">Vector b</param>
  1572. /// <returns>Vector</returns>
  1573. [DebuggerStepThrough]
  1574. public static v128 packs_epi16(v128 a, v128 b)
  1575. {
  1576. v128 dst = default(v128);
  1577. short* aptr = &a.SShort0;
  1578. short* bptr = &b.SShort0;
  1579. sbyte* dptr = &dst.SByte0;
  1580. for (int j = 0; j < 8; ++j)
  1581. {
  1582. dptr[j] = Saturate_To_Int8(aptr[j]);
  1583. }
  1584. for (int j = 0; j < 8; ++j)
  1585. {
  1586. dptr[j + 8] = Saturate_To_Int8(bptr[j]);
  1587. }
  1588. return dst;
  1589. }
  1590. // _mm_packs_epi32
  1591. /// <summary> Convert packed 32-bit integers from "a" and "b" to packed 16-bit integers using signed saturation, and store the results in "dst". </summary>
  1592. /// <param name="a">Vector a</param>
  1593. /// <param name="b">Vector b</param>
  1594. /// <returns>Vector</returns>
  1595. [DebuggerStepThrough]
  1596. public static v128 packs_epi32(v128 a, v128 b)
  1597. {
  1598. v128 dst = default(v128);
  1599. int* aptr = &a.SInt0;
  1600. int* bptr = &b.SInt0;
  1601. short* dptr = &dst.SShort0;
  1602. for (int j = 0; j < 4; ++j)
  1603. {
  1604. dptr[j] = Saturate_To_Int16(aptr[j]);
  1605. }
  1606. for (int j = 0; j < 4; ++j)
  1607. {
  1608. dptr[j + 4] = Saturate_To_Int16(bptr[j]);
  1609. }
  1610. return dst;
  1611. }
  1612. // _mm_packus_epi16
  1613. /// <summary> Convert packed 16-bit integers from "a" and "b" to packed 8-bit integers using unsigned saturation, and store the results in "dst". </summary>
  1614. /// <param name="a">Vector a</param>
  1615. /// <param name="b">Vector b</param>
  1616. /// <returns>Vector</returns>
  1617. [DebuggerStepThrough]
  1618. public static v128 packus_epi16(v128 a, v128 b)
  1619. {
  1620. v128 dst = default(v128);
  1621. short* aptr = &a.SShort0;
  1622. short* bptr = &b.SShort0;
  1623. byte* dptr = &dst.Byte0;
  1624. for (int j = 0; j < 8; ++j)
  1625. {
  1626. dptr[j] = Saturate_To_UnsignedInt8(aptr[j]);
  1627. }
  1628. for (int j = 0; j < 8; ++j)
  1629. {
  1630. dptr[j + 8] = Saturate_To_UnsignedInt8(bptr[j]);
  1631. }
  1632. return dst;
  1633. }
  1634. // _mm_extract_epi16
  1635. /// <summary> Extract a 16-bit integer from "a", selected with "imm8", and store the result in the lower element of "dst". </summary>
  1636. /// <param name="a">Vector a</param>
  1637. /// <param name="imm8">Selection</param>
  1638. /// <returns>ushort</returns>
  1639. [DebuggerStepThrough]
  1640. public static ushort extract_epi16(v128 a, int imm8)
  1641. {
  1642. return (&a.UShort0)[imm8 & 7];
  1643. }
  1644. // _mm_insert_epi16
  1645. /// <summary> Copy "a" to "dst", and insert the 16-bit integer "i" into "dst" at the location specified by "imm8". </summary>
  1646. /// <param name="a">Vector a</param>
  1647. /// <param name="i">16-bit integer</param>
  1648. /// <param name="imm8">Location</param>
  1649. /// <returns>Vector</returns>
  1650. [DebuggerStepThrough]
  1651. public static v128 insert_epi16(v128 a, int i, int imm8)
  1652. {
  1653. v128 dst = a;
  1654. (&dst.SShort0)[imm8 & 7] = (short)i;
  1655. return dst;
  1656. }
  1657. // _mm_movemask_epi8
  1658. /// <summary> Create mask from the most significant bit of each 8-bit element in "a", and store the result in "dst". </summary>
  1659. /// <param name="a">Vector a</param>
  1660. /// <returns>Integer</returns>
  1661. [DebuggerStepThrough]
  1662. public static int movemask_epi8(v128 a)
  1663. {
  1664. int dst = 0;
  1665. byte* aptr = &a.Byte0;
  1666. for (int j = 0; j <= 15; j++)
  1667. {
  1668. if (0 != (aptr[j] & 0x80))
  1669. dst |= 1 << j;
  1670. }
  1671. return dst;
  1672. }
  1673. // _mm_shuffle_epi32
  1674. /// <summary> Shuffle 32-bit integers in "a" using the control in "imm8", and store the results in "dst". </summary>
  1675. /// <param name="a">Vector a</param>
  1676. /// <param name="imm8">Control</param>
  1677. /// <returns>Vector</returns>
  1678. [DebuggerStepThrough]
  1679. public static v128 shuffle_epi32(v128 a, int imm8)
  1680. {
  1681. v128 dst = default(v128);
  1682. uint* dptr = &dst.UInt0;
  1683. uint* aptr = &a.UInt0;
  1684. dptr[0] = aptr[imm8 & 3];
  1685. dptr[1] = aptr[(imm8 >> 2) & 3];
  1686. dptr[2] = aptr[(imm8 >> 4) & 3];
  1687. dptr[3] = aptr[(imm8 >> 6) & 3];
  1688. return dst;
  1689. }
  1690. // _mm_shufflehi_epi16
  1691. /// <summary> Shuffle 16-bit integers in the high 64 bits of "a" using the control in "imm8". Store the results in the high 64 bits of "dst", with the low 64 bits being copied from from "a" to "dst". </summary>
  1692. /// <param name="a">Vector a</param>
  1693. /// <param name="imm8">Control</param>
  1694. /// <returns>Vector</returns>
  1695. [DebuggerStepThrough]
  1696. public static v128 shufflehi_epi16(v128 a, int imm8)
  1697. {
  1698. v128 dst = a;
  1699. short* dptr = &dst.SShort0;
  1700. short* aptr = &a.SShort0;
  1701. dptr[4] = aptr[4 + (imm8 & 3)];
  1702. dptr[5] = aptr[4 + ((imm8 >> 2) & 3)];
  1703. dptr[6] = aptr[4 + ((imm8 >> 4) & 3)];
  1704. dptr[7] = aptr[4 + ((imm8 >> 6) & 3)];
  1705. return dst;
  1706. }
  1707. // _mm_shufflelo_epi16
  1708. /// <summary> Shuffle 16-bit integers in the low 64 bits of "a" using the control in "imm8". Store the results in the low 64 bits of "dst", with the high 64 bits being copied from from "a" to "dst". </summary>
  1709. /// <param name="a">Vector a</param>
  1710. /// <param name="imm8">Control</param>
  1711. /// <returns>Vector</returns>
  1712. [DebuggerStepThrough]
  1713. public static v128 shufflelo_epi16(v128 a, int imm8)
  1714. {
  1715. v128 dst = a;
  1716. short* dptr = &dst.SShort0;
  1717. short* aptr = &a.SShort0;
  1718. dptr[0] = aptr[(imm8 & 3)];
  1719. dptr[1] = aptr[((imm8 >> 2) & 3)];
  1720. dptr[2] = aptr[((imm8 >> 4) & 3)];
  1721. dptr[3] = aptr[((imm8 >> 6) & 3)];
  1722. return dst;
  1723. }
  1724. // _mm_unpackhi_epi8
  1725. /// <summary> Unpack and interleave 8-bit integers from the high half of "a" and "b", and store the results in "dst". </summary>
  1726. /// <param name="a">Vector a</param>
  1727. /// <param name="b">Vector b</param>
  1728. /// <returns>Vector</returns>
  1729. [DebuggerStepThrough]
  1730. public static v128 unpackhi_epi8(v128 a, v128 b)
  1731. {
  1732. v128 dst = default(v128);
  1733. byte* dptr = &dst.Byte0;
  1734. byte* aptr = &a.Byte0;
  1735. byte* bptr = &b.Byte0;
  1736. for (int j = 0; j <= 7; ++j)
  1737. {
  1738. dptr[2 * j] = aptr[j + 8];
  1739. dptr[2 * j + 1] = bptr[j + 8];
  1740. }
  1741. return dst;
  1742. }
  1743. // _mm_unpackhi_epi16
  1744. /// <summary> Unpack and interleave 16-bit integers from the high half of "a" and "b", and store the results in "dst". </summary>
  1745. /// <param name="a">Vector a</param>
  1746. /// <param name="b">Vector b</param>
  1747. /// <returns>Vector</returns>
  1748. [DebuggerStepThrough]
  1749. public static v128 unpackhi_epi16(v128 a, v128 b)
  1750. {
  1751. v128 dst = default(v128);
  1752. ushort* dptr = &dst.UShort0;
  1753. ushort* aptr = &a.UShort0;
  1754. ushort* bptr = &b.UShort0;
  1755. for (int j = 0; j <= 3; ++j)
  1756. {
  1757. dptr[2 * j] = aptr[j + 4];
  1758. dptr[2 * j + 1] = bptr[j + 4];
  1759. }
  1760. return dst;
  1761. }
  1762. // _mm_unpackhi_epi32
  1763. /// <summary> Unpack and interleave 32-bit integers from the high half of "a" and "b", and store the results in "dst". </summary>
  1764. /// <param name="a">Vector a</param>
  1765. /// <param name="b">Vector b</param>
  1766. /// <returns>Vector</returns>
  1767. [DebuggerStepThrough]
  1768. public static v128 unpackhi_epi32(v128 a, v128 b)
  1769. {
  1770. v128 dst = default(v128);
  1771. dst.UInt0 = a.UInt2;
  1772. dst.UInt1 = b.UInt2;
  1773. dst.UInt2 = a.UInt3;
  1774. dst.UInt3 = b.UInt3;
  1775. return dst;
  1776. }
  1777. // _mm_unpackhi_epi64
  1778. /// <summary> Unpack and interleave 64-bit integers from the high half of "a" and "b", and store the results in "dst". </summary>
  1779. /// <param name="a">Vector a</param>
  1780. /// <param name="b">Vector b</param>
  1781. /// <returns>Vector</returns>
  1782. [DebuggerStepThrough]
  1783. public static v128 unpackhi_epi64(v128 a, v128 b)
  1784. {
  1785. v128 dst = default(v128);
  1786. dst.ULong0 = a.ULong1;
  1787. dst.ULong1 = b.ULong1;
  1788. return dst;
  1789. }
  1790. // _mm_unpacklo_epi8
  1791. /// <summary> Unpack and interleave 8-bit integers from the low half of "a" and "b", and store the results in "dst". </summary>
  1792. /// <param name="a">Vector a</param>
  1793. /// <param name="b">Vector b</param>
  1794. /// <returns>Vector</returns>
  1795. [DebuggerStepThrough]
  1796. public static v128 unpacklo_epi8(v128 a, v128 b)
  1797. {
  1798. v128 dst = default(v128);
  1799. byte* dptr = &dst.Byte0;
  1800. byte* aptr = &a.Byte0;
  1801. byte* bptr = &b.Byte0;
  1802. for (int j = 0; j <= 7; ++j)
  1803. {
  1804. dptr[2 * j] = aptr[j];
  1805. dptr[2 * j + 1] = bptr[j];
  1806. }
  1807. return dst;
  1808. }
  1809. // _mm_unpacklo_epi16
  1810. /// <summary> Unpack and interleave 16-bit integers from the low half of "a" and "b", and store the results in "dst". </summary>
  1811. /// <param name="a">Vector a</param>
  1812. /// <param name="b">Vector b</param>
  1813. /// <returns>Vector</returns>
  1814. [DebuggerStepThrough]
  1815. public static v128 unpacklo_epi16(v128 a, v128 b)
  1816. {
  1817. v128 dst = default(v128);
  1818. ushort* dptr = &dst.UShort0;
  1819. ushort* aptr = &a.UShort0;
  1820. ushort* bptr = &b.UShort0;
  1821. for (int j = 0; j <= 3; ++j)
  1822. {
  1823. dptr[2 * j] = aptr[j];
  1824. dptr[2 * j + 1] = bptr[j];
  1825. }
  1826. return dst;
  1827. }
  1828. // _mm_unpacklo_epi32
  1829. /// <summary> Unpack and interleave 32-bit integers from the low half of "a" and "b", and store the results in "dst". </summary>
  1830. /// <param name="a">Vector a</param>
  1831. /// <param name="b">Vector b</param>
  1832. /// <returns>Vector</returns>
  1833. [DebuggerStepThrough]
  1834. public static v128 unpacklo_epi32(v128 a, v128 b)
  1835. {
  1836. v128 dst = default(v128);
  1837. dst.UInt0 = a.UInt0;
  1838. dst.UInt1 = b.UInt0;
  1839. dst.UInt2 = a.UInt1;
  1840. dst.UInt3 = b.UInt1;
  1841. return dst;
  1842. }
  1843. // _mm_unpacklo_epi64
  1844. /// <summary> Unpack and interleave 64-bit integers from the low half of "a" and "b", and store the results in "dst". </summary>
  1845. /// <param name="a">Vector a</param>
  1846. /// <param name="b">Vector b</param>
  1847. /// <returns>Vector</returns>
  1848. [DebuggerStepThrough]
  1849. public static v128 unpacklo_epi64(v128 a, v128 b)
  1850. {
  1851. v128 dst = default(v128);
  1852. dst.ULong0 = a.ULong0;
  1853. dst.ULong1 = b.ULong0;
  1854. return dst;
  1855. }
  1856. // _mm_add_sd
  1857. /// <summary> Add the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". </summary>
  1858. /// <param name="a">Vector a</param>
  1859. /// <param name="b">Vector b</param>
  1860. /// <returns>Vector</returns>
  1861. [DebuggerStepThrough]
  1862. public static v128 add_sd(v128 a, v128 b)
  1863. {
  1864. v128 dst = default(v128);
  1865. dst.Double0 = a.Double0 + b.Double0;
  1866. dst.Double1 = a.Double1;
  1867. return dst;
  1868. }
  1869. // _mm_add_pd
  1870. /// <summary> Add packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". </summary>
  1871. /// <param name="a">Vector a</param>
  1872. /// <param name="b">Vector b</param>
  1873. /// <returns>Vector</returns>
  1874. [DebuggerStepThrough]
  1875. public static v128 add_pd(v128 a, v128 b)
  1876. {
  1877. v128 dst = default(v128);
  1878. dst.Double0 = a.Double0 + b.Double0;
  1879. dst.Double1 = a.Double1 + b.Double1;
  1880. return dst;
  1881. }
  1882. // _mm_div_sd
  1883. /// <summary> Divide the lower double-precision (64-bit) floating-point element in "a" by the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". </summary>
  1884. /// <param name="a">Vector a</param>
  1885. /// <param name="b">Vector b</param>
  1886. /// <returns>Vector</returns>
  1887. [DebuggerStepThrough]
  1888. public static v128 div_sd(v128 a, v128 b)
  1889. {
  1890. v128 dst = default(v128);
  1891. dst.Double0 = a.Double0 / b.Double0;
  1892. dst.Double1 = a.Double1;
  1893. return dst;
  1894. }
  1895. // _mm_div_pd
  1896. /// <summary> Divide packed double-precision (64-bit) floating-point elements in "a" by packed elements in "b", and store the results in "dst". </summary>
  1897. /// <param name="a">Vector a</param>
  1898. /// <param name="b">Vector b</param>
  1899. /// <returns>Vector</returns>
  1900. [DebuggerStepThrough]
  1901. public static v128 div_pd(v128 a, v128 b)
  1902. {
  1903. v128 dst = default(v128);
  1904. dst.Double0 = a.Double0 / b.Double0;
  1905. dst.Double1 = a.Double1 / b.Double1;
  1906. return dst;
  1907. }
  1908. // _mm_max_sd
  1909. /// <summary> Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the maximum value in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". </summary>
  1910. /// <param name="a">Vector a</param>
  1911. /// <param name="b">Vector b</param>
  1912. /// <returns>Vector</returns>
  1913. [DebuggerStepThrough]
  1914. public static v128 max_sd(v128 a, v128 b)
  1915. {
  1916. v128 dst = default(v128);
  1917. dst.Double0 = Math.Max(a.Double0, b.Double0);
  1918. dst.Double1 = a.Double1;
  1919. return dst;
  1920. }
  1921. // _mm_max_pd
  1922. /// <summary> Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed maximum values in "dst". </summary>
  1923. /// <param name="a">Vector a</param>
  1924. /// <param name="b">Vector b</param>
  1925. /// <returns>Vector</returns>
  1926. [DebuggerStepThrough]
  1927. public static v128 max_pd(v128 a, v128 b)
  1928. {
  1929. v128 dst = default(v128);
  1930. dst.Double0 = Math.Max(a.Double0, b.Double0);
  1931. dst.Double1 = Math.Max(a.Double1, b.Double1);
  1932. return dst;
  1933. }
  1934. // _mm_min_sd
  1935. /// <summary> Compare the lower double-precision (64-bit) floating-point elements in "a" and "b", store the minimum value in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". </summary>
  1936. /// <param name="a">Vector a</param>
  1937. /// <param name="b">Vector b</param>
  1938. /// <returns>Vector</returns>
  1939. [DebuggerStepThrough]
  1940. public static v128 min_sd(v128 a, v128 b)
  1941. {
  1942. v128 dst = default(v128);
  1943. dst.Double0 = Math.Min(a.Double0, b.Double0);
  1944. dst.Double1 = a.Double1;
  1945. return dst;
  1946. }
  1947. // _mm_min_pd
  1948. /// <summary> Compare packed double-precision (64-bit) floating-point elements in "a" and "b", and store packed minimum values in "dst". </summary>
  1949. /// <param name="a">Vector a</param>
  1950. /// <param name="b">Vector b</param>
  1951. /// <returns>Vector</returns>
  1952. [DebuggerStepThrough]
  1953. public static v128 min_pd(v128 a, v128 b)
  1954. {
  1955. v128 dst = default(v128);
  1956. dst.Double0 = Math.Min(a.Double0, b.Double0);
  1957. dst.Double1 = Math.Min(a.Double1, b.Double1);
  1958. return dst;
  1959. }
  1960. // _mm_mul_sd
  1961. /// <summary> Multiply the lower double-precision (64-bit) floating-point element in "a" and "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". </summary>
  1962. /// <param name="a">Vector a</param>
  1963. /// <param name="b">Vector b</param>
  1964. /// <returns>Vector</returns>
  1965. [DebuggerStepThrough]
  1966. public static v128 mul_sd(v128 a, v128 b)
  1967. {
  1968. v128 dst = default(v128);
  1969. dst.Double0 = a.Double0 * b.Double0;
  1970. dst.Double1 = a.Double1;
  1971. return dst;
  1972. }
  1973. // _mm_mul_pd
  1974. /// <summary> Multiply packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". </summary>
  1975. /// <param name="a">Vector a</param>
  1976. /// <param name="b">Vector b</param>
  1977. /// <returns>Vector</returns>
  1978. [DebuggerStepThrough]
  1979. public static v128 mul_pd(v128 a, v128 b)
  1980. {
  1981. v128 dst = default(v128);
  1982. dst.Double0 = a.Double0 * b.Double0;
  1983. dst.Double1 = a.Double1 * b.Double1;
  1984. return dst;
  1985. }
  1986. // _mm_sqrt_sd
  1987. /// <summary> Compute the square root of the lower double-precision (64-bit) floating-point element in "b", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". </summary>
  1988. /// <param name="a">Vector a</param>
  1989. /// <param name="b">Vector b</param>
  1990. /// <returns>Vector</returns>
  1991. [DebuggerStepThrough]
  1992. public static v128 sqrt_sd(v128 a, v128 b)
  1993. {
  1994. v128 dst = default(v128);
  1995. dst.Double0 = Math.Sqrt(b.Double0);
  1996. dst.Double1 = a.Double1;
  1997. return dst;
  1998. }
  1999. // _mm_sqrt_pd
  2000. /// <summary> Compute the square root of packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". </summary>
  2001. /// <param name="a">Vector a</param>
  2002. /// <returns>Vector</returns>
  2003. [DebuggerStepThrough]
  2004. public static v128 sqrt_pd(v128 a)
  2005. {
  2006. v128 dst = default(v128);
  2007. dst.Double0 = Math.Sqrt(a.Double0);
  2008. dst.Double1 = Math.Sqrt(a.Double1);
  2009. return dst;
  2010. }
  2011. // _mm_sub_sd
  2012. /// <summary> Subtract the lower double-precision (64-bit) floating-point element in "b" from the lower double-precision (64-bit) floating-point element in "a", store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". </summary>
  2013. /// <param name="a">Vector a</param>
  2014. /// <param name="b">Vector b</param>
  2015. /// <returns>Vector</returns>
  2016. [DebuggerStepThrough]
  2017. public static v128 sub_sd(v128 a, v128 b)
  2018. {
  2019. v128 dst = default(v128);
  2020. dst.Double0 = a.Double0 - b.Double0;
  2021. dst.Double1 = a.Double1;
  2022. return dst;
  2023. }
  2024. // _mm_sub_pd
  2025. /// <summary> Subtract packed double-precision (64-bit) floating-point elements in "b" from packed double-precision (64-bit) floating-point elements in "a", and store the results in "dst". </summary>
  2026. /// <param name="a">Vector a</param>
  2027. /// <param name="b">Vector b</param>
  2028. /// <returns>Vector</returns>
  2029. [DebuggerStepThrough]
  2030. public static v128 sub_pd(v128 a, v128 b)
  2031. {
  2032. v128 dst = default(v128);
  2033. dst.Double0 = a.Double0 - b.Double0;
  2034. dst.Double1 = a.Double1 - b.Double1;
  2035. return dst;
  2036. }
  2037. // _mm_and_pd
  2038. /// <summary> Compute the bitwise AND of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". </summary>
  2039. /// <param name="a">Vector a</param>
  2040. /// <param name="b">Vector b</param>
  2041. /// <returns>Vector</returns>
  2042. [DebuggerStepThrough]
  2043. public static v128 and_pd(v128 a, v128 b)
  2044. {
  2045. v128 dst = default(v128);
  2046. dst.ULong0 = a.ULong0 & b.ULong0;
  2047. dst.ULong1 = a.ULong1 & b.ULong1;
  2048. return dst;
  2049. }
  2050. // _mm_andnot_pd
  2051. /// <summary> Compute the bitwise NOT of packed double-precision (64-bit) floating-point elements in "a" and then AND with "b", and store the results in "dst". </summary>
  2052. /// <param name="a">Vector a</param>
  2053. /// <param name="b">Vector b</param>
  2054. /// <returns>Vector</returns>
  2055. [DebuggerStepThrough]
  2056. public static v128 andnot_pd(v128 a, v128 b)
  2057. {
  2058. v128 dst = default(v128);
  2059. dst.ULong0 = (~a.ULong0) & b.ULong0;
  2060. dst.ULong1 = (~a.ULong1) & b.ULong1;
  2061. return dst;
  2062. }
  2063. // _mm_or_pd
  2064. /// <summary> Compute the bitwise OR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". </summary>
  2065. /// <param name="a">Vector a</param>
  2066. /// <param name="b">Vector b</param>
  2067. /// <returns>Vector</returns>
  2068. [DebuggerStepThrough]
  2069. public static v128 or_pd(v128 a, v128 b)
  2070. {
  2071. v128 dst = default(v128);
  2072. dst.ULong0 = a.ULong0 | b.ULong0;
  2073. dst.ULong1 = a.ULong1 | b.ULong1;
  2074. return dst;
  2075. }
  2076. // _mm_xor_pd
  2077. /// <summary> Compute the bitwise XOR of packed double-precision (64-bit) floating-point elements in "a" and "b", and store the results in "dst". </summary>
  2078. /// <param name="a">Vector a</param>
  2079. /// <param name="b">Vector b</param>
  2080. /// <returns>Vector</returns>
  2081. [DebuggerStepThrough]
  2082. public static v128 xor_pd(v128 a, v128 b)
  2083. {
  2084. v128 dst = default(v128);
  2085. dst.ULong0 = a.ULong0 ^ b.ULong0;
  2086. dst.ULong1 = a.ULong1 ^ b.ULong1;
  2087. return dst;
  2088. }
  2089. // _mm_cmpeq_sd
  2090. /// <summary> Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for equality, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". </summary>
  2091. /// <param name="a">Vector a</param>
  2092. /// <param name="b">Vector b</param>
  2093. /// <returns>Vector</returns>
  2094. [DebuggerStepThrough]
  2095. public static v128 cmpeq_sd(v128 a, v128 b)
  2096. {
  2097. v128 dst = default(v128);
  2098. dst.ULong0 = a.Double0 == b.Double0 ? ~0ul : 0;
  2099. dst.ULong1 = a.ULong1;
  2100. return dst;
  2101. }
  2102. // _mm_cmplt_sd
  2103. /// <summary> Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for less-than, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". </summary>
  2104. /// <param name="a">Vector a</param>
  2105. /// <param name="b">Vector b</param>
  2106. /// <returns>Vector</returns>
  2107. [DebuggerStepThrough]
  2108. public static v128 cmplt_sd(v128 a, v128 b)
  2109. {
  2110. v128 dst = default(v128);
  2111. dst.ULong0 = a.Double0 < b.Double0 ? ~0ul : 0;
  2112. dst.ULong1 = a.ULong1;
  2113. return dst;
  2114. }
  2115. // _mm_cmple_sd
  2116. /// <summary> Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for less-than-or-equal, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". </summary>
  2117. /// <param name="a">Vector a</param>
  2118. /// <param name="b">Vector b</param>
  2119. /// <returns>Vector</returns>
  2120. [DebuggerStepThrough]
  2121. public static v128 cmple_sd(v128 a, v128 b)
  2122. {
  2123. v128 dst = default(v128);
  2124. dst.ULong0 = a.Double0 <= b.Double0 ? ~0ul : 0;
  2125. dst.ULong1 = a.ULong1;
  2126. return dst;
  2127. }
  2128. // _mm_cmpgt_sd
  2129. /// <summary> Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for greater-than, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". </summary>
  2130. /// <param name="a">Vector a</param>
  2131. /// <param name="b">Vector b</param>
  2132. /// <returns>Vector</returns>
  2133. [DebuggerStepThrough]
  2134. [BurstTargetCpu(BurstTargetCpu.X64_SSE2)]
  2135. public static v128 cmpgt_sd(v128 a, v128 b)
  2136. {
  2137. return cmple_sd(b, a);
  2138. }
  2139. // _mm_cmpge_sd
  2140. /// <summary> Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for greater-than-or-equal, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". </summary>
  2141. /// <param name="a">Vector a</param>
  2142. /// <param name="b">Vector b</param>
  2143. /// <returns>Vector</returns>
  2144. [DebuggerStepThrough]
  2145. [BurstTargetCpu(BurstTargetCpu.X64_SSE2)]
  2146. public static v128 cmpge_sd(v128 a, v128 b)
  2147. {
  2148. return cmplt_sd(b, a);
  2149. }
  2150. // _mm_cmpord_sd
  2151. /// <summary> Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" to see if neither is NaN, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". </summary>
  2152. /// <param name="a">Vector a</param>
  2153. /// <param name="b">Vector b</param>
  2154. /// <returns>Vector</returns>
  2155. [DebuggerStepThrough]
  2156. public static v128 cmpord_sd(v128 a, v128 b)
  2157. {
  2158. v128 dst = default(v128);
  2159. dst.ULong0 = IsNaN(a.ULong0) || IsNaN(b.ULong0) ? 0 : ~0ul;
  2160. dst.ULong1 = a.ULong1;
  2161. return dst;
  2162. }
  2163. // _mm_cmpunord_sd
  2164. /// <summary> Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" to see if either is NaN, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". </summary>
  2165. /// <param name="a">Vector a</param>
  2166. /// <param name="b">Vector b</param>
  2167. /// <returns>Vector</returns>
  2168. [DebuggerStepThrough]
  2169. public static v128 cmpunord_sd(v128 a, v128 b)
  2170. {
  2171. v128 dst = default(v128);
  2172. dst.ULong0 = IsNaN(a.ULong0) || IsNaN(b.ULong0) ? ~0ul : 0;
  2173. dst.ULong1 = a.ULong1;
  2174. return dst;
  2175. }
  2176. // _mm_cmpneq_sd
  2177. /// <summary> Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for not-equal, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". </summary>
  2178. /// <param name="a">Vector a</param>
  2179. /// <param name="b">Vector b</param>
  2180. /// <returns>Vector</returns>
  2181. [DebuggerStepThrough]
  2182. public static v128 cmpneq_sd(v128 a, v128 b)
  2183. {
  2184. v128 dst = default(v128);
  2185. dst.ULong0 = a.Double0 != b.Double0 ? ~0ul : 0;
  2186. dst.ULong1 = a.ULong1;
  2187. return dst;
  2188. }
  2189. // _mm_cmpnlt_sd
  2190. /// <summary> Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". </summary>
  2191. /// <param name="a">Vector a</param>
  2192. /// <param name="b">Vector b</param>
  2193. /// <returns>Vector</returns>
  2194. [DebuggerStepThrough]
  2195. public static v128 cmpnlt_sd(v128 a, v128 b)
  2196. {
  2197. v128 dst = default(v128);
  2198. dst.ULong0 = !(a.Double0 < b.Double0) ? ~0ul : 0;
  2199. dst.ULong1 = a.ULong1;
  2200. return dst;
  2201. }
  2202. // _mm_cmpnle_sd
  2203. /// <summary> Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". </summary>
  2204. /// <param name="a">Vector a</param>
  2205. /// <param name="b">Vector b</param>
  2206. /// <returns>Vector</returns>
  2207. [DebuggerStepThrough]
  2208. public static v128 cmpnle_sd(v128 a, v128 b)
  2209. {
  2210. v128 dst = default(v128);
  2211. dst.ULong0 = !(a.Double0 <= b.Double0) ? ~0ul : 0;
  2212. dst.ULong1 = a.ULong1;
  2213. return dst;
  2214. }
  2215. // _mm_cmpngt_sd
  2216. /// <summary> Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for not-greater-than, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". </summary>
  2217. /// <param name="a">Vector a</param>
  2218. /// <param name="b">Vector b</param>
  2219. /// <returns>Vector</returns>
  2220. [DebuggerStepThrough]
  2221. [BurstTargetCpu(BurstTargetCpu.X64_SSE2)]
  2222. public static v128 cmpngt_sd(v128 a, v128 b)
  2223. {
  2224. return cmpnlt_sd(b, a);
  2225. }
  2226. // _mm_cmpnge_sd
  2227. /// <summary> Compare the lower double-precision (64-bit) floating-point elements in "a" and "b" for not-greater-than-or-equal, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". </summary>
  2228. /// <param name="a">Vector a</param>
  2229. /// <param name="b">Vector b</param>
  2230. /// <returns>Vector</returns>
  2231. [DebuggerStepThrough]
  2232. [BurstTargetCpu(BurstTargetCpu.X64_SSE2)]
  2233. public static v128 cmpnge_sd(v128 a, v128 b)
  2234. {
  2235. return cmpnle_sd(b, a);
  2236. }
  2237. // _mm_cmpeq_pd
  2238. /// <summary> Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for equality, and store the results in "dst". </summary>
  2239. /// <param name="a">Vector a</param>
  2240. /// <param name="b">Vector b</param>
  2241. /// <returns>Vector</returns>
  2242. [DebuggerStepThrough]
  2243. public static v128 cmpeq_pd(v128 a, v128 b)
  2244. {
  2245. var dst = default(v128);
  2246. dst.ULong0 = (a.Double0 == b.Double0) ? ~0ul : 0;
  2247. dst.ULong1 = (a.Double1 == b.Double1) ? ~0ul : 0;
  2248. return dst;
  2249. }
  2250. // _mm_cmplt_pd
  2251. /// <summary> Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for less-than, and store the results in "dst". </summary>
  2252. /// <param name="a">Vector a</param>
  2253. /// <param name="b">Vector b</param>
  2254. /// <returns>Vector</returns>
  2255. [DebuggerStepThrough]
  2256. public static v128 cmplt_pd(v128 a, v128 b)
  2257. {
  2258. var dst = default(v128);
  2259. dst.ULong0 = (a.Double0 < b.Double0) ? ~0ul : 0;
  2260. dst.ULong1 = (a.Double1 < b.Double1) ? ~0ul : 0;
  2261. return dst;
  2262. }
  2263. // _mm_cmple_pd
  2264. /// <summary> Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for less-than-or-equal, and store the results in "dst". </summary>
  2265. /// <param name="a">Vector a</param>
  2266. /// <param name="b">Vector b</param>
  2267. /// <returns>Vector</returns>
  2268. [DebuggerStepThrough]
  2269. public static v128 cmple_pd(v128 a, v128 b)
  2270. {
  2271. v128 dst = default(v128);
  2272. dst.ULong0 = (a.Double0 <= b.Double0) ? ~0ul : 0;
  2273. dst.ULong1 = (a.Double1 <= b.Double1) ? ~0ul : 0;
  2274. return dst;
  2275. }
  2276. // _mm_cmpgt_pd
  2277. /// <summary> Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for greater-than, and store the results in "dst". </summary>
  2278. /// <param name="a">Vector a</param>
  2279. /// <param name="b">Vector b</param>
  2280. /// <returns>Vector</returns>
  2281. [DebuggerStepThrough]
  2282. public static v128 cmpgt_pd(v128 a, v128 b)
  2283. {
  2284. v128 dst = default(v128);
  2285. dst.ULong0 = (a.Double0 > b.Double0) ? ~0ul : 0;
  2286. dst.ULong1 = (a.Double1 > b.Double1) ? ~0ul : 0;
  2287. return dst;
  2288. }
  2289. // _mm_cmpge_pd
  2290. /// <summary> Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for greater-than-or-equal, and store the results in "dst". </summary>
  2291. /// <param name="a">Vector a</param>
  2292. /// <param name="b">Vector b</param>
  2293. /// <returns>Vector</returns>
  2294. [DebuggerStepThrough]
  2295. public static v128 cmpge_pd(v128 a, v128 b)
  2296. {
  2297. v128 dst = default(v128);
  2298. dst.ULong0 = (a.Double0 >= b.Double0) ? ~0ul : 0;
  2299. dst.ULong1 = (a.Double1 >= b.Double1) ? ~0ul : 0;
  2300. return dst;
  2301. }
  2302. // _mm_cmpord_pd
  2303. /// <summary> Compare packed double-precision (64-bit) floating-point elements in "a" and "b" to see if neither is NaN, and store the results in "dst". </summary>
  2304. /// <param name="a">Vector a</param>
  2305. /// <param name="b">Vector b</param>
  2306. /// <returns>Vector</returns>
  2307. [DebuggerStepThrough]
  2308. public static v128 cmpord_pd(v128 a, v128 b)
  2309. {
  2310. v128 dst = default(v128);
  2311. dst.ULong0 = (IsNaN(a.ULong0) || IsNaN(b.ULong0)) ? 0 : ~0ul;
  2312. dst.ULong1 = (IsNaN(a.ULong1) || IsNaN(b.ULong1)) ? 0 : ~0ul;
  2313. return dst;
  2314. }
  2315. // _mm_cmpunord_pd
  2316. /// <summary> Compare packed double-precision (64-bit) floating-point elements in "a" and "b" to see if either is NaN, and store the results in "dst". </summary>
  2317. /// <param name="a">Vector a</param>
  2318. /// <param name="b">Vector b</param>
  2319. /// <returns>Vector</returns>
  2320. [DebuggerStepThrough]
  2321. public static v128 cmpunord_pd(v128 a, v128 b)
  2322. {
  2323. v128 dst = default(v128);
  2324. dst.ULong0 = (IsNaN(a.ULong0) || IsNaN(b.ULong0)) ? ~0ul : 0;
  2325. dst.ULong1 = (IsNaN(a.ULong1) || IsNaN(b.ULong1)) ? ~0ul : 0;
  2326. return dst;
  2327. }
  2328. // _mm_cmpneq_pd
  2329. /// <summary> Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-equal, and store the results in "dst". </summary>
  2330. /// <param name="a">Vector a</param>
  2331. /// <param name="b">Vector b</param>
  2332. /// <returns>Vector</returns>
  2333. [DebuggerStepThrough]
  2334. public static v128 cmpneq_pd(v128 a, v128 b)
  2335. {
  2336. v128 dst = default(v128);
  2337. dst.ULong0 = (a.Double0 != b.Double0) ? ~0ul : 0;
  2338. dst.ULong1 = (a.Double1 != b.Double1) ? ~0ul : 0;
  2339. return dst;
  2340. }
  2341. // _mm_cmpnlt_pd
  2342. /// <summary> Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than, and store the results in "dst". </summary>
  2343. /// <param name="a">Vector a</param>
  2344. /// <param name="b">Vector b</param>
  2345. /// <returns>Vector</returns>
  2346. [DebuggerStepThrough]
  2347. public static v128 cmpnlt_pd(v128 a, v128 b)
  2348. {
  2349. v128 dst = default(v128);
  2350. dst.ULong0 = !(a.Double0 < b.Double0) ? ~0ul : 0;
  2351. dst.ULong1 = !(a.Double1 < b.Double1) ? ~0ul : 0;
  2352. return dst;
  2353. }
  2354. // _mm_cmpnle_pd
  2355. /// <summary> Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-less-than-or-equal, and store the results in "dst". </summary>
  2356. /// <param name="a">Vector a</param>
  2357. /// <param name="b">Vector b</param>
  2358. /// <returns>Vector</returns>
  2359. [DebuggerStepThrough]
  2360. public static v128 cmpnle_pd(v128 a, v128 b)
  2361. {
  2362. v128 dst = default(v128);
  2363. dst.ULong0 = !(a.Double0 <= b.Double0) ? ~0ul : 0;
  2364. dst.ULong1 = !(a.Double1 <= b.Double1) ? ~0ul : 0;
  2365. return dst;
  2366. }
  2367. // _mm_cmpngt_pd
  2368. /// <summary> Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-greater-than, and store the results in "dst". </summary>
  2369. /// <param name="a">Vector a</param>
  2370. /// <param name="b">Vector b</param>
  2371. /// <returns>Vector</returns>
  2372. [DebuggerStepThrough]
  2373. public static v128 cmpngt_pd(v128 a, v128 b)
  2374. {
  2375. v128 dst = default(v128);
  2376. dst.ULong0 = !(a.Double0 > b.Double0) ? ~0ul : 0;
  2377. dst.ULong1 = !(a.Double1 > b.Double1) ? ~0ul : 0;
  2378. return dst;
  2379. }
  2380. // _mm_cmpnge_pd
  2381. /// <summary> Compare packed double-precision (64-bit) floating-point elements in "a" and "b" for not-greater-than-or-equal, and store the results in "dst". </summary>
  2382. /// <param name="a">Vector a</param>
  2383. /// <param name="b">Vector b</param>
  2384. /// <returns>Vector</returns>
  2385. [DebuggerStepThrough]
  2386. public static v128 cmpnge_pd(v128 a, v128 b)
  2387. {
  2388. v128 dst = default(v128);
  2389. dst.ULong0 = !(a.Double0 >= b.Double0) ? ~0ul : 0;
  2390. dst.ULong1 = !(a.Double1 >= b.Double1) ? ~0ul : 0;
  2391. return dst;
  2392. }
  2393. // _mm_comieq_sd
  2394. /// <summary> Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for equality, and return the boolean result (0 or 1). </summary>
  2395. /// <param name="a">Vector a</param>
  2396. /// <param name="b">Vector b</param>
  2397. /// <returns>Boolean result</returns>
  2398. [DebuggerStepThrough]
  2399. public static int comieq_sd(v128 a, v128 b)
  2400. {
  2401. return a.Double0 == b.Double0 ? 1 : 0;
  2402. }
  2403. // _mm_comilt_sd
  2404. /// <summary> Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for less-than, and return the boolean result (0 or 1). </summary>
  2405. /// <param name="a">Vector a</param>
  2406. /// <param name="b">Vector b</param>
  2407. /// <returns>Boolean result</returns>
  2408. [DebuggerStepThrough]
  2409. public static int comilt_sd(v128 a, v128 b)
  2410. {
  2411. return a.Double0 < b.Double0 ? 1 : 0;
  2412. }
  2413. // _mm_comile_sd
  2414. /// <summary> Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for less-than-or-equal, and return the boolean result (0 or 1). </summary>
  2415. /// <param name="a">Vector a</param>
  2416. /// <param name="b">Vector b</param>
  2417. /// <returns>Boolean result</returns>
  2418. [DebuggerStepThrough]
  2419. public static int comile_sd(v128 a, v128 b)
  2420. {
  2421. return a.Double0 <= b.Double0 ? 1 : 0;
  2422. }
  2423. // _mm_comigt_sd
  2424. /// <summary> Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for greater-than, and return the boolean result (0 or 1). </summary>
  2425. /// <param name="a">Vector a</param>
  2426. /// <param name="b">Vector b</param>
  2427. /// <returns>Boolean result</returns>
  2428. [DebuggerStepThrough]
  2429. public static int comigt_sd(v128 a, v128 b)
  2430. {
  2431. return a.Double0 > b.Double0 ? 1 : 0;
  2432. }
  2433. // _mm_comige_sd
  2434. /// <summary> Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for greater-than-or-equal, and return the boolean result (0 or 1). </summary>
  2435. /// <param name="a">Vector a</param>
  2436. /// <param name="b">Vector b</param>
  2437. /// <returns>Boolean result</returns>
  2438. [DebuggerStepThrough]
  2439. public static int comige_sd(v128 a, v128 b)
  2440. {
  2441. return a.Double0 >= b.Double0 ? 1 : 0;
  2442. }
  2443. // _mm_comineq_sd
  2444. /// <summary> Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for not-equal, and return the boolean result (0 or 1). </summary>
  2445. /// <param name="a">Vector a</param>
  2446. /// <param name="b">Vector b</param>
  2447. /// <returns>Boolean result</returns>
  2448. [DebuggerStepThrough]
  2449. public static int comineq_sd(v128 a, v128 b)
  2450. {
  2451. return a.Double0 != b.Double0 ? 1 : 0;
  2452. }
  2453. // _mm_ucomieq_sd
  2454. /// <summary> Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for equality, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. </summary>
  2455. /// <param name="a">Vector a</param>
  2456. /// <param name="b">Vector b</param>
  2457. /// <returns>Boolean result</returns>
  2458. [DebuggerStepThrough]
  2459. public static int ucomieq_sd(v128 a, v128 b)
  2460. {
  2461. return a.Double0 == b.Double0 ? 1 : 0;
  2462. }
  2463. // _mm_ucomilt_sd
  2464. /// <summary> Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for less-than, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. </summary>
  2465. /// <param name="a">Vector a</param>
  2466. /// <param name="b">Vector b</param>
  2467. /// <returns>Boolean result</returns>
  2468. [DebuggerStepThrough]
  2469. public static int ucomilt_sd(v128 a, v128 b)
  2470. {
  2471. return a.Double0 < b.Double0 ? 1 : 0;
  2472. }
  2473. // _mm_ucomile_sd
  2474. /// <summary> Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for less-than-or-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. </summary>
  2475. /// <param name="a">Vector a</param>
  2476. /// <param name="b">Vector b</param>
  2477. /// <returns>Boolean result</returns>
  2478. [DebuggerStepThrough]
  2479. public static int ucomile_sd(v128 a, v128 b)
  2480. {
  2481. return a.Double0 <= b.Double0 ? 1 : 0;
  2482. }
  2483. // _mm_ucomigt_sd
  2484. /// <summary> Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for greater-than, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. </summary>
  2485. /// <param name="a">Vector a</param>
  2486. /// <param name="b">Vector b</param>
  2487. /// <returns>Boolean result</returns>
  2488. [DebuggerStepThrough]
  2489. public static int ucomigt_sd(v128 a, v128 b)
  2490. {
  2491. return a.Double0 > b.Double0 ? 1 : 0;
  2492. }
  2493. // _mm_ucomige_sd
  2494. /// <summary> Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for greater-than-or-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. </summary>
  2495. /// <param name="a">Vector a</param>
  2496. /// <param name="b">Vector b</param>
  2497. /// <returns>Boolean result</returns>
  2498. [DebuggerStepThrough]
  2499. public static int ucomige_sd(v128 a, v128 b)
  2500. {
  2501. return a.Double0 >= b.Double0 ? 1 : 0;
  2502. }
  2503. // _mm_ucomineq_sd
  2504. /// <summary> Compare the lower double-precision (64-bit) floating-point element in "a" and "b" for not-equal, and return the boolean result (0 or 1). This instruction will not signal an exception for QNaNs. </summary>
  2505. /// <param name="a">Vector a</param>
  2506. /// <param name="b">Vector b</param>
  2507. /// <returns>Boolean result</returns>
  2508. [DebuggerStepThrough]
  2509. public static int ucomineq_sd(v128 a, v128 b)
  2510. {
  2511. return a.Double0 != b.Double0 ? 1 : 0;
  2512. }
  2513. // _mm_cvtpd_ps
  2514. /// <summary> Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements, and store the results in "dst". </summary>
  2515. /// <param name="a">Vector a</param>
  2516. /// <returns>Vector</returns>
  2517. [DebuggerStepThrough]
  2518. public static v128 cvtpd_ps(v128 a)
  2519. {
  2520. v128 dst = default(v128);
  2521. dst.Float0 = (float)a.Double0;
  2522. dst.Float1 = (float)a.Double1;
  2523. dst.Float2 = 0;
  2524. dst.Float3 = 0;
  2525. return dst;
  2526. }
  2527. // _mm_cvtps_pd
  2528. /// <summary> Convert packed single-precision (32-bit) floating-point elements in "a" to packed double-precision (64-bit) floating-point elements, and store the results in "dst". </summary>
  2529. /// <param name="a">Vector a</param>
  2530. /// <returns>Vector</returns>
  2531. [DebuggerStepThrough]
  2532. public static v128 cvtps_pd(v128 a)
  2533. {
  2534. // The normal Burst IR does fine here.
  2535. v128 dst = default(v128);
  2536. dst.Double0 = a.Float0;
  2537. dst.Double1 = a.Float1;
  2538. return dst;
  2539. }
  2540. // _mm_cvtpd_epi32
  2541. /// <summary> Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". </summary>
  2542. /// <param name="a">Vector a</param>
  2543. /// <returns>Vector</returns>
  2544. [DebuggerStepThrough]
  2545. public static v128 cvtpd_epi32(v128 a)
  2546. {
  2547. v128 dst = default(v128);
  2548. dst.SInt0 = (int)Math.Round(a.Double0);
  2549. dst.SInt1 = (int)Math.Round(a.Double1);
  2550. return dst;
  2551. }
  2552. // _mm_cvtsd_si32
  2553. /// <summary> Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer, and store the result in "dst". </summary>
  2554. /// <param name="a">Vector a</param>
  2555. /// <returns>32-bit integer</returns>
  2556. [DebuggerStepThrough]
  2557. public static int cvtsd_si32(v128 a)
  2558. {
  2559. return (int)Math.Round(a.Double0);
  2560. }
  2561. // _mm_cvtsd_si64
  2562. /// <summary> Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst". </summary>
  2563. /// <param name="a">Vector a</param>
  2564. /// <returns>64-bit integer</returns>
  2565. [DebuggerStepThrough]
  2566. public static long cvtsd_si64(v128 a)
  2567. {
  2568. return (long)Math.Round(a.Double0);
  2569. }
  2570. // _mm_cvtsd_si64x
  2571. /// <summary> Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer, and store the result in "dst". </summary>
  2572. /// <param name="a">Vector a</param>
  2573. /// <returns>64-bit integer</returns>
  2574. [DebuggerStepThrough]
  2575. [BurstTargetCpu(BurstTargetCpu.X64_SSE2)]
  2576. public static long cvtsd_si64x(v128 a)
  2577. {
  2578. return cvtsd_si64(a);
  2579. }
  2580. // _mm_cvtsd_ss
  2581. /// <summary> Convert the lower double-precision (64-bit) floating-point element in "b" to a single-precision (32-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". </summary>
  2582. /// <param name="a">Vector a</param>
  2583. /// <param name="b">Vector b</param>
  2584. /// <returns>Vector</returns>
  2585. [DebuggerStepThrough]
  2586. public static v128 cvtsd_ss(v128 a, v128 b)
  2587. {
  2588. v128 dst = a;
  2589. dst.Float0 = (float)b.Double0;
  2590. return dst;
  2591. }
  2592. // _mm_cvtsd_f64
  2593. /// <summary> Copy the lower double-precision (64-bit) floating-point element of "a" to "dst". </summary>
  2594. /// <param name="a">Vector a</param>
  2595. /// <returns>64-bit floating-point element</returns>
  2596. [DebuggerStepThrough]
  2597. public static double cvtsd_f64(v128 a)
  2598. {
  2599. // Burst IR is OK
  2600. return a.Double0;
  2601. }
  2602. // _mm_cvtss_sd
  2603. /// <summary> Convert the lower single-precision (32-bit) floating-point element in "b" to a double-precision (64-bit) floating-point element, store the result in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". </summary>
  2604. /// <param name="a">Vector a</param>
  2605. /// <param name="b">Vector b</param>
  2606. /// <returns>Vector</returns>
  2607. [DebuggerStepThrough]
  2608. public static v128 cvtss_sd(v128 a, v128 b)
  2609. {
  2610. // Burst IR is OK
  2611. v128 dst = default(v128);
  2612. dst.Double0 = b.Float0;
  2613. dst.Double1 = a.Float0;
  2614. return dst;
  2615. }
  2616. // _mm_cvttpd_epi32
  2617. /// <summary> Convert packed double-precision (64-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". </summary>
  2618. /// <param name="a">Vector a</param>
  2619. /// <returns>Vector</returns>
  2620. [DebuggerStepThrough]
  2621. public static v128 cvttpd_epi32(v128 a)
  2622. {
  2623. v128 dst = default(v128);
  2624. dst.SInt0 = (int)a.Double0;
  2625. dst.SInt1 = (int)a.Double1;
  2626. return dst;
  2627. }
  2628. // _mm_cvttsd_si32
  2629. /// <summary> Convert the lower double-precision (64-bit) floating-point element in "a" to a 32-bit integer with truncation, and store the result in "dst". </summary>
  2630. /// <param name="a">Vector a</param>
  2631. /// <returns>32-bit integer</returns>
  2632. [DebuggerStepThrough]
  2633. public static int cvttsd_si32(v128 a)
  2634. {
  2635. return (int)a.Double0;
  2636. }
  2637. // _mm_cvttsd_si64
  2638. /// <summary> Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst". </summary>
  2639. /// <param name="a">Vector a</param>
  2640. /// <returns>64-bit integer</returns>
  2641. [DebuggerStepThrough]
  2642. public static long cvttsd_si64(v128 a)
  2643. {
  2644. return (long)a.Double0;
  2645. }
  2646. // _mm_cvttsd_si64x
  2647. /// <summary> Convert the lower double-precision (64-bit) floating-point element in "a" to a 64-bit integer with truncation, and store the result in "dst". </summary>
  2648. /// <param name="a">Vector a</param>
  2649. /// <returns>64-bit integer</returns>
  2650. [DebuggerStepThrough]
  2651. [BurstTargetCpu(BurstTargetCpu.X64_SSE2)]
  2652. public static long cvttsd_si64x(v128 a)
  2653. {
  2654. return cvttsd_si64(a);
  2655. }
  2656. // _mm_cvtps_epi32
  2657. /// <summary> Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers, and store the results in "dst". </summary>
  2658. /// <param name="a">Vector a</param>
  2659. /// <returns>Vector</returns>
  2660. [DebuggerStepThrough]
  2661. public static v128 cvtps_epi32(v128 a)
  2662. {
  2663. v128 dst = default(v128);
  2664. dst.SInt0 = (int)Math.Round(a.Float0);
  2665. dst.SInt1 = (int)Math.Round(a.Float1);
  2666. dst.SInt2 = (int)Math.Round(a.Float2);
  2667. dst.SInt3 = (int)Math.Round(a.Float3);
  2668. return dst;
  2669. }
  2670. // _mm_cvttps_epi32
  2671. /// <summary> Convert packed single-precision (32-bit) floating-point elements in "a" to packed 32-bit integers with truncation, and store the results in "dst". </summary>
  2672. /// <param name="a">Vector a</param>
  2673. /// <returns>Vector</returns>
  2674. [DebuggerStepThrough]
  2675. public static v128 cvttps_epi32(v128 a)
  2676. {
  2677. v128 dst = default(v128);
  2678. dst.SInt0 = (int)a.Float0;
  2679. dst.SInt1 = (int)a.Float1;
  2680. dst.SInt2 = (int)a.Float2;
  2681. dst.SInt3 = (int)a.Float3;
  2682. return dst;
  2683. }
  2684. // _mm_set_sd
  2685. /// <summary> Copy double-precision (64-bit) floating-point element "a" to the lower element of "dst", and zero the upper element. </summary>
  2686. /// <param name="a">Double-precision floating-point element</param>
  2687. /// <returns>Vector</returns>
  2688. [DebuggerStepThrough]
  2689. public static v128 set_sd(double a)
  2690. {
  2691. // Burst IR is fine.
  2692. v128 dst = default(v128);
  2693. dst.Double0 = a;
  2694. dst.Double1 = 0.0;
  2695. return dst;
  2696. }
  2697. // _mm_set1_pd
  2698. /// <summary> Broadcast double-precision (64-bit) floating-point value "a" to all elements of "dst". </summary>
  2699. /// <param name="a">Double-precision floating-point element</param>
  2700. /// <returns>Vector</returns>
  2701. [DebuggerStepThrough]
  2702. public static v128 set1_pd(double a)
  2703. {
  2704. // Burst IR is fine.
  2705. v128 dst = default(v128);
  2706. dst.Double0 = dst.Double1 = a;
  2707. return dst;
  2708. }
  2709. // _mm_set_pd1
  2710. /// <summary> Broadcast double-precision (64-bit) floating-point value "a" to all elements of "dst". </summary>
  2711. /// <param name="a">Double-precision floating-point element</param>
  2712. /// <returns>Vector</returns>
  2713. [DebuggerStepThrough]
  2714. [BurstTargetCpu(BurstTargetCpu.X64_SSE2)]
  2715. public static v128 set_pd1(double a)
  2716. {
  2717. // Burst IR is fine.
  2718. return set1_pd(a);
  2719. }
  2720. // _mm_set_pd
  2721. /// <summary> Set packed double-precision (64-bit) floating-point elements in "dst" with the supplied values. </summary>
  2722. /// <param name="e1">Double-precision floating-point element 1</param>
  2723. /// <param name="e0">Double-precision floating-point element 0</param>
  2724. /// <returns>Vector</returns>
  2725. [DebuggerStepThrough]
  2726. public static v128 set_pd(double e1, double e0)
  2727. {
  2728. // Burst IR is fine.
  2729. v128 dst = default(v128);
  2730. dst.Double0 = e0;
  2731. dst.Double1 = e1;
  2732. return dst;
  2733. }
  2734. // _mm_setr_pd
  2735. /// <summary> Set packed double-precision (64-bit) floating-point elements in "dst" with the supplied values in reverse order. </summary>
  2736. /// <param name="e1">Double-precision floating-point element 1</param>
  2737. /// <param name="e0">Double-precision floating-point element 0</param>
  2738. /// <returns>Vector</returns>
  2739. [DebuggerStepThrough]
  2740. public static v128 setr_pd(double e1, double e0)
  2741. {
  2742. // Burst IR is fine.
  2743. v128 dst = default(v128);
  2744. dst.Double0 = e1;
  2745. dst.Double1 = e0;
  2746. return dst;
  2747. }
  2748. // _mm_unpackhi_pd
  2749. /// <summary> Unpack and interleave double-precision (64-bit) floating-point elements from the high half of "a" and "b", and store the results in "dst". </summary>
  2750. /// <param name="a">Vector a</param>
  2751. /// <param name="b">Vector b</param>
  2752. /// <returns>Vector</returns>
  2753. [DebuggerStepThrough]
  2754. public static v128 unpackhi_pd(v128 a, v128 b)
  2755. {
  2756. v128 dst = default(v128);
  2757. dst.Double0 = a.Double1;
  2758. dst.Double1 = b.Double1;
  2759. return dst;
  2760. }
  2761. // _mm_unpacklo_pd
  2762. /// <summary> Unpack and interleave double-precision (64-bit) floating-point elements from the low half of "a" and "b", and store the results in "dst". </summary>
  2763. /// <param name="a">Vector a</param>
  2764. /// <param name="b">Vector b</param>
  2765. /// <returns>Vector</returns>
  2766. [DebuggerStepThrough]
  2767. public static v128 unpacklo_pd(v128 a, v128 b)
  2768. {
  2769. v128 dst = default(v128);
  2770. dst.Double0 = a.Double0;
  2771. dst.Double1 = b.Double0;
  2772. return dst;
  2773. }
  2774. // _mm_movemask_pd
  2775. /// <summary> Set each bit of mask "dst" based on the most significant bit of the corresponding packed double-precision (64-bit) floating-point element in "a". </summary>
  2776. /// <param name="a">Vector a</param>
  2777. /// <returns>Integer</returns>
  2778. [DebuggerStepThrough]
  2779. public static int movemask_pd(v128 a)
  2780. {
  2781. int dst = 0;
  2782. if ((a.ULong0 & 0x8000000000000000ul) != 0)
  2783. dst |= 1;
  2784. if ((a.ULong1 & 0x8000000000000000ul) != 0)
  2785. dst |= 2;
  2786. return dst;
  2787. }
  2788. // _mm_shuffle_pd
  2789. /// <summary> Shuffle double-precision (64-bit) floating-point elements using the control in "imm8", and store the results in "dst". </summary>
  2790. /// <param name="a">Vector a</param>
  2791. /// <param name="b">Vector b</param>
  2792. /// <param name="imm8">Control</param>
  2793. /// <returns>Vector</returns>
  2794. [DebuggerStepThrough]
  2795. public static v128 shuffle_pd(v128 a, v128 b, int imm8)
  2796. {
  2797. v128 dst = default(v128);
  2798. double* aptr = &a.Double0;
  2799. double* bptr = &b.Double0;
  2800. dst.Double0 = aptr[(imm8 & 1)];
  2801. dst.Double1 = bptr[((imm8 >> 1) & 1)];
  2802. return dst;
  2803. }
  2804. // _mm_move_sd
  2805. /// <summary> Move the lower double-precision (64-bit) floating-point element from "b" to the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". </summary>
  2806. /// <param name="a">Vector a</param>
  2807. /// <param name="b">Vector b</param>
  2808. /// <returns>Vector</returns>
  2809. [DebuggerStepThrough]
  2810. public static v128 move_sd(v128 a, v128 b)
  2811. {
  2812. // Burst IR is fine.
  2813. v128 dst = default(v128);
  2814. dst.Double0 = b.Double0;
  2815. dst.Double1 = a.Double1;
  2816. return dst;
  2817. }
  2818. /// <summary>
  2819. /// Load unaligned 32-bit integer from memory into the first element of dst.
  2820. /// </summary>
  2821. /// <param name="mem_addr">Memory address</param>
  2822. /// <returns>Vector</returns>
  2823. public static v128 loadu_si32(void* mem_addr)
  2824. {
  2825. return new v128(*(int*)mem_addr, 0, 0, 0);
  2826. }
  2827. /// <summary>
  2828. /// Store 32-bit integer from the first element of a into memory.
  2829. /// mem_addr does not need to be aligned on any particular
  2830. /// boundary.
  2831. /// </summary>
  2832. /// <param name="mem_addr">Memory address</param>
  2833. /// <param name="a">Vector a</param>
  2834. public static void storeu_si32(void* mem_addr, v128 a)
  2835. {
  2836. *(int*)mem_addr = a.SInt0;
  2837. }
  2838. /// <summary>
  2839. /// Load 128-bits of integer data from memory into dst.
  2840. /// </summary>
  2841. /// <remarks>
  2842. /// Burst always generates unaligned loads.
  2843. /// </remarks>
  2844. /// <param name="ptr">Pointer</param>
  2845. /// <returns>Vector</returns>
  2846. [DebuggerStepThrough]
  2847. [BurstTargetCpu(BurstTargetCpu.X64_SSE2)]
  2848. public static v128 load_si128(void* ptr)
  2849. {
  2850. return GenericCSharpLoad(ptr);
  2851. }
  2852. /// <summary>
  2853. /// Load 128-bits of integer data from memory into dst.
  2854. /// </summary>
  2855. /// <remarks>
  2856. /// Burst always generates unaligned loads.
  2857. /// </remarks>
  2858. /// <param name="ptr">Pointer</param>
  2859. /// <returns>Vector</returns>
  2860. [DebuggerStepThrough]
  2861. [BurstTargetCpu(BurstTargetCpu.X64_SSE2)]
  2862. public static v128 loadu_si128(void* ptr)
  2863. {
  2864. return GenericCSharpLoad(ptr);
  2865. }
  2866. /// <summary>
  2867. /// Store 128-bits of integer data from a into memory.
  2868. /// </summary>
  2869. /// <remarks>
  2870. /// Burst always generates unaligned stores.
  2871. /// </remarks>
  2872. /// <param name="ptr">Pointer</param>
  2873. /// <param name="val">Value</param>
  2874. [DebuggerStepThrough]
  2875. [BurstTargetCpu(BurstTargetCpu.X64_SSE2)]
  2876. public static void store_si128(void* ptr, v128 val)
  2877. {
  2878. GenericCSharpStore(ptr, val);
  2879. }
  2880. /// <summary>
  2881. /// Store 128-bits of integer data from a into memory.
  2882. /// </summary>
  2883. /// <remarks>
  2884. /// Burst always generates unaligned stores.
  2885. /// </remarks>
  2886. /// <param name="ptr">Pointer</param>
  2887. /// <param name="val">Value</param>
  2888. [DebuggerStepThrough]
  2889. [BurstTargetCpu(BurstTargetCpu.X64_SSE2)]
  2890. public static void storeu_si128(void* ptr, v128 val)
  2891. {
  2892. GenericCSharpStore(ptr, val);
  2893. }
  2894. /// <summary>
  2895. /// Invalidate and flush the cache line that contains p from all levels of the cache hierarchy.
  2896. /// </summary>
  2897. /// <remarks>
  2898. /// **** clflush m8
  2899. /// </remarks>
  2900. /// <param name="ptr">Pointer to the cache line to be flushed.</param>
  2901. [DebuggerStepThrough]
  2902. public static void clflush(void* ptr)
  2903. {
  2904. }
  2905. }
  2906. }
  2907. }