No Description
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

Ssse3.cs 14KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371
  1. using System;
  2. using System.Diagnostics;
  3. namespace Unity.Burst.Intrinsics
  4. {
  5. public unsafe static partial class X86
  6. {
  7. /// <summary>
  8. /// SSSE3 intrinsics
  9. /// </summary>
  10. public static class Ssse3
  11. {
  12. /// <summary>
  13. /// Evaluates to true at compile time if SSSE3 intrinsics are supported.
  14. /// </summary>
  15. public static bool IsSsse3Supported { get { return false; } }
  16. // _mm_abs_epi8
  17. /// <summary> Compute the absolute value of packed 8-bit integers in "a", and store the unsigned results in "dst". </summary>
  18. /// <param name="a">Vector a</param>
  19. /// <returns>Vector</returns>
  20. [DebuggerStepThrough]
  21. public static v128 abs_epi8(v128 a)
  22. {
  23. v128 dst = default(v128);
  24. byte* dptr = &dst.Byte0;
  25. sbyte* aptr = &a.SByte0;
  26. for (int j = 0; j <= 15; j++)
  27. {
  28. dptr[j] = (byte)Math.Abs((int)aptr[j]);
  29. }
  30. return dst;
  31. }
  32. // _mm_abs_epi16
  33. /// <summary> Compute the absolute value of packed 16-bit integers in "a", and store the unsigned results in "dst". </summary>
  34. /// <param name="a">Vector a</param>
  35. /// <returns>Vector</returns>
  36. [DebuggerStepThrough]
  37. public static v128 abs_epi16(v128 a)
  38. {
  39. v128 dst = default(v128);
  40. ushort* dptr = &dst.UShort0;
  41. short* aptr = &a.SShort0;
  42. for (int j = 0; j <= 7; j++)
  43. {
  44. dptr[j] = (ushort)Math.Abs((int)aptr[j]);
  45. }
  46. return dst;
  47. }
  48. // _mm_abs_epi32
  49. /// <summary> Compute the absolute value of packed 32-bit integers in "a", and store the unsigned results in "dst". </summary>
  50. /// <param name="a">Vector a</param>
  51. /// <returns>Vector</returns>
  52. [DebuggerStepThrough]
  53. public static v128 abs_epi32(v128 a)
  54. {
  55. v128 dst = default(v128);
  56. uint* dptr = &dst.UInt0;
  57. int* aptr = &a.SInt0;
  58. for (int j = 0; j <= 3; j++)
  59. {
  60. dptr[j] = (uint)Math.Abs((long)aptr[j]);
  61. }
  62. return dst;
  63. }
  64. // _mm_shuffle_epi8
  65. /// <summary> Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst". </summary>
  66. /// <param name="a">Vector a</param>
  67. /// <param name="b">Vector b</param>
  68. /// <returns>Vector</returns>
  69. [DebuggerStepThrough]
  70. public static v128 shuffle_epi8(v128 a, v128 b)
  71. {
  72. v128 dst = default(v128);
  73. byte* dptr = &dst.Byte0;
  74. byte* aptr = &a.Byte0;
  75. byte* bptr = &b.Byte0;
  76. for (int j = 0; j <= 15; j++)
  77. {
  78. if ((bptr[j] & 0x80) != 0)
  79. {
  80. dptr[j] = 0x00;
  81. }
  82. else
  83. {
  84. dptr[j] = aptr[bptr[j] & 15];
  85. }
  86. }
  87. return dst;
  88. }
  89. // _mm_alignr_epi8
  90. /// <summary> Concatenate 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "count" bytes, and store the low 16 bytes in "dst". </summary>
  91. /// <param name="a">Vector a</param>
  92. /// <param name="b">Vector b</param>
  93. /// <param name="count">Byte count</param>
  94. /// <returns>Vector</returns>
  95. [DebuggerStepThrough]
  96. public static v128 alignr_epi8(v128 a, v128 b, int count)
  97. {
  98. var dst = default(v128);
  99. byte* dptr = &dst.Byte0;
  100. byte* aptr = &a.Byte0 + count;
  101. byte* bptr = &b.Byte0;
  102. int i;
  103. for (i = 0; i < 16 - count; ++i)
  104. {
  105. *dptr++ = *aptr++;
  106. }
  107. for (; i < 16; ++i)
  108. {
  109. *dptr++ = *bptr++;
  110. }
  111. return dst;
  112. }
  113. // _mm_hadd_epi16
  114. /// <summary> Horizontally add adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst". </summary>
  115. /// <param name="a">Vector a</param>
  116. /// <param name="b">Vector b</param>
  117. /// <returns>Vector</returns>
  118. [DebuggerStepThrough]
  119. public static v128 hadd_epi16(v128 a, v128 b)
  120. {
  121. v128 dst = default(v128);
  122. short* dptr = &dst.SShort0;
  123. short* aptr = &a.SShort0;
  124. short* bptr = &b.SShort0;
  125. for (int j = 0; j <= 3; ++j)
  126. {
  127. dptr[j] = (short)(aptr[2 * j + 1] + aptr[2 * j]);
  128. dptr[j + 4] = (short)(bptr[2 * j + 1] + bptr[2 * j]);
  129. }
  130. return dst;
  131. }
  132. // _mm_hadds_epi16
  133. /// <summary> Horizontally add adjacent pairs of 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst". </summary>
  134. /// <param name="a">Vector a</param>
  135. /// <param name="b">Vector b</param>
  136. /// <returns>Vector</returns>
  137. [DebuggerStepThrough]
  138. public static v128 hadds_epi16(v128 a, v128 b)
  139. {
  140. v128 dst = default(v128);
  141. short* dptr = &dst.SShort0;
  142. short* aptr = &a.SShort0;
  143. short* bptr = &b.SShort0;
  144. for (int j = 0; j <= 3; ++j)
  145. {
  146. dptr[j] = Saturate_To_Int16(aptr[2 * j + 1] + aptr[2 * j]);
  147. dptr[j + 4] = Saturate_To_Int16(bptr[2 * j + 1] + bptr[2 * j]);
  148. }
  149. return dst;
  150. }
  151. // _mm_hadd_epi32
  152. /// <summary> Horizontally add adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst". </summary>
  153. /// <param name="a">Vector a</param>
  154. /// <param name="b">Vector b</param>
  155. /// <returns>Vector</returns>
  156. [DebuggerStepThrough]
  157. public static v128 hadd_epi32(v128 a, v128 b)
  158. {
  159. v128 dst = default(v128);
  160. dst.SInt0 = a.SInt1 + a.SInt0;
  161. dst.SInt1 = a.SInt3 + a.SInt2;
  162. dst.SInt2 = b.SInt1 + b.SInt0;
  163. dst.SInt3 = b.SInt3 + b.SInt2;
  164. return dst;
  165. }
  166. // _mm_hsub_epi16
  167. /// <summary> Horizontally subtract adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst". </summary>
  168. /// <param name="a">Vector a</param>
  169. /// <param name="b">Vector b</param>
  170. /// <returns>Vector</returns>
  171. [DebuggerStepThrough]
  172. public static v128 hsub_epi16(v128 a, v128 b)
  173. {
  174. v128 dst = default(v128);
  175. short* dptr = &dst.SShort0;
  176. short* aptr = &a.SShort0;
  177. short* bptr = &b.SShort0;
  178. for (int j = 0; j <= 3; ++j)
  179. {
  180. dptr[j] = (short)(aptr[2 * j] - aptr[2 * j + 1]);
  181. dptr[j + 4] = (short)(bptr[2 * j] - bptr[2 * j + 1]);
  182. }
  183. return dst;
  184. }
  185. // _mm_hsubs_epi16
  186. /// <summary> Horizontally subtract adjacent pairs of 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst". </summary>
  187. /// <param name="a">Vector a</param>
  188. /// <param name="b">Vector b</param>
  189. /// <returns>Vector</returns>
  190. [DebuggerStepThrough]
  191. public static v128 hsubs_epi16(v128 a, v128 b)
  192. {
  193. v128 dst = default(v128);
  194. short* dptr = &dst.SShort0;
  195. short* aptr = &a.SShort0;
  196. short* bptr = &b.SShort0;
  197. for (int j = 0; j <= 3; ++j)
  198. {
  199. dptr[j] = Saturate_To_Int16(aptr[2 * j] - aptr[2 * j + 1]);
  200. dptr[j + 4] = Saturate_To_Int16(bptr[2 * j] - bptr[2 * j + 1]);
  201. }
  202. return dst;
  203. }
  204. // _mm_hsub_epi32
  205. /// <summary> Horizontally subtract adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst". </summary>
  206. /// <param name="a">Vector a</param>
  207. /// <param name="b">Vector b</param>
  208. /// <returns>Vector</returns>
  209. [DebuggerStepThrough]
  210. public static v128 hsub_epi32(v128 a, v128 b)
  211. {
  212. v128 dst = default(v128);
  213. dst.SInt0 = a.SInt0 - a.SInt1;
  214. dst.SInt1 = a.SInt2 - a.SInt3;
  215. dst.SInt2 = b.SInt0 - b.SInt1;
  216. dst.SInt3 = b.SInt2 - b.SInt3;
  217. return dst;
  218. }
  219. // _mm_maddubs_epi16
  220. /// <summary> Vertically multiply each unsigned 8-bit integer from "a" with the corresponding signed 8-bit integer from "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst". </summary>
  221. /// <param name="a">Vector a</param>
  222. /// <param name="b">Vector b</param>
  223. /// <returns>Vector</returns>
  224. [DebuggerStepThrough]
  225. public static v128 maddubs_epi16(v128 a, v128 b)
  226. {
  227. v128 dst = default(v128);
  228. short* dptr = &dst.SShort0;
  229. byte* aptr = &a.Byte0;
  230. sbyte* bptr = &b.SByte0;
  231. for (int j = 0; j <= 7; j++)
  232. {
  233. int tmp = aptr[2 * j + 1] * bptr[2 * j + 1] + aptr[2 * j] * bptr[2 * j];
  234. dptr[j] = Saturate_To_Int16(tmp);
  235. }
  236. return dst;
  237. }
  238. // _mm_mulhrs_epi16
  239. /// <summary> Multiply packed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst". </summary>
  240. /// <param name="a">Vector a</param>
  241. /// <param name="b">Vector b</param>
  242. /// <returns>Vector</returns>
  243. [DebuggerStepThrough]
  244. public static v128 mulhrs_epi16(v128 a, v128 b)
  245. {
  246. v128 dst = default(v128);
  247. short* dptr = &dst.SShort0;
  248. short* aptr = &a.SShort0;
  249. short* bptr = &b.SShort0;
  250. for (int j = 0; j <= 7; j++)
  251. {
  252. int tmp = aptr[j] * bptr[j];
  253. tmp >>= 14;
  254. tmp += 1;
  255. tmp >>= 1;
  256. dptr[j] = (short)tmp;
  257. }
  258. return dst;
  259. }
  260. // _mm_sign_epi8
  261. /// <summary> Negate packed 8-bit integers in "a" when the corresponding signed 8-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero. </summary>
  262. /// <param name="a">Vector a</param>
  263. /// <param name="b">Vector b</param>
  264. /// <returns>Vector</returns>
  265. [DebuggerStepThrough]
  266. public static v128 sign_epi8(v128 a, v128 b)
  267. {
  268. v128 dst = default(v128);
  269. sbyte* dptr = &dst.SByte0;
  270. sbyte* aptr = &a.SByte0;
  271. sbyte* bptr = &b.SByte0;
  272. for (int j = 0; j <= 15; j++)
  273. {
  274. if (bptr[j] < 0)
  275. {
  276. dptr[j] = (sbyte)-aptr[j];
  277. }
  278. else if (bptr[j] == 0)
  279. {
  280. dptr[j] = 0;
  281. }
  282. else
  283. {
  284. dptr[j] = aptr[j];
  285. }
  286. }
  287. return dst;
  288. }
  289. // _mm_sign_epi16
  290. /// <summary> Negate packed 16-bit integers in "a" when the corresponding signed 16-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero. </summary>
  291. /// <param name="a">Vector a</param>
  292. /// <param name="b">Vector b</param>
  293. /// <returns>Vector</returns>
  294. [DebuggerStepThrough]
  295. public static v128 sign_epi16(v128 a, v128 b)
  296. {
  297. v128 dst = default(v128);
  298. short* dptr = &dst.SShort0;
  299. short* aptr = &a.SShort0;
  300. short* bptr = &b.SShort0;
  301. for (int j = 0; j <= 7; j++)
  302. {
  303. if (bptr[j] < 0)
  304. {
  305. dptr[j] = (short)-aptr[j];
  306. }
  307. else if (bptr[j] == 0)
  308. {
  309. dptr[j] = 0;
  310. }
  311. else
  312. {
  313. dptr[j] = aptr[j];
  314. }
  315. }
  316. return dst;
  317. }
  318. // _mm_sign_epi32
  319. /// <summary> Negate packed 32-bit integers in "a" when the corresponding signed 32-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero. </summary>
  320. /// <param name="a">Vector a</param>
  321. /// <param name="b">Vector b</param>
  322. /// <returns>Vector</returns>
  323. [DebuggerStepThrough]
  324. public static v128 sign_epi32(v128 a, v128 b)
  325. {
  326. v128 dst = default(v128);
  327. int* dptr = &dst.SInt0;
  328. int* aptr = &a.SInt0;
  329. int* bptr = &b.SInt0;
  330. for (int j = 0; j <= 3; j++)
  331. {
  332. if (bptr[j] < 0)
  333. {
  334. dptr[j] = -aptr[j];
  335. }
  336. else if (bptr[j] == 0)
  337. {
  338. dptr[j] = 0;
  339. }
  340. else
  341. {
  342. dptr[j] = aptr[j];
  343. }
  344. }
  345. return dst;
  346. }
  347. }
  348. }
  349. }