123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371 |
- using System;
- using System.Diagnostics;
-
- namespace Unity.Burst.Intrinsics
- {
- public unsafe static partial class X86
- {
- /// <summary>
- /// SSSE3 intrinsics
- /// </summary>
- public static class Ssse3
- {
- /// <summary>
- /// Evaluates to true at compile time if SSSE3 intrinsics are supported.
- /// </summary>
- public static bool IsSsse3Supported { get { return false; } }
-
- // _mm_abs_epi8
- /// <summary> Compute the absolute value of packed 8-bit integers in "a", and store the unsigned results in "dst". </summary>
- /// <param name="a">Vector a</param>
- /// <returns>Vector</returns>
- [DebuggerStepThrough]
- public static v128 abs_epi8(v128 a)
- {
- v128 dst = default(v128);
- byte* dptr = &dst.Byte0;
- sbyte* aptr = &a.SByte0;
- for (int j = 0; j <= 15; j++)
- {
- dptr[j] = (byte)Math.Abs((int)aptr[j]);
- }
- return dst;
- }
-
- // _mm_abs_epi16
- /// <summary> Compute the absolute value of packed 16-bit integers in "a", and store the unsigned results in "dst". </summary>
- /// <param name="a">Vector a</param>
- /// <returns>Vector</returns>
- [DebuggerStepThrough]
- public static v128 abs_epi16(v128 a)
- {
- v128 dst = default(v128);
- ushort* dptr = &dst.UShort0;
- short* aptr = &a.SShort0;
- for (int j = 0; j <= 7; j++)
- {
- dptr[j] = (ushort)Math.Abs((int)aptr[j]);
- }
- return dst;
- }
-
- // _mm_abs_epi32
- /// <summary> Compute the absolute value of packed 32-bit integers in "a", and store the unsigned results in "dst". </summary>
- /// <param name="a">Vector a</param>
- /// <returns>Vector</returns>
- [DebuggerStepThrough]
- public static v128 abs_epi32(v128 a)
- {
- v128 dst = default(v128);
- uint* dptr = &dst.UInt0;
- int* aptr = &a.SInt0;
- for (int j = 0; j <= 3; j++)
- {
- dptr[j] = (uint)Math.Abs((long)aptr[j]);
- }
- return dst;
- }
-
- // _mm_shuffle_epi8
- /// <summary> Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst". </summary>
- /// <param name="a">Vector a</param>
- /// <param name="b">Vector b</param>
- /// <returns>Vector</returns>
- [DebuggerStepThrough]
- public static v128 shuffle_epi8(v128 a, v128 b)
- {
- v128 dst = default(v128);
- byte* dptr = &dst.Byte0;
- byte* aptr = &a.Byte0;
- byte* bptr = &b.Byte0;
- for (int j = 0; j <= 15; j++)
- {
- if ((bptr[j] & 0x80) != 0)
- {
- dptr[j] = 0x00;
- }
- else
- {
- dptr[j] = aptr[bptr[j] & 15];
- }
- }
- return dst;
- }
-
-
- // _mm_alignr_epi8
- /// <summary> Concatenate 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "count" bytes, and store the low 16 bytes in "dst". </summary>
- /// <param name="a">Vector a</param>
- /// <param name="b">Vector b</param>
- /// <param name="count">Byte count</param>
- /// <returns>Vector</returns>
- [DebuggerStepThrough]
- public static v128 alignr_epi8(v128 a, v128 b, int count)
- {
- var dst = default(v128);
- byte* dptr = &dst.Byte0;
- byte* aptr = &a.Byte0 + count;
- byte* bptr = &b.Byte0;
-
- int i;
- for (i = 0; i < 16 - count; ++i)
- {
- *dptr++ = *aptr++;
- }
-
- for (; i < 16; ++i)
- {
- *dptr++ = *bptr++;
- }
-
- return dst;
- }
-
- // _mm_hadd_epi16
- /// <summary> Horizontally add adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst". </summary>
- /// <param name="a">Vector a</param>
- /// <param name="b">Vector b</param>
- /// <returns>Vector</returns>
- [DebuggerStepThrough]
- public static v128 hadd_epi16(v128 a, v128 b)
- {
- v128 dst = default(v128);
- short* dptr = &dst.SShort0;
- short* aptr = &a.SShort0;
- short* bptr = &b.SShort0;
- for (int j = 0; j <= 3; ++j)
- {
- dptr[j] = (short)(aptr[2 * j + 1] + aptr[2 * j]);
- dptr[j + 4] = (short)(bptr[2 * j + 1] + bptr[2 * j]);
- }
- return dst;
- }
-
- // _mm_hadds_epi16
- /// <summary> Horizontally add adjacent pairs of 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst". </summary>
- /// <param name="a">Vector a</param>
- /// <param name="b">Vector b</param>
- /// <returns>Vector</returns>
- [DebuggerStepThrough]
- public static v128 hadds_epi16(v128 a, v128 b)
- {
- v128 dst = default(v128);
- short* dptr = &dst.SShort0;
- short* aptr = &a.SShort0;
- short* bptr = &b.SShort0;
- for (int j = 0; j <= 3; ++j)
- {
- dptr[j] = Saturate_To_Int16(aptr[2 * j + 1] + aptr[2 * j]);
- dptr[j + 4] = Saturate_To_Int16(bptr[2 * j + 1] + bptr[2 * j]);
- }
- return dst;
- }
-
- // _mm_hadd_epi32
- /// <summary> Horizontally add adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst". </summary>
- /// <param name="a">Vector a</param>
- /// <param name="b">Vector b</param>
- /// <returns>Vector</returns>
- [DebuggerStepThrough]
- public static v128 hadd_epi32(v128 a, v128 b)
- {
- v128 dst = default(v128);
- dst.SInt0 = a.SInt1 + a.SInt0;
- dst.SInt1 = a.SInt3 + a.SInt2;
- dst.SInt2 = b.SInt1 + b.SInt0;
- dst.SInt3 = b.SInt3 + b.SInt2;
- return dst;
- }
-
- // _mm_hsub_epi16
- /// <summary> Horizontally subtract adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst". </summary>
- /// <param name="a">Vector a</param>
- /// <param name="b">Vector b</param>
- /// <returns>Vector</returns>
- [DebuggerStepThrough]
- public static v128 hsub_epi16(v128 a, v128 b)
- {
- v128 dst = default(v128);
- short* dptr = &dst.SShort0;
- short* aptr = &a.SShort0;
- short* bptr = &b.SShort0;
- for (int j = 0; j <= 3; ++j)
- {
- dptr[j] = (short)(aptr[2 * j] - aptr[2 * j + 1]);
- dptr[j + 4] = (short)(bptr[2 * j] - bptr[2 * j + 1]);
- }
- return dst;
- }
-
- // _mm_hsubs_epi16
- /// <summary> Horizontally subtract adjacent pairs of 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst". </summary>
- /// <param name="a">Vector a</param>
- /// <param name="b">Vector b</param>
- /// <returns>Vector</returns>
- [DebuggerStepThrough]
- public static v128 hsubs_epi16(v128 a, v128 b)
- {
- v128 dst = default(v128);
- short* dptr = &dst.SShort0;
- short* aptr = &a.SShort0;
- short* bptr = &b.SShort0;
- for (int j = 0; j <= 3; ++j)
- {
- dptr[j] = Saturate_To_Int16(aptr[2 * j] - aptr[2 * j + 1]);
- dptr[j + 4] = Saturate_To_Int16(bptr[2 * j] - bptr[2 * j + 1]);
- }
- return dst;
- }
-
- // _mm_hsub_epi32
- /// <summary> Horizontally subtract adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst". </summary>
- /// <param name="a">Vector a</param>
- /// <param name="b">Vector b</param>
- /// <returns>Vector</returns>
- [DebuggerStepThrough]
- public static v128 hsub_epi32(v128 a, v128 b)
- {
- v128 dst = default(v128);
- dst.SInt0 = a.SInt0 - a.SInt1;
- dst.SInt1 = a.SInt2 - a.SInt3;
- dst.SInt2 = b.SInt0 - b.SInt1;
- dst.SInt3 = b.SInt2 - b.SInt3;
- return dst;
- }
-
- // _mm_maddubs_epi16
- /// <summary> Vertically multiply each unsigned 8-bit integer from "a" with the corresponding signed 8-bit integer from "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst". </summary>
- /// <param name="a">Vector a</param>
- /// <param name="b">Vector b</param>
- /// <returns>Vector</returns>
- [DebuggerStepThrough]
- public static v128 maddubs_epi16(v128 a, v128 b)
- {
- v128 dst = default(v128);
- short* dptr = &dst.SShort0;
- byte* aptr = &a.Byte0;
- sbyte* bptr = &b.SByte0;
- for (int j = 0; j <= 7; j++)
- {
- int tmp = aptr[2 * j + 1] * bptr[2 * j + 1] + aptr[2 * j] * bptr[2 * j];
- dptr[j] = Saturate_To_Int16(tmp);
- }
- return dst;
- }
-
-
- // _mm_mulhrs_epi16
- /// <summary> Multiply packed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst". </summary>
- /// <param name="a">Vector a</param>
- /// <param name="b">Vector b</param>
- /// <returns>Vector</returns>
- [DebuggerStepThrough]
- public static v128 mulhrs_epi16(v128 a, v128 b)
- {
- v128 dst = default(v128);
- short* dptr = &dst.SShort0;
- short* aptr = &a.SShort0;
- short* bptr = &b.SShort0;
- for (int j = 0; j <= 7; j++)
- {
- int tmp = aptr[j] * bptr[j];
- tmp >>= 14;
- tmp += 1;
- tmp >>= 1;
- dptr[j] = (short)tmp;
- }
- return dst;
- }
-
- // _mm_sign_epi8
- /// <summary> Negate packed 8-bit integers in "a" when the corresponding signed 8-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero. </summary>
- /// <param name="a">Vector a</param>
- /// <param name="b">Vector b</param>
- /// <returns>Vector</returns>
- [DebuggerStepThrough]
- public static v128 sign_epi8(v128 a, v128 b)
- {
- v128 dst = default(v128);
- sbyte* dptr = &dst.SByte0;
- sbyte* aptr = &a.SByte0;
- sbyte* bptr = &b.SByte0;
- for (int j = 0; j <= 15; j++)
- {
- if (bptr[j] < 0)
- {
- dptr[j] = (sbyte)-aptr[j];
- }
- else if (bptr[j] == 0)
- {
- dptr[j] = 0;
- }
- else
- {
- dptr[j] = aptr[j];
- }
- }
- return dst;
- }
-
- // _mm_sign_epi16
- /// <summary> Negate packed 16-bit integers in "a" when the corresponding signed 16-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero. </summary>
- /// <param name="a">Vector a</param>
- /// <param name="b">Vector b</param>
- /// <returns>Vector</returns>
- [DebuggerStepThrough]
- public static v128 sign_epi16(v128 a, v128 b)
- {
- v128 dst = default(v128);
- short* dptr = &dst.SShort0;
- short* aptr = &a.SShort0;
- short* bptr = &b.SShort0;
- for (int j = 0; j <= 7; j++)
- {
- if (bptr[j] < 0)
- {
- dptr[j] = (short)-aptr[j];
- }
- else if (bptr[j] == 0)
- {
- dptr[j] = 0;
- }
- else
- {
- dptr[j] = aptr[j];
- }
- }
- return dst;
- }
-
- // _mm_sign_epi32
- /// <summary> Negate packed 32-bit integers in "a" when the corresponding signed 32-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero. </summary>
- /// <param name="a">Vector a</param>
- /// <param name="b">Vector b</param>
- /// <returns>Vector</returns>
- [DebuggerStepThrough]
- public static v128 sign_epi32(v128 a, v128 b)
- {
- v128 dst = default(v128);
- int* dptr = &dst.SInt0;
- int* aptr = &a.SInt0;
- int* bptr = &b.SInt0;
- for (int j = 0; j <= 3; j++)
- {
- if (bptr[j] < 0)
- {
- dptr[j] = -aptr[j];
- }
- else if (bptr[j] == 0)
- {
- dptr[j] = 0;
- }
- else
- {
- dptr[j] = aptr[j];
- }
- }
- return dst;
- }
- }
- }
- }
|