Sin descripción
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

F16C.cs 17KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306
  1. using System.Diagnostics;
  2. namespace Unity.Burst.Intrinsics
  3. {
  4. public unsafe static partial class X86
  5. {
  6. /// <summary>
  7. /// F16C intrinsics
  8. /// </summary>
  9. public static class F16C
  10. {
  11. /// <summary>
  12. /// Evaluates to true at compile time if F16C intrinsics are supported.
  13. ///
  14. /// Burst ties F16C support to AVX2 support to simplify feature sets to support.
  15. /// </summary>
  16. public static bool IsF16CSupported { get { return Avx2.IsAvx2Supported; } }
  17. /// <summary>
  18. /// Converts a half (hiding in a ushort) to a float (hiding in a uint).
  19. /// </summary>
  20. /// <param name="h">The half to convert</param>
  21. /// <returns>The float result</returns>
  22. [DebuggerStepThrough]
  23. private static uint HalfToFloat(ushort h)
  24. {
  25. var signed = (h & 0x8000u) != 0;
  26. var exponent = (h >> 10) & 0x1fu;
  27. var mantissa = h & 0x3ffu;
  28. var result = signed ? 0x80000000u : 0u;
  29. if (!(exponent == 0 && mantissa == 0))
  30. {
  31. // Denormal (converts to normalized)
  32. if (exponent == 0)
  33. {
  34. // Adjust mantissa so it's normalized (and keep track of exponent adjustment)
  35. exponent = -1;
  36. do
  37. {
  38. exponent++;
  39. mantissa <<= 1;
  40. } while ((mantissa & 0x400) == 0);
  41. result |= (uint)((127 - 15 - exponent) << 23);
  42. // Have to re-mask the mantissa here because we've been shifting bits up.
  43. result |= (mantissa & 0x3ff) << 13;
  44. }
  45. else
  46. {
  47. var isInfOrNan = exponent == 0x1f;
  48. result |= (uint)(isInfOrNan ? 255 : (127 - 15 + exponent) << 23);
  49. result |= mantissa << 13;
  50. }
  51. }
  52. return result;
  53. }
  54. /// <summary>
  55. /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
  56. /// </summary>
  57. /// <remarks>
  58. /// **** vcvtph2ps xmm, xmm
  59. /// </remarks>
  60. /// <param name="a">Vector a</param>
  61. /// <returns>Vector</returns>
  62. [DebuggerStepThrough]
  63. public static v128 cvtph_ps(v128 a)
  64. {
  65. return new v128(HalfToFloat(a.UShort0), HalfToFloat(a.UShort1), HalfToFloat(a.UShort2), HalfToFloat(a.UShort3));
  66. }
  67. /// <summary>
  68. /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
  69. /// </summary>
  70. /// <remarks>
  71. /// **** vcvtph2ps ymm, xmm
  72. /// </remarks>
  73. /// <param name="a">Vector a</param>
  74. /// <returns>Vector</returns>
  75. [DebuggerStepThrough]
  76. public static v256 mm256_cvtph_ps(v128 a)
  77. {
  78. return new v256(HalfToFloat(a.UShort0), HalfToFloat(a.UShort1), HalfToFloat(a.UShort2), HalfToFloat(a.UShort3), HalfToFloat(a.UShort4), HalfToFloat(a.UShort5), HalfToFloat(a.UShort6), HalfToFloat(a.UShort7));
  79. }
  80. // Using ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf
  81. private static readonly ushort[] BaseTable =
  82. {
  83. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  84. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  85. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  86. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  87. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  88. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  89. 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100,
  90. 0x0200, 0x0400, 0x0800, 0x0C00, 0x1000, 0x1400, 0x1800, 0x1C00, 0x2000, 0x2400, 0x2800, 0x2C00, 0x3000, 0x3400, 0x3800, 0x3C00,
  91. 0x4000, 0x4400, 0x4800, 0x4C00, 0x5000, 0x5400, 0x5800, 0x5C00, 0x6000, 0x6400, 0x6800, 0x6C00, 0x7000, 0x7400, 0x7800, 0x7C00,
  92. 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
  93. 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
  94. 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
  95. 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
  96. 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
  97. 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
  98. 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
  99. 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
  100. 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
  101. 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
  102. 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
  103. 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
  104. 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
  105. 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002, 0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100,
  106. 0x8200, 0x8400, 0x8800, 0x8C00, 0x9000, 0x9400, 0x9800, 0x9C00, 0xA000, 0xA400, 0xA800, 0xAC00, 0xB000, 0xB400, 0xB800, 0xBC00,
  107. 0xC000, 0xC400, 0xC800, 0xCC00, 0xD000, 0xD400, 0xD800, 0xDC00, 0xE000, 0xE400, 0xE800, 0xEC00, 0xF000, 0xF400, 0xF800, 0xFC00,
  108. 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
  109. 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
  110. 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
  111. 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
  112. 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
  113. 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
  114. 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
  115. };
  116. private static readonly sbyte[] ShiftTable =
  117. {
  118. 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
  119. 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
  120. 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
  121. 24, 24, 24, 24, 24, 24, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
  122. 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
  123. 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
  124. 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
  125. 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 13,
  126. 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
  127. 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
  128. 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
  129. 24, 24, 24, 24, 24, 24, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
  130. 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
  131. 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
  132. 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
  133. 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 13,
  134. };
  135. /// <summary>
  136. /// Converts a float (hiding in a uint) to a half (hiding in a ushort).
  137. /// </summary>
  138. /// <param name="f">The float to convert</param>
  139. /// <param name="rounding">Rounding mode</param>
  140. /// <returns>The half result</returns>
  141. [DebuggerStepThrough]
  142. private static ushort FloatToHalf(uint f, int rounding)
  143. {
  144. var exponentAndSign = f >> 23;
  145. var shift = ShiftTable[exponentAndSign];
  146. var result = (uint)(BaseTable[exponentAndSign] + (ushort)((f & 0x7FFFFFu) >> shift));
  147. // Check if the result is not Inf or NaN.
  148. var isFinite = (result & 0x7C00) != 0x7C00;
  149. var isNegative = (result & 0x8000) != 0;
  150. if (rounding == (int)RoundingMode.FROUND_NINT_NOEXC)
  151. {
  152. var fWithRoundingBitPreserved = (f & 0x7FFFFFu) >> (shift - 1);
  153. if ((exponentAndSign & 0xFF) == 102)
  154. {
  155. result++;
  156. }
  157. if (isFinite && ((fWithRoundingBitPreserved & 0x1u) != 0))
  158. {
  159. result++;
  160. }
  161. }
  162. else if (rounding == (int)RoundingMode.FROUND_TRUNC_NOEXC)
  163. {
  164. if (!isFinite)
  165. {
  166. result -= (uint)(~shift & 0x1);
  167. }
  168. }
  169. else if (rounding == (int)RoundingMode.FROUND_CEIL_NOEXC)
  170. {
  171. if (isFinite && !isNegative)
  172. {
  173. if ((exponentAndSign <= 102) && (exponentAndSign != 0))
  174. {
  175. result++;
  176. }
  177. else if ((f & 0x7FFFFFu & ((1u << shift) - 1u)) != 0)
  178. {
  179. result++;
  180. }
  181. }
  182. var resultIsNegativeInf = (result == 0xFC00);
  183. var inputIsNotNegativeInfOrNan = (exponentAndSign != 0x1FF);
  184. if (resultIsNegativeInf && inputIsNotNegativeInfOrNan)
  185. {
  186. result--;
  187. }
  188. }
  189. else if (rounding == (int)RoundingMode.FROUND_FLOOR_NOEXC)
  190. {
  191. if (isFinite && isNegative)
  192. {
  193. if ((exponentAndSign <= 358) && (exponentAndSign != 256))
  194. {
  195. result++;
  196. }
  197. else if ((f & 0x7FFFFFu & ((1u << shift) - 1u)) != 0)
  198. {
  199. result++;
  200. }
  201. }
  202. var resultIsPositiveInf = (result == 0x7C00);
  203. var inputIsNotPositiveInfOrNan = (exponentAndSign != 0xFF);
  204. if (resultIsPositiveInf && inputIsNotPositiveInfOrNan)
  205. {
  206. result--;
  207. }
  208. }
  209. return (ushort)result;
  210. }
  211. /// <summary>
  212. /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.
  213. ///
  214. /// Rounding is done according to the rounding parameter, which can be one of:
  215. /// </summary>
  216. /// <remarks>
  217. /// **** cvtps2ph xmm, xmm, imm
  218. /// </remarks>
  219. /// <param name="a">Vector a</param>
  220. /// <param name="rounding">Rounding mode</param>
  221. /// <returns>Vector</returns>
  222. [DebuggerStepThrough]
  223. public static v128 cvtps_ph(v128 a, int rounding)
  224. {
  225. if (rounding == (int)RoundingMode.FROUND_RINT_NOEXC)
  226. {
  227. switch (MXCSR & MXCSRBits.RoundingControlMask)
  228. {
  229. case MXCSRBits.RoundToNearest:
  230. rounding = (int)RoundingMode.FROUND_NINT_NOEXC;
  231. break;
  232. case MXCSRBits.RoundDown:
  233. rounding = (int)RoundingMode.FROUND_FLOOR_NOEXC;
  234. break;
  235. case MXCSRBits.RoundUp:
  236. rounding = (int)RoundingMode.FROUND_CEIL_NOEXC;
  237. break;
  238. case MXCSRBits.RoundTowardZero:
  239. rounding = (int)RoundingMode.FROUND_TRUNC_NOEXC;
  240. break;
  241. }
  242. }
  243. return new v128(FloatToHalf(a.UInt0, rounding), FloatToHalf(a.UInt1, rounding), FloatToHalf(a.UInt2, rounding), FloatToHalf(a.UInt3, rounding), 0, 0, 0, 0);
  244. }
  245. /// <summary>
  246. /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.
  247. ///
  248. /// Rounding is done according to the rounding parameter, which can be one of:
  249. /// </summary>
  250. /// <remarks>
  251. /// **** cvtps2ph xmm, ymm, imm
  252. /// </remarks>
  253. /// <param name="a">Vector a</param>
  254. /// <param name="rounding">Rounding mode</param>
  255. /// <returns>Vector</returns>
  256. [DebuggerStepThrough]
  257. public static v128 mm256_cvtps_ph(v256 a, int rounding)
  258. {
  259. if (rounding == (int)RoundingMode.FROUND_RINT_NOEXC)
  260. {
  261. switch (MXCSR & MXCSRBits.RoundingControlMask)
  262. {
  263. case MXCSRBits.RoundToNearest:
  264. rounding = (int)RoundingMode.FROUND_NINT_NOEXC;
  265. break;
  266. case MXCSRBits.RoundDown:
  267. rounding = (int)RoundingMode.FROUND_FLOOR_NOEXC;
  268. break;
  269. case MXCSRBits.RoundUp:
  270. rounding = (int)RoundingMode.FROUND_CEIL_NOEXC;
  271. break;
  272. case MXCSRBits.RoundTowardZero:
  273. rounding = (int)RoundingMode.FROUND_TRUNC_NOEXC;
  274. break;
  275. }
  276. }
  277. return new v128(FloatToHalf(a.UInt0, rounding), FloatToHalf(a.UInt1, rounding), FloatToHalf(a.UInt2, rounding), FloatToHalf(a.UInt3, rounding), FloatToHalf(a.UInt4, rounding), FloatToHalf(a.UInt5, rounding), FloatToHalf(a.UInt6, rounding), FloatToHalf(a.UInt7, rounding));
  278. }
  279. }
  280. }
  281. }