暫無描述
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281
  1. using System;
  2. using Unity.Burst;
  3. #if !UNITY_DOTSPLAYER
  4. #if !BURST_INTERNAL
  5. using AOT;
  6. using UnityEngine;
  7. #endif
  8. using System.Runtime.InteropServices;
  9. #endif
  10. namespace Unity.Burst.Intrinsics
  11. {
  12. #if !BURST_INTERNAL && !UNITY_DOTSPLAYER
  13. [BurstCompile]
  14. #endif
  15. public unsafe static partial class X86
  16. {
  17. /// <summary>
  18. /// The 32-bit MXCSR register contains control and status information for SSE and AVX SIMD floating-point operations.
  19. /// </summary>
  20. [Flags]
  21. public enum MXCSRBits
  22. {
  23. /// <summary>
  24. /// Bit 15 (FTZ) of the MXCSR register enables the flush-to-zero mode, which controls the masked response to a SIMD floating-point underflow condition.
  25. /// </summary>
  26. /// <remarks>
  27. /// When the underflow exception is masked and the flush-to-zero mode is enabled, the processor performs the following operations when it detects a floating-point underflow condition.
  28. /// - Returns a zero result with the sign of the true result
  29. /// - Sets the precision and underflow exception flags.
  30. ///
  31. /// If the underflow exception is not masked, the flush-to-zero bit is ignored.
  32. ///
  33. /// The flush-to-zero mode is not compatible with IEEE Standard 754. The IEEE-mandated masked response to under-flow is to deliver the denormalized result.
  34. /// The flush-to-zero mode is provided primarily for performance reasons. At the cost of a slight precision loss, faster execution can be achieved for applications where underflows
  35. /// are common and rounding the underflow result to zero can be tolerated. The flush-to-zero bit is cleared upon a power-up or reset of the processor, disabling the flush-to-zero mode.
  36. /// </remarks>
  37. FlushToZero = 1 << 15,
  38. /// <summary>
  39. /// Mask for rounding control bits.
  40. /// </summary>
  41. ///
  42. /// The rounding modes have no effect on comparison operations, operations that produce exact results, or operations that produce NaN results.
  43. RoundingControlMask = (1 << 14) | (1 << 13),
  44. /// <summary>
  45. /// Rounded result is the closest to the infinitely precise result. If two values are equally close, the result is the even value (that is, the one with the least-significant bit of zero). Default.
  46. /// </summary>
  47. RoundToNearest = 0,
  48. /// <summary>
  49. /// Rounded result is closest to but no greater than the infinitely precise result.
  50. /// </summary>
  51. RoundDown = (1 << 13),
  52. /// <summary>
  53. /// Rounded result is closest to but no less than the infinitely precise result.
  54. /// </summary>
  55. RoundUp = (1 << 14),
  56. /// <summary>
  57. /// Rounded result is closest to but no greater in absolute value than the infinitely precise result.
  58. /// </summary>
  59. RoundTowardZero = (1 << 13) | (1 << 14),
  60. /// <summary>Bits 7 through 12 provide individual mask bits for the SIMD floating-point exceptions. An exception type is masked if the corresponding mask bit is set, and it is unmasked if the bit is clear. These mask bits are set upon a power-up or reset. This causes all SIMD floating-point exceptions to be initially masked.</summary>
  61. PrecisionMask = 1 << 12,
  62. /// <summary>Bits 7 through 12 provide individual mask bits for the SIMD floating-point exceptions. An exception type is masked if the corresponding mask bit is set, and it is unmasked if the bit is clear. These mask bits are set upon a power-up or reset. This causes all SIMD floating-point exceptions to be initially masked.</summary>
  63. UnderflowMask = 1 << 11,
  64. /// <summary>Bits 7 through 12 provide individual mask bits for the SIMD floating-point exceptions. An exception type is masked if the corresponding mask bit is set, and it is unmasked if the bit is clear. These mask bits are set upon a power-up or reset. This causes all SIMD floating-point exceptions to be initially masked.</summary>
  65. OverflowMask = 1 << 10,
  66. /// <summary>Bits 7 through 12 provide individual mask bits for the SIMD floating-point exceptions. An exception type is masked if the corresponding mask bit is set, and it is unmasked if the bit is clear. These mask bits are set upon a power-up or reset. This causes all SIMD floating-point exceptions to be initially masked.</summary>
  67. DivideByZeroMask = 1 << 9,
  68. /// <summary>Bits 7 through 12 provide individual mask bits for the SIMD floating-point exceptions. An exception type is masked if the corresponding mask bit is set, and it is unmasked if the bit is clear. These mask bits are set upon a power-up or reset. This causes all SIMD floating-point exceptions to be initially masked.</summary>
  69. DenormalOperationMask = 1 << 8,
  70. /// <summary>Bits 7 through 12 provide individual mask bits for the SIMD floating-point exceptions. An exception type is masked if the corresponding mask bit is set, and it is unmasked if the bit is clear. These mask bits are set upon a power-up or reset. This causes all SIMD floating-point exceptions to be initially masked.</summary>
  71. InvalidOperationMask = 1 << 7,
  72. /// <summary>
  73. /// Combine all bits for exception masking into one mask for convenience.
  74. /// </summary>
  75. ExceptionMask = PrecisionMask | UnderflowMask | OverflowMask | DivideByZeroMask | DenormalOperationMask | InvalidOperationMask,
  76. /// <summary>
  77. /// Bit 6 (DAZ) of the MXCSR register enables the denormals-are-zeros mode, which controls the processor’s response to a SIMD floating-point denormal operand condition.
  78. /// </summary>
  79. ///
  80. /// When the denormals-are-zeros flag is set, the processor converts all denormal source operands to a zero with the sign of the original operand before performing any computations on them.
  81. /// The processor does not set the denormal-operand exception flag (DE), regardless of the setting of the denormal-operand exception mask bit (DM); and it does not generate a denormal-operand
  82. /// exception if the exception is unmasked.The denormals-are-zeros mode is not compatible with IEEE Standard 754.
  83. ///
  84. /// The denormals-are-zeros mode is provided to improve processor performance for applications such as streaming media processing, where rounding a denormal operand to zero does not
  85. /// appreciably affect the quality of the processed data. The denormals-are-zeros flag is cleared upon a power-up or reset of the processor, disabling the denormals-are-zeros mode.
  86. ///
  87. /// The denormals-are-zeros mode was introduced in the Pentium 4 and Intel Xeon processor with the SSE2 extensions; however, it is fully compatible with the SSE SIMD floating-point instructions
  88. /// (that is, the denormals-are-zeros flag affects the operation of the SSE SIMD floating-point instructions). In earlier IA-32 processors and in some models of the Pentium 4 processor, this flag
  89. /// (bit 6) is reserved. Attempting to set bit 6 of the MXCSR register on processors that do not support the DAZ flag will cause a general-protection exception (#GP).
  90. DenormalsAreZeroes = 1 << 6,
  91. /// <summary>Bits 0 through 5 of the MXCSR register indicate whether a SIMD floating-point exception has been detected. They are "sticky" flags. That is, after a flag is set, it remains set until explicitly cleared. To clear these flags, use the LDMXCSR or the FXRSTOR instruction to write zeroes to them.</summary>
  92. PrecisionFlag = 1 << 5,
  93. /// <summary>Bits 0 through 5 of the MXCSR register indicate whether a SIMD floating-point exception has been detected. They are "sticky" flags. That is, after a flag is set, it remains set until explicitly cleared. To clear these flags, use the LDMXCSR or the FXRSTOR instruction to write zeroes to them.</summary>
  94. UnderflowFlag = 1 << 4,
  95. /// <summary>Bits 0 through 5 of the MXCSR register indicate whether a SIMD floating-point exception has been detected. They are "sticky" flags. That is, after a flag is set, it remains set until explicitly cleared. To clear these flags, use the LDMXCSR or the FXRSTOR instruction to write zeroes to them.</summary>
  96. OverflowFlag = 1 << 3,
  97. /// <summary>Bits 0 through 5 of the MXCSR register indicate whether a SIMD floating-point exception has been detected. They are "sticky" flags. That is, after a flag is set, it remains set until explicitly cleared. To clear these flags, use the LDMXCSR or the FXRSTOR instruction to write zeroes to them.</summary>
  98. DivideByZeroFlag = 1 << 2,
  99. /// <summary>Bits 0 through 5 of the MXCSR register indicate whether a SIMD floating-point exception has been detected. They are "sticky" flags. That is, after a flag is set, it remains set until explicitly cleared. To clear these flags, use the LDMXCSR or the FXRSTOR instruction to write zeroes to them.</summary>
  100. DenormalFlag = 1 << 1,
  101. /// <summary>Bits 0 through 5 of the MXCSR register indicate whether a SIMD floating-point exception has been detected. They are "sticky" flags. That is, after a flag is set, it remains set until explicitly cleared. To clear these flags, use the LDMXCSR or the FXRSTOR instruction to write zeroes to them.</summary>
  102. InvalidOperationFlag = 1 << 0,
  103. /// <summary>
  104. /// Combines all bits for flags into one mask for convenience.
  105. /// </summary>
  106. FlagMask = PrecisionFlag | UnderflowFlag | OverflowFlag | DivideByZeroFlag | DenormalFlag | InvalidOperationFlag,
  107. }
  108. /// <summary>
  109. /// Rounding mode flags
  110. /// </summary>
  111. [Flags]
  112. public enum RoundingMode
  113. {
  114. /// <summary>
  115. /// Round to the nearest integer
  116. /// </summary>
  117. FROUND_TO_NEAREST_INT = 0x00,
  118. /// <summary>
  119. /// Round to negative infinity
  120. /// </summary>
  121. FROUND_TO_NEG_INF = 0x01,
  122. /// <summary>
  123. /// Round to positive infinity
  124. /// </summary>
  125. FROUND_TO_POS_INF = 0x02,
  126. /// <summary>
  127. /// Round to zero
  128. /// </summary>
  129. FROUND_TO_ZERO = 0x03,
  130. /// <summary>
  131. /// Round to current direction
  132. /// </summary>
  133. FROUND_CUR_DIRECTION = 0x04,
  134. /// <summary>
  135. /// Do not suppress exceptions
  136. /// </summary>
  137. FROUND_RAISE_EXC = 0x00,
  138. /// <summary>
  139. /// Suppress exceptions
  140. /// </summary>
  141. FROUND_NO_EXC = 0x08,
  142. /// <summary>
  143. /// Round to the nearest integer without suppressing exceptions
  144. /// </summary>
  145. FROUND_NINT = FROUND_TO_NEAREST_INT | FROUND_RAISE_EXC,
  146. /// <summary>
  147. /// Round using Floor function without suppressing exceptions
  148. /// </summary>
  149. FROUND_FLOOR = FROUND_TO_NEG_INF | FROUND_RAISE_EXC,
  150. /// <summary>
  151. /// Round using Ceiling function without suppressing exceptions
  152. /// </summary>
  153. FROUND_CEIL = FROUND_TO_POS_INF | FROUND_RAISE_EXC,
  154. /// <summary>
  155. /// Round by truncating without suppressing exceptions
  156. /// </summary>
  157. FROUND_TRUNC = FROUND_TO_ZERO | FROUND_RAISE_EXC,
  158. /// <summary>
  159. /// Round using MXCSR.RC without suppressing exceptions
  160. /// </summary>
  161. FROUND_RINT = FROUND_CUR_DIRECTION | FROUND_RAISE_EXC,
  162. /// <summary>
  163. /// Round using MXCSR.RC and suppressing exceptions
  164. /// </summary>
  165. FROUND_NEARBYINT = FROUND_CUR_DIRECTION | FROUND_NO_EXC,
  166. /// <summary>
  167. /// Round to nearest integer and suppressing exceptions
  168. /// </summary>
  169. FROUND_NINT_NOEXC = FROUND_TO_NEAREST_INT | FROUND_NO_EXC,
  170. /// <summary>
  171. /// Round using Floor function and suppressing exceptions
  172. /// </summary>
  173. FROUND_FLOOR_NOEXC = FROUND_TO_NEG_INF | FROUND_NO_EXC,
  174. /// <summary>
  175. /// Round using Ceiling function and suppressing exceptions
  176. /// </summary>
  177. FROUND_CEIL_NOEXC = FROUND_TO_POS_INF | FROUND_NO_EXC,
  178. /// <summary>
  179. /// Round by truncating and suppressing exceptions
  180. /// </summary>
  181. FROUND_TRUNC_NOEXC = FROUND_TO_ZERO | FROUND_NO_EXC,
  182. /// <summary>
  183. /// Round using MXCSR.RC and suppressing exceptions
  184. /// </summary>
  185. FROUND_RINT_NOEXC = FROUND_CUR_DIRECTION | FROUND_NO_EXC,
  186. }
  187. internal struct RoundingScope : IDisposable
  188. {
  189. private MXCSRBits OldBits;
  190. public RoundingScope(MXCSRBits roundingMode)
  191. {
  192. OldBits = MXCSR;
  193. MXCSR = (OldBits & ~MXCSRBits.RoundingControlMask) | roundingMode;
  194. }
  195. public void Dispose()
  196. {
  197. MXCSR = OldBits;
  198. }
  199. }
  200. #if UNITY_DOTSPLAYER
  201. internal static int getcsr_raw()
  202. {
  203. throw new NotImplementedException("getcsr_raw not supported from managed in this configuration");
  204. }
  205. internal static void setcsr_raw(int bits)
  206. {
  207. throw new NotImplementedException("setcsr_raw not supported from managed in this configuration");
  208. }
  209. #endif
  210. #if !BURST_INTERNAL && !UNITY_DOTSPLAYER
  211. private static void BurstIntrinsicSetCSRFromManaged(int _) { }
  212. private static int BurstIntrinsicGetCSRFromManaged() { return 0; }
  213. internal static int getcsr_raw() => DoGetCSRTrampoline();
  214. internal static void setcsr_raw(int bits) => DoSetCSRTrampoline(bits);
  215. [BurstCompile(CompileSynchronously = true)]
  216. private static void DoSetCSRTrampoline(int bits)
  217. {
  218. if (Sse.IsSseSupported)
  219. BurstIntrinsicSetCSRFromManaged(bits);
  220. }
  221. [BurstCompile(CompileSynchronously = true)]
  222. private static int DoGetCSRTrampoline()
  223. {
  224. if (Sse.IsSseSupported)
  225. return BurstIntrinsicGetCSRFromManaged();
  226. return 0;
  227. }
  228. #elif BURST_INTERNAL
  229. // Internally inside burst for unit tests we can't recurse from tests into burst again,
  230. // so we pinvoke to a dummy wrapper DLL that exposes CSR manipulation
  231. [DllImport("burst-dllimport-native", EntryPoint = "x86_getcsr")]
  232. internal static extern int getcsr_raw();
  233. [DllImport("burst-dllimport-native", EntryPoint = "x86_setcsr")]
  234. internal static extern void setcsr_raw(int bits);
  235. #endif
  236. /// <summary>
  237. /// Allows access to the CSR register
  238. /// </summary>
  239. public static MXCSRBits MXCSR
  240. {
  241. [BurstTargetCpu(BurstTargetCpu.X64_SSE2)]
  242. get
  243. {
  244. return (MXCSRBits)getcsr_raw();
  245. }
  246. [BurstTargetCpu(BurstTargetCpu.X64_SSE2)]
  247. set
  248. {
  249. setcsr_raw((int)value);
  250. }
  251. }
  252. }
  253. }