123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281 |
- using System;
- using Unity.Burst;
-
- #if !UNITY_DOTSPLAYER
- #if !BURST_INTERNAL
- using AOT;
- using UnityEngine;
- #endif
- using System.Runtime.InteropServices;
- #endif
-
- namespace Unity.Burst.Intrinsics
- {
- #if !BURST_INTERNAL && !UNITY_DOTSPLAYER
- [BurstCompile]
- #endif
- public unsafe static partial class X86
- {
- /// <summary>
- /// The 32-bit MXCSR register contains control and status information for SSE and AVX SIMD floating-point operations.
- /// </summary>
- [Flags]
- public enum MXCSRBits
- {
- /// <summary>
- /// Bit 15 (FTZ) of the MXCSR register enables the flush-to-zero mode, which controls the masked response to a SIMD floating-point underflow condition.
- /// </summary>
- /// <remarks>
- /// When the underflow exception is masked and the flush-to-zero mode is enabled, the processor performs the following operations when it detects a floating-point underflow condition.
- /// - Returns a zero result with the sign of the true result
- /// - Sets the precision and underflow exception flags.
- ///
- /// If the underflow exception is not masked, the flush-to-zero bit is ignored.
- ///
- /// The flush-to-zero mode is not compatible with IEEE Standard 754. The IEEE-mandated masked response to under-flow is to deliver the denormalized result.
- /// The flush-to-zero mode is provided primarily for performance reasons. At the cost of a slight precision loss, faster execution can be achieved for applications where underflows
- /// are common and rounding the underflow result to zero can be tolerated. The flush-to-zero bit is cleared upon a power-up or reset of the processor, disabling the flush-to-zero mode.
- /// </remarks>
- FlushToZero = 1 << 15,
-
- /// <summary>
- /// Mask for rounding control bits.
- /// </summary>
- ///
- /// The rounding modes have no effect on comparison operations, operations that produce exact results, or operations that produce NaN results.
- RoundingControlMask = (1 << 14) | (1 << 13),
-
- /// <summary>
- /// Rounded result is the closest to the infinitely precise result. If two values are equally close, the result is the even value (that is, the one with the least-significant bit of zero). Default.
- /// </summary>
- RoundToNearest = 0,
-
- /// <summary>
- /// Rounded result is closest to but no greater than the infinitely precise result.
- /// </summary>
- RoundDown = (1 << 13),
-
- /// <summary>
- /// Rounded result is closest to but no less than the infinitely precise result.
- /// </summary>
- RoundUp = (1 << 14),
-
- /// <summary>
- /// Rounded result is closest to but no greater in absolute value than the infinitely precise result.
- /// </summary>
- RoundTowardZero = (1 << 13) | (1 << 14),
-
- /// <summary>Bits 7 through 12 provide individual mask bits for the SIMD floating-point exceptions. An exception type is masked if the corresponding mask bit is set, and it is unmasked if the bit is clear. These mask bits are set upon a power-up or reset. This causes all SIMD floating-point exceptions to be initially masked.</summary>
- PrecisionMask = 1 << 12,
- /// <summary>Bits 7 through 12 provide individual mask bits for the SIMD floating-point exceptions. An exception type is masked if the corresponding mask bit is set, and it is unmasked if the bit is clear. These mask bits are set upon a power-up or reset. This causes all SIMD floating-point exceptions to be initially masked.</summary>
- UnderflowMask = 1 << 11,
- /// <summary>Bits 7 through 12 provide individual mask bits for the SIMD floating-point exceptions. An exception type is masked if the corresponding mask bit is set, and it is unmasked if the bit is clear. These mask bits are set upon a power-up or reset. This causes all SIMD floating-point exceptions to be initially masked.</summary>
- OverflowMask = 1 << 10,
- /// <summary>Bits 7 through 12 provide individual mask bits for the SIMD floating-point exceptions. An exception type is masked if the corresponding mask bit is set, and it is unmasked if the bit is clear. These mask bits are set upon a power-up or reset. This causes all SIMD floating-point exceptions to be initially masked.</summary>
- DivideByZeroMask = 1 << 9,
- /// <summary>Bits 7 through 12 provide individual mask bits for the SIMD floating-point exceptions. An exception type is masked if the corresponding mask bit is set, and it is unmasked if the bit is clear. These mask bits are set upon a power-up or reset. This causes all SIMD floating-point exceptions to be initially masked.</summary>
- DenormalOperationMask = 1 << 8,
- /// <summary>Bits 7 through 12 provide individual mask bits for the SIMD floating-point exceptions. An exception type is masked if the corresponding mask bit is set, and it is unmasked if the bit is clear. These mask bits are set upon a power-up or reset. This causes all SIMD floating-point exceptions to be initially masked.</summary>
- InvalidOperationMask = 1 << 7,
-
- /// <summary>
- /// Combine all bits for exception masking into one mask for convenience.
- /// </summary>
- ExceptionMask = PrecisionMask | UnderflowMask | OverflowMask | DivideByZeroMask | DenormalOperationMask | InvalidOperationMask,
-
- /// <summary>
- /// Bit 6 (DAZ) of the MXCSR register enables the denormals-are-zeros mode, which controls the processor’s response to a SIMD floating-point denormal operand condition.
- /// </summary>
- ///
- /// When the denormals-are-zeros flag is set, the processor converts all denormal source operands to a zero with the sign of the original operand before performing any computations on them.
- /// The processor does not set the denormal-operand exception flag (DE), regardless of the setting of the denormal-operand exception mask bit (DM); and it does not generate a denormal-operand
- /// exception if the exception is unmasked.The denormals-are-zeros mode is not compatible with IEEE Standard 754.
- ///
- /// The denormals-are-zeros mode is provided to improve processor performance for applications such as streaming media processing, where rounding a denormal operand to zero does not
- /// appreciably affect the quality of the processed data. The denormals-are-zeros flag is cleared upon a power-up or reset of the processor, disabling the denormals-are-zeros mode.
- ///
- /// The denormals-are-zeros mode was introduced in the Pentium 4 and Intel Xeon processor with the SSE2 extensions; however, it is fully compatible with the SSE SIMD floating-point instructions
- /// (that is, the denormals-are-zeros flag affects the operation of the SSE SIMD floating-point instructions). In earlier IA-32 processors and in some models of the Pentium 4 processor, this flag
- /// (bit 6) is reserved. Attempting to set bit 6 of the MXCSR register on processors that do not support the DAZ flag will cause a general-protection exception (#GP).
- DenormalsAreZeroes = 1 << 6,
-
- /// <summary>Bits 0 through 5 of the MXCSR register indicate whether a SIMD floating-point exception has been detected. They are "sticky" flags. That is, after a flag is set, it remains set until explicitly cleared. To clear these flags, use the LDMXCSR or the FXRSTOR instruction to write zeroes to them.</summary>
- PrecisionFlag = 1 << 5,
- /// <summary>Bits 0 through 5 of the MXCSR register indicate whether a SIMD floating-point exception has been detected. They are "sticky" flags. That is, after a flag is set, it remains set until explicitly cleared. To clear these flags, use the LDMXCSR or the FXRSTOR instruction to write zeroes to them.</summary>
- UnderflowFlag = 1 << 4,
- /// <summary>Bits 0 through 5 of the MXCSR register indicate whether a SIMD floating-point exception has been detected. They are "sticky" flags. That is, after a flag is set, it remains set until explicitly cleared. To clear these flags, use the LDMXCSR or the FXRSTOR instruction to write zeroes to them.</summary>
- OverflowFlag = 1 << 3,
- /// <summary>Bits 0 through 5 of the MXCSR register indicate whether a SIMD floating-point exception has been detected. They are "sticky" flags. That is, after a flag is set, it remains set until explicitly cleared. To clear these flags, use the LDMXCSR or the FXRSTOR instruction to write zeroes to them.</summary>
- DivideByZeroFlag = 1 << 2,
- /// <summary>Bits 0 through 5 of the MXCSR register indicate whether a SIMD floating-point exception has been detected. They are "sticky" flags. That is, after a flag is set, it remains set until explicitly cleared. To clear these flags, use the LDMXCSR or the FXRSTOR instruction to write zeroes to them.</summary>
- DenormalFlag = 1 << 1,
- /// <summary>Bits 0 through 5 of the MXCSR register indicate whether a SIMD floating-point exception has been detected. They are "sticky" flags. That is, after a flag is set, it remains set until explicitly cleared. To clear these flags, use the LDMXCSR or the FXRSTOR instruction to write zeroes to them.</summary>
- InvalidOperationFlag = 1 << 0,
-
- /// <summary>
- /// Combines all bits for flags into one mask for convenience.
- /// </summary>
- FlagMask = PrecisionFlag | UnderflowFlag | OverflowFlag | DivideByZeroFlag | DenormalFlag | InvalidOperationFlag,
- }
-
- /// <summary>
- /// Rounding mode flags
- /// </summary>
- [Flags]
- public enum RoundingMode
- {
- /// <summary>
- /// Round to the nearest integer
- /// </summary>
- FROUND_TO_NEAREST_INT = 0x00,
- /// <summary>
- /// Round to negative infinity
- /// </summary>
- FROUND_TO_NEG_INF = 0x01,
- /// <summary>
- /// Round to positive infinity
- /// </summary>
- FROUND_TO_POS_INF = 0x02,
- /// <summary>
- /// Round to zero
- /// </summary>
- FROUND_TO_ZERO = 0x03,
- /// <summary>
- /// Round to current direction
- /// </summary>
- FROUND_CUR_DIRECTION = 0x04,
-
- /// <summary>
- /// Do not suppress exceptions
- /// </summary>
- FROUND_RAISE_EXC = 0x00,
- /// <summary>
- /// Suppress exceptions
- /// </summary>
- FROUND_NO_EXC = 0x08,
-
- /// <summary>
- /// Round to the nearest integer without suppressing exceptions
- /// </summary>
- FROUND_NINT = FROUND_TO_NEAREST_INT | FROUND_RAISE_EXC,
- /// <summary>
- /// Round using Floor function without suppressing exceptions
- /// </summary>
- FROUND_FLOOR = FROUND_TO_NEG_INF | FROUND_RAISE_EXC,
- /// <summary>
- /// Round using Ceiling function without suppressing exceptions
- /// </summary>
- FROUND_CEIL = FROUND_TO_POS_INF | FROUND_RAISE_EXC,
- /// <summary>
- /// Round by truncating without suppressing exceptions
- /// </summary>
- FROUND_TRUNC = FROUND_TO_ZERO | FROUND_RAISE_EXC,
- /// <summary>
- /// Round using MXCSR.RC without suppressing exceptions
- /// </summary>
- FROUND_RINT = FROUND_CUR_DIRECTION | FROUND_RAISE_EXC,
- /// <summary>
- /// Round using MXCSR.RC and suppressing exceptions
- /// </summary>
- FROUND_NEARBYINT = FROUND_CUR_DIRECTION | FROUND_NO_EXC,
-
- /// <summary>
- /// Round to nearest integer and suppressing exceptions
- /// </summary>
- FROUND_NINT_NOEXC = FROUND_TO_NEAREST_INT | FROUND_NO_EXC,
- /// <summary>
- /// Round using Floor function and suppressing exceptions
- /// </summary>
- FROUND_FLOOR_NOEXC = FROUND_TO_NEG_INF | FROUND_NO_EXC,
- /// <summary>
- /// Round using Ceiling function and suppressing exceptions
- /// </summary>
- FROUND_CEIL_NOEXC = FROUND_TO_POS_INF | FROUND_NO_EXC,
- /// <summary>
- /// Round by truncating and suppressing exceptions
- /// </summary>
- FROUND_TRUNC_NOEXC = FROUND_TO_ZERO | FROUND_NO_EXC,
- /// <summary>
- /// Round using MXCSR.RC and suppressing exceptions
- /// </summary>
- FROUND_RINT_NOEXC = FROUND_CUR_DIRECTION | FROUND_NO_EXC,
- }
-
- internal struct RoundingScope : IDisposable
- {
- private MXCSRBits OldBits;
-
- public RoundingScope(MXCSRBits roundingMode)
- {
- OldBits = MXCSR;
- MXCSR = (OldBits & ~MXCSRBits.RoundingControlMask) | roundingMode;
- }
-
- public void Dispose()
- {
- MXCSR = OldBits;
- }
- }
-
- #if UNITY_DOTSPLAYER
- internal static int getcsr_raw()
- {
- throw new NotImplementedException("getcsr_raw not supported from managed in this configuration");
- }
-
- internal static void setcsr_raw(int bits)
- {
- throw new NotImplementedException("setcsr_raw not supported from managed in this configuration");
- }
- #endif
-
- #if !BURST_INTERNAL && !UNITY_DOTSPLAYER
- private static void BurstIntrinsicSetCSRFromManaged(int _) { }
- private static int BurstIntrinsicGetCSRFromManaged() { return 0; }
-
- internal static int getcsr_raw() => DoGetCSRTrampoline();
-
- internal static void setcsr_raw(int bits) => DoSetCSRTrampoline(bits);
-
- [BurstCompile(CompileSynchronously = true)]
- private static void DoSetCSRTrampoline(int bits)
- {
- if (Sse.IsSseSupported)
- BurstIntrinsicSetCSRFromManaged(bits);
- }
-
- [BurstCompile(CompileSynchronously = true)]
- private static int DoGetCSRTrampoline()
- {
- if (Sse.IsSseSupported)
- return BurstIntrinsicGetCSRFromManaged();
- return 0;
- }
-
- #elif BURST_INTERNAL
- // Internally inside burst for unit tests we can't recurse from tests into burst again,
- // so we pinvoke to a dummy wrapper DLL that exposes CSR manipulation
- [DllImport("burst-dllimport-native", EntryPoint = "x86_getcsr")]
- internal static extern int getcsr_raw();
-
- [DllImport("burst-dllimport-native", EntryPoint = "x86_setcsr")]
- internal static extern void setcsr_raw(int bits);
- #endif
- /// <summary>
- /// Allows access to the CSR register
- /// </summary>
- public static MXCSRBits MXCSR
- {
- [BurstTargetCpu(BurstTargetCpu.X64_SSE2)]
- get
- {
- return (MXCSRBits)getcsr_raw();
- }
- [BurstTargetCpu(BurstTargetCpu.X64_SSE2)]
- set
- {
- setcsr_raw((int)value);
- }
- }
- }
- }
|