Нема описа
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.


  1. #ifndef UNITY_COMMON_INCLUDED
  2. #define UNITY_COMMON_INCLUDED
  3. #if SHADER_API_MOBILE || SHADER_API_GLES3 || SHADER_API_SWITCH || defined(UNITY_UNIFIED_SHADER_PRECISION_MODEL)
  4. #pragma warning (disable : 3205) // conversion of larger type to smaller
  5. #endif
  6. // Convention:
  7. // Unity is Y up and left handed in world space
  8. // Caution: When going from world space to view space, unity is right handed in view space and the determinant of the matrix is negative
  9. // For cubemap capture (reflection probe) view space is still left handed (cubemap convention) and the determinant is positive.
  10. // The lighting code assume that 1 Unity unit (1uu) == 1 meters. This is very important regarding physically based light unit and inverse square attenuation
  11. // space at the end of the variable name
  12. // WS: world space
  13. // RWS: Camera-Relative world space. A space where the translation of the camera have already been substract in order to improve precision
  14. // VS: view space
  15. // OS: object space
  16. // CS: Homogenous clip spaces
  17. // TS: tangent space
  18. // TXS: texture space
  19. // Example: NormalWS
  20. // normalized / unormalized vector
  21. // normalized direction are almost everywhere, we tag unormalized vector with un.
  22. // Example: unL for unormalized light vector
  23. // use capital letter for regular vector, vector are always pointing outward the current pixel position (ready for lighting equation)
  24. // capital letter mean the vector is normalize, unless we put 'un' in front of it.
  25. // V: View vector (no eye vector)
  26. // L: Light vector
  27. // N: Normal vector
  28. // H: Half vector
  29. // Input/Outputs structs in PascalCase and prefixed by entry type
  30. // struct AttributesDefault
  31. // struct VaryingsDefault
  32. // use input/output as variable name when using these structures
  33. // Entry program name
  34. // VertDefault
  35. // FragDefault / FragForward / FragDeferred
  36. // constant floating number written as 1.0 (not 1, not 1.0f, not 1.0h)
  37. // uniform have _ as prefix + uppercase _LowercaseThenCamelCase
  38. // Do not use "in", only "out" or "inout" as califier, no "inline" keyword either, useless.
  39. // When declaring "out" argument of function, they are always last
  40. // headers from ShaderLibrary do not include "common.hlsl", this should be included in the .shader using it (or Material.hlsl)
  41. // All uniforms should be in contant buffer (nothing in the global namespace).
  42. // The reason is that for compute shader we need to guarantee that the layout of CBs is consistent across kernels. Something that we can't control with the global namespace (uniforms get optimized out if not used, modifying the global CBuffer layout per kernel)
  43. // Structure definition that are share between C# and hlsl.
  44. // These structures need to be align on float4 to respect various packing rules from shader language. This mean that these structure need to be padded.
  45. // Rules: When doing an array for constant buffer variables, we always use float4 to avoid any packing issue, particularly between compute shader and pixel shaders
  46. // i.e don't use SetGlobalFloatArray or SetComputeFloatParams
  47. // The array can be alias in hlsl. Exemple:
  48. // uniform float4 packedArray[3];
  49. // static float unpackedArray[12] = (float[12])packedArray;
  50. // The function of the shader library are stateless, no uniform declare in it.
  51. // Any function that require an explicit precision, use float or half qualifier, when the function can support both, it use real (see below)
  52. // If a function require to have both a half and a float version, then both need to be explicitly define
  53. ///
  54. /// Hardware Support for Wave Operations
  55. ///
  56. // Support for wave operations is intentionally limited to the compute shader stage in order to make this functionality available to a wider range of hardware.
  57. #if defined(SHADER_STAGE_COMPUTE)
  58. //
  59. // Platform Support
  60. //
  61. // Platforms may indicate support for wave operations at compile-time.
  62. // Shaders on these platforms may not always be compiled with a compiler that supports wave operations.
  63. // To simplify usage, we check for a supported compiler here before indicating that wave operations are supported.
  64. #if ((defined(UNITY_PLATFORM_SUPPORTS_WAVE_32) || defined(UNITY_PLATFORM_SUPPORTS_WAVE_64)) && (defined(UNITY_COMPILER_DXC) || defined(SHADER_API_PSSL)))
  65. #if defined(UNITY_PLATFORM_SUPPORTS_WAVE_32)
  66. #define UNITY_HW_WAVE_SIZE 32
  67. #elif defined(UNITY_PLATFORM_SUPPORTS_WAVE_64)
  68. #define UNITY_HW_WAVE_SIZE 64
  69. #endif
  70. #define UNITY_PLATFORM_SUPPORTS_WAVE 1
  71. //
  72. // Device Support
  73. //
  74. // Devices may indicate support for wave operations at run-time.
  75. // Shaders compiled with these defines are always compiled with a compiler that supports wave operations.
  76. #elif (defined(UNITY_DEVICE_SUPPORTS_WAVE_ANY) || defined(UNITY_DEVICE_SUPPORTS_WAVE_8) || defined(UNITY_DEVICE_SUPPORTS_WAVE_16) || defined(UNITY_DEVICE_SUPPORTS_WAVE_32) || defined(UNITY_DEVICE_SUPPORTS_WAVE_64) || defined(UNITY_DEVICE_SUPPORTS_WAVE_128))
  77. #if defined(UNITY_DEVICE_SUPPORTS_WAVE_8)
  78. #define UNITY_HW_WAVE_SIZE 8
  79. #elif defined(UNITY_DEVICE_SUPPORTS_WAVE_16)
  80. #define UNITY_HW_WAVE_SIZE 16
  81. #elif defined(UNITY_DEVICE_SUPPORTS_WAVE_32)
  82. #define UNITY_HW_WAVE_SIZE 32
  83. #elif defined(UNITY_DEVICE_SUPPORTS_WAVE_64)
  84. #define UNITY_HW_WAVE_SIZE 64
  85. #elif defined(UNITY_DEVICE_SUPPORTS_WAVE_128)
  86. #define UNITY_HW_WAVE_SIZE 128
  87. #endif
  88. #define UNITY_DEVICE_SUPPORTS_WAVE 1
  89. #endif
  90. #if (defined(UNITY_PLATFORM_SUPPORTS_WAVE) || defined(UNITY_DEVICE_SUPPORTS_WAVE))
  91. #define UNITY_HW_SUPPORTS_WAVE 1
  92. #endif
  93. #endif
  94. #ifndef real
  95. // The including shader should define whether half
  96. // precision is suitable for its needs. The shader
  97. // API (for now) can indicate whether half is possible.
  98. #if defined(SHADER_API_MOBILE) || defined(SHADER_API_SWITCH) || defined(UNITY_UNIFIED_SHADER_PRECISION_MODEL)
  99. #define HAS_HALF 1
  100. #else
  101. #define HAS_HALF 0
  102. #endif
  103. #ifndef PREFER_HALF
  104. #define PREFER_HALF 1
  105. #endif
  106. #if HAS_HALF && PREFER_HALF
  107. #define REAL_IS_HALF 1
  108. #else
  109. #define REAL_IS_HALF 0
  110. #endif // Do we have half?
  111. #if REAL_IS_HALF
  112. #define HALF_IS_FLOAT 0
  113. #define half min16float
  114. #define half2 min16float2
  115. #define half3 min16float3
  116. #define half4 min16float4
  117. #define half2x2 min16float2x2
  118. #define half2x3 min16float2x3
  119. #define half3x2 min16float3x2
  120. #define half3x3 min16float3x3
  121. #define half3x4 min16float3x4
  122. #define half4x3 min16float4x3
  123. #define half4x4 min16float4x4
  124. #else
  125. #define HALF_IS_FLOAT (!defined(UNITY_DEVICE_SUPPORTS_NATIVE_16BIT))
  126. #endif
  127. #if REAL_IS_HALF
  128. #define real half
  129. #define real2 half2
  130. #define real3 half3
  131. #define real4 half4
  132. #define real2x2 half2x2
  133. #define real2x3 half2x3
  134. #define real2x4 half2x4
  135. #define real3x2 half3x2
  136. #define real3x3 half3x3
  137. #define real3x4 half3x4
  138. #define real4x3 half4x3
  139. #define real4x4 half4x4
  140. #define REAL_MIN HALF_MIN
  141. #define REAL_MAX HALF_MAX
  142. #define REAL_EPS HALF_EPS
  143. #else
  144. #define real float
  145. #define real2 float2
  146. #define real3 float3
  147. #define real4 float4
  148. #define real2x2 float2x2
  149. #define real2x3 float2x3
  150. #define real2x4 float2x4
  151. #define real3x2 float3x2
  152. #define real3x3 float3x3
  153. #define real3x4 float3x4
  154. #define real4x3 float4x3
  155. #define real4x4 float4x4
  156. #define REAL_MIN FLT_MIN
  157. #define REAL_MAX FLT_MAX
  158. #define REAL_EPS FLT_EPS
  159. #endif // REAL_IS_HALF
  160. #endif // #ifndef real
  161. // Target in compute shader are supported in 2018.2, for now define ours
  162. // (Note only 45 and above support compute shader)
  163. #ifdef SHADER_STAGE_COMPUTE
  164. # ifndef SHADER_TARGET
  165. # if defined(SHADER_API_METAL)
  166. # define SHADER_TARGET 45
  167. # else
  168. # define SHADER_TARGET 50
  169. # endif
  170. # endif
  171. #endif
  172. // This is the default keyword combination and needs to be overriden by the platforms that need specific behaviors
  173. // when enabling conservative depth overrides
  174. #define SV_POSITION_QUALIFIERS
  175. #define DEPTH_OFFSET_SEMANTIC SV_Depth
  176. // Include language header
  177. #if defined (SHADER_API_GAMECORE)
  178. #include "Packages/com.unity.render-pipelines.gamecore/ShaderLibrary/API/GameCore.hlsl"
  179. #elif defined(SHADER_API_XBOXONE)
  180. #include "Packages/com.unity.render-pipelines.xboxone/ShaderLibrary/API/XBoxOne.hlsl"
  181. #elif defined(SHADER_API_PS4)
  182. #include "Packages/com.unity.render-pipelines.ps4/ShaderLibrary/API/PSSL.hlsl"
  183. #elif defined(SHADER_API_PS5)
  184. #include "Packages/com.unity.render-pipelines.ps5/ShaderLibrary/API/PSSL.hlsl"
  185. #elif defined(SHADER_API_D3D11)
  186. #include "Packages/com.unity.render-pipelines.core/ShaderLibrary/API/D3D11.hlsl"
  187. #elif defined(SHADER_API_METAL)
  188. #include "Packages/com.unity.render-pipelines.core/ShaderLibrary/API/Metal.hlsl"
  189. #elif defined(SHADER_API_VULKAN)
  190. #include "Packages/com.unity.render-pipelines.core/ShaderLibrary/API/Vulkan.hlsl"
  191. #elif defined(SHADER_API_SWITCH)
  192. #include "Packages/com.unity.render-pipelines.core/ShaderLibrary/API/Switch.hlsl"
  193. #elif defined(SHADER_API_GLCORE)
  194. #include "Packages/com.unity.render-pipelines.core/ShaderLibrary/API/GLCore.hlsl"
  195. #elif defined(SHADER_API_GLES3)
  196. #include "Packages/com.unity.render-pipelines.core/ShaderLibrary/API/GLES3.hlsl"
  197. #elif defined(SHADER_API_WEBGPU)
  198. #include "Packages/com.unity.render-pipelines.core/ShaderLibrary/API/WebGPU.hlsl"
  199. #else
  200. #error unsupported shader api
  201. #endif
  202. #include "Packages/com.unity.render-pipelines.core/ShaderLibrary/API/Validate.hlsl"
  203. #include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Macros.hlsl"
  204. #include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Random.hlsl"
  205. #if !defined(SHADER_API_PS5)
  206. #define PushMarker(str)
  207. #define PopMarker()
  208. #endif
  209. #ifdef SHADER_API_XBOXONE // TODO: to move in .nda package in 21.1
  210. #define PLATFORM_SUPPORTS_PRIMITIVE_ID_IN_PIXEL_SHADER
  211. #endif
  212. #if defined(PLATFORM_SUPPORTS_NATIVE_RENDERPASS)
  213. #if defined(UNITY_COMPILER_DXC)
  214. //Subpass inputs are disallowed in non-fragment shader stages with DXC so we need some dummy value to use in the fragment function while it's not being compiled
  215. #if defined(SHADER_STAGE_FRAGMENT)
  216. #define UNITY_DXC_SUBPASS_INPUT_TYPE_INDEX(type, idx) [[vk::input_attachment_index(idx)]] SubpassInput<type##4> hlslcc_fbinput_##idx
  217. #define UNITY_DXC_SUBPASS_INPUT_TYPE_INDEX_MS(type, idx) [[vk::input_attachment_index(idx)]] SubpassInputMS<type##4> hlslcc_fbinput_##idx
  218. #else
  219. //declaring dummy resources here so that non-fragment shader stage automatic bindings wouldn't diverge from the fragment shader (important for vulkan)
  220. #define UNITY_DXC_SUBPASS_INPUT_TYPE_INDEX(type, idx) Texture2D dxc_dummy_fbinput_resource##idx; static type DXC_DummySubpassVariable##idx = type(0).xxxx;
  221. #define UNITY_DXC_SUBPASS_INPUT_TYPE_INDEX_MS(type, idx) Texture2D dxc_dummy_fbinput_resource##idx; static type DXC_DummySubpassVariable##idx = type(0).xxxx
  222. #endif
  223. // Renderpass inputs: Vulkan/Metal subpass input
  224. #define FRAMEBUFFER_INPUT_FLOAT(idx) UNITY_DXC_SUBPASS_INPUT_TYPE_INDEX(float, idx)
  225. #define FRAMEBUFFER_INPUT_FLOAT_MS(idx) UNITY_DXC_SUBPASS_INPUT_TYPE_INDEX_MS(float, idx)
  226. // For halfs
  227. #define FRAMEBUFFER_INPUT_HALF(idx) UNITY_DXC_SUBPASS_INPUT_TYPE_INDEX(half, idx)
  228. #define FRAMEBUFFER_INPUT_HALF_MS(idx) UNITY_DXC_SUBPASS_INPUT_TYPE_INDEX_MS(half, idx)
  229. // For ints
  230. #define FRAMEBUFFER_INPUT_INT(idx) UNITY_DXC_SUBPASS_INPUT_TYPE_INDEX(int, idx)
  231. #define FRAMEBUFFER_INPUT_INT_MS(idx) UNITY_DXC_SUBPASS_INPUT_TYPE_INDEX_MS(int, idx)
  232. // For uints
  233. #define FRAMEBUFFER_INPUT_UINT(idx) UNITY_DXC_SUBPASS_INPUT_TYPE_INDEX(uint, idx)
  234. #define FRAMEBUFFER_INPUT_UINT_MS(idx) UNITY_DXC_SUBPASS_INPUT_TYPE_INDEX_MS(uint, idx)
  235. #if defined(SHADER_STAGE_FRAGMENT)
  236. #define LOAD_FRAMEBUFFER_INPUT(idx, v2fname) hlslcc_fbinput_##idx.SubpassLoad()
  237. #define LOAD_FRAMEBUFFER_INPUT_MS(idx, sampleIdx, v2fname) hlslcc_fbinput_##idx.SubpassLoad(sampleIdx)
  238. #else
  239. #define LOAD_FRAMEBUFFER_INPUT(idx, v2fname) DXC_DummySubpassVariable##idx
  240. #define LOAD_FRAMEBUFFER_INPUT_MS(idx, sampleIdx, v2fname) DXC_DummySubpassVariable##idx
  241. #endif
  242. #elif defined(SHADER_API_METAL) && defined(UNITY_NEEDS_RENDERPASS_FBFETCH_FALLBACK)
  243. // On desktop metal we need special magic due to the need to support both intel and apple silicon
  244. // since the former does not support framebuffer fetch
  245. // Due to this we have special considerations:
  246. // 1. since we might need to bind the copy texture, to simplify our lives we always declare _UnityFBInput texture
  247. // in metal translation we will add function_constant, but we still want to generate binding in hlsl
  248. // so that unity knows about the possibility
  249. // 2. hlsl do not have anything like function constants, hence we will add bool to the fake cbuffer for subpass
  250. // again, this is done only for hlsl to generate proper code - in translation it will be changed to
  251. // a proper function constant (i.e. hlslcc_SubpassInput_f_ cbuffer is just "metadata" and is absent in metal code)
  252. // 3. we want to generate an actual if command (not conditional move), hence we need to have an interim function
  253. // alas we are not able to hide in it the texture coords: we are guaranteed to have just one "declare fb input"
  254. // per index, but nothing stops users to have several "read fb input", hence we need to generate function code
  255. // in the former, where we do not know the source of uv coords
  256. // while the usage looks weird (we pass hlslcc_fbfetch_ in the function), it is ok due to the way hlsl compiler works
  257. // it will generate an actual if and access hlslcc_fbfetch_ only if framebuffer fetch is available
  258. // and when creating metal program, compiler takes care of this (function_constant magic)
  259. #define RENDERPASS_DECLARE_FALLBACK(T, idx) \
  260. Texture2D<T> _UnityFBInput##idx; float4 _UnityFBInput##idx##_TexelSize; \
  261. inline T ReadFBInput_##idx(bool var, uint2 coord) { \
  262. [branch]if(var) { return hlslcc_fbinput_##idx; } \
  263. else { return _UnityFBInput##idx.Load(uint3(coord,0)); } \
  264. }
  265. #define RENDERPASS_DECLARE_FALLBACK_MS(T, idx) \
  266. Texture2DMS<T> _UnityFBInput##idx; float4 _UnityFBInput##idx##_TexelSize; \
  267. inline T ReadFBInput_##idx(bool var, uint2 coord, uint sampleIdx) { \
  268. [branch]if(var) { return hlslcc_fbinput_##idx[sampleIdx]; } \
  269. else { return _UnityFBInput##idx.Load(coord,sampleIdx); } \
  270. }
  271. #define FRAMEBUFFER_INPUT_FLOAT(idx) \
  272. cbuffer hlslcc_SubpassInput_f_##idx { float4 hlslcc_fbinput_##idx; bool hlslcc_fbfetch_##idx; }; \
  273. RENDERPASS_DECLARE_FALLBACK(float4, idx)
  274. #define FRAMEBUFFER_INPUT_FLOAT_MS(idx) \
  275. cbuffer hlslcc_SubpassInput_F_##idx { float4 hlslcc_fbinput_##idx[8]; bool hlslcc_fbfetch_##idx; }; \
  276. RENDERPASS_DECLARE_FALLBACK_MS(float4, idx)
  277. #define FRAMEBUFFER_INPUT_HALF(idx) \
  278. cbuffer hlslcc_SubpassInput_h_##idx { half4 hlslcc_fbinput_##idx; bool hlslcc_fbfetch_##idx; }; \
  279. RENDERPASS_DECLARE_FALLBACK(half4, idx)
  280. #define FRAMEBUFFER_INPUT_HALF_MS(idx) \
  281. cbuffer hlslcc_SubpassInput_H_##idx { half4 hlslcc_fbinput_##idx[8]; bool hlslcc_fbfetch_##idx; }; \
  282. RENDERPASS_DECLARE_FALLBACK_MS(half4, idx)
  283. #define FRAMEBUFFER_INPUT_INT(idx) \
  284. cbuffer hlslcc_SubpassInput_i_##idx { int4 hlslcc_fbinput_##idx; bool hlslcc_fbfetch_##idx; }; \
  285. RENDERPASS_DECLARE_FALLBACK(int4, idx)
  286. #define FRAMEBUFFER_INPUT_INT_MS(idx) \
  287. cbuffer hlslcc_SubpassInput_I_##idx { int4 hlslcc_fbinput_##idx[8]; bool hlslcc_fbfetch_##idx; }; \
  288. RENDERPASS_DECLARE_FALLBACK_MS(int4, idx)
  289. #define FRAMEBUFFER_INPUT_UINT(idx) \
  290. cbuffer hlslcc_SubpassInput_u_##idx { uint4 hlslcc_fbinput_##idx; bool hlslcc_fbfetch_##idx; }; \
  291. RENDERPASS_DECLARE_FALLBACK(uint4, idx)
  292. #define FRAMEBUFFER_INPUT_UINT_MS(idx) \
  293. cbuffer hlslcc_SubpassInput_U_##idx { uint4 hlslcc_fbinput_##idx[8]; bool hlslcc_fbfetch_##idx; }; \
  294. UNITY_RENDERPASS_DECLARE_FALLBACK_MS(uint4, idx)
  295. #define LOAD_FRAMEBUFFER_INPUT(idx, v2fname) ReadFBInput_##idx(hlslcc_fbfetch_##idx, uint2(v2fname.xy))
  296. #define LOAD_FRAMEBUFFER_INPUT_MS(idx, sampleIdx, v2fname) ReadFBInput_##idx(hlslcc_fbfetch_##idx, uint2(v2fname.xy), sampleIdx)
  297. #else
  298. // For floats
  299. #define FRAMEBUFFER_INPUT_FLOAT(idx) cbuffer hlslcc_SubpassInput_f_##idx { float4 hlslcc_fbinput_##idx; }
  300. #define FRAMEBUFFER_INPUT_FLOAT_MS(idx) cbuffer hlslcc_SubpassInput_F_##idx { float4 hlslcc_fbinput_##idx[8]; }
  301. // For halfs
  302. #define FRAMEBUFFER_INPUT_HALF(idx) cbuffer hlslcc_SubpassInput_h_##idx { half4 hlslcc_fbinput_##idx; }
  303. #define FRAMEBUFFER_INPUT_HALF_MS(idx) cbuffer hlslcc_SubpassInput_H_##idx { half4 hlslcc_fbinput_##idx[8]; }
  304. // For ints
  305. #define FRAMEBUFFER_INPUT_INT(idx) cbuffer hlslcc_SubpassInput_i_##idx { int4 hlslcc_fbinput_##idx; }
  306. #define FRAMEBUFFER_INPUT_INT_MS(idx) cbuffer hlslcc_SubpassInput_I_##idx { int4 hlslcc_fbinput_##idx[8]; }
  307. // For uints
  308. #define FRAMEBUFFER_INPUT_UINT(idx) cbuffer hlslcc_SubpassInput_u_##idx { uint4 hlslcc_fbinput_##idx; }
  309. #define FRAMEBUFFER_INPUT_UINT_MS(idx) cbuffer hlslcc_SubpassInput_U_##idx { uint4 hlslcc_fbinput_##idx[8]; }
  310. #define LOAD_FRAMEBUFFER_INPUT(idx, v2fname) hlslcc_fbinput_##idx
  311. #define LOAD_FRAMEBUFFER_INPUT_MS(idx, sampleIdx, v2fname) hlslcc_fbinput_##idx[sampleIdx]
  312. #endif
  313. #else
  314. // Renderpass inputs: General fallback paths
  315. #define FRAMEBUFFER_INPUT_FLOAT(idx) TEXTURE2D_FLOAT(_UnityFBInput##idx); float4 _UnityFBInput##idx##_TexelSize
  316. #define FRAMEBUFFER_INPUT_HALF(idx) TEXTURE2D_HALF(_UnityFBInput##idx); float4 _UnityFBInput##idx##_TexelSize
  317. #define FRAMEBUFFER_INPUT_INT(idx) TEXTURE2D_INT(_UnityFBInput##idx); float4 _UnityFBInput##idx##_TexelSize
  318. #define FRAMEBUFFER_INPUT_UINT(idx) TEXTURE2D_UINT(_UnityFBInput##idx); float4 _UnityFBInput##idx##_TexelSize
  319. #define LOAD_FRAMEBUFFER_INPUT(idx, v2fvertexname) _UnityFBInput##idx.Load(uint3(v2fvertexname.xy, 0))
  320. #define FRAMEBUFFER_INPUT_FLOAT_MS(idx) Texture2DMS<float4> _UnityFBInput##idx; float4 _UnityFBInput##idx##_TexelSize
  321. #define FRAMEBUFFER_INPUT_HALF_MS(idx) Texture2DMS<float4> _UnityFBInput##idx; float4 _UnityFBInput##idx##_TexelSize
  322. #define FRAMEBUFFER_INPUT_INT_MS(idx) Texture2DMS<int4> _UnityFBInput##idx; float4 _UnityFBInput##idx##_TexelSize
  323. #define FRAMEBUFFER_INPUT_UINT_MS(idx) Texture2DMS<uint4> _UnityFBInput##idx; float4 _UnityFBInput##idx##_TexelSize
  324. #define LOAD_FRAMEBUFFER_INPUT_MS(idx, sampleIdx, v2fvertexname) _UnityFBInput##idx.Load(uint2(v2fvertexname.xy), sampleIdx)
  325. #endif
  326. // ----------------------------------------------------------------------------
  327. // Global resources API definitions for Ray Tracing
  328. // ----------------------------------------------------------------------------
  329. #if (SHADER_STAGE_RAY_TRACING && UNITY_RAY_TRACING_GLOBAL_RESOURCES)
  330. #define GLOBAL_RESOURCE(type, name, reg) type name : register(reg, space1);
  331. #define GLOBAL_CBUFFER_START(name, reg) cbuffer name : register(reg, space1) {
  332. #define GLOBAL_TEXTURE2D(name, reg) TEXTURE2D(name) : register(reg, space1)
  333. #define GLOBAL_TEXTURE2D_ARRAY(name, reg) TEXTURE2D_ARRAY(name) : register(reg, space1)
  334. #define GLOBAL_TEXTURECUBE_ARRAY(name, reg) TEXTURECUBE_ARRAY(name) : register(reg, space1)
  335. #else
  336. #define GLOBAL_RESOURCE(type, name, reg) type name;
  337. #define GLOBAL_CBUFFER_START(name, reg) CBUFFER_START(name)
  338. #define GLOBAL_TEXTURE2D(name, reg) TEXTURE2D(name)
  339. #define GLOBAL_TEXTURE2D_ARRAY(name, reg) TEXTURE2D_ARRAY(name)
  340. #define GLOBAL_TEXTURECUBE_ARRAY(name, reg) TEXTURECUBE_ARRAY(name)
  341. #endif
  342. // ----------------------------------------------------------------------------
  343. // Common intrinsic (general implementation of intrinsic available on some platform)
  344. // ----------------------------------------------------------------------------
  345. #if !defined(PLATFORM_SUPPORTS_WAVE_INTRINSICS) && !defined(UNITY_COMPILER_DXC) && !defined(UNITY_HW_SUPPORTS_WAVE)
  346. // Intercept wave functions when they aren't supported to provide better error messages
  347. #define WaveActiveAllTrue ERROR_ON_UNSUPPORTED_FUNCTION(WaveActiveAllTrue)
  348. #define WaveActiveAnyTrue ERROR_ON_UNSUPPORTED_FUNCTION(WaveActiveAnyTrue)
  349. #define WaveGetLaneIndex ERROR_ON_UNSUPPORTED_FUNCTION(WaveGetLaneIndex)
  350. #define WaveIsFirstLane ERROR_ON_UNSUPPORTED_FUNCTION(WaveIsFirstLane)
  351. #define GetWaveID ERROR_ON_UNSUPPORTED_FUNCTION(GetWaveID)
  352. #define WaveActiveMin ERROR_ON_UNSUPPORTED_FUNCTION(WaveActiveMin)
  353. #define WaveActiveMax ERROR_ON_UNSUPPORTED_FUNCTION(WaveActiveMax)
  354. #define WaveActiveBallot ERROR_ON_UNSUPPORTED_FUNCTION(WaveActiveBallot)
  355. #define WaveActiveSum ERROR_ON_UNSUPPORTED_FUNCTION(WaveActiveSum)
  356. #define WaveActiveBitAnd ERROR_ON_UNSUPPORTED_FUNCTION(WaveActiveBitAnd)
  357. #define WaveActiveBitOr ERROR_ON_UNSUPPORTED_FUNCTION(WaveActiveBitOr)
  358. #define WaveGetLaneCount ERROR_ON_UNSUPPORTED_FUNCTION(WaveGetLaneCount)
  359. #define WaveIsHelperLane ERROR_ON_UNSUPPORTED_FUNCTION(WaveIsHelperLane)
  360. #endif
  361. #if defined(PLATFORM_SUPPORTS_WAVE_INTRINSICS)
  362. // Helper macro to compute lane swizzle offset starting from andMask, orMask and xorMask.
  363. // IMPORTANT, to guarantee compatibility with all platforms, the masks need to be constant literals (constants at compile time)
  364. #define LANE_SWIZZLE_OFFSET(andMask, orMask, xorMask) (andMask | (orMask << 5) | (xorMask << 10))
  365. #endif
  366. #include "Packages/com.unity.render-pipelines.core/ShaderLibrary/CommonDeprecated.hlsl"
  367. #ifndef INTRINSIC_BITFIELD_EXTRACT
  368. // Unsigned integer bit field extraction.
  369. // Note that the intrinsic itself generates a vector instruction.
  370. // Wrap this function with WaveReadLaneFirst() to get scalar output.
  371. uint BitFieldExtract(uint data, uint offset, uint numBits)
  372. {
  373. uint mask = (1u << numBits) - 1u;
  374. return (data >> offset) & mask;
  375. }
  376. #endif // INTRINSIC_BITFIELD_EXTRACT
  377. #ifndef INTRINSIC_BITFIELD_EXTRACT_SIGN_EXTEND
  378. // Integer bit field extraction with sign extension.
  379. // Note that the intrinsic itself generates a vector instruction.
  380. // Wrap this function with WaveReadLaneFirst() to get scalar output.
  381. int BitFieldExtractSignExtend(int data, uint offset, uint numBits)
  382. {
  383. int shifted = data >> offset; // Sign-extending (arithmetic) shift
  384. int signBit = shifted & (1u << (numBits - 1u));
  385. uint mask = (1u << numBits) - 1u;
  386. return -signBit | (shifted & mask); // Use 2-complement for negation to replicate the sign bit
  387. }
  388. #endif // INTRINSIC_BITFIELD_EXTRACT_SIGN_EXTEND
  389. #ifndef INTRINSIC_BITFIELD_INSERT
  390. // Inserts the bits indicated by 'mask' from 'src' into 'dst'.
  391. uint BitFieldInsert(uint mask, uint src, uint dst)
  392. {
  393. return (src & mask) | (dst & ~mask);
  394. }
  395. #endif // INTRINSIC_BITFIELD_INSERT
  396. bool IsBitSet(uint data, uint offset)
  397. {
  398. return BitFieldExtract(data, offset, 1u) != 0;
  399. }
  400. void SetBit(inout uint data, uint offset)
  401. {
  402. data |= 1u << offset;
  403. }
  404. void ClearBit(inout uint data, uint offset)
  405. {
  406. data &= ~(1u << offset);
  407. }
  408. void ToggleBit(inout uint data, uint offset)
  409. {
  410. data ^= 1u << offset;
  411. }
  412. #ifndef INTRINSIC_WAVEREADFIRSTLANE
  413. // Warning: for correctness, the argument's value must be the same across all lanes of the wave.
  414. TEMPLATE_1_FLT_HALF(WaveReadLaneFirst, scalarValue, return scalarValue)
  415. TEMPLATE_1_INT(WaveReadLaneFirst, scalarValue, return scalarValue)
  416. #endif
  417. #ifndef INTRINSIC_MUL24
  418. TEMPLATE_2_INT(Mul24, a, b, return a * b)
  419. #endif // INTRINSIC_MUL24
  420. #ifndef INTRINSIC_MAD24
  421. TEMPLATE_3_INT(Mad24, a, b, c, return a * b + c)
  422. #endif // INTRINSIC_MAD24
  423. #ifndef INTRINSIC_MINMAX3
  424. TEMPLATE_3_FLT_HALF(Min3, a, b, c, return min(min(a, b), c))
  425. TEMPLATE_3_INT(Min3, a, b, c, return min(min(a, b), c))
  426. TEMPLATE_3_FLT_HALF(Max3, a, b, c, return max(max(a, b), c))
  427. TEMPLATE_3_INT(Max3, a, b, c, return max(max(a, b), c))
  428. #endif // INTRINSIC_MINMAX3
  429. TEMPLATE_3_FLT_HALF(Avg3, a, b, c, return (a + b + c) * 0.33333333)
  430. // Important! Quad functions only valid in pixel shaders!
  431. float2 GetQuadOffset(int2 screenPos)
  432. {
  433. return float2(float(screenPos.x & 1) * 2.0 - 1.0, float(screenPos.y & 1) * 2.0 - 1.0);
  434. }
  435. #ifndef INTRINSIC_QUAD_SHUFFLE
  436. float QuadReadAcrossX(float value, int2 screenPos)
  437. {
  438. return value - (ddx_fine(value) * (float(screenPos.x & 1) * 2.0 - 1.0));
  439. }
  440. float QuadReadAcrossY(float value, int2 screenPos)
  441. {
  442. return value - (ddy_fine(value) * (float(screenPos.y & 1) * 2.0 - 1.0));
  443. }
  444. float QuadReadAcrossDiagonal(float value, int2 screenPos)
  445. {
  446. float2 quadDir = GetQuadOffset(screenPos);
  447. float dX = ddx_fine(value);
  448. float X = value - (dX * quadDir.x);
  449. return X - (ddy_fine(X) * quadDir.y);
  450. }
  451. #endif
  452. float3 QuadReadFloat3AcrossX(float3 val, int2 positionSS)
  453. {
  454. return float3(QuadReadAcrossX(val.x, positionSS), QuadReadAcrossX(val.y, positionSS), QuadReadAcrossX(val.z, positionSS));
  455. }
  456. float4 QuadReadFloat4AcrossX(float4 val, int2 positionSS)
  457. {
  458. return float4(QuadReadAcrossX(val.x, positionSS), QuadReadAcrossX(val.y, positionSS), QuadReadAcrossX(val.z, positionSS), QuadReadAcrossX(val.w, positionSS));
  459. }
  460. float3 QuadReadFloat3AcrossY(float3 val, int2 positionSS)
  461. {
  462. return float3(QuadReadAcrossY(val.x, positionSS), QuadReadAcrossY(val.y, positionSS), QuadReadAcrossY(val.z, positionSS));
  463. }
  464. float4 QuadReadFloat4AcrossY(float4 val, int2 positionSS)
  465. {
  466. return float4(QuadReadAcrossY(val.x, positionSS), QuadReadAcrossY(val.y, positionSS), QuadReadAcrossY(val.z, positionSS), QuadReadAcrossY(val.w, positionSS));
  467. }
  468. float3 QuadReadFloat3AcrossDiagonal(float3 val, int2 positionSS)
  469. {
  470. return float3(QuadReadAcrossDiagonal(val.x, positionSS), QuadReadAcrossDiagonal(val.y, positionSS), QuadReadAcrossDiagonal(val.z, positionSS));
  471. }
  472. float4 QuadReadFloat4AcrossDiagonal(float4 val, int2 positionSS)
  473. {
  474. return float4(QuadReadAcrossDiagonal(val.x, positionSS), QuadReadAcrossDiagonal(val.y, positionSS), QuadReadAcrossDiagonal(val.z, positionSS), QuadReadAcrossDiagonal(val.w, positionSS));
  475. }
  476. TEMPLATE_SWAP(Swap) // Define a Swap(a, b) function for all types
  477. #define CUBEMAPFACE_POSITIVE_X 0
  478. #define CUBEMAPFACE_NEGATIVE_X 1
  479. #define CUBEMAPFACE_POSITIVE_Y 2
  480. #define CUBEMAPFACE_NEGATIVE_Y 3
  481. #define CUBEMAPFACE_POSITIVE_Z 4
  482. #define CUBEMAPFACE_NEGATIVE_Z 5
  483. #ifndef INTRINSIC_CUBEMAP_FACE_ID
  484. float CubeMapFaceID(float3 dir)
  485. {
  486. float faceID;
  487. if (abs(dir.z) >= abs(dir.x) && abs(dir.z) >= abs(dir.y))
  488. {
  489. faceID = (dir.z < 0.0) ? CUBEMAPFACE_NEGATIVE_Z : CUBEMAPFACE_POSITIVE_Z;
  490. }
  491. else if (abs(dir.y) >= abs(dir.x))
  492. {
  493. faceID = (dir.y < 0.0) ? CUBEMAPFACE_NEGATIVE_Y : CUBEMAPFACE_POSITIVE_Y;
  494. }
  495. else
  496. {
  497. faceID = (dir.x < 0.0) ? CUBEMAPFACE_NEGATIVE_X : CUBEMAPFACE_POSITIVE_X;
  498. }
  499. return faceID;
  500. }
  501. #endif // INTRINSIC_CUBEMAP_FACE_ID
  502. // Intrinsic isnan can't be used because it require /Gic to be enabled on fxc that we can't do. So use AnyIsNan instead
  503. bool IsNaN(float x)
  504. {
  505. return (asuint(x) & 0x7FFFFFFF) > 0x7F800000;
  506. }
  507. bool AnyIsNaN(float2 v)
  508. {
  509. return (IsNaN(v.x) || IsNaN(v.y));
  510. }
  511. bool AnyIsNaN(float3 v)
  512. {
  513. return (IsNaN(v.x) || IsNaN(v.y) || IsNaN(v.z));
  514. }
  515. bool AnyIsNaN(float4 v)
  516. {
  517. return (IsNaN(v.x) || IsNaN(v.y) || IsNaN(v.z) || IsNaN(v.w));
  518. }
  519. bool IsInf(float x)
  520. {
  521. return (asuint(x) & 0x7FFFFFFF) == 0x7F800000;
  522. }
  523. bool AnyIsInf(float2 v)
  524. {
  525. return (IsInf(v.x) || IsInf(v.y));
  526. }
  527. bool AnyIsInf(float3 v)
  528. {
  529. return (IsInf(v.x) || IsInf(v.y) || IsInf(v.z));
  530. }
  531. bool AnyIsInf(float4 v)
  532. {
  533. return (IsInf(v.x) || IsInf(v.y) || IsInf(v.z) || IsInf(v.w));
  534. }
  535. bool IsFinite(float x)
  536. {
  537. return (asuint(x) & 0x7F800000) != 0x7F800000;
  538. }
  539. float SanitizeFinite(float x)
  540. {
  541. return IsFinite(x) ? x : 0;
  542. }
  543. bool IsPositiveFinite(float x)
  544. {
  545. return asuint(x) < 0x7F800000;
  546. }
  547. float SanitizePositiveFinite(float x)
  548. {
  549. return IsPositiveFinite(x) ? x : 0;
  550. }
  551. // ----------------------------------------------------------------------------
  552. // Common math functions
  553. // ----------------------------------------------------------------------------
  554. real DegToRad(real deg)
  555. {
  556. return deg * (PI / 180.0);
  557. }
  558. real RadToDeg(real rad)
  559. {
  560. return rad * (180.0 / PI);
  561. }
  562. // Square functions for cleaner code
  563. TEMPLATE_1_FLT_HALF(Sq, x, return (x) * (x))
  564. TEMPLATE_1_INT(Sq, x, return (x) * (x))
  565. bool IsPower2(uint x)
  566. {
  567. return (x & (x - 1)) == 0;
  568. }
  569. // Input [0, 1] and output [0, PI/2]
  570. // 9 VALU
  571. real FastACosPos(real inX)
  572. {
  573. real x = abs(inX);
  574. real res = (0.0468878 * x + -0.203471) * x + 1.570796; // p(x)
  575. res *= sqrt(1.0 - x);
  576. return res;
  577. }
  578. // Ref: https://seblagarde.wordpress.com/2014/12/01/inverse-trigonometric-functions-gpu-optimization-for-amd-gcn-architecture/
  579. // Input [-1, 1] and output [0, PI]
  580. // 12 VALU
  581. real FastACos(real inX)
  582. {
  583. real res = FastACosPos(inX);
  584. return (inX >= 0) ? res : PI - res; // Undo range reduction
  585. }
  586. // Same cost as Acos + 1 FR
  587. // Same error
  588. // input [-1, 1] and output [-PI/2, PI/2]
  589. real FastASin(real x)
  590. {
  591. return HALF_PI - FastACos(x);
  592. }
  593. // max absolute error 1.3x10^-3
  594. // Eberly's odd polynomial degree 5 - respect bounds
  595. // 4 VGPR, 14 FR (10 FR, 1 QR), 2 scalar
  596. // input [0, infinity] and output [0, PI/2]
  597. real FastATanPos(real x)
  598. {
  599. real t0 = (x < 1.0) ? x : 1.0 / x;
  600. real t1 = t0 * t0;
  601. real poly = 0.0872929;
  602. poly = -0.301895 + poly * t1;
  603. poly = 1.0 + poly * t1;
  604. poly = poly * t0;
  605. return (x < 1.0) ? poly : HALF_PI - poly;
  606. }
  607. // 4 VGPR, 16 FR (12 FR, 1 QR), 2 scalar
  608. // input [-infinity, infinity] and output [-PI/2, PI/2]
  609. real FastATan(real x)
  610. {
  611. real t0 = FastATanPos(abs(x));
  612. return (x < 0.0) ? -t0 : t0;
  613. }
  614. real FastAtan2(real y, real x)
  615. {
  616. return FastATan(y / x) + real(y >= 0.0 ? PI : -PI) * (x < 0.0);
  617. }
  618. #if (SHADER_TARGET >= 45)
  619. uint FastLog2(uint x)
  620. {
  621. return firstbithigh(x);
  622. }
  623. #endif
  624. // Using pow often result to a warning like this
  625. // "pow(f, e) will not work for negative f, use abs(f) or conditionally handle negative values if you expect them"
  626. // PositivePow remove this warning when you know the value is positive or 0 and avoid inf/NAN.
  627. // Note: https://msdn.microsoft.com/en-us/library/windows/desktop/bb509636(v=vs.85).aspx pow(0, >0) == 0
  628. TEMPLATE_2_FLT_HALF(PositivePow, base, power, return pow(abs(base), power))
  629. // SafePositivePow: Same as pow(x,y) but considers x always positive and never exactly 0 such that
  630. // SafePositivePow(0,y) will numerically converge to 1 as y -> 0, including SafePositivePow(0,0) returning 1.
  631. //
  632. // First, like PositivePow, SafePositivePow removes this warning for when you know the x value is positive or 0 and you know
  633. // you avoid a NaN:
  634. // ie you know that x == 0 and y > 0, such that pow(x,y) == pow(0, >0) == 0
  635. // SafePositivePow(0, y) will however return close to 1 as y -> 0, see below.
  636. //
  637. // Also, pow(x,y) is most probably approximated as exp2(log2(x) * y), so pow(0,0) will give exp2(-inf * 0) == exp2(NaN) == NaN.
  638. //
  639. // SafePositivePow avoids NaN in allowing SafePositivePow(x,y) where (x,y) == (0,y) for any y including 0 by clamping x to a
  640. // minimum of FLT_EPS. The consequences are:
  641. //
  642. // -As a replacement for pow(0,y) where y >= 1, the result of SafePositivePow(x,y) should be close enough to 0.
  643. // -For cases where we substitute for pow(0,y) where 0 < y < 1, SafePositivePow(x,y) will quickly reach 1 as y -> 0, while
  644. // normally pow(0,y) would give 0 instead of 1 for all 0 < y.
  645. // eg: if we #define FLT_EPS 5.960464478e-8 (for fp32),
  646. // SafePositivePow(0, 0.1) = 0.1894646
  647. // SafePositivePow(0, 0.01) = 0.8467453
  648. // SafePositivePow(0, 0.001) = 0.9835021
  649. //
  650. // Depending on the intended usage of pow(), this difference in behavior might be a moot point since:
  651. // 1) by leaving "y" free to get to 0, we get a NaNs
  652. // 2) the behavior of SafePositivePow() has more continuity when both x and y get closer together to 0, since
  653. // when x is assured to be positive non-zero, pow(x,x) -> 1 as x -> 0.
  654. //
  655. // TL;DR: SafePositivePow(x,y) avoids NaN and is safe for positive (x,y) including (x,y) == (0,0),
  656. // but SafePositivePow(0, y) will return close to 1 as y -> 0, instead of 0, so watch out
  657. // for behavior depending on pow(0, y) giving always 0, especially for 0 < y < 1.
  658. //
  659. // Ref: https://msdn.microsoft.com/en-us/library/windows/desktop/bb509636(v=vs.85).aspx
  660. TEMPLATE_2_FLT(SafePositivePow, base, power, return pow(max(abs(base), float(FLT_EPS)), power))
  661. TEMPLATE_2_HALF(SafePositivePow, base, power, return pow(max(abs(base), min16float(HALF_EPS)), power))
  662. // Helpers for making shadergraph functions consider precision spec through the same $precision token used for variable types
  663. TEMPLATE_2_FLT(SafePositivePow_float, base, power, return pow(max(abs(base), float(FLT_EPS)), power))
  664. TEMPLATE_2_HALF(SafePositivePow_half, base, power, return pow(max(abs(base), min16float(HALF_EPS)), power))
  665. float Eps_float() { return FLT_EPS; }
  666. float Min_float() { return FLT_MIN; }
  667. float Max_float() { return FLT_MAX; }
  668. half Eps_half() { return HALF_EPS; }
  669. half Min_half() { return HALF_MIN; }
  670. half Max_half() { return HALF_MAX; }
  671. // Compute the 'epsilon equal' relative to the scale of 'a' & 'b'.
  672. // Farther to 0.0f 'a' or 'b' are, larger epsilon have to be.
  673. bool NearlyEqual(float a, float b, float epsilon)
  674. {
  675. return abs(a - b) / (abs(a) + abs(b)) < epsilon;
  676. }
  677. TEMPLATE_2_FLT(NearlyEqual_Float, a, b, return abs(a - b) / (abs(a) + abs(b)) < float(FLT_EPS))
  678. TEMPLATE_2_HALF(NearlyEqual_Half, a, b, return abs(a - b) / (abs(a) + abs(b)) < min16float(HALF_EPS))
  679. // Composes a floating point value with the magnitude of 'x' and the sign of 's'.
  680. // See the comment about FastSign() below.
  681. float CopySign(float x, float s, bool ignoreNegZero = true)
  682. {
  683. if (ignoreNegZero)
  684. {
  685. return (s >= 0) ? abs(x) : -abs(x);
  686. }
  687. else
  688. {
  689. uint negZero = 0x80000000u;
  690. uint signBit = negZero & asuint(s);
  691. return asfloat(BitFieldInsert(negZero, signBit, asuint(x)));
  692. }
  693. }
  694. // Returns -1 for negative numbers and 1 for positive numbers.
  695. // 0 can be handled in 2 different ways.
  696. // The IEEE floating point standard defines 0 as signed: +0 and -0.
  697. // However, mathematics typically treats 0 as unsigned.
  698. // Therefore, we treat -0 as +0 by default: FastSign(+0) = FastSign(-0) = 1.
  699. // If (ignoreNegZero = false), FastSign(-0, false) = -1.
  700. // Note that the sign() function in HLSL implements signum, which returns 0 for 0.
  701. float FastSign(float s, bool ignoreNegZero = true)
  702. {
  703. return CopySign(1.0, s, ignoreNegZero);
  704. }
  705. // Orthonormalizes the tangent frame using the Gram-Schmidt process.
  706. // We assume that the normal is normalized and that the two vectors
  707. // aren't collinear.
  708. // Returns the new tangent (the normal is unaffected).
  709. real3 Orthonormalize(real3 tangent, real3 normal)
  710. {
  711. // TODO: use SafeNormalize()?
  712. return normalize(tangent - dot(tangent, normal) * normal);
  713. }
  714. // [start, end] -> [0, 1] : (x - start) / (end - start) = x * rcpLength - (start * rcpLength)
  715. TEMPLATE_3_FLT_HALF(Remap01, x, rcpLength, startTimesRcpLength, return saturate(x * rcpLength - startTimesRcpLength))
  716. // [start, end] -> [1, 0] : (end - x) / (end - start) = (end * rcpLength) - x * rcpLength
  717. TEMPLATE_3_FLT_HALF(Remap10, x, rcpLength, endTimesRcpLength, return saturate(endTimesRcpLength - x * rcpLength))
  718. // Remap: [0.5 / size, 1 - 0.5 / size] -> [0, 1]
  719. real2 RemapHalfTexelCoordTo01(real2 coord, real2 size)
  720. {
  721. const real2 rcpLen = size * rcp(size - 1);
  722. const real2 startTimesRcpLength = 0.5 * rcp(size - 1);
  723. return Remap01(coord, rcpLen, startTimesRcpLength);
  724. }
  725. // Remap: [0, 1] -> [0.5 / size, 1 - 0.5 / size]
  726. real2 Remap01ToHalfTexelCoord(real2 coord, real2 size)
  727. {
  728. const real2 start = 0.5 * rcp(size);
  729. const real2 len = 1 - rcp(size);
  730. return coord * len + start;
  731. }
  732. // smoothstep that assumes that 'x' lies within the [0, 1] interval.
  733. real Smoothstep01(real x)
  734. {
  735. return x * x * (3 - (2 * x));
  736. }
  737. real Smootherstep01(real x)
  738. {
  739. return x * x * x * (x * (x * 6 - 15) + 10);
  740. }
  741. real Smootherstep(real a, real b, real t)
  742. {
  743. real r = rcp(b - a);
  744. real x = Remap01(t, r, a * r);
  745. return Smootherstep01(x);
  746. }
  747. float3 NLerp(float3 A, float3 B, float t)
  748. {
  749. return normalize(lerp(A, B, t));
  750. }
  751. float Length2(float3 v)
  752. {
  753. return dot(v, v);
  754. }
  755. #ifndef BUILTIN_TARGET_API
  756. real Pow4(real x)
  757. {
  758. return (x * x) * (x * x);
  759. }
  760. #endif
  761. TEMPLATE_3_FLT(RangeRemap, min, max, t, return saturate((t - min) / (max - min)))
  762. TEMPLATE_3_FLT(RangeRemapFrom01, min, max, t, return (max - min) * t + min)
  763. float4x4 Inverse(float4x4 m)
  764. {
  765. float n11 = m[0][0], n12 = m[1][0], n13 = m[2][0], n14 = m[3][0];
  766. float n21 = m[0][1], n22 = m[1][1], n23 = m[2][1], n24 = m[3][1];
  767. float n31 = m[0][2], n32 = m[1][2], n33 = m[2][2], n34 = m[3][2];
  768. float n41 = m[0][3], n42 = m[1][3], n43 = m[2][3], n44 = m[3][3];
  769. float t11 = n23 * n34 * n42 - n24 * n33 * n42 + n24 * n32 * n43 - n22 * n34 * n43 - n23 * n32 * n44 + n22 * n33 * n44;
  770. float t12 = n14 * n33 * n42 - n13 * n34 * n42 - n14 * n32 * n43 + n12 * n34 * n43 + n13 * n32 * n44 - n12 * n33 * n44;
  771. float t13 = n13 * n24 * n42 - n14 * n23 * n42 + n14 * n22 * n43 - n12 * n24 * n43 - n13 * n22 * n44 + n12 * n23 * n44;
  772. float t14 = n14 * n23 * n32 - n13 * n24 * n32 - n14 * n22 * n33 + n12 * n24 * n33 + n13 * n22 * n34 - n12 * n23 * n34;
  773. float det = n11 * t11 + n21 * t12 + n31 * t13 + n41 * t14;
  774. float idet = 1.0f / det;
  775. float4x4 ret;
  776. ret[0][0] = t11 * idet;
  777. ret[0][1] = (n24 * n33 * n41 - n23 * n34 * n41 - n24 * n31 * n43 + n21 * n34 * n43 + n23 * n31 * n44 - n21 * n33 * n44) * idet;
  778. ret[0][2] = (n22 * n34 * n41 - n24 * n32 * n41 + n24 * n31 * n42 - n21 * n34 * n42 - n22 * n31 * n44 + n21 * n32 * n44) * idet;
  779. ret[0][3] = (n23 * n32 * n41 - n22 * n33 * n41 - n23 * n31 * n42 + n21 * n33 * n42 + n22 * n31 * n43 - n21 * n32 * n43) * idet;
  780. ret[1][0] = t12 * idet;
  781. ret[1][1] = (n13 * n34 * n41 - n14 * n33 * n41 + n14 * n31 * n43 - n11 * n34 * n43 - n13 * n31 * n44 + n11 * n33 * n44) * idet;
  782. ret[1][2] = (n14 * n32 * n41 - n12 * n34 * n41 - n14 * n31 * n42 + n11 * n34 * n42 + n12 * n31 * n44 - n11 * n32 * n44) * idet;
  783. ret[1][3] = (n12 * n33 * n41 - n13 * n32 * n41 + n13 * n31 * n42 - n11 * n33 * n42 - n12 * n31 * n43 + n11 * n32 * n43) * idet;
  784. ret[2][0] = t13 * idet;
  785. ret[2][1] = (n14 * n23 * n41 - n13 * n24 * n41 - n14 * n21 * n43 + n11 * n24 * n43 + n13 * n21 * n44 - n11 * n23 * n44) * idet;
  786. ret[2][2] = (n12 * n24 * n41 - n14 * n22 * n41 + n14 * n21 * n42 - n11 * n24 * n42 - n12 * n21 * n44 + n11 * n22 * n44) * idet;
  787. ret[2][3] = (n13 * n22 * n41 - n12 * n23 * n41 - n13 * n21 * n42 + n11 * n23 * n42 + n12 * n21 * n43 - n11 * n22 * n43) * idet;
  788. ret[3][0] = t14 * idet;
  789. ret[3][1] = (n13 * n24 * n31 - n14 * n23 * n31 + n14 * n21 * n33 - n11 * n24 * n33 - n13 * n21 * n34 + n11 * n23 * n34) * idet;
  790. ret[3][2] = (n14 * n22 * n31 - n12 * n24 * n31 - n14 * n21 * n32 + n11 * n24 * n32 + n12 * n21 * n34 - n11 * n22 * n34) * idet;
  791. ret[3][3] = (n12 * n23 * n31 - n13 * n22 * n31 + n13 * n21 * n32 - n11 * n23 * n32 - n12 * n21 * n33 + n11 * n22 * n33) * idet;
  792. return ret;
  793. }
  794. float Remap(float origFrom, float origTo, float targetFrom, float targetTo, float value)
  795. {
  796. return lerp(targetFrom, targetTo, (value - origFrom) / (origTo - origFrom));
  797. }
  798. // ----------------------------------------------------------------------------
  799. // Texture utilities
  800. // ----------------------------------------------------------------------------
  801. float ComputeTextureLOD(float2 uvdx, float2 uvdy, float2 scale, float bias = 0.0)
  802. {
  803. float2 ddx_ = scale * uvdx;
  804. float2 ddy_ = scale * uvdy;
  805. float d = max(dot(ddx_, ddx_), dot(ddy_, ddy_));
  806. return max(0.5 * log2(d) - bias, 0.0);
  807. }
  808. float ComputeTextureLOD(float2 uv, float bias = 0.0)
  809. {
  810. float2 ddx_ = ddx(uv);
  811. float2 ddy_ = ddy(uv);
  812. return ComputeTextureLOD(ddx_, ddy_, 1.0, bias);
  813. }
  814. // x contains width, w contains height
  815. float ComputeTextureLOD(float2 uv, float2 texelSize, float bias = 0.0)
  816. {
  817. uv *= texelSize;
  818. return ComputeTextureLOD(uv, bias);
  819. }
  820. // LOD clamp is optional and happens outside the function.
  821. float ComputeTextureLOD(float3 duvw_dx, float3 duvw_dy, float3 duvw_dz, float scale, float bias = 0.0)
  822. {
  823. float d = Max3(dot(duvw_dx, duvw_dx), dot(duvw_dy, duvw_dy), dot(duvw_dz, duvw_dz));
  824. return max(0.5f * log2(d * (scale * scale)) - bias, 0.0);
  825. }
  826. #if defined(SHADER_API_D3D11) || defined(SHADER_API_D3D12) || defined(SHADER_API_D3D11_9X) || defined(SHADER_API_XBOXONE) || defined(SHADER_API_PSSL) || defined(SHADER_API_METAL)
  827. #define MIP_COUNT_SUPPORTED 1
  828. #endif
  829. // TODO: Bug workaround, switch defines GLCORE when it shouldn't
  830. #if ((defined(SHADER_API_GLCORE) && !defined(SHADER_API_SWITCH)) || defined(SHADER_API_VULKAN)) && !defined(SHADER_STAGE_COMPUTE)
  831. // OpenGL only supports textureSize for width, height, depth
  832. // textureQueryLevels (GL_ARB_texture_query_levels) needs OpenGL 4.3 or above and doesn't compile in compute shaders
  833. // tex.GetDimensions converted to textureQueryLevels
  834. #define MIP_COUNT_SUPPORTED 1
  835. #endif
  836. // Metal doesn't support high enough OpenGL version
  837. uint GetMipCount(TEXTURE2D_PARAM(tex, smp))
  838. {
  839. #if defined(MIP_COUNT_SUPPORTED)
  840. uint mipLevel, width, height, mipCount;
  841. mipLevel = width = height = mipCount = 0;
  842. tex.GetDimensions(mipLevel, width, height, mipCount);
  843. return mipCount;
  844. #else
  845. return 0;
  846. #endif
  847. }
  848. // ----------------------------------------------------------------------------
  849. // Texture format sampling
  850. // ----------------------------------------------------------------------------
  851. // DXC no longer supports DX9-style HLSL syntax for sampler2D, tex2D and the like.
  852. // These are emulated for backwards compatibility using our own small structs and functions which manually combine samplers and textures.
  853. #if defined(UNITY_COMPILER_DXC) && !defined(DXC_SAMPLER_COMPATIBILITY)
  854. #define DXC_SAMPLER_COMPATIBILITY 1
  855. // On DXC platforms which don't care about explicit sampler precison we want the emulated types to work directly e.g without needing to redefine 'sampler2D' to 'sampler2D_f'
  856. #if !defined(SHADER_API_GLES3) && !defined(SHADER_API_VULKAN) && !defined(SHADER_API_METAL) && !defined(SHADER_API_SWITCH) && !defined(SHADER_API_WEBGPU)
  857. #define sampler1D_f sampler1D
  858. #define sampler2D_f sampler2D
  859. #define sampler3D_f sampler3D
  860. #define samplerCUBE_f samplerCUBE
  861. #endif
  862. struct sampler1D_f { Texture1D<float4> t; SamplerState s; };
  863. struct sampler2D_f { Texture2D<float4> t; SamplerState s; };
  864. struct sampler3D_f { Texture3D<float4> t; SamplerState s; };
  865. struct samplerCUBE_f { TextureCube<float4> t; SamplerState s; };
  866. float4 tex1D(sampler1D_f x, float v) { return x.t.Sample(x.s, v); }
  867. float4 tex2D(sampler2D_f x, float2 v) { return x.t.Sample(x.s, v); }
  868. float4 tex3D(sampler3D_f x, float3 v) { return x.t.Sample(x.s, v); }
  869. float4 texCUBE(samplerCUBE_f x, float3 v) { return x.t.Sample(x.s, v); }
  870. float4 tex1Dbias(sampler1D_f x, in float4 t) { return x.t.SampleBias(x.s, t.x, t.w); }
  871. float4 tex2Dbias(sampler2D_f x, in float4 t) { return x.t.SampleBias(x.s, t.xy, t.w); }
  872. float4 tex3Dbias(sampler3D_f x, in float4 t) { return x.t.SampleBias(x.s, t.xyz, t.w); }
  873. float4 texCUBEbias(samplerCUBE_f x, in float4 t) { return x.t.SampleBias(x.s, t.xyz, t.w); }
  874. float4 tex1Dlod(sampler1D_f x, in float4 t) { return x.t.SampleLevel(x.s, t.x, t.w); }
  875. float4 tex2Dlod(sampler2D_f x, in float4 t) { return x.t.SampleLevel(x.s, t.xy, t.w); }
  876. float4 tex3Dlod(sampler3D_f x, in float4 t) { return x.t.SampleLevel(x.s, t.xyz, t.w); }
  877. float4 texCUBElod(samplerCUBE_f x, in float4 t) { return x.t.SampleLevel(x.s, t.xyz, t.w); }
  878. float4 tex1Dgrad(sampler1D_f x, float t, float dx, float dy) { return x.t.SampleGrad(x.s, t, dx, dy); }
  879. float4 tex2Dgrad(sampler2D_f x, float2 t, float2 dx, float2 dy) { return x.t.SampleGrad(x.s, t, dx, dy); }
  880. float4 tex3Dgrad(sampler3D_f x, float3 t, float3 dx, float3 dy) { return x.t.SampleGrad(x.s, t, dx, dy); }
  881. float4 texCUBEgrad(samplerCUBE_f x, float3 t, float3 dx, float3 dy) { return x.t.SampleGrad(x.s, t, dx, dy); }
  882. float4 tex1D(sampler1D_f x, float t, float dx, float dy) { return x.t.SampleGrad(x.s, t, dx, dy); }
  883. float4 tex2D(sampler2D_f x, float2 t, float2 dx, float2 dy) { return x.t.SampleGrad(x.s, t, dx, dy); }
  884. float4 tex3D(sampler3D_f x, float3 t, float3 dx, float3 dy) { return x.t.SampleGrad(x.s, t, dx, dy); }
  885. float4 texCUBE(samplerCUBE_f x, float3 t, float3 dx, float3 dy) { return x.t.SampleGrad(x.s, t, dx, dy); }
  886. float4 tex1Dproj(sampler1D_f s, in float2 t) { return tex1D(s, t.x / t.y); }
  887. float4 tex1Dproj(sampler1D_f s, in float4 t) { return tex1D(s, t.x / t.w); }
  888. float4 tex2Dproj(sampler2D_f s, in float3 t) { return tex2D(s, t.xy / t.z); }
  889. float4 tex2Dproj(sampler2D_f s, in float4 t) { return tex2D(s, t.xy / t.w); }
  890. float4 tex3Dproj(sampler3D_f s, in float4 t) { return tex3D(s, t.xyz / t.w); }
  891. float4 texCUBEproj(samplerCUBE_f s, in float4 t) { return texCUBE(s, t.xyz / t.w); }
  892. // Half precision emulated samplers used instead the sampler.*_half unity types
  893. struct sampler1D_h { Texture1D<min16float4> t; SamplerState s; };
  894. struct sampler2D_h { Texture2D<min16float4> t; SamplerState s; };
  895. struct sampler3D_h { Texture3D<min16float4> t; SamplerState s; };
  896. struct samplerCUBE_h { TextureCube<min16float4> t; SamplerState s; };
  897. min16float4 tex1D(sampler1D_h x, float v) { return x.t.Sample(x.s, v); }
  898. min16float4 tex2D(sampler2D_h x, float2 v) { return x.t.Sample(x.s, v); }
  899. min16float4 tex3D(sampler3D_h x, float3 v) { return x.t.Sample(x.s, v); }
  900. min16float4 texCUBE(samplerCUBE_h x, float3 v) { return x.t.Sample(x.s, v); }
  901. min16float4 tex1Dbias(sampler1D_h x, in float4 t) { return x.t.SampleBias(x.s, t.x, t.w); }
  902. min16float4 tex2Dbias(sampler2D_h x, in float4 t) { return x.t.SampleBias(x.s, t.xy, t.w); }
  903. min16float4 tex3Dbias(sampler3D_h x, in float4 t) { return x.t.SampleBias(x.s, t.xyz, t.w); }
  904. min16float4 texCUBEbias(samplerCUBE_h x, in float4 t) { return x.t.SampleBias(x.s, t.xyz, t.w); }
  905. min16float4 tex1Dlod(sampler1D_h x, in float4 t) { return x.t.SampleLevel(x.s, t.x, t.w); }
  906. min16float4 tex2Dlod(sampler2D_h x, in float4 t) { return x.t.SampleLevel(x.s, t.xy, t.w); }
  907. min16float4 tex3Dlod(sampler3D_h x, in float4 t) { return x.t.SampleLevel(x.s, t.xyz, t.w); }
  908. min16float4 texCUBElod(samplerCUBE_h x, in float4 t) { return x.t.SampleLevel(x.s, t.xyz, t.w); }
  909. min16float4 tex1Dgrad(sampler1D_h x, float t, float dx, float dy) { return x.t.SampleGrad(x.s, t, dx, dy); }
  910. min16float4 tex2Dgrad(sampler2D_h x, float2 t, float2 dx, float2 dy) { return x.t.SampleGrad(x.s, t, dx, dy); }
  911. min16float4 tex3Dgrad(sampler3D_h x, float3 t, float3 dx, float3 dy) { return x.t.SampleGrad(x.s, t, dx, dy); }
  912. min16float4 texCUBEgrad(samplerCUBE_h x, float3 t, float3 dx, float3 dy) { return x.t.SampleGrad(x.s, t, dx, dy); }
  913. min16float4 tex1D(sampler1D_h x, float t, float dx, float dy) { return x.t.SampleGrad(x.s, t, dx, dy); }
  914. min16float4 tex2D(sampler2D_h x, float2 t, float2 dx, float2 dy) { return x.t.SampleGrad(x.s, t, dx, dy); }
  915. min16float4 tex3D(sampler3D_h x, float3 t, float3 dx, float3 dy) { return x.t.SampleGrad(x.s, t, dx, dy); }
  916. min16float4 texCUBE(samplerCUBE_h x, float3 t, float3 dx, float3 dy) { return x.t.SampleGrad(x.s, t, dx, dy); }
  917. min16float4 tex1Dproj(sampler1D_h s, in float2 t) { return tex1D(s, t.x / t.y); }
  918. min16float4 tex1Dproj(sampler1D_h s, in float4 t) { return tex1D(s, t.x / t.w); }
  919. min16float4 tex2Dproj(sampler2D_h s, in float3 t) { return tex2D(s, t.xy / t.z); }
  920. min16float4 tex2Dproj(sampler2D_h s, in float4 t) { return tex2D(s, t.xy / t.w); }
  921. min16float4 tex3Dproj(sampler3D_h s, in float4 t) { return tex3D(s, t.xyz / t.w); }
  922. min16float4 texCUBEproj(samplerCUBE_h s, in float4 t) { return texCUBE(s, t.xyz / t.w); }
  923. #endif
  924. float2 DirectionToLatLongCoordinate(float3 unDir)
  925. {
  926. float3 dir = normalize(unDir);
  927. // coordinate frame is (-Z, X) meaning negative Z is primary axis and X is secondary axis.
  928. return float2(1.0 - 0.5 * INV_PI * atan2(dir.x, -dir.z), asin(dir.y) * INV_PI + 0.5);
  929. }
  930. float3 LatlongToDirectionCoordinate(float2 coord)
  931. {
  932. float theta = coord.y * PI;
  933. float phi = (coord.x * 2.f * PI - PI*0.5f);
  934. float cosTheta = cos(theta);
  935. float sinTheta = sqrt(1.0 - min(1.0, cosTheta*cosTheta));
  936. float cosPhi = cos(phi);
  937. float sinPhi = sin(phi);
  938. float3 direction = float3(sinTheta*cosPhi, cosTheta, sinTheta*sinPhi);
  939. direction.xy *= -1.0;
  940. return direction;
  941. }
  942. float2 OrientationToDirection(float orientation)
  943. {
  944. return float2(cos(orientation), sin(orientation));
  945. }
  946. // ----------------------------------------------------------------------------
  947. // Depth encoding/decoding
  948. // ----------------------------------------------------------------------------
  949. // Z buffer to linear 0..1 depth (0 at near plane, 1 at far plane).
  950. // Does NOT correctly handle oblique view frustums.
  951. // Does NOT work with orthographic projection.
  952. // zBufferParam = { (f-n)/n, 1, (f-n)/n*f, 1/f }
  953. float Linear01DepthFromNear(float depth, float4 zBufferParam)
  954. {
  955. return 1.0 / (zBufferParam.x + zBufferParam.y / depth);
  956. }
  957. // Z buffer to linear 0..1 depth (0 at camera position, 1 at far plane).
  958. // Does NOT work with orthographic projections.
  959. // Does NOT correctly handle oblique view frustums.
  960. // zBufferParam = { (f-n)/n, 1, (f-n)/n*f, 1/f }
  961. float Linear01Depth(float depth, float4 zBufferParam)
  962. {
  963. return 1.0 / (zBufferParam.x * depth + zBufferParam.y);
  964. }
  965. // Z buffer to linear depth.
  966. // Does NOT correctly handle oblique view frustums.
  967. // Does NOT work with orthographic projection.
  968. // zBufferParam = { (f-n)/n, 1, (f-n)/n*f, 1/f }
  969. float LinearEyeDepth(float depth, float4 zBufferParam)
  970. {
  971. return 1.0 / (zBufferParam.z * depth + zBufferParam.w);
  972. }
  973. // Z buffer to linear depth.
  974. // Correctly handles oblique view frustums.
  975. // Does NOT work with orthographic projection.
  976. // Ref: An Efficient Depth Linearization Method for Oblique View Frustums, Eq. 6.
  977. float LinearEyeDepth(float2 positionNDC, float deviceDepth, float4 invProjParam)
  978. {
  979. float viewSpaceZ = rcp(dot(float4(positionNDC, deviceDepth, 1.0), invProjParam));
  980. // If the matrix is right-handed, we have to flip the Z axis to get a positive value.
  981. return abs(viewSpaceZ);
  982. }
  983. // Z buffer to linear depth.
  984. // Works in all cases.
  985. // Typically, this is the cheapest variant, provided you've already computed 'positionWS'.
  986. // Assumes that the 'positionWS' is in front of the camera.
  987. float LinearEyeDepth(float3 positionWS, float4x4 viewMatrix)
  988. {
  989. float viewSpaceZ = mul(viewMatrix, float4(positionWS, 1.0)).z;
  990. // If the matrix is right-handed, we have to flip the Z axis to get a positive value.
  991. return abs(viewSpaceZ);
  992. }
  993. // 'z' is the view space Z position (linear depth).
  994. // saturate(z) the output of the function to clamp them to the [0, 1] range.
  995. // d = log2(c * (z - n) + 1) / log2(c * (f - n) + 1)
  996. // = log2(c * (z - n + 1/c)) / log2(c * (f - n) + 1)
  997. // = log2(c) / log2(c * (f - n) + 1) + log2(z - (n - 1/c)) / log2(c * (f - n) + 1)
  998. // = E + F * log2(z - G)
  999. // encodingParams = { E, F, G, 0 }
  1000. float EncodeLogarithmicDepthGeneralized(float z, float4 encodingParams)
  1001. {
  1002. // Use max() to avoid NaNs.
  1003. return encodingParams.x + encodingParams.y * log2(max(0, z - encodingParams.z));
  1004. }
  1005. // 'd' is the logarithmically encoded depth value.
  1006. // saturate(d) to clamp the output of the function to the [n, f] range.
  1007. // z = 1/c * (pow(c * (f - n) + 1, d) - 1) + n
  1008. // = 1/c * pow(c * (f - n) + 1, d) + n - 1/c
  1009. // = 1/c * exp2(d * log2(c * (f - n) + 1)) + (n - 1/c)
  1010. // = L * exp2(d * M) + N
  1011. // decodingParams = { L, M, N, 0 }
  1012. // Graph: https://www.desmos.com/calculator/qrtatrlrba
  1013. float DecodeLogarithmicDepthGeneralized(float d, float4 decodingParams)
  1014. {
  1015. return decodingParams.x * exp2(d * decodingParams.y) + decodingParams.z;
  1016. }
  1017. // 'z' is the view-space Z position (linear depth).
  1018. // saturate(z) the output of the function to clamp them to the [0, 1] range.
  1019. // encodingParams = { n, log2(f/n), 1/n, 1/log2(f/n) }
  1020. // This is an optimized version of EncodeLogarithmicDepthGeneralized() for (c = 2).
  1021. float EncodeLogarithmicDepth(float z, float4 encodingParams)
  1022. {
  1023. // Use max() to avoid NaNs.
  1024. // TODO: optimize to (log2(z) - log2(n)) / (log2(f) - log2(n)).
  1025. return log2(max(0, z * encodingParams.z)) * encodingParams.w;
  1026. }
  1027. // 'd' is the logarithmically encoded depth value.
  1028. // saturate(d) to clamp the output of the function to the [n, f] range.
  1029. // encodingParams = { n, log2(f/n), 1/n, 1/log2(f/n) }
  1030. // This is an optimized version of DecodeLogarithmicDepthGeneralized() for (c = 2).
  1031. // Graph: https://www.desmos.com/calculator/qrtatrlrba
  1032. float DecodeLogarithmicDepth(float d, float4 encodingParams)
  1033. {
  1034. // TODO: optimize to exp2(d * y + log2(x)).
  1035. return encodingParams.x * exp2(d * encodingParams.y);
  1036. }
  1037. // Use an infinite far plane
  1038. // https://chaosinmotion.com/2010/09/06/goodbye-far-clipping-plane/
  1039. // 'depth' is the linear depth (view-space Z position)
  1040. float EncodeInfiniteDepth(float depth, float near)
  1041. {
  1042. return saturate(near / depth);
  1043. }
  1044. // 'z' is the depth encoded in the depth buffer (1 at near plane, 0 at far plane)
  1045. float DecodeInfiniteDepth(float z, float near)
  1046. {
  1047. return near / max(z, FLT_EPS);
  1048. }
  1049. real4 CompositeOver(real4 front, real4 back)
  1050. {
  1051. return front + (1 - front.a) * back;
  1052. }
  1053. void CompositeOver(real3 colorFront, real3 alphaFront,
  1054. real3 colorBack, real3 alphaBack,
  1055. out real3 color, out real3 alpha)
  1056. {
  1057. color = colorFront + (1 - alphaFront) * colorBack;
  1058. alpha = alphaFront + (1 - alphaFront) * alphaBack;
  1059. }
  1060. // ----------------------------------------------------------------------------
  1061. // Space transformations
  1062. // ----------------------------------------------------------------------------
  1063. static const float3x3 k_identity3x3 = {1, 0, 0,
  1064. 0, 1, 0,
  1065. 0, 0, 1};
  1066. static const float4x4 k_identity4x4 = {1, 0, 0, 0,
  1067. 0, 1, 0, 0,
  1068. 0, 0, 1, 0,
  1069. 0, 0, 0, 1};
  1070. float4 ComputeClipSpacePosition(float2 positionNDC, float deviceDepth)
  1071. {
  1072. float4 positionCS = float4(positionNDC * 2.0 - 1.0, deviceDepth, 1.0);
  1073. #if UNITY_UV_STARTS_AT_TOP
  1074. // Our world space, view space, screen space and NDC space are Y-up.
  1075. // Our clip space is flipped upside-down due to poor legacy Unity design.
  1076. // The flip is baked into the projection matrix, so we only have to flip
  1077. // manually when going from CS to NDC and back.
  1078. positionCS.y = -positionCS.y;
  1079. #endif
  1080. return positionCS;
  1081. }
  1082. // Use case examples:
  1083. // (position = positionCS) => (clipSpaceTransform = use default)
  1084. // (position = positionVS) => (clipSpaceTransform = UNITY_MATRIX_P)
  1085. // (position = positionWS) => (clipSpaceTransform = UNITY_MATRIX_VP)
  1086. float4 ComputeClipSpacePosition(float3 position, float4x4 clipSpaceTransform = k_identity4x4)
  1087. {
  1088. return mul(clipSpaceTransform, float4(position, 1.0));
  1089. }
  1090. // The returned Z value is the depth buffer value (and NOT linear view space Z value).
  1091. // Use case examples:
  1092. // (position = positionCS) => (clipSpaceTransform = use default)
  1093. // (position = positionVS) => (clipSpaceTransform = UNITY_MATRIX_P)
  1094. // (position = positionWS) => (clipSpaceTransform = UNITY_MATRIX_VP)
  1095. float3 ComputeNormalizedDeviceCoordinatesWithZ(float3 position, float4x4 clipSpaceTransform = k_identity4x4)
  1096. {
  1097. float4 positionCS = ComputeClipSpacePosition(position, clipSpaceTransform);
  1098. #if UNITY_UV_STARTS_AT_TOP
  1099. // Our world space, view space, screen space and NDC space are Y-up.
  1100. // Our clip space is flipped upside-down due to poor legacy Unity design.
  1101. // The flip is baked into the projection matrix, so we only have to flip
  1102. // manually when going from CS to NDC and back.
  1103. positionCS.y = -positionCS.y;
  1104. #endif
  1105. positionCS *= rcp(positionCS.w);
  1106. positionCS.xy = positionCS.xy * 0.5 + 0.5;
  1107. return positionCS.xyz;
  1108. }
  1109. // Use case examples:
  1110. // (position = positionCS) => (clipSpaceTransform = use default)
  1111. // (position = positionVS) => (clipSpaceTransform = UNITY_MATRIX_P)
  1112. // (position = positionWS) => (clipSpaceTransform = UNITY_MATRIX_VP)
  1113. float2 ComputeNormalizedDeviceCoordinates(float3 position, float4x4 clipSpaceTransform = k_identity4x4)
  1114. {
  1115. return ComputeNormalizedDeviceCoordinatesWithZ(position, clipSpaceTransform).xy;
  1116. }
  1117. float3 ComputeViewSpacePosition(float2 positionNDC, float deviceDepth, float4x4 invProjMatrix)
  1118. {
  1119. float4 positionCS = ComputeClipSpacePosition(positionNDC, deviceDepth);
  1120. float4 positionVS = mul(invProjMatrix, positionCS);
  1121. // The view space uses a right-handed coordinate system.
  1122. positionVS.z = -positionVS.z;
  1123. return positionVS.xyz / positionVS.w;
  1124. }
  1125. float3 ComputeWorldSpacePosition(float2 positionNDC, float deviceDepth, float4x4 invViewProjMatrix)
  1126. {
  1127. float4 positionCS = ComputeClipSpacePosition(positionNDC, deviceDepth);
  1128. float4 hpositionWS = mul(invViewProjMatrix, positionCS);
  1129. return hpositionWS.xyz / hpositionWS.w;
  1130. }
  1131. float3 ComputeWorldSpacePosition(float4 positionCS, float4x4 invViewProjMatrix)
  1132. {
  1133. float4 hpositionWS = mul(invViewProjMatrix, positionCS);
  1134. return hpositionWS.xyz / hpositionWS.w;
  1135. }
  1136. // ----------------------------------------------------------------------------
  1137. // PositionInputs
  1138. // ----------------------------------------------------------------------------
  1139. // Note: if you modify this struct, be sure to update the CustomPassFullscreenShader.template
  1140. struct PositionInputs
  1141. {
  1142. float3 positionWS; // World space position (could be camera-relative)
  1143. float2 positionNDC; // Normalized screen coordinates within the viewport : [0, 1) (with the half-pixel offset)
  1144. uint2 positionSS; // Screen space pixel coordinates : [0, NumPixels)
  1145. uint2 tileCoord; // Screen tile coordinates : [0, NumTiles)
  1146. float deviceDepth; // Depth from the depth buffer : [0, 1] (typically reversed)
  1147. float linearDepth; // View space Z coordinate : [Near, Far]
  1148. };
  1149. // This function is use to provide an easy way to sample into a screen texture, either from a pixel or a compute shaders.
  1150. // This allow to easily share code.
  1151. // If a compute shader call this function positionSS is an integer usually calculate like: uint2 positionSS = groupId.xy * BLOCK_SIZE + groupThreadId.xy
  1152. // else it is current unormalized screen coordinate like return by SV_Position
  1153. PositionInputs GetPositionInput(float2 positionSS, float2 invScreenSize, uint2 tileCoord) // Specify explicit tile coordinates so that we can easily make it lane invariant for compute evaluation.
  1154. {
  1155. PositionInputs posInput;
  1156. ZERO_INITIALIZE(PositionInputs, posInput);
  1157. posInput.positionNDC = positionSS;
  1158. #if defined(SHADER_STAGE_COMPUTE) || defined(SHADER_STAGE_RAY_TRACING)
  1159. // In case of compute shader an extra half offset is added to the screenPos to shift the integer position to pixel center.
  1160. posInput.positionNDC.xy += float2(0.5, 0.5);
  1161. #endif
  1162. posInput.positionNDC *= invScreenSize;
  1163. posInput.positionSS = uint2(positionSS);
  1164. posInput.tileCoord = tileCoord;
  1165. return posInput;
  1166. }
  1167. PositionInputs GetPositionInput(float2 positionSS, float2 invScreenSize)
  1168. {
  1169. return GetPositionInput(positionSS, invScreenSize, uint2(0, 0));
  1170. }
  1171. // For Raytracing only
  1172. // This function does not initialize deviceDepth and linearDepth
  1173. PositionInputs GetPositionInput(float2 positionSS, float2 invScreenSize, float3 positionWS)
  1174. {
  1175. PositionInputs posInput = GetPositionInput(positionSS, invScreenSize, uint2(0, 0));
  1176. posInput.positionWS = positionWS;
  1177. return posInput;
  1178. }
  1179. // From forward
  1180. // deviceDepth and linearDepth come directly from .zw of SV_Position
  1181. PositionInputs GetPositionInput(float2 positionSS, float2 invScreenSize, float deviceDepth, float linearDepth, float3 positionWS, uint2 tileCoord)
  1182. {
  1183. PositionInputs posInput = GetPositionInput(positionSS, invScreenSize, tileCoord);
  1184. posInput.positionWS = positionWS;
  1185. posInput.deviceDepth = deviceDepth;
  1186. posInput.linearDepth = linearDepth;
  1187. return posInput;
  1188. }
  1189. PositionInputs GetPositionInput(float2 positionSS, float2 invScreenSize, float deviceDepth, float linearDepth, float3 positionWS)
  1190. {
  1191. return GetPositionInput(positionSS, invScreenSize, deviceDepth, linearDepth, positionWS, uint2(0, 0));
  1192. }
  1193. // From deferred or compute shader
  1194. // depth must be the depth from the raw depth buffer. This allow to handle all kind of depth automatically with the inverse view projection matrix.
  1195. // For information. In Unity Depth is always in range 0..1 (even on OpenGL) but can be reversed.
  1196. PositionInputs GetPositionInput(float2 positionSS, float2 invScreenSize, float deviceDepth,
  1197. float4x4 invViewProjMatrix, float4x4 viewMatrix,
  1198. uint2 tileCoord)
  1199. {
  1200. PositionInputs posInput = GetPositionInput(positionSS, invScreenSize, tileCoord);
  1201. posInput.positionWS = ComputeWorldSpacePosition(posInput.positionNDC, deviceDepth, invViewProjMatrix);
  1202. posInput.deviceDepth = deviceDepth;
  1203. posInput.linearDepth = LinearEyeDepth(posInput.positionWS, viewMatrix);
  1204. return posInput;
  1205. }
  1206. PositionInputs GetPositionInput(float2 positionSS, float2 invScreenSize, float deviceDepth,
  1207. float4x4 invViewProjMatrix, float4x4 viewMatrix)
  1208. {
  1209. return GetPositionInput(positionSS, invScreenSize, deviceDepth, invViewProjMatrix, viewMatrix, uint2(0, 0));
  1210. }
  1211. // The view direction 'V' points towards the camera.
  1212. // 'depthOffsetVS' is always applied in the opposite direction (-V).
  1213. void ApplyDepthOffsetPositionInput(float3 V, float depthOffsetVS, float3 viewForwardDir, float4x4 viewProjMatrix, inout PositionInputs posInput)
  1214. {
  1215. posInput.positionWS += depthOffsetVS * (-V);
  1216. posInput.deviceDepth = ComputeNormalizedDeviceCoordinatesWithZ(posInput.positionWS, viewProjMatrix).z;
  1217. // Transform the displacement along the view vector to the displacement along the forward vector.
  1218. // Use abs() to make sure we get the sign right.
  1219. // 'depthOffsetVS' applies in the direction away from the camera.
  1220. posInput.linearDepth += depthOffsetVS * abs(dot(V, viewForwardDir));
  1221. }
  1222. // ----------------------------------------------------------------------------
  1223. // Terrain/Brush heightmap encoding/decoding
  1224. // ----------------------------------------------------------------------------
  1225. #if defined(SHADER_API_VULKAN) || defined(SHADER_API_GLES3) || defined(SHADER_API_WEBGPU)
  1226. // For the built-in target this is already a defined symbol
  1227. #ifndef BUILTIN_TARGET_API
  1228. real4 PackHeightmap(real height)
  1229. {
  1230. uint a = (uint)(65535.0 * height);
  1231. return real4((a >> 0) & 0xFF, (a >> 8) & 0xFF, 0, 0) / 255.0;
  1232. }
  1233. real UnpackHeightmap(real4 height)
  1234. {
  1235. return (height.r + height.g * 256.0) / 257.0; // (255.0 * height.r + 255.0 * 256.0 * height.g) / 65535.0
  1236. }
  1237. #endif
  1238. #else
  1239. // For the built-in target this is already a defined symbol
  1240. #ifndef BUILTIN_TARGET_API
  1241. real4 PackHeightmap(real height)
  1242. {
  1243. return real4(height, 0, 0, 0);
  1244. }
  1245. real UnpackHeightmap(real4 height)
  1246. {
  1247. return height.r;
  1248. }
  1249. #endif
  1250. #endif
  1251. // ----------------------------------------------------------------------------
  1252. // Misc utilities
  1253. // ----------------------------------------------------------------------------
  1254. // Simple function to test a bitfield
  1255. bool HasFlag(uint bitfield, uint flag)
  1256. {
  1257. return (bitfield & flag) != 0;
  1258. }
  1259. // Normalize that account for vectors with zero length
  1260. float3 SafeNormalize(float3 inVec)
  1261. {
  1262. float dp3 = max(FLT_MIN, dot(inVec, inVec));
  1263. return inVec * rsqrt(dp3);
  1264. }
  1265. half3 SafeNormalize(half3 inVec)
  1266. {
  1267. half dp3 = max(HALF_MIN, dot(inVec, inVec));
  1268. return inVec * rsqrt(dp3);
  1269. }
  1270. bool IsNormalized(float3 inVec)
  1271. {
  1272. float squaredLength = dot(inVec, inVec);
  1273. return 0.9998 < squaredLength && squaredLength < 1.0002001;
  1274. }
  1275. bool IsNormalized(half3 inVec)
  1276. {
  1277. half squaredLength = dot(inVec, inVec);
  1278. return 0.998 < squaredLength && squaredLength < 1.002;
  1279. }
  1280. // Division which returns 1 for (inf/inf) and (0/0).
  1281. // If any of the input parameters are NaNs, the result is a NaN.
  1282. real SafeDiv(real numer, real denom)
  1283. {
  1284. return (numer != denom) ? numer / denom : 1;
  1285. }
  1286. // Perform a square root safe of imaginary number.
  1287. real SafeSqrt(real x)
  1288. {
  1289. return sqrt(max(0, x));
  1290. }
  1291. // Assumes that (0 <= x <= Pi).
  1292. real SinFromCos(real cosX)
  1293. {
  1294. return sqrt(saturate(1 - cosX * cosX));
  1295. }
  1296. // Dot product in spherical coordinates.
  1297. real SphericalDot(real cosTheta1, real phi1, real cosTheta2, real phi2)
  1298. {
  1299. return SinFromCos(cosTheta1) * SinFromCos(cosTheta2) * cos(phi1 - phi2) + cosTheta1 * cosTheta2;
  1300. }
  1301. // Generates a triangle in homogeneous clip space, s.t.
  1302. // v0 = (-1, -1, 1), v1 = (3, -1, 1), v2 = (-1, 3, 1).
  1303. float2 GetFullScreenTriangleTexCoord(uint vertexID)
  1304. {
  1305. #if UNITY_UV_STARTS_AT_TOP
  1306. return float2((vertexID << 1) & 2, 1.0 - (vertexID & 2));
  1307. #else
  1308. return float2((vertexID << 1) & 2, vertexID & 2);
  1309. #endif
  1310. }
  1311. float4 GetFullScreenTriangleVertexPosition(uint vertexID, float z = UNITY_NEAR_CLIP_VALUE)
  1312. {
  1313. // note: the triangle vertex position coordinates are x2 so the returned UV coordinates are in range -1, 1 on the screen.
  1314. float2 uv = float2((vertexID << 1) & 2, vertexID & 2);
  1315. float4 pos = float4(uv * 2.0 - 1.0, z, 1.0);
  1316. #ifdef UNITY_PRETRANSFORM_TO_DISPLAY_ORIENTATION
  1317. pos = ApplyPretransformRotation(pos);
  1318. #endif
  1319. return pos;
  1320. }
  1321. // draw procedural with 2 triangles has index order (0,1,2) (0,2,3)
  1322. // 0 - 0,0
  1323. // 1 - 0,1
  1324. // 2 - 1,1
  1325. // 3 - 1,0
  1326. float2 GetQuadTexCoord(uint vertexID)
  1327. {
  1328. uint topBit = vertexID >> 1;
  1329. uint botBit = (vertexID & 1);
  1330. float u = topBit;
  1331. float v = (topBit + botBit) & 1; // produces 0 for indices 0,3 and 1 for 1,2
  1332. #if UNITY_UV_STARTS_AT_TOP
  1333. v = 1.0 - v;
  1334. #endif
  1335. return float2(u, v);
  1336. }
  1337. // 0 - 0,1
  1338. // 1 - 0,0
  1339. // 2 - 1,0
  1340. // 3 - 1,1
  1341. float4 GetQuadVertexPosition(uint vertexID, float z = UNITY_NEAR_CLIP_VALUE)
  1342. {
  1343. uint topBit = vertexID >> 1;
  1344. uint botBit = (vertexID & 1);
  1345. float x = topBit;
  1346. float y = 1 - (topBit + botBit) & 1; // produces 1 for indices 0,3 and 0 for 1,2
  1347. float4 pos = float4(x, y, z, 1.0);
  1348. #ifdef UNITY_PRETRANSFORM_TO_DISPLAY_ORIENTATION
  1349. pos = ApplyPretransformRotation(pos);
  1350. #endif
  1351. return pos;
  1352. }
  1353. #if !defined(SHADER_STAGE_RAY_TRACING)
  1354. // LOD dithering transition helper
  1355. // LOD0 must use this function with ditherFactor 1..0
  1356. // LOD1 must use this function with ditherFactor -1..0
  1357. // This is what is provided by unity_LODFade
  1358. void LODDitheringTransition(uint2 fadeMaskSeed, float ditherFactor)
  1359. {
  1360. // Generate a spatially varying pattern.
  1361. // Unfortunately, varying the pattern with time confuses the TAA, increasing the amount of noise.
  1362. float p = GenerateHashedRandomFloat(fadeMaskSeed);
  1363. // This preserves the symmetry s.t. if LOD 0 has f = x, LOD 1 has f = -x.
  1364. float f = ditherFactor - CopySign(p, ditherFactor);
  1365. clip(f);
  1366. }
  1367. #endif
  1368. // The resource that is bound when binding a stencil buffer from the depth buffer is two channel. On D3D11 the stencil value is in the green channel,
  1369. // while on other APIs is in the red channel. Note that on some platform, always using the green channel might work, but is not guaranteed.
  1370. uint GetStencilValue(uint2 stencilBufferVal)
  1371. {
  1372. #if defined(SHADER_API_D3D11) || defined(SHADER_API_XBOXONE) || defined(SHADER_API_GAMECORE)
  1373. return stencilBufferVal.y;
  1374. #else
  1375. return stencilBufferVal.x;
  1376. #endif
  1377. }
  1378. // Sharpens the alpha of a texture to the width of a single pixel
  1379. // Used for alpha to coverage
  1380. // source: https://medium.com/@bgolus/anti-aliased-alpha-test-the-esoteric-alpha-to-coverage-8b177335ae4f
  1381. float SharpenAlpha(float alpha, float alphaClipTreshold)
  1382. {
  1383. return saturate((alpha - alphaClipTreshold) / max(fwidth(alpha), 0.0001) + 0.5);
  1384. }
  1385. // These clamping function to max of floating point 16 bit are use to prevent INF in code in case of extreme value
  1386. TEMPLATE_1_FLT(ClampToFloat16Max, value, return min(value, HALF_MAX))
  1387. #if SHADER_API_MOBILE || SHADER_API_GLES3 || SHADER_API_SWITCH
  1388. #pragma warning (enable : 3205) // conversion of larger type to smaller
  1389. #endif
  1390. float2 RepeatOctahedralUV(float u, float v)
  1391. {
  1392. float2 uv;
  1393. if (u < 0.0f)
  1394. {
  1395. if (v < 0.0f)
  1396. uv = float2(1.0f + u, 1.0f + v);
  1397. else if (v < 1.0f)
  1398. uv = float2(-u, 1.0f - v);
  1399. else
  1400. uv = float2(1.0f + u, v - 1.0f);
  1401. }
  1402. else if (u < 1.0f)
  1403. {
  1404. if (v < 0.0f)
  1405. uv = float2(1.0f - u, -v);
  1406. else if (v < 1.0f)
  1407. uv = float2(u, v);
  1408. else
  1409. uv = float2(1.0f - u, 2.0f - v);
  1410. }
  1411. else
  1412. {
  1413. if (v < 0.0f)
  1414. uv = float2(u - 1.0f, 1.0f + v);
  1415. else if (v < 1.0f)
  1416. uv = float2(2.0f - u, 1.0f - v);
  1417. else
  1418. uv = float2(u - 1.0f, v - 1.0f);
  1419. }
  1420. return uv;
  1421. }
  1422. #endif // UNITY_COMMON_INCLUDED