No Description
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

AreaLighting.hlsl 15KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436
  1. #ifndef UNITY_AREA_LIGHTING_INCLUDED
  2. #define UNITY_AREA_LIGHTING_INCLUDED
  3. #define APPROXIMATE_POLY_LIGHT_AS_SPHERE_LIGHT
  4. #define APPROXIMATE_SPHERE_LIGHT_NUMERICALLY
  5. // 'sinSqSigma' is the sine^2 of the half-angle subtended by the sphere (aperture) as seen from the shaded point.
  6. // 'cosOmega' is the cosine of the angle between the normal and the direction to the center of the light.
  7. // This function performs horizon clipping.
  8. real DiffuseSphereLightIrradiance(real sinSqSigma, real cosOmega)
  9. {
  10. #ifdef APPROXIMATE_SPHERE_LIGHT_NUMERICALLY
  11. real x = sinSqSigma;
  12. real y = cosOmega;
  13. // Use a numerical fit found in Mathematica. Mean absolute error: 0.00476944.
  14. // You can use the following Mathematica code to reproduce our results:
  15. // t = Flatten[Table[{x, y, f[x, y]}, {x, 0, 0.999999, 0.001}, {y, -0.999999, 0.999999, 0.002}], 1]
  16. // m = NonlinearModelFit[t, x * (y + e) * (0.5 + (y - e) * (a + b * x + c * x^2 + d * x^3)), {a, b, c, d, e}, {x, y}]
  17. return saturate(x * (0.9245867471551246 + y) * (0.5 + (-0.9245867471551246 + y) * (0.5359050373687144 + x * (-1.0054221851257754 + x * (1.8199061187417047 - x * 1.3172081704209504)))));
  18. #else
  19. #if 0 // Ref: Area Light Sources for Real-Time Graphics, page 4 (1996).
  20. real sinSqOmega = saturate(1 - cosOmega * cosOmega);
  21. real cosSqSigma = saturate(1 - sinSqSigma);
  22. real sinSqGamma = saturate(cosSqSigma / sinSqOmega);
  23. real cosSqGamma = saturate(1 - sinSqGamma);
  24. real sinSigma = sqrt(sinSqSigma);
  25. real sinGamma = sqrt(sinSqGamma);
  26. real cosGamma = sqrt(cosSqGamma);
  27. real sigma = asin(sinSigma);
  28. real omega = acos(cosOmega);
  29. real gamma = asin(sinGamma);
  30. if (omega >= HALF_PI + sigma)
  31. {
  32. // Full horizon occlusion (case #4).
  33. return 0;
  34. }
  35. real e = sinSqSigma * cosOmega;
  36. UNITY_BRANCH
  37. if (omega < HALF_PI - sigma)
  38. {
  39. // No horizon occlusion (case #1).
  40. return e;
  41. }
  42. else
  43. {
  44. real g = (-2 * sqrt(sinSqOmega * cosSqSigma) + sinGamma) * cosGamma + (HALF_PI - gamma);
  45. real h = cosOmega * (cosGamma * sqrt(saturate(sinSqSigma - cosSqGamma)) + sinSqSigma * asin(saturate(cosGamma / sinSigma)));
  46. if (omega < HALF_PI)
  47. {
  48. // Partial horizon occlusion (case #2).
  49. return saturate(e + INV_PI * (g - h));
  50. }
  51. else
  52. {
  53. // Partial horizon occlusion (case #3).
  54. return saturate(INV_PI * (g + h));
  55. }
  56. }
  57. #else // Ref: Moving Frostbite to Physically Based Rendering, page 47 (2015, optimized).
  58. real cosSqOmega = cosOmega * cosOmega; // y^2
  59. UNITY_BRANCH
  60. if (cosSqOmega > sinSqSigma) // (y^2)>x
  61. {
  62. return saturate(sinSqSigma * cosOmega); // Clip[x*y,{0,1}]
  63. }
  64. else
  65. {
  66. real cotSqSigma = rcp(sinSqSigma) - 1; // 1/x-1
  67. real tanSqSigma = rcp(cotSqSigma); // x/(1-x)
  68. real sinSqOmega = 1 - cosSqOmega; // 1-y^2
  69. real w = sinSqOmega * tanSqSigma; // (1-y^2)*(x/(1-x))
  70. real x = -cosOmega * rsqrt(w); // -y*Sqrt[(1/x-1)/(1-y^2)]
  71. real y = sqrt(sinSqOmega * tanSqSigma - cosSqOmega); // Sqrt[(1-y^2)*(x/(1-x))-y^2]
  72. real z = y * cotSqSigma; // Sqrt[(1-y^2)*(x/(1-x))-y^2]*(1/x-1)
  73. real a = cosOmega * acos(x) - z; // y*ArcCos[-y*Sqrt[(1/x-1)/(1-y^2)]]-Sqrt[(1-y^2)*(x/(1-x))-y^2]*(1/x-1)
  74. real b = atan(y); // ArcTan[Sqrt[(1-y^2)*(x/(1-x))-y^2]]
  75. return saturate(INV_PI * (a * sinSqSigma + b));
  76. }
  77. #endif
  78. #endif
  79. }
  80. // The output is *not* normalized by the factor of 1/TWO_PI (this is done by the PolygonFormFactor function).
  81. real3 ComputeEdgeFactor(real3 V1, real3 V2)
  82. {
  83. real subtendedAngle;
  84. real V1oV2 = dot(V1, V2);
  85. real3 V1xV2 = cross(V1, V2); // Plane normal (tangent to the unit sphere)
  86. real sqLen = saturate(1 - V1oV2 * V1oV2); // length(V1xV2) = abs(sin(angle))
  87. real rcpLen = rsqrt(max(FLT_MIN, sqLen)); // Make sure it is finite
  88. #if 0
  89. real y = rcpLen * acos(V1oV2);
  90. #else
  91. // Let y[x_] = ArcCos[x] / Sqrt[1 - x^2].
  92. // Range reduction: since ArcCos[-x] == Pi - ArcCos[x], we only need to consider x on [0, 1].
  93. real x = abs(V1oV2);
  94. // Limit[y[x], x -> 1] == 1,
  95. // Limit[y[x], x -> 0] == Pi/2.
  96. // The approximation is exact at the endpoints of [0, 1].
  97. // Max. abs. error on [0, 1] is 1.33e-6 at x = 0.0036.
  98. // Max. rel. error on [0, 1] is 8.66e-7 at x = 0.0037.
  99. real y = HALF_PI + x * (-0.99991 + x * (0.783393 + x * (-0.649178 + x * (0.510589 + x * (-0.326137 + x * (0.137528 + x * -0.0270813))))));
  100. if (V1oV2 < 0)
  101. {
  102. y = rcpLen * PI - y;
  103. }
  104. #endif
  105. return V1xV2 * y;
  106. }
  107. // Input: 3-5 vertices in the coordinate frame centered at the shaded point.
  108. // Output: signed vector irradiance.
  109. // No horizon clipping is performed.
  110. real3 PolygonFormFactor(real4x3 L, real3 L4, uint n)
  111. {
  112. // The length cannot be zero since we have already checked
  113. // that the light has a non-zero effective area,
  114. // and thus its plane cannot pass through the origin.
  115. L[0] = normalize(L[0]);
  116. L[1] = normalize(L[1]);
  117. L[2] = normalize(L[2]);
  118. switch (n)
  119. {
  120. case 3:
  121. L[3] = L[0];
  122. break;
  123. case 4:
  124. L[3] = normalize(L[3]);
  125. L4 = L[0];
  126. break;
  127. case 5:
  128. L[3] = normalize(L[3]);
  129. L4 = normalize(L4);
  130. break;
  131. }
  132. // If the magnitudes of a pair of edge factors are
  133. // nearly the same, catastrophic cancellation may occur:
  134. // https://en.wikipedia.org/wiki/Catastrophic_cancellation
  135. // For the same reason, the value of the cross product of two
  136. // nearly collinear vectors is prone to large errors.
  137. // Therefore, the algorithm is inherently numerically unstable
  138. // for area lights that shrink to a line (or a point) after
  139. // projection onto the unit sphere.
  140. real3 F = ComputeEdgeFactor(L[0], L[1]);
  141. F += ComputeEdgeFactor(L[1], L[2]);
  142. F += ComputeEdgeFactor(L[2], L[3]);
  143. if (n >= 4)
  144. F += ComputeEdgeFactor(L[3], L4);
  145. if (n == 5)
  146. F += ComputeEdgeFactor(L4, L[0]);
  147. return INV_TWO_PI * F; // The output may be projected onto the tangent plane (F.z) to yield signed irradiance.
  148. }
  149. // See "Real-Time Area Lighting: a Journey from Research to Production", slide 102.
  150. // Turns out, despite the authors claiming that this function "calculates an approximation of
  151. // the clipped sphere form factor", that is simply not true.
  152. // First of all, above horizon, the function should then just return 'F.z', which it does not.
  153. // Secondly, if we use the correct function called DiffuseSphereLightIrradiance(), it results
  154. // in severe light leaking if the light is placed vertically behind the camera.
  155. // So this function is clearly a hack designed to work around these problems.
  156. real PolygonIrradianceFromVectorFormFactor(float3 F)
  157. {
  158. #if 1
  159. float l = length(F);
  160. return max(0, (l * l + F.z) / (l + 1));
  161. #else
  162. real sff = saturate(dot(F, F));
  163. real sinSqAperture = sqrt(sff);
  164. real cosElevationAngle = F.z * rsqrt(sff);
  165. return DiffuseSphereLightIrradiance(sinSqAperture, cosElevationAngle);
  166. #endif
  167. }
  168. // Expects non-normalized vertex positions.
  169. // Output: F is the signed vector irradiance.
  170. real PolygonIrradiance(real4x3 L, out real3 F)
  171. {
  172. #ifdef APPROXIMATE_POLY_LIGHT_AS_SPHERE_LIGHT
  173. F = PolygonFormFactor(L, real3(0,0,1), 4); // Before horizon clipping.
  174. return PolygonIrradianceFromVectorFormFactor(F); // Accounts for the horizon.
  175. #else
  176. // 1. ClipQuadToHorizon
  177. // detect clipping config
  178. uint config = 0;
  179. if (L[0].z > 0) config += 1;
  180. if (L[1].z > 0) config += 2;
  181. if (L[2].z > 0) config += 4;
  182. if (L[3].z > 0) config += 8;
  183. // The fifth vertex for cases when clipping cuts off one corner.
  184. // Due to a compiler bug, copying L into a vector array with 5 rows
  185. // messes something up, so we need to stick with the matrix + the L4 vertex.
  186. real3 L4 = L[3];
  187. // This switch is surprisingly fast. Tried replacing it with a lookup array of vertices.
  188. // Even though that replaced the switch with just some indexing and no branches, it became
  189. // way, way slower - mem fetch stalls?
  190. // clip
  191. uint n = 0;
  192. switch (config)
  193. {
  194. case 0: // clip all
  195. break;
  196. case 1: // V1 clip V2 V3 V4
  197. n = 3;
  198. L[1] = -L[1].z * L[0] + L[0].z * L[1];
  199. L[2] = -L[3].z * L[0] + L[0].z * L[3];
  200. break;
  201. case 2: // V2 clip V1 V3 V4
  202. n = 3;
  203. L[0] = -L[0].z * L[1] + L[1].z * L[0];
  204. L[2] = -L[2].z * L[1] + L[1].z * L[2];
  205. break;
  206. case 3: // V1 V2 clip V3 V4
  207. n = 4;
  208. L[2] = -L[2].z * L[1] + L[1].z * L[2];
  209. L[3] = -L[3].z * L[0] + L[0].z * L[3];
  210. break;
  211. case 4: // V3 clip V1 V2 V4
  212. n = 3;
  213. L[0] = -L[3].z * L[2] + L[2].z * L[3];
  214. L[1] = -L[1].z * L[2] + L[2].z * L[1];
  215. break;
  216. case 5: // V1 V3 clip V2 V4: impossible
  217. break;
  218. case 6: // V2 V3 clip V1 V4
  219. n = 4;
  220. L[0] = -L[0].z * L[1] + L[1].z * L[0];
  221. L[3] = -L[3].z * L[2] + L[2].z * L[3];
  222. break;
  223. case 7: // V1 V2 V3 clip V4
  224. n = 5;
  225. L4 = -L[3].z * L[0] + L[0].z * L[3];
  226. L[3] = -L[3].z * L[2] + L[2].z * L[3];
  227. break;
  228. case 8: // V4 clip V1 V2 V3
  229. n = 3;
  230. L[0] = -L[0].z * L[3] + L[3].z * L[0];
  231. L[1] = -L[2].z * L[3] + L[3].z * L[2];
  232. L[2] = L[3];
  233. break;
  234. case 9: // V1 V4 clip V2 V3
  235. n = 4;
  236. L[1] = -L[1].z * L[0] + L[0].z * L[1];
  237. L[2] = -L[2].z * L[3] + L[3].z * L[2];
  238. break;
  239. case 10: // V2 V4 clip V1 V3: impossible
  240. break;
  241. case 11: // V1 V2 V4 clip V3
  242. n = 5;
  243. L[3] = -L[2].z * L[3] + L[3].z * L[2];
  244. L[2] = -L[2].z * L[1] + L[1].z * L[2];
  245. break;
  246. case 12: // V3 V4 clip V1 V2
  247. n = 4;
  248. L[1] = -L[1].z * L[2] + L[2].z * L[1];
  249. L[0] = -L[0].z * L[3] + L[3].z * L[0];
  250. break;
  251. case 13: // V1 V3 V4 clip V2
  252. n = 5;
  253. L[3] = L[2];
  254. L[2] = -L[1].z * L[2] + L[2].z * L[1];
  255. L[1] = -L[1].z * L[0] + L[0].z * L[1];
  256. break;
  257. case 14: // V2 V3 V4 clip V1
  258. n = 5;
  259. L4 = -L[0].z * L[3] + L[3].z * L[0];
  260. L[0] = -L[0].z * L[1] + L[1].z * L[0];
  261. break;
  262. case 15: // V1 V2 V3 V4
  263. n = 4;
  264. break;
  265. }
  266. if (n == 0) return 0;
  267. // 2. Integrate
  268. F = PolygonFormFactor(L, L4, n); // After the horizon clipping.
  269. // 3. Compute irradiance
  270. return max(0, F.z);
  271. #endif
  272. }
  273. // This function assumes that inputs are well-behaved, e.i.
  274. // that the line does not pass through the origin and
  275. // that the light is (at least partially) above the surface.
  276. float I_diffuse_line(float3 C, float3 A, float hl)
  277. {
  278. // Solve C.z + h * A.z = 0.
  279. float h = -C.z * rcp(A.z); // May be Inf, but never NaN
  280. // Clip the line segment against the z-plane if necessary.
  281. float h2 = (A.z >= 0) ? max( hl, h)
  282. : min( hl, h); // P2 = C + h2 * A
  283. float h1 = (A.z >= 0) ? max(-hl, h)
  284. : min(-hl, h); // P1 = C + h1 * A
  285. // Normalize the tangent.
  286. float as = dot(A, A); // |A|^2
  287. float ar = rsqrt(as); // 1/|A|
  288. float a = as * ar; // |A|
  289. float3 T = A * ar; // A/|A|
  290. // Orthogonal 2D coordinates:
  291. // P(n, t) = n * N + t * T.
  292. float tc = dot(T, C); // C = n * N + tc * T
  293. float3 P0 = C - tc * T; // P(n, 0) = n * N
  294. float ns = dot(P0, P0); // |P0|^2
  295. float nr = rsqrt(ns); // 1/|P0|
  296. float n = ns * nr; // |P0|
  297. float Nz = P0.z * nr; // N.z = P0.z/|P0|
  298. // P(n, t) - C = P0 + t * T - P0 - tc * T
  299. // = (t - tc) * T = h * A = (h * a) * T.
  300. float t2 = tc + h2 * a; // P2.t
  301. float t1 = tc + h1 * a; // P1.t
  302. float s2 = ns + t2 * t2; // |P2|^2
  303. float s1 = ns + t1 * t1; // |P1|^2
  304. float mr = rsqrt(s1 * s2); // 1/(|P1|*|P2|)
  305. float r2 = s1 * (mr * mr); // 1/|P2|^2
  306. float r1 = s2 * (mr * mr); // 1/|P1|^2
  307. // I = (i1 + i2 + i3) / Pi.
  308. // i1 = N.z * (P2.t / |P2|^2 - P1.t / |P1|^2).
  309. // i2 = -T.z * (P2.n / |P2|^2 - P1.n / |P1|^2).
  310. // i3 = N.z * ArcCos[Dot[P1, P2] / (|P1| * |P2|)] / |P0|.
  311. float i12 = (Nz * t2 - (T.z * n)) * r2
  312. - (Nz * t1 - (T.z * n)) * r1;
  313. // Guard against numerical errors.
  314. float dt = min(1, (ns + t1 * t2) * mr);
  315. float i3 = acos(dt) * (Nz * nr); // angle * cos(θ) / r^2
  316. // Guard against numerical errors.
  317. return INV_PI * max(0, i12 + i3);
  318. }
  319. // Computes 1 / length(mul(transpose(inverse(invM)), normalize(ortho))).
  320. float ComputeLineWidthFactor(float3x3 invM, float3 ortho, float orthoSq)
  321. {
  322. // transpose(inverse(invM)) = 1 / determinant(invM) * cofactor(invM).
  323. // Take into account the sparsity of the matrix:
  324. // {{a,0,b},
  325. // {0,c,0},
  326. // {d,0,1}}
  327. float a = invM[0][0];
  328. float b = invM[0][2];
  329. float c = invM[1][1];
  330. float d = invM[2][0];
  331. float det = c * (a - b * d);
  332. float3 X = float3(c * (ortho.x - d * ortho.z),
  333. (ortho.y * (a - b * d)),
  334. c * (-b * ortho.x + a * ortho.z)); // mul(cof, ortho)
  335. // 1 / length(1/s * X) = abs(s) / length(X).
  336. return abs(det) * rsqrt(dot(X, X) * orthoSq) * orthoSq; // rsqrt(x^2) * x^2 = x
  337. }
  338. float I_ltc_line(float3x3 invM, float3 center, float3 axis, float halfLength)
  339. {
  340. float3 ortho = cross(center, axis);
  341. float orthoSq = dot(ortho, ortho);
  342. // Check whether the line passes through the origin.
  343. bool quit = (orthoSq == 0);
  344. // Check whether the light is entirely below the surface.
  345. // We must test twice, since a linear transformation
  346. // may bring the light above the surface (a side-effect).
  347. quit = quit || (center.z + halfLength * abs(axis.z) <= 0);
  348. // Transform into the diffuse configuration.
  349. // This is a sparse matrix multiplication.
  350. // Pay attention to the multiplication order
  351. // (in case your matrices are transposed).
  352. float3 C = mul(invM, center);
  353. float3 A = mul(invM, axis);
  354. // Check whether the light is entirely below the surface.
  355. // We must test twice, since a linear transformation
  356. // may bring the light below the surface (as expected).
  357. quit = quit || (C.z + halfLength * abs(A.z) <= 0);
  358. float result = 0;
  359. if (!quit)
  360. {
  361. float w = ComputeLineWidthFactor(invM, ortho, orthoSq);
  362. result = I_diffuse_line(C, A, halfLength) * w;
  363. }
  364. return result;
  365. }
  366. #endif // UNITY_AREA_LIGHTING_INCLUDED