Ingen beskrivning
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

Coverage.hlsl 16KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424
  1. /*
  2. MIT License
  3. Copyright (c) 2022 Kleber Garcia
  4. Permission is hereby granted, free of charge, to any person obtaining a copy
  5. of this software and associated documentation files (the "Software"), to deal
  6. in the Software without restriction, including without limitation the rights
  7. to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8. copies of the Software, and to permit persons to whom the Software is
  9. furnished to do so, subject to the following conditions:
  10. The above copyright notice and this permission notice shall be included in all
  11. copies or substantial portions of the Software.
  12. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  13. IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  14. FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  15. AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  16. LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  17. OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  18. SOFTWARE.
  19. */
  20. #ifndef __COVERAGE__
  21. #define __COVERAGE__
  22. //Utilities for coverage bit mask on an 8x8 grid.
  23. namespace Coverage
  24. {
  25. //**************************************************************************************************************/
  26. // How to use
  27. //**************************************************************************************************************/
  28. /*
  29. To utilize this library, first call the genLUT function at the beginning of your compute shader.
  30. This function must be followed by a group sync. Example follows:
  31. ...
  32. coverage::genLUT(groupThreadIndex);
  33. GroupMemoryBarrierWithGroupSync();
  34. ...
  35. Alternatively, you can dump the contents into buffer. The contents of the LUT are inside gs_quadMask, which is 64 entries.
  36. After this use the coverage functions
  37. */
  38. //**************************************************************************************************************/
  39. // Coordinate System
  40. //**************************************************************************************************************/
  41. /*
  42. The functions in this library follow the same convension, input is a shape described by certain vertices,
  43. output is a 64 bit mask with such shape's coverage.
  44. The coordinate system is (0,0) for the top left of an 8x8 grid, and (1,1) for the bottom right.
  45. The LSB represents coordinate (0,0), and sample points are centered on the pixel.
  46. (0.0,0.0) (1.0,0.0)
  47. | |
  48. |___________________________________|
  49. | | | | | | | | | |
  50. | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
  51. |___|___|___|___|___|___|___|___|___|
  52. | | | | | | | | | |
  53. | 9 | 10| 11| 12| 13| 14| 15| 16| 17|
  54. |___|___|___|___|___|___|___|___|___|___(1.0, 2.0/8.0)
  55. the center of bit 0 would be 0.5,0.5 and so on
  56. any points outside of the range (0,1) means they are outside the grid.
  57. */
  58. //**************************************************************************************************************/
  59. // Masks
  60. //**************************************************************************************************************/
  61. /*
  62. Masks are stored in a packed 64 bit represented by uint2.
  63. x component represents the first 32 bits, y component the next 32 bits.
  64. */
  65. //**************************************************************************************************************/
  66. // coverage API
  67. //**************************************************************************************************************/
  68. /*
  69. lut for 4x4 quad mask. See buildQuadMask function
  70. 4 states for horizontal flipping and vertical flipping
  71. You can dump this lut to a buffer, and preload it manually,
  72. or just regenerated in your thread group
  73. */
  74. groupshared uint gs_quadMask[16 * 4];
  75. /*
  76. Call this function to generate the coverage 4x4 luts
  77. groupThreadIndex - the thread index.
  78. NOTE: must sync group threads after calling this.
  79. */
  80. void GenLUT(uint groupThreadIndex);
  81. /*
  82. Call this function to get a 64 bit coverage mask for a triangle.
  83. v0, v1, v2 - the triangle coordinates in right hand ruling order
  84. return - the coverage mask for this triangle
  85. */
  86. uint2 TriangleCoverageMask(float2 v0, float2 v1, float2 v2, bool showFrontFace, bool showBackface);
  87. /*
  88. Call this function to get a 64 bit coverage mask for a line.
  89. v0, v1 - the line coordinates.
  90. thickness - thickness of line in normalized space. 1.0 means the entire 8 pixels in a tile
  91. caps - extra pixels in the caps of the line in normalized space. 1.0 means 8 pixels in a tile
  92. return - the coverage mask of this line
  93. */
  94. uint2 LineCoverageMask(float2 v0, float2 v1, float thickness, float caps);
  95. //**************************************************************************************************************/
  96. // coverage implementation
  97. //**************************************************************************************************************/
  98. /*
  99. function that builds a 4x4 compact bit quad for line coverage.
  100. the line is assumed to have a positive slope < 1.0. That means it can only be raised 1 step at most.
  101. "incrementMask" is a bit mask specifying how much the y component of a line increments.
  102. "incrementMask" only describes 4 bits, the rest of the bits are ignored.
  103. For example, given this bit mask:
  104. 1 0 1 0
  105. would generate this 4x4 coverage mask:
  106. 0 0 0 0
  107. 0 0 0 1 <- 3rd bit tells the line to raise here
  108. 0 1 1 1 <- first bit raises the line
  109. 1 1 1 1 <- low axis is always covered
  110. */
  111. uint BuildQuadMask(uint incrementMask)
  112. {
  113. uint c = 0;
  114. uint mask = 0xF;
  115. for (int r = 0; r < 4; ++r)
  116. {
  117. c |= mask << (r * 8);
  118. if (incrementMask == 0)
  119. break;
  120. int b = firstbitlow(incrementMask);
  121. mask = (0xFu << (b + 1)) & 0xFu;
  122. incrementMask ^= 1u << b;
  123. }
  124. return c;
  125. }
  126. //flip 4 bit nibble
  127. uint FlipNibble(uint mask, int offset)
  128. {
  129. mask = (mask >> offset) & 0xF;
  130. uint r = ((mask << 3) & 0x8)
  131. | ((mask << 1) & 0x4)
  132. | ((mask >> 1) & 0x2)
  133. | ((mask >> 3) & 0x1);
  134. return (r << offset);
  135. }
  136. //flip an entire 4x4 bit quad
  137. uint FlipQuadInX(uint mask)
  138. {
  139. return FlipNibble(mask, 0) | FlipNibble(mask, 8) | FlipNibble(mask, 16) | FlipNibble(mask, 24);
  140. }
  141. uint TransposeQuad(uint mask)
  142. {
  143. uint result = 0;
  144. [unroll]
  145. for (int i = 0; i < 4; ++i)
  146. {
  147. for (int j = 0; j < 4; ++j)
  148. {
  149. if (mask & (1u << (i * 8 + j)))
  150. result |= 1u << (j * 8 + i);
  151. }
  152. }
  153. return result;
  154. }
  155. // Builds all the luts necessary for fast bit based coverage
  156. void GenLUT(uint groupThreadIndex)
  157. {
  158. // Neutral
  159. if (groupThreadIndex < 16)
  160. gs_quadMask[groupThreadIndex] = BuildQuadMask(groupThreadIndex);
  161. GroupMemoryBarrierWithGroupSync();
  162. // Flip in X axis, transpose
  163. if (groupThreadIndex < 16)
  164. {
  165. gs_quadMask[groupThreadIndex + 16] = FlipQuadInX(gs_quadMask[groupThreadIndex]);
  166. gs_quadMask[groupThreadIndex + 32] = TransposeQuad(gs_quadMask[groupThreadIndex]);
  167. }
  168. GroupMemoryBarrierWithGroupSync();
  169. if (groupThreadIndex < 16)
  170. {
  171. gs_quadMask[groupThreadIndex + 48] = (~TransposeQuad(FlipQuadInX(gs_quadMask[groupThreadIndex]))) & 0x0F0F0F0F;
  172. }
  173. }
  174. // Represents a 2D analytical line.
  175. // stores slope (a) and offset (b)
  176. struct AnalyticalLine
  177. {
  178. float a;
  179. float b;
  180. // Builds an analytical line based on two points.
  181. void Build(float2 v0, float2 v1)
  182. {
  183. //line equation: f(x): a * x + b;
  184. // where a = (v1.y - v0.y)/(v1.x - v0.x)
  185. float2 l = v1 - v0;
  186. a = l.y/l.x;
  187. b = v1.y - a * v1.x;
  188. }
  189. // Builds a "Flipped" line.
  190. // A flipped line is defined as having a positive slope < 1.0
  191. // The two output booleans specify the flip operators to recover the original line.
  192. void BuildFlipped(float2 v0, float2 v1, out bool outFlipX, out bool outFlipAxis, out bool outIsRightHand, out bool outValid)
  193. {
  194. //build line with flip bits for lookup compression
  195. //This line will have a slope between 0 and 0.5, and always positive.
  196. //We output the flips as bools
  197. float2 ll = v1 - v0;
  198. outFlipAxis = abs(ll.y) > abs(ll.x);
  199. outFlipX = sign(ll.y) != sign(ll.x);
  200. outIsRightHand = ll.x >= 0 ? v0.y >= v1.y : v0.y > v1.y;
  201. if (outFlipAxis)
  202. {
  203. ll.xy = ll.yx;
  204. v0.xy = v0.yx;
  205. v1.xy = v1.yx;
  206. }
  207. a = ll.y/ll.x;
  208. if (outFlipX)
  209. {
  210. v0.x = 1.0 - v0.x;
  211. v1.x = 1.0 - v1.x;
  212. a *= -1;
  213. }
  214. b = v1.y - a * v1.x;
  215. outValid = any(v1 != v0);//ll.y != 0.0f;
  216. }
  217. // Evaluates f(x) = a * x + b for the line
  218. float Eval(float xval)
  219. {
  220. return xval * a + b;
  221. }
  222. // Evaluates 4 inputs of f(x) = a * x + b for the line
  223. float4 Eval4(float4 xvals)
  224. {
  225. return xvals * a + b;
  226. }
  227. // Evaluates a single 2d in the line given an X.
  228. float2 PointAt(float xv)
  229. {
  230. return float2(xv, Eval(xv));
  231. }
  232. };
  233. /*
  234. Represents a set of bits in an 8x8 grid divided by a line.
  235. The representation is given by 2 splits of the 8x8 grid.
  236. offsets represents how much we offset the quadCoverage on either x or y (flipped dependant axis)
  237. the mask represents the increment mask used to look up the quadCoverage
  238. */
  239. struct LineArea
  240. {
  241. int offsets[2];
  242. uint masks[2];
  243. bool isValid;
  244. bool flipX;
  245. bool flipAxis;
  246. bool isRightHand;
  247. AnalyticalLine debugLine;
  248. // Recovers a single point in the boundary
  249. // of the line (where the line intersects a pixel).
  250. // Theres a total of 8 possible points
  251. float2 GetBoundaryPoint(uint i)
  252. {
  253. int j = i & 0x3;
  254. int m = i >> 2;
  255. int yval = offsets[m] + (int)countbits(((1u << j) - 1) & masks[m]);
  256. float2 v = float2(i + 0.5, yval + 0.5) * 1.0/8.0;
  257. if (flipX)
  258. v.x = 1.0 - v.x;
  259. if (flipAxis)
  260. {
  261. float2 tmp = v;
  262. v.xy = tmp.yx;
  263. }
  264. return v;
  265. }
  266. // Creates a line area object, based on 2 points on an 8x8 quad
  267. // quad coordinate domain is 0.0 -> 1.0 for both axis.
  268. // Anything negative or greater than 1.0 is by definition outside of the 8x8 quad.
  269. static LineArea Create(float2 v0, float2 v1)
  270. {
  271. LineArea data;
  272. //line debug data
  273. data.debugLine.Build(v0, v1);
  274. AnalyticalLine l;
  275. l.BuildFlipped(v0, v1, data.flipX, data.flipAxis, data.isRightHand, data.isValid);
  276. // Xs values of 8 points
  277. const float4 xs0 = float4(0.5,1.5,2.5,3.5)/8.0;
  278. const float4 xs1 = float4(4.5,5.5,6.5,7.5)/8.0;
  279. // Ys values of 8 points
  280. float4 ys0 = l.Eval4(xs0);
  281. float4 ys1 = l.Eval4(xs1);
  282. int4 ysi0 = (int4)floor(ys0 * 8.0 - 0.5);
  283. int4 ysi1 = (int4)floor(ys1 * 8.0 - 0.5);
  284. // Incremental masks
  285. uint4 dysmask0 = uint4(ysi0.yzw, ysi1.x) - ysi0.xyzw;
  286. uint4 dysmask1 = uint4(ysi1.yzw, 0) - uint4(ysi1.xyz, 0);
  287. // Final output, offset and mask
  288. data.offsets[0] = ysi0.x;
  289. data.masks[0] = dysmask0.x | (dysmask0.y << 1) | (dysmask0.z << 2) | (dysmask0.w << 3);
  290. data.offsets[1] = countbits(data.masks[0]) + data.offsets[0];
  291. data.masks[1] = dysmask1.x | (dysmask1.y << 1) | (dysmask1.z << 2) | (dysmask1.w << 3);
  292. return data;
  293. }
  294. } ;
  295. uint2 CreateCoverageMask(in LineArea lineArea)
  296. {
  297. const uint leftSideMask = 0x0F0F0F0F;
  298. const uint2 horizontalMask = uint2(leftSideMask, ~leftSideMask);
  299. //prepare samples, flip samples if there is mirroring in x
  300. int2 ii = lineArea.flipX ? int2(1,0) : int2(0,1);
  301. int lutOperation = ((uint)lineArea.flipX << 4) | ((uint)lineArea.flipAxis << 5);
  302. int2 offsets = int2(lineArea.offsets[ii.x],lineArea.offsets[ii.y]);
  303. uint2 halfSamples = uint2(gs_quadMask[lineArea.masks[ii.x] + lutOperation], gs_quadMask[lineArea.masks[ii.y] + lutOperation]);
  304. uint2 result = 0;
  305. if (lineArea.flipAxis)
  306. {
  307. //Case were we have flipped axis / transpose. We generate top and bottom part
  308. int2 tOffsets = clamp(offsets, -31, 31);
  309. uint2 workMask = leftSideMask << clamp(offsets, 0, 4);
  310. uint2 topDownMasks = uint2( tOffsets.x > 0 ?
  311. ((halfSamples.x << min(4,tOffsets.x)) & leftSideMask) | ((halfSamples.x << min(8,tOffsets.x)) & ~leftSideMask)
  312. : ((halfSamples.x << 4) >> min(4,-tOffsets.x) & ~leftSideMask) >> 4,
  313. tOffsets.y > 0 ?
  314. ((halfSamples.y << min(4, tOffsets.y)) & leftSideMask) | ((halfSamples.y << min(8, tOffsets.y)) & ~leftSideMask)
  315. : ((halfSamples.y << 4) >> min(4, -tOffsets.y) & ~leftSideMask) >> 4);
  316. ;
  317. int2 backMaskShift = lineArea.flipX ? clamp(tOffsets + 4, -31, 31) : tOffsets;
  318. uint2 backMaskOp = int2((backMaskShift.x > 0 ? 1u << backMaskShift.x : 1u >> -backMaskShift.x) - 1u, (backMaskShift.y > 0 ? 1u << backMaskShift.y : 1u >> -backMaskShift.y) - 1u);
  319. uint2 backBite = uint2( backMaskShift.x <= 0 ? (lineArea.flipX ? ~0x0 : 0x0) : (lineArea.flipX ? (0xFF & ~backMaskOp.x) : (0xFFFF & backMaskOp.x)),
  320. backMaskShift.y <= 0 ? (lineArea.flipX ? ~0x0 : 0x0) : (lineArea.flipX ? (0xFF & ~backMaskOp.y) : (0xFFFF & backMaskOp.y)));
  321. result = backBite | (backBite << 8) | (backBite << 16) | (backBite << 24) | (topDownMasks & workMask);
  322. }
  323. else
  324. {
  325. //Case were the masks are positioned horizontally. We generate 4 quads
  326. uint2 sideMasks = uint2(halfSamples.x, (halfSamples.y << 4));
  327. int4 tOffsets = clamp((offsets.xyxy - int4(0,0,4,4)) << 3, -31, 31);
  328. uint4 halfMasks = uint4( tOffsets.x > 0 ? (~sideMasks.x & horizontalMask.x) << tOffsets.x : ~(sideMasks.x >> -tOffsets.x),
  329. tOffsets.y > 0 ? (~sideMasks.y & horizontalMask.y) << tOffsets.y : ~(sideMasks.y >> -tOffsets.y),
  330. tOffsets.z > 0 ? (~sideMasks.x & horizontalMask.x) << tOffsets.z : ~(sideMasks.x >> -tOffsets.z),
  331. tOffsets.w > 0 ? (~sideMasks.y & horizontalMask.y) << tOffsets.w : ~(sideMasks.y >> -tOffsets.w)) & horizontalMask.xyxy;
  332. result = uint2(halfMasks.x | halfMasks.y, halfMasks.z | halfMasks.w);
  333. }
  334. result = lineArea.flipX ? ~result : result;
  335. result = lineArea.isRightHand ? result : ~result;
  336. result = lineArea.isValid ? result : 0;
  337. return result;
  338. }
  339. uint2 TriangleCoverageMask(float2 v0, float2 v1, float2 v2, bool showFrontFace, bool showBackface)
  340. {
  341. uint2 mask0 = Coverage::CreateCoverageMask(Coverage::LineArea::Create(v0, v1));
  342. uint2 mask1 = Coverage::CreateCoverageMask(Coverage::LineArea::Create(v1, v2));
  343. uint2 mask2 = Coverage::CreateCoverageMask(Coverage::LineArea::Create(v2, v0));
  344. uint2 frontMask = (mask0 & mask1 & mask2);
  345. bool frontMaskValid = any(mask0 != 0) || any(mask1 != 0) || any(mask2 != 0);
  346. return (showFrontFace * (mask0 & mask1 & mask2)) | ((frontMaskValid && showBackface) * (~mask0 & ~mask1 & ~mask2));
  347. }
  348. uint2 LineCoverageMask(float2 v0, float2 v1, float thickness, float caps)
  349. {
  350. float2 lineVector = normalize(v1 - v0);
  351. float2 D = cross(float3(lineVector, 0.0),float3(0,0,1)).xy * thickness;
  352. v0 -= caps * lineVector;
  353. v1 += caps * lineVector;
  354. uint2 mask0 = Coverage::CreateCoverageMask(Coverage::LineArea::Create(v0 - D, v1 - D));
  355. uint2 mask1 = Coverage::CreateCoverageMask(Coverage::LineArea::Create(v1 + D, v0 + D));
  356. uint2 mask2 = Coverage::CreateCoverageMask(Coverage::LineArea::Create(v0 + D, v0 - D));
  357. uint2 mask3 = Coverage::CreateCoverageMask(Coverage::LineArea::Create(v1 - D, v1 + D));
  358. return mask0 & mask1 & mask3 & mask2;
  359. }
  360. }
  361. #endif