123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424 |
- /*
- MIT License
-
- Copyright (c) 2022 Kleber Garcia
-
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
- in the Software without restriction, including without limitation the rights
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
-
- The above copyright notice and this permission notice shall be included in all
- copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- SOFTWARE.
- */
-
- #ifndef __COVERAGE__
- #define __COVERAGE__
-
- //Utilities for coverage bit mask on an 8x8 grid.
- namespace Coverage
- {
-
- //**************************************************************************************************************/
- // How to use
- //**************************************************************************************************************/
- /*
- To utilize this library, first call the genLUT function at the beginning of your compute shader.
- This function must be followed by a group sync. Example follows:
-
- ...
- coverage::genLUT(groupThreadIndex);
- GroupMemoryBarrierWithGroupSync();
- ...
-
- Alternatively, you can dump the contents into buffer. The contents of the LUT are inside gs_quadMask, which is 64 entries.
-
- After this use the coverage functions
-
- */
-
- //**************************************************************************************************************/
- // Coordinate System
- //**************************************************************************************************************/
- /*
- The functions in this library follow the same convension, input is a shape described by certain vertices,
- output is a 64 bit mask with such shape's coverage.
-
- The coordinate system is (0,0) for the top left of an 8x8 grid, and (1,1) for the bottom right.
- The LSB represents coordinate (0,0), and sample points are centered on the pixel.
-
- (0.0,0.0) (1.0,0.0)
- | |
- |___________________________________|
- | | | | | | | | | |
- | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
- |___|___|___|___|___|___|___|___|___|
- | | | | | | | | | |
- | 9 | 10| 11| 12| 13| 14| 15| 16| 17|
- |___|___|___|___|___|___|___|___|___|___(1.0, 2.0/8.0)
-
- the center of bit 0 would be 0.5,0.5 and so on
-
- any points outside of the range (0,1) means they are outside the grid.
- */
-
- //**************************************************************************************************************/
- // Masks
- //**************************************************************************************************************/
- /*
- Masks are stored in a packed 64 bit represented by uint2.
- x component represents the first 32 bits, y component the next 32 bits.
- */
-
- //**************************************************************************************************************/
- // coverage API
- //**************************************************************************************************************/
-
- /*
- lut for 4x4 quad mask. See buildQuadMask function
- 4 states for horizontal flipping and vertical flipping
- You can dump this lut to a buffer, and preload it manually,
- or just regenerated in your thread group
- */
- groupshared uint gs_quadMask[16 * 4];
-
- /*
- Call this function to generate the coverage 4x4 luts
- groupThreadIndex - the thread index.
- NOTE: must sync group threads after calling this.
- */
- void GenLUT(uint groupThreadIndex);
-
- /*
- Call this function to get a 64 bit coverage mask for a triangle.
- v0, v1, v2 - the triangle coordinates in right hand ruling order
- return - the coverage mask for this triangle
- */
- uint2 TriangleCoverageMask(float2 v0, float2 v1, float2 v2, bool showFrontFace, bool showBackface);
-
-
- /*
- Call this function to get a 64 bit coverage mask for a line.
- v0, v1 - the line coordinates.
- thickness - thickness of line in normalized space. 1.0 means the entire 8 pixels in a tile
- caps - extra pixels in the caps of the line in normalized space. 1.0 means 8 pixels in a tile
- return - the coverage mask of this line
- */
- uint2 LineCoverageMask(float2 v0, float2 v1, float thickness, float caps);
-
-
- //**************************************************************************************************************/
- // coverage implementation
- //**************************************************************************************************************/
-
- /*
- function that builds a 4x4 compact bit quad for line coverage.
- the line is assumed to have a positive slope < 1.0. That means it can only be raised 1 step at most.
- "incrementMask" is a bit mask specifying how much the y component of a line increments.
- "incrementMask" only describes 4 bits, the rest of the bits are ignored.
- For example, given this bit mask:
- 1 0 1 0
- would generate this 4x4 coverage mask:
-
- 0 0 0 0
- 0 0 0 1 <- 3rd bit tells the line to raise here
- 0 1 1 1 <- first bit raises the line
- 1 1 1 1 <- low axis is always covered
- */
- uint BuildQuadMask(uint incrementMask)
- {
- uint c = 0;
-
- uint mask = 0xF;
- for (int r = 0; r < 4; ++r)
- {
- c |= mask << (r * 8);
- if (incrementMask == 0)
- break;
- int b = firstbitlow(incrementMask);
- mask = (0xFu << (b + 1)) & 0xFu;
- incrementMask ^= 1u << b;
- }
-
- return c;
- }
-
- //flip 4 bit nibble
- uint FlipNibble(uint mask, int offset)
- {
- mask = (mask >> offset) & 0xF;
- uint r = ((mask << 3) & 0x8)
- | ((mask << 1) & 0x4)
- | ((mask >> 1) & 0x2)
- | ((mask >> 3) & 0x1);
- return (r << offset);
- }
-
- //flip an entire 4x4 bit quad
- uint FlipQuadInX(uint mask)
- {
- return FlipNibble(mask, 0) | FlipNibble(mask, 8) | FlipNibble(mask, 16) | FlipNibble(mask, 24);
- }
-
- uint TransposeQuad(uint mask)
- {
- uint result = 0;
- [unroll]
- for (int i = 0; i < 4; ++i)
- {
- for (int j = 0; j < 4; ++j)
- {
- if (mask & (1u << (i * 8 + j)))
- result |= 1u << (j * 8 + i);
- }
- }
- return result;
- }
-
- // Builds all the luts necessary for fast bit based coverage
- void GenLUT(uint groupThreadIndex)
- {
- // Neutral
- if (groupThreadIndex < 16)
- gs_quadMask[groupThreadIndex] = BuildQuadMask(groupThreadIndex);
-
- GroupMemoryBarrierWithGroupSync();
-
- // Flip in X axis, transpose
- if (groupThreadIndex < 16)
- {
- gs_quadMask[groupThreadIndex + 16] = FlipQuadInX(gs_quadMask[groupThreadIndex]);
- gs_quadMask[groupThreadIndex + 32] = TransposeQuad(gs_quadMask[groupThreadIndex]);
- }
- GroupMemoryBarrierWithGroupSync();
- if (groupThreadIndex < 16)
- {
- gs_quadMask[groupThreadIndex + 48] = (~TransposeQuad(FlipQuadInX(gs_quadMask[groupThreadIndex]))) & 0x0F0F0F0F;
- }
- }
-
- // Represents a 2D analytical line.
- // stores slope (a) and offset (b)
- struct AnalyticalLine
- {
- float a;
- float b;
-
- // Builds an analytical line based on two points.
- void Build(float2 v0, float2 v1)
- {
- //line equation: f(x): a * x + b;
- // where a = (v1.y - v0.y)/(v1.x - v0.x)
- float2 l = v1 - v0;
- a = l.y/l.x;
- b = v1.y - a * v1.x;
- }
-
- // Builds a "Flipped" line.
- // A flipped line is defined as having a positive slope < 1.0
- // The two output booleans specify the flip operators to recover the original line.
- void BuildFlipped(float2 v0, float2 v1, out bool outFlipX, out bool outFlipAxis, out bool outIsRightHand, out bool outValid)
- {
- //build line with flip bits for lookup compression
- //This line will have a slope between 0 and 0.5, and always positive.
- //We output the flips as bools
-
- float2 ll = v1 - v0;
- outFlipAxis = abs(ll.y) > abs(ll.x);
- outFlipX = sign(ll.y) != sign(ll.x);
- outIsRightHand = ll.x >= 0 ? v0.y >= v1.y : v0.y > v1.y;
- if (outFlipAxis)
- {
- ll.xy = ll.yx;
- v0.xy = v0.yx;
- v1.xy = v1.yx;
- }
-
- a = ll.y/ll.x;
- if (outFlipX)
- {
- v0.x = 1.0 - v0.x;
- v1.x = 1.0 - v1.x;
- a *= -1;
- }
- b = v1.y - a * v1.x;
- outValid = any(v1 != v0);//ll.y != 0.0f;
- }
-
- // Evaluates f(x) = a * x + b for the line
- float Eval(float xval)
- {
- return xval * a + b;
- }
-
- // Evaluates 4 inputs of f(x) = a * x + b for the line
- float4 Eval4(float4 xvals)
- {
- return xvals * a + b;
- }
-
- // Evaluates a single 2d in the line given an X.
- float2 PointAt(float xv)
- {
- return float2(xv, Eval(xv));
- }
- };
-
- /*
- Represents a set of bits in an 8x8 grid divided by a line.
- The representation is given by 2 splits of the 8x8 grid.
- offsets represents how much we offset the quadCoverage on either x or y (flipped dependant axis)
- the mask represents the increment mask used to look up the quadCoverage
- */
- struct LineArea
- {
- int offsets[2];
- uint masks[2];
- bool isValid;
- bool flipX;
- bool flipAxis;
- bool isRightHand;
- AnalyticalLine debugLine;
-
- // Recovers a single point in the boundary
- // of the line (where the line intersects a pixel).
- // Theres a total of 8 possible points
- float2 GetBoundaryPoint(uint i)
- {
- int j = i & 0x3;
- int m = i >> 2;
- int yval = offsets[m] + (int)countbits(((1u << j) - 1) & masks[m]);
- float2 v = float2(i + 0.5, yval + 0.5) * 1.0/8.0;
- if (flipX)
- v.x = 1.0 - v.x;
- if (flipAxis)
- {
- float2 tmp = v;
- v.xy = tmp.yx;
- }
- return v;
- }
-
- // Creates a line area object, based on 2 points on an 8x8 quad
- // quad coordinate domain is 0.0 -> 1.0 for both axis.
- // Anything negative or greater than 1.0 is by definition outside of the 8x8 quad.
- static LineArea Create(float2 v0, float2 v1)
- {
- LineArea data;
-
- //line debug data
- data.debugLine.Build(v0, v1);
-
- AnalyticalLine l;
- l.BuildFlipped(v0, v1, data.flipX, data.flipAxis, data.isRightHand, data.isValid);
-
- // Xs values of 8 points
- const float4 xs0 = float4(0.5,1.5,2.5,3.5)/8.0;
- const float4 xs1 = float4(4.5,5.5,6.5,7.5)/8.0;
-
- // Ys values of 8 points
- float4 ys0 = l.Eval4(xs0);
- float4 ys1 = l.Eval4(xs1);
-
- int4 ysi0 = (int4)floor(ys0 * 8.0 - 0.5);
- int4 ysi1 = (int4)floor(ys1 * 8.0 - 0.5);
-
- // Incremental masks
- uint4 dysmask0 = uint4(ysi0.yzw, ysi1.x) - ysi0.xyzw;
- uint4 dysmask1 = uint4(ysi1.yzw, 0) - uint4(ysi1.xyz, 0);
-
- // Final output, offset and mask
- data.offsets[0] = ysi0.x;
- data.masks[0] = dysmask0.x | (dysmask0.y << 1) | (dysmask0.z << 2) | (dysmask0.w << 3);
- data.offsets[1] = countbits(data.masks[0]) + data.offsets[0];
- data.masks[1] = dysmask1.x | (dysmask1.y << 1) | (dysmask1.z << 2) | (dysmask1.w << 3);
- return data;
- }
- } ;
-
- uint2 CreateCoverageMask(in LineArea lineArea)
- {
- const uint leftSideMask = 0x0F0F0F0F;
- const uint2 horizontalMask = uint2(leftSideMask, ~leftSideMask);
-
- //prepare samples, flip samples if there is mirroring in x
- int2 ii = lineArea.flipX ? int2(1,0) : int2(0,1);
- int lutOperation = ((uint)lineArea.flipX << 4) | ((uint)lineArea.flipAxis << 5);
- int2 offsets = int2(lineArea.offsets[ii.x],lineArea.offsets[ii.y]);
- uint2 halfSamples = uint2(gs_quadMask[lineArea.masks[ii.x] + lutOperation], gs_quadMask[lineArea.masks[ii.y] + lutOperation]);
-
- uint2 result = 0;
- if (lineArea.flipAxis)
- {
- //Case were we have flipped axis / transpose. We generate top and bottom part
- int2 tOffsets = clamp(offsets, -31, 31);
- uint2 workMask = leftSideMask << clamp(offsets, 0, 4);
- uint2 topDownMasks = uint2( tOffsets.x > 0 ?
- ((halfSamples.x << min(4,tOffsets.x)) & leftSideMask) | ((halfSamples.x << min(8,tOffsets.x)) & ~leftSideMask)
- : ((halfSamples.x << 4) >> min(4,-tOffsets.x) & ~leftSideMask) >> 4,
- tOffsets.y > 0 ?
- ((halfSamples.y << min(4, tOffsets.y)) & leftSideMask) | ((halfSamples.y << min(8, tOffsets.y)) & ~leftSideMask)
- : ((halfSamples.y << 4) >> min(4, -tOffsets.y) & ~leftSideMask) >> 4);
- ;
- int2 backMaskShift = lineArea.flipX ? clamp(tOffsets + 4, -31, 31) : tOffsets;
- uint2 backMaskOp = int2((backMaskShift.x > 0 ? 1u << backMaskShift.x : 1u >> -backMaskShift.x) - 1u, (backMaskShift.y > 0 ? 1u << backMaskShift.y : 1u >> -backMaskShift.y) - 1u);
- uint2 backBite = uint2( backMaskShift.x <= 0 ? (lineArea.flipX ? ~0x0 : 0x0) : (lineArea.flipX ? (0xFF & ~backMaskOp.x) : (0xFFFF & backMaskOp.x)),
- backMaskShift.y <= 0 ? (lineArea.flipX ? ~0x0 : 0x0) : (lineArea.flipX ? (0xFF & ~backMaskOp.y) : (0xFFFF & backMaskOp.y)));
- result = backBite | (backBite << 8) | (backBite << 16) | (backBite << 24) | (topDownMasks & workMask);
- }
- else
- {
- //Case were the masks are positioned horizontally. We generate 4 quads
- uint2 sideMasks = uint2(halfSamples.x, (halfSamples.y << 4));
- int4 tOffsets = clamp((offsets.xyxy - int4(0,0,4,4)) << 3, -31, 31);
- uint4 halfMasks = uint4( tOffsets.x > 0 ? (~sideMasks.x & horizontalMask.x) << tOffsets.x : ~(sideMasks.x >> -tOffsets.x),
- tOffsets.y > 0 ? (~sideMasks.y & horizontalMask.y) << tOffsets.y : ~(sideMasks.y >> -tOffsets.y),
- tOffsets.z > 0 ? (~sideMasks.x & horizontalMask.x) << tOffsets.z : ~(sideMasks.x >> -tOffsets.z),
- tOffsets.w > 0 ? (~sideMasks.y & horizontalMask.y) << tOffsets.w : ~(sideMasks.y >> -tOffsets.w)) & horizontalMask.xyxy;
- result = uint2(halfMasks.x | halfMasks.y, halfMasks.z | halfMasks.w);
- }
-
- result = lineArea.flipX ? ~result : result;
- result = lineArea.isRightHand ? result : ~result;
- result = lineArea.isValid ? result : 0;
- return result;
-
- }
-
- uint2 TriangleCoverageMask(float2 v0, float2 v1, float2 v2, bool showFrontFace, bool showBackface)
- {
- uint2 mask0 = Coverage::CreateCoverageMask(Coverage::LineArea::Create(v0, v1));
- uint2 mask1 = Coverage::CreateCoverageMask(Coverage::LineArea::Create(v1, v2));
- uint2 mask2 = Coverage::CreateCoverageMask(Coverage::LineArea::Create(v2, v0));
- uint2 frontMask = (mask0 & mask1 & mask2);
- bool frontMaskValid = any(mask0 != 0) || any(mask1 != 0) || any(mask2 != 0);
- return (showFrontFace * (mask0 & mask1 & mask2)) | ((frontMaskValid && showBackface) * (~mask0 & ~mask1 & ~mask2));
- }
-
- uint2 LineCoverageMask(float2 v0, float2 v1, float thickness, float caps)
- {
- float2 lineVector = normalize(v1 - v0);
- float2 D = cross(float3(lineVector, 0.0),float3(0,0,1)).xy * thickness;
- v0 -= caps * lineVector;
- v1 += caps * lineVector;
-
- uint2 mask0 = Coverage::CreateCoverageMask(Coverage::LineArea::Create(v0 - D, v1 - D));
- uint2 mask1 = Coverage::CreateCoverageMask(Coverage::LineArea::Create(v1 + D, v0 + D));
- uint2 mask2 = Coverage::CreateCoverageMask(Coverage::LineArea::Create(v0 + D, v0 - D));
- uint2 mask3 = Coverage::CreateCoverageMask(Coverage::LineArea::Create(v1 - D, v1 + D));
- return mask0 & mask1 & mask3 & mask2;
- }
-
- }
-
- #endif
|