暂无描述
您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

HlbvhBuilder.cs 14KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275
  1. using Unity.Mathematics;
  2. namespace UnityEngine.Rendering.RadeonRays
  3. {
  4. internal class HlbvhBuilder
  5. {
  6. private ComputeShader shaderBuildHlbvh;
  7. private int kernelInit;
  8. private int kernelCalculateAabb;
  9. private int kernelCalculateMortonCodes;
  10. private int kernelInitClusters;
  11. private int kernelMergeClusters;
  12. private int kernelFindPreferredNeighbor;
  13. private int kernelWriteLeafNodes;
  14. private int kernelBuildTreeBottomUp;
  15. private int kernelClearUpdateFlags;
  16. private ComputeShader shaderReorderTriangleIndices;
  17. private int kernelOrderIndices;
  18. private int kernelCopyOrderedIndicesBack;
  19. private RadixSort radixSort;
  20. private Scan scan;
  21. const uint kTrianglesPerThread = 8u;
  22. const uint kGroupSize = 256u;
  23. const uint kTrianglesPerGroup = kTrianglesPerThread * kGroupSize;
  24. public HlbvhBuilder(RadeonRaysShaders shaders)
  25. {
  26. shaderBuildHlbvh = shaders.buildHlbvh;
  27. kernelInit = shaderBuildHlbvh.FindKernel("Init");
  28. kernelCalculateAabb = shaderBuildHlbvh.FindKernel("CalculateAabb");
  29. kernelCalculateMortonCodes = shaderBuildHlbvh.FindKernel("CalculateMortonCodes");
  30. kernelWriteLeafNodes = shaderBuildHlbvh.FindKernel("WriteLeafNodes");
  31. kernelBuildTreeBottomUp = shaderBuildHlbvh.FindKernel("BuildTreeBottomUp");
  32. kernelInitClusters = shaderBuildHlbvh.FindKernel("InitClusters");
  33. kernelFindPreferredNeighbor = shaderBuildHlbvh.FindKernel("FindPreferredNeighbor");
  34. kernelMergeClusters = shaderBuildHlbvh.FindKernel("MergeClusters");
  35. kernelClearUpdateFlags = shaderBuildHlbvh.FindKernel("ClearUpdateFlags");
  36. shaderReorderTriangleIndices = shaders.reorderTriangleIndices;
  37. kernelOrderIndices = shaderReorderTriangleIndices.FindKernel("OrderIndices");
  38. kernelCopyOrderedIndicesBack = shaderReorderTriangleIndices.FindKernel("CopyOrderedIndicesBack");
  39. radixSort = new RadixSort(shaders);
  40. scan = new Scan(shaders);
  41. }
  42. public uint GetScratchDataSizeInDwords(uint triangleCount)
  43. {
  44. var scratchLayout = GetScratchBufferLayout(triangleCount);
  45. return scratchLayout.TotalSize;
  46. }
  47. public static uint GetBvhNodeCount(uint leafCount)
  48. {
  49. return 2 * leafCount - 1;
  50. }
  51. public static uint GetBvhNodeCountPrediction(uint leafCount)
  52. {
  53. return (uint)((double)leafCount*0.8) + 10;
  54. }
  55. public uint GetResultDataSizeInDwords(uint triangleCount)
  56. {
  57. var bvhNodeCount = GetBvhNodeCount(triangleCount) + 1; // plus one for header
  58. uint sizeOfNode = 16;
  59. return bvhNodeCount * sizeOfNode;
  60. }
  61. public uint GetResultDataSizeInDwordsPrediction(uint triangleCount)
  62. {
  63. var bvhNodeCount = GetBvhNodeCountPrediction(triangleCount) + 1; // plus one for header
  64. uint sizeOfNode = 16;
  65. return bvhNodeCount * sizeOfNode;
  66. }
  67. struct ScratchBufferOffsets
  68. {
  69. public uint Aabb;
  70. public uint SortedPrimitiveRefs;
  71. public uint SortedMortonCodes;
  72. // Overlaps with TempBvh
  73. public uint PrimitiveRefs;
  74. public uint MortonCodes;
  75. public uint SortMemory;
  76. // Overlaps with PrimitiveRefs
  77. public uint TempBvh;
  78. public uint EnabledNodes;
  79. public uint ScanScratch;
  80. public uint ClusterValidity;
  81. public uint ClusterRange;
  82. public uint PreferredNeighbor;
  83. public uint ClusterToNodeIndex;
  84. public uint Deltas;
  85. public uint InternalNodeRange;
  86. public uint TotalSize;
  87. }
  88. public void Execute(
  89. CommandBuffer cmd,
  90. GraphicsBuffer vertices, int verticesOffset, uint vertexStride,
  91. GraphicsBuffer indices, int indicesOffset, uint triangleCount,
  92. GraphicsBuffer scratch, GraphicsBuffer result, uint resultOffset, uint resultSizeInNodes,
  93. uint reduceMemoryIterations = 2)
  94. {
  95. Common.EnableKeyword(cmd, shaderBuildHlbvh, "TOP_LEVEL", false);
  96. Common.EnableKeyword(cmd, shaderBuildHlbvh, "NO_REDUCTION", reduceMemoryIterations == 0);
  97. var scratchLayout = GetScratchBufferLayout(triangleCount);
  98. cmd.SetComputeIntParam(shaderBuildHlbvh, SID.g_indices_offset, indicesOffset);
  99. cmd.SetComputeIntParam(shaderBuildHlbvh, SID.g_vertices_offset, verticesOffset);
  100. cmd.SetComputeIntParam(shaderBuildHlbvh, SID.g_constants_vertex_stride, (int)vertexStride);
  101. cmd.SetComputeIntParam(shaderBuildHlbvh, SID.g_constants_triangle_count, (int)triangleCount);
  102. cmd.SetComputeIntParam(shaderBuildHlbvh, SID.g_bvh_offset, (int)resultOffset);
  103. cmd.SetComputeIntParam(shaderBuildHlbvh, SID.g_bvh_max_node_count, (int)resultSizeInNodes-1);
  104. cmd.SetComputeIntParam(shaderBuildHlbvh, SID.g_cluster_validity_offset, (int)scratchLayout.ClusterValidity);
  105. cmd.SetComputeIntParam(shaderBuildHlbvh, SID.g_cluster_range_offset, (int)scratchLayout.ClusterRange);
  106. cmd.SetComputeIntParam(shaderBuildHlbvh, SID.g_neighbor_offset, (int)scratchLayout.PreferredNeighbor);
  107. cmd.SetComputeIntParam(shaderBuildHlbvh, SID.g_cluster_to_node_offset, (int)scratchLayout.ClusterToNodeIndex);
  108. cmd.SetComputeIntParam(shaderBuildHlbvh, SID.g_deltas_offset, (int)scratchLayout.Deltas);
  109. cmd.SetComputeIntParam(shaderBuildHlbvh, SID.g_internal_node_range_offset, (int)scratchLayout.InternalNodeRange);
  110. BindKernelArguments(cmd, kernelInit, vertices, indices, scratch, scratchLayout, result, false);
  111. cmd.DispatchCompute(shaderBuildHlbvh, kernelInit, 1, 1, 1);
  112. BindKernelArguments(cmd, kernelCalculateAabb, vertices, indices, scratch, scratchLayout, result, false);
  113. cmd.DispatchCompute(shaderBuildHlbvh, kernelCalculateAabb, (int)Common.CeilDivide(triangleCount, kTrianglesPerGroup), 1, 1);
  114. BindKernelArguments(cmd, kernelCalculateMortonCodes, vertices, indices, scratch, scratchLayout, result, false);
  115. cmd.DispatchCompute(shaderBuildHlbvh, kernelCalculateMortonCodes, (int)Common.CeilDivide(triangleCount, kTrianglesPerGroup), 1, 1);
  116. radixSort.Execute(cmd, scratch,
  117. scratchLayout.MortonCodes, scratchLayout.SortedMortonCodes,
  118. scratchLayout.PrimitiveRefs, scratchLayout.SortedPrimitiveRefs,
  119. scratchLayout.SortMemory, triangleCount);
  120. if (reduceMemoryIterations != 0)
  121. {
  122. // Original RadeonRays impl stores only one triangle per leaf noe
  123. // Added optional path that starts by agglomerating multiple triangles per node before starting the BVH tree construction.
  124. // Based on PLOC paper ("Parallel Locally-Ordered Clustering for Bounding Volume Hierarchy Construction")
  125. BindKernelArguments(cmd, kernelInitClusters, vertices, indices, scratch, scratchLayout, result, true);
  126. cmd.DispatchCompute(shaderBuildHlbvh, kernelInitClusters, (int)Common.CeilDivide(triangleCount, kGroupSize), 1, 1);
  127. for (int i = 0; i < reduceMemoryIterations; ++i)
  128. {
  129. BindKernelArguments(cmd, kernelFindPreferredNeighbor, vertices, indices, scratch, scratchLayout, result, true);
  130. cmd.DispatchCompute(shaderBuildHlbvh, kernelFindPreferredNeighbor, (int)Common.CeilDivide(triangleCount, kGroupSize), 1, 1);
  131. BindKernelArguments(cmd, kernelMergeClusters, vertices, indices, scratch, scratchLayout, result, true);
  132. cmd.DispatchCompute(shaderBuildHlbvh, kernelMergeClusters, (int)Common.CeilDivide(triangleCount, kGroupSize), 1, 1);
  133. }
  134. scan.Execute(cmd, scratch, scratchLayout.ClusterValidity, scratchLayout.ClusterToNodeIndex, scratchLayout.ScanScratch, triangleCount);
  135. BindKernelArguments(cmd, kernelWriteLeafNodes, vertices, indices, scratch, scratchLayout, result, true);
  136. cmd.DispatchCompute(shaderBuildHlbvh, kernelWriteLeafNodes, (int)Common.CeilDivide(triangleCount, kGroupSize), 1, 1);
  137. }
  138. else
  139. {
  140. BindKernelArguments(cmd, kernelClearUpdateFlags, vertices, indices, scratch, scratchLayout, result, true);
  141. cmd.DispatchCompute(shaderBuildHlbvh, kernelClearUpdateFlags, (int)Common.CeilDivide(triangleCount, kTrianglesPerGroup), 1, 1);
  142. }
  143. // In RadeonRays, HLBVH construction was based on "Maximizing Parallelism in the Construction of BVHs, Octrees, and k-d Trees" paper
  144. // Replaced by impl by "Fast and Simple Agglomerative LBVH Construction" paper that does everything in a single bottom-up pass.
  145. BindKernelArguments(cmd, kernelBuildTreeBottomUp, vertices, indices, scratch, scratchLayout, result, true);
  146. cmd.DispatchCompute(shaderBuildHlbvh, kernelBuildTreeBottomUp, (int)Common.CeilDivide(triangleCount, kTrianglesPerGroup), 1, 1);
  147. }
  148. private ScratchBufferOffsets cachedScratchOffsets;
  149. private uint cachedTriangleCount = 0;
  150. ScratchBufferOffsets GetScratchBufferLayout(uint triangleCount)
  151. {
  152. if (cachedTriangleCount == triangleCount)
  153. {
  154. return cachedScratchOffsets;
  155. }
  156. var result = new ScratchBufferOffsets();
  157. uint offset = 0;
  158. result.Aabb = offset;
  159. offset += 6;
  160. result.SortedPrimitiveRefs = offset;
  161. offset += triangleCount;
  162. result.SortedMortonCodes = offset;
  163. offset += triangleCount;
  164. result.PrimitiveRefs = offset;
  165. offset += triangleCount;
  166. result.MortonCodes = offset;
  167. offset += triangleCount;
  168. result.SortMemory = offset;
  169. offset += (uint)radixSort.GetScratchDataSizeInDwords(triangleCount);
  170. result.TotalSize = offset;
  171. // used by kernelWriteLeafNodes
  172. result.ClusterValidity = result.PrimitiveRefs;
  173. result.ClusterRange = result.PrimitiveRefs + triangleCount;
  174. result.ClusterToNodeIndex = result.PrimitiveRefs + 2*triangleCount;
  175. result.Deltas = result.PrimitiveRefs + 3*triangleCount;
  176. result.ScanScratch = result.Deltas;
  177. // used by Clustering
  178. result.PreferredNeighbor = result.ClusterToNodeIndex;
  179. // used by kernelBuildTreeBottomUp
  180. result.InternalNodeRange = result.ClusterValidity;
  181. result.TotalSize = math.max(result.TotalSize, result.Deltas+triangleCount);
  182. cachedScratchOffsets = result;
  183. cachedTriangleCount = triangleCount;
  184. return result;
  185. }
  186. private void BindKernelArguments(
  187. CommandBuffer cmd,
  188. int kernel,
  189. GraphicsBuffer vertices,
  190. GraphicsBuffer indices,
  191. GraphicsBuffer scratch,
  192. ScratchBufferOffsets scratchLayout,
  193. GraphicsBuffer result,
  194. bool setSortedCodes)
  195. {
  196. cmd.SetComputeBufferParam(shaderBuildHlbvh, kernel, SID.g_vertices, vertices);
  197. cmd.SetComputeBufferParam(shaderBuildHlbvh, kernel, SID.g_indices, indices);
  198. cmd.SetComputeBufferParam(shaderBuildHlbvh, kernel, SID.g_scratch_buffer, scratch);
  199. cmd.SetComputeBufferParam(shaderBuildHlbvh, kernel, SID.g_bvh, result);
  200. if (setSortedCodes)
  201. {
  202. cmd.SetComputeIntParam(shaderBuildHlbvh, SID.g_morton_codes_offset, (int)scratchLayout.SortedMortonCodes);
  203. cmd.SetComputeIntParam(shaderBuildHlbvh, SID.g_primitive_refs_offset, (int)scratchLayout.SortedPrimitiveRefs);
  204. }
  205. else
  206. {
  207. cmd.SetComputeIntParam(shaderBuildHlbvh, SID.g_morton_codes_offset, (int)scratchLayout.MortonCodes);
  208. cmd.SetComputeIntParam(shaderBuildHlbvh, SID.g_primitive_refs_offset, (int)scratchLayout.PrimitiveRefs);
  209. }
  210. }
  211. private void ReorderIndexBuffer(
  212. CommandBuffer cmd,
  213. GraphicsBuffer indices, int indicesOffset, uint triangleCount,
  214. GraphicsBuffer scratch, ScratchBufferOffsets scratchLayout)
  215. {
  216. cmd.SetComputeIntParam(shaderReorderTriangleIndices, SID.g_indices_offset, indicesOffset);
  217. cmd.SetComputeIntParam(shaderReorderTriangleIndices, SID.g_constants_triangle_count, (int)triangleCount);
  218. cmd.SetComputeIntParam(shaderReorderTriangleIndices, SID.g_sorted_prim_refs_offset, (int)scratchLayout.SortedPrimitiveRefs);
  219. cmd.SetComputeIntParam(shaderReorderTriangleIndices, SID.g_temp_indices_offset, (int)scratchLayout.PrimitiveRefs);
  220. cmd.SetComputeBufferParam(shaderReorderTriangleIndices, kernelOrderIndices, SID.g_indices, indices);
  221. cmd.SetComputeBufferParam(shaderReorderTriangleIndices, kernelOrderIndices, SID.g_scratch_buffer, scratch);
  222. cmd.DispatchCompute(shaderReorderTriangleIndices, kernelOrderIndices, (int)Common.CeilDivide(triangleCount, kTrianglesPerGroup), 1, 1);
  223. cmd.SetComputeBufferParam(shaderReorderTriangleIndices, kernelCopyOrderedIndicesBack, SID.g_indices, indices);
  224. cmd.SetComputeBufferParam(shaderReorderTriangleIndices, kernelCopyOrderedIndicesBack, SID.g_scratch_buffer, scratch);
  225. cmd.DispatchCompute(shaderReorderTriangleIndices, kernelCopyOrderedIndicesBack, (int)Common.CeilDivide(triangleCount, kTrianglesPerGroup), 1, 1);
  226. }
  227. }
  228. }