説明なし
選択できるのは25トピックまでです。 トピックは、先頭が英数字で、英数字とダッシュ('-')を使用した35文字以内のものにしてください。

BC6H.hlsl 5.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187
  1. // Ref: https://github.com/knarkowicz/GPURealTimeBC6H/blob/master/bin/compress.hlsl
  2. // Doc: https://msdn.microsoft.com/en-us/library/windows/desktop/hh308952(v=vs.85).aspx
  3. // Measure compression error
  4. float CalcMSLE(float3 a, float3 b)
  5. {
  6. float3 err = log2(( b + 1.0) / (a + 1.0 ));
  7. err = err * err;
  8. return err.x + err.y + err.z;
  9. }
  10. // Quantification Helpers
  11. float3 Quantize7(float3 x)
  12. {
  13. return (f32tof16(x) * 128.0) / (0x7bff + 1.0);
  14. }
  15. float3 Quantize9(float3 x)
  16. {
  17. return (f32tof16(x) * 512.0) / (0x7bff + 1.0);
  18. }
  19. float3 Quantize10(float3 x)
  20. {
  21. return (f32tof16(x) * 1024.0) / (0x7bff + 1.0);
  22. }
  23. float3 Unquantize7(float3 x)
  24. {
  25. return (x * 65536.0 + 0x8000) / 128.0;
  26. }
  27. float3 Unquantize9(float3 x)
  28. {
  29. return (x * 65536.0 + 0x8000) / 512.0;
  30. }
  31. float3 Unquantize10(float3 x)
  32. {
  33. return (x * 65536.0 + 0x8000) / 1024.0;
  34. }
  35. // BC6H Helpers
  36. // Compute index of a texel projected against endpoints
  37. uint ComputeIndex3(float texelPos, float endPoint0Pos, float endPoint1Pos )
  38. {
  39. float r = ( texelPos - endPoint0Pos ) / ( endPoint1Pos - endPoint0Pos );
  40. return (uint) clamp( r * 6.98182f + 0.00909f + 0.5f, 0.0, 7.0 );
  41. }
  42. uint ComputeIndex4(float texelPos, float endPoint0Pos, float endPoint1Pos )
  43. {
  44. float r = ( texelPos - endPoint0Pos ) / ( endPoint1Pos - endPoint0Pos );
  45. return (uint) clamp( r * 14.93333f + 0.03333f + 0.5f, 0.0, 15.0 );
  46. }
  47. void SignExtend(inout float3 v1, uint mask, uint signFlag )
  48. {
  49. int3 v = (int3) v1;
  50. v.x = ( v.x & mask ) | ( v.x < 0 ? signFlag : 0 );
  51. v.y = ( v.y & mask ) | ( v.y < 0 ? signFlag : 0 );
  52. v.z = ( v.z & mask ) | ( v.z < 0 ? signFlag : 0 );
  53. v1 = v;
  54. }
  55. // 2nd step for unquantize
  56. float3 FinishUnquantize( float3 endpoint0Unq, float3 endpoint1Unq, float weight )
  57. {
  58. float3 comp = ( endpoint0Unq * ( 64.0 - weight ) + endpoint1Unq * weight + 32.0 ) * ( 31.0 / 4096.0 );
  59. return f16tof32( uint3( comp ) );
  60. }
  61. // BC6H Modes
  62. void EncodeMode11( inout uint4 block, inout float blockMSLE, float3 texels[ 16 ] )
  63. {
  64. // compute endpoints (min/max RGB bbox)
  65. float3 blockMin = texels[ 0 ];
  66. float3 blockMax = texels[ 0 ];
  67. uint i;
  68. for (i = 1; i < 16; ++i )
  69. {
  70. blockMin = min( blockMin, texels[ i ] );
  71. blockMax = max( blockMax, texels[ i ] );
  72. }
  73. // refine endpoints in log2 RGB space - find the second mix and max value
  74. float3 refinedBlockMin = blockMax;
  75. float3 refinedBlockMax = blockMin;
  76. for (i = 0; i < 16; ++i )
  77. {
  78. float3 minTexel = float3(
  79. (texels[i].x == blockMin.x) ? refinedBlockMin.x : texels[i].x,
  80. (texels[i].y == blockMin.y) ? refinedBlockMin.y : texels[i].y,
  81. (texels[i].z == blockMin.z) ? refinedBlockMin.z : texels[i].z
  82. );
  83. float3 maxTexel = float3(
  84. (texels[i].x == blockMax.x) ? refinedBlockMax.x : texels[i].x,
  85. (texels[i].y == blockMax.y) ? refinedBlockMax.y : texels[i].y,
  86. (texels[i].z == blockMax.z) ? refinedBlockMax.z : texels[i].z
  87. );
  88. refinedBlockMin = min(refinedBlockMin, minTexel);
  89. refinedBlockMax = max(refinedBlockMax, maxTexel);
  90. }
  91. float3 logBlockMax = log2( blockMax + 1.0 );
  92. float3 logBlockMin = log2( blockMin + 1.0 );
  93. float3 logRefinedBlockMax = log2( refinedBlockMax + 1.0 );
  94. float3 logRefinedBlockMin = log2( refinedBlockMin + 1.0 );
  95. float3 logBlockMaxExt = ( logBlockMax - logBlockMin ) * ( 1.0 / 32.0 );
  96. logBlockMin += min( logRefinedBlockMin - logBlockMin, logBlockMaxExt );
  97. logBlockMax -= min( logBlockMax - logRefinedBlockMax, logBlockMaxExt );
  98. blockMin = exp2( logBlockMin ) - 1.0;
  99. blockMax = exp2( logBlockMax ) - 1.0;
  100. float3 blockDir = blockMax - blockMin;
  101. blockDir = blockDir / ( blockDir.x + blockDir.y + blockDir.z );
  102. float3 endpoint0 = Quantize10( blockMin );
  103. float3 endpoint1 = Quantize10( blockMax );
  104. float endPoint0Pos = f32tof16( dot( blockMin, blockDir ) );
  105. float endPoint1Pos = f32tof16( dot( blockMax, blockDir ) );
  106. // check if endpoint swap is required
  107. float fixupTexelPos = f32tof16( dot( texels[ 0 ], blockDir ) );
  108. uint fixupIndex = ComputeIndex4( fixupTexelPos, endPoint0Pos, endPoint1Pos );
  109. if ( fixupIndex > 7 )
  110. {
  111. Swap( endPoint0Pos, endPoint1Pos );
  112. Swap( endpoint0, endpoint1 );
  113. }
  114. // compute indices
  115. uint indices[ 16 ] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
  116. for (i = 0; i < 16; ++i )
  117. {
  118. float texelPos = f32tof16( dot( texels[ i ], blockDir ) );
  119. indices[ i ] = ComputeIndex4( texelPos, endPoint0Pos, endPoint1Pos );
  120. }
  121. // compute compression error (MSLE)
  122. float3 endpoint0Unq = Unquantize10( endpoint0 );
  123. float3 endpoint1Unq = Unquantize10( endpoint1 );
  124. float msle = 0.0;
  125. for (i = 0; i < 16; ++i )
  126. {
  127. float weight = floor( ( indices[ i ] * 64.0 ) / 15.0 + 0.5);
  128. float3 texelUnc = FinishUnquantize( endpoint0Unq, endpoint1Unq, weight );
  129. msle += CalcMSLE( texels[ i ], texelUnc );
  130. }
  131. // encode block for mode 11
  132. blockMSLE = msle;
  133. block.x = 0x03;
  134. // endpoints
  135. block.x |= (uint) endpoint0.x << 5;
  136. block.x |= (uint) endpoint0.y << 15;
  137. block.x |= (uint) endpoint0.z << 25;
  138. block.y |= (uint) endpoint0.z >> 7;
  139. block.y |= (uint) endpoint1.x << 3;
  140. block.y |= (uint) endpoint1.y << 13;
  141. block.y |= (uint) endpoint1.z << 23;
  142. block.z |= (uint) endpoint1.z >> 9;
  143. // indices
  144. block.z |= indices[ 0 ] << 1;
  145. block.z |= indices[ 1 ] << 4;
  146. block.z |= indices[ 2 ] << 8;
  147. block.z |= indices[ 3 ] << 12;
  148. block.z |= indices[ 4 ] << 16;
  149. block.z |= indices[ 5 ] << 20;
  150. block.z |= indices[ 6 ] << 24;
  151. block.z |= indices[ 7 ] << 28;
  152. block.w |= indices[ 8 ] << 0;
  153. block.w |= indices[ 9 ] << 4;
  154. block.w |= indices[ 10 ] << 8;
  155. block.w |= indices[ 11 ] << 12;
  156. block.w |= indices[ 12 ] << 16;
  157. block.w |= indices[ 13 ] << 20;
  158. block.w |= indices[ 14 ] << 24;
  159. block.w |= indices[ 15 ] << 28;
  160. }