Нема описа
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

StreamCompressionModel.cs 13KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290
  1. using System;
  2. using System.Diagnostics;
  3. using Unity.Burst;
  4. using Unity.Mathematics;
  5. namespace Unity.Collections
  6. {
  7. /// <summary>
  8. /// A type that uses Huffman encoding to encode values in a lossless manner.
  9. /// </summary>
  10. /// <remarks>
  11. /// This type puts values into a manageable number of power-of-two-sized buckets.
  12. /// It codes the bucket index with Huffman, and uses several raw bits that correspond
  13. /// to the size of the bucket to code the position in the bucket.
  14. ///
  15. /// For example, if you want to send a 32-bit integer over the network, it's
  16. /// impractical to create a Huffman tree that encompasses every value the integer
  17. /// can take because it requires a tree with 2^32 leaves. This type manages that situation.
  18. ///
  19. /// The buckets are small, around 0, and become progressively larger as the data moves away from zero.
  20. /// Because most data is deltas against predictions, most values are small and most of the redundancy
  21. /// is in the error's size and not in the values of that size we end up hitting.
  22. ///
  23. /// The context is as a sub-model that has its own statistics and uses its own Huffman tree.
  24. /// When using the context to read and write a specific value, the context must always be the same.
  25. /// The benefit of using multiple contexts is that it allows you to separate the statistics of things that have
  26. /// different expected distributions, which leads to more precise statistics, which again yields better compression.
  27. /// More contexts does, however, result in a marginal cost of a slightly larger model.
  28. /// </remarks>
  29. [GenerateTestsForBurstCompatibility]
  30. public unsafe struct StreamCompressionModel
  31. {
  32. internal static readonly byte[] k_BucketSizes =
  33. {
  34. 0, 0, 1, 2, 3, 4, 6, 8, 10, 12, 15, 18, 21, 24, 27, 32
  35. };
  36. internal static readonly uint[] k_BucketOffsets =
  37. {
  38. 0, 1, 2, 4, 8, 16, 32, 96, 352, 1376, 5472, 38240, 300384, 2397536, 19174752, 153392480
  39. };
  40. internal static readonly int[] k_FirstBucketCandidate =
  41. {
  42. // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
  43. 15, 15, 15, 15, 14, 14, 14, 13, 13, 13, 12, 12, 12, 11, 11, 11, 10, 10, 10, 9, 9, 8, 8, 7, 7, 6, 5, 4, 3, 2, 1, 1, 0
  44. };
  45. internal static readonly byte[] k_DefaultModelData = { 16, // 16 symbols
  46. 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 6, 6, 6,
  47. 0, 0 }; // no contexts
  48. internal const int k_AlphabetSize = 16;
  49. internal const int k_MaxHuffmanSymbolLength = 6;
  50. internal const int k_MaxContexts = 1;
  51. byte m_Initialized;
  52. static class SharedStaticCompressionModel
  53. {
  54. internal static readonly SharedStatic<StreamCompressionModel> Default = SharedStatic<StreamCompressionModel>.GetOrCreate<StreamCompressionModel>();
  55. }
  56. /// <summary>
  57. /// A shared singleton instance of <see cref="StreamCompressionModel"/>, this instance is initialized using
  58. /// hardcoded bucket parameters and model.
  59. /// </summary>
  60. public static StreamCompressionModel Default {
  61. get
  62. {
  63. if (SharedStaticCompressionModel.Default.Data.m_Initialized == 1)
  64. {
  65. return SharedStaticCompressionModel.Default.Data;
  66. }
  67. Initialize();
  68. SharedStaticCompressionModel.Default.Data.m_Initialized = 1;
  69. return SharedStaticCompressionModel.Default.Data;
  70. }
  71. }
  72. static void Initialize()
  73. {
  74. for (int i = 0; i < k_AlphabetSize; ++i)
  75. {
  76. SharedStaticCompressionModel.Default.Data.bucketSizes[i] = k_BucketSizes[i];
  77. SharedStaticCompressionModel.Default.Data.bucketOffsets[i] = k_BucketOffsets[i];
  78. }
  79. var modelData = new NativeArray<byte>(k_DefaultModelData.Length, Allocator.Temp);
  80. for (var index = 0; index < k_DefaultModelData.Length; index++)
  81. {
  82. modelData[index] = k_DefaultModelData[index];
  83. }
  84. //int numContexts = NetworkConfig.maxContexts;
  85. int numContexts = 1;
  86. var symbolLengths = new NativeArray<byte>(numContexts * k_AlphabetSize, Allocator.Temp);
  87. int readOffset = 0;
  88. {
  89. // default model
  90. int defaultModelAlphabetSize = modelData[readOffset++];
  91. CheckAlphabetSize(defaultModelAlphabetSize);
  92. for (int i = 0; i < k_AlphabetSize; i++)
  93. {
  94. byte length = modelData[readOffset++];
  95. for (int context = 0; context < numContexts; context++)
  96. {
  97. symbolLengths[numContexts * context + i] = length;
  98. }
  99. }
  100. // other models
  101. int numModels = modelData[readOffset] | (modelData[readOffset + 1] << 8);
  102. readOffset += 2;
  103. for (int model = 0; model < numModels; model++)
  104. {
  105. int context = modelData[readOffset] | (modelData[readOffset + 1] << 8);
  106. readOffset += 2;
  107. int modelAlphabetSize = modelData[readOffset++];
  108. CheckAlphabetSize(modelAlphabetSize);
  109. for (int i = 0; i < k_AlphabetSize; i++)
  110. {
  111. byte length = modelData[readOffset++];
  112. symbolLengths[numContexts * context + i] = length;
  113. }
  114. }
  115. }
  116. // generate tables
  117. var tmpSymbolLengths = new NativeArray<byte>(k_AlphabetSize, Allocator.Temp);
  118. var tmpSymbolDecodeTable = new NativeArray<ushort>(1 << k_MaxHuffmanSymbolLength, Allocator.Temp);
  119. var symbolCodes = new NativeArray<byte>(k_AlphabetSize, Allocator.Temp);
  120. for (int context = 0; context < numContexts; context++)
  121. {
  122. for (int i = 0; i < k_AlphabetSize; i++)
  123. tmpSymbolLengths[i] = symbolLengths[numContexts * context + i];
  124. GenerateHuffmanCodes(symbolCodes, 0, tmpSymbolLengths, 0, k_AlphabetSize, k_MaxHuffmanSymbolLength);
  125. GenerateHuffmanDecodeTable(tmpSymbolDecodeTable, 0, tmpSymbolLengths, symbolCodes, k_AlphabetSize, k_MaxHuffmanSymbolLength);
  126. for (int i = 0; i < k_AlphabetSize; i++)
  127. {
  128. SharedStaticCompressionModel.Default.Data.encodeTable[context * k_AlphabetSize + i] = (ushort)((symbolCodes[i] << 8) | symbolLengths[numContexts * context + i]);
  129. }
  130. for (int i = 0; i < (1 << k_MaxHuffmanSymbolLength); i++)
  131. {
  132. SharedStaticCompressionModel.Default.Data.decodeTable[context * (1 << k_MaxHuffmanSymbolLength) + i] = tmpSymbolDecodeTable[i];
  133. }
  134. }
  135. }
  136. static void GenerateHuffmanCodes(NativeArray<byte> symbolCodes, int symbolCodesOffset, NativeArray<byte> symbolLengths, int symbolLengthsOffset, int alphabetSize, int maxCodeLength)
  137. {
  138. CheckAlphabetAndMaxCodeLength(alphabetSize, maxCodeLength);
  139. var lengthCounts = new NativeArray<byte>(maxCodeLength + 1, Allocator.Temp);
  140. var symbolList = new NativeArray<byte>((maxCodeLength + 1) * alphabetSize, Allocator.Temp);
  141. //byte[] symbol_list[(MAX_HUFFMAN_CODE_LENGTH + 1u) * MAX_NUM_HUFFMAN_SYMBOLS];
  142. for (int symbol = 0; symbol < alphabetSize; symbol++)
  143. {
  144. int symbolLength = symbolLengths[symbol + symbolLengthsOffset];
  145. CheckExceedMaxCodeLength(symbolLength, maxCodeLength);
  146. symbolList[(maxCodeLength + 1) * symbolLength + lengthCounts[symbolLength]++] = (byte)symbol;
  147. }
  148. uint nextCodeWord = 0;
  149. for (int length = 1; length <= maxCodeLength; length++)
  150. {
  151. int length_count = lengthCounts[length];
  152. for (int i = 0; i < length_count; i++)
  153. {
  154. int symbol = symbolList[(maxCodeLength + 1) * length + i];
  155. CheckSymbolLength(symbolLengths, symbolLengthsOffset, symbol, length);
  156. symbolCodes[symbol + symbolCodesOffset] = (byte)ReverseBits(nextCodeWord++, length);
  157. }
  158. nextCodeWord <<= 1;
  159. }
  160. }
  161. static uint ReverseBits(uint value, int num_bits)
  162. {
  163. value = ((value & 0x55555555u) << 1) | ((value & 0xAAAAAAAAu) >> 1);
  164. value = ((value & 0x33333333u) << 2) | ((value & 0xCCCCCCCCu) >> 2);
  165. value = ((value & 0x0F0F0F0Fu) << 4) | ((value & 0xF0F0F0F0u) >> 4);
  166. value = ((value & 0x00FF00FFu) << 8) | ((value & 0xFF00FF00u) >> 8);
  167. value = (value << 16) | (value >> 16);
  168. return value >> (32 - num_bits);
  169. }
  170. // decode table entries: (symbol << 8) | length
  171. static void GenerateHuffmanDecodeTable(NativeArray<ushort> decodeTable, int decodeTableOffset, NativeArray<byte> symbolLengths, NativeArray<byte> symbolCodes, int alphabetSize, int maxCodeLength)
  172. {
  173. CheckAlphabetAndMaxCodeLength(alphabetSize, maxCodeLength);
  174. uint maxCode = 1u << maxCodeLength;
  175. for (int symbol = 0; symbol < alphabetSize; symbol++)
  176. {
  177. int length = symbolLengths[symbol];
  178. CheckExceedMaxCodeLength(length, maxCodeLength);
  179. if (length > 0)
  180. {
  181. uint code = symbolCodes[symbol];
  182. uint step = 1u << length;
  183. do
  184. {
  185. decodeTable[(int)(decodeTableOffset + code)] = (ushort)(symbol << 8 | length);
  186. code += step;
  187. }
  188. while (code < maxCode);
  189. }
  190. }
  191. }
  192. /// <summary>
  193. /// Bucket n starts at bucketOffsets[n] and ends at bucketOffsets[n] + (1 &lt;&lt; bucketSizes[n]).
  194. /// (code &lt;&lt; 8) | length
  195. /// </summary>
  196. internal fixed ushort encodeTable[k_MaxContexts * k_AlphabetSize];
  197. /// <summary>
  198. /// Bucket n starts at bucketOffsets[n] and ends at bucketOffsets[n] + (1 &lt;&lt; bucketSizes[n]).
  199. /// (symbol &lt;&lt; 8) | length
  200. /// </summary>
  201. internal fixed ushort decodeTable[k_MaxContexts * (1 << k_MaxHuffmanSymbolLength)];
  202. /// <summary>
  203. /// Specifies the sizes of the buckets in bits, so a bucket of n bits has 2^n values.
  204. /// </summary>
  205. internal fixed byte bucketSizes[k_AlphabetSize];
  206. /// <summary>
  207. /// Specifies the starting positions of the bucket.
  208. /// </summary>
  209. internal fixed uint bucketOffsets[k_AlphabetSize];
  210. /// <summary>
  211. /// Calculates the bucket index into the <see cref="encodeTable"/> where the specified value should be written.
  212. /// </summary>
  213. /// <param name="value">A 4-byte unsigned integer value to find a bucket for.</param>
  214. /// <returns>The bucket index where to put the value.</returns>
  215. readonly public int CalculateBucket(uint value)
  216. {
  217. int bucketIndex = k_FirstBucketCandidate[math.lzcnt(value)];
  218. if (bucketIndex + 1 < k_AlphabetSize && value >= bucketOffsets[bucketIndex + 1])
  219. bucketIndex++;
  220. return bucketIndex;
  221. }
  222. /// <summary>
  223. /// The compressed size in bits of the given unsigned integer value
  224. /// </summary>
  225. /// <param name="value">the unsigned int value you want to compress</param>
  226. /// <returns></returns>
  227. public readonly int GetCompressedSizeInBits(uint value)
  228. {
  229. int bucket = CalculateBucket(value);
  230. int bits = bucketSizes[bucket];
  231. ushort encodeEntry = encodeTable[bucket];
  232. return (encodeEntry & 0xff) + bits;
  233. }
  234. [Conditional("ENABLE_UNITY_COLLECTIONS_CHECKS"), Conditional("UNITY_DOTS_DEBUG")]
  235. static void CheckAlphabetSize(int alphabetSize)
  236. {
  237. if (alphabetSize != k_AlphabetSize)
  238. {
  239. throw new InvalidOperationException("The alphabet size of compression models must be " + k_AlphabetSize);
  240. }
  241. }
  242. [Conditional("ENABLE_UNITY_COLLECTIONS_CHECKS"), Conditional("UNITY_DOTS_DEBUG")]
  243. static void CheckSymbolLength(NativeArray<byte> symbolLengths, int symbolLengthsOffset, int symbol, int length)
  244. {
  245. if (symbolLengths[symbol + symbolLengthsOffset] != length)
  246. throw new InvalidOperationException("Incorrect symbol length");
  247. }
  248. [Conditional("ENABLE_UNITY_COLLECTIONS_CHECKS"), Conditional("UNITY_DOTS_DEBUG")]
  249. static void CheckAlphabetAndMaxCodeLength(int alphabetSize, int maxCodeLength)
  250. {
  251. if (alphabetSize > 256 || maxCodeLength > 8)
  252. throw new InvalidOperationException("Can only generate huffman codes up to alphabet size 256 and maximum code length 8");
  253. }
  254. [Conditional("ENABLE_UNITY_COLLECTIONS_CHECKS"), Conditional("UNITY_DOTS_DEBUG")]
  255. static void CheckExceedMaxCodeLength(int length, int maxCodeLength)
  256. {
  257. if (length > maxCodeLength)
  258. throw new InvalidOperationException("Maximum code length exceeded");
  259. }
  260. }
  261. }