Nenhuma descrição
Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

UTF8ArrayUnsafeUtility.cs 19KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338
  1. using Unity.Collections.LowLevel.Unsafe;
  2. namespace Unity.Collections
  3. {
  4. /// <summary>
  5. /// Provides methods for copying and encoding Unicode text.
  6. /// </summary>
  7. [GenerateTestsForBurstCompatibility]
  8. public static unsafe class UTF8ArrayUnsafeUtility
  9. {
  10. /// <summary>
  11. /// Copies a buffer of UCS-2 text. The copy is encoded as UTF-8.
  12. /// </summary>
  13. /// <remarks>Assumes the source data is valid UCS-2.</remarks>
  14. /// <param name="src">The source buffer for reading UCS-2.</param>
  15. /// <param name="srcLength">The number of chars to read from the source.</param>
  16. /// <param name="dest">The destination buffer for writing UTF-8.</param>
  17. /// <param name="destLength">Outputs the number of bytes written to the destination.</param>
  18. /// <param name="destUTF8MaxLengthInBytes">The max number of bytes that will be written to the destination buffer.</param>
  19. /// <returns><see cref="CopyError.None"/> if the copy fully completes. Otherwise, returns <see cref="CopyError.Truncation"/>.</returns>
  20. public static CopyError Copy(byte *dest, out int destLength, int destUTF8MaxLengthInBytes, char *src, int srcLength)
  21. {
  22. var error = Unicode.Utf16ToUtf8(src, srcLength, dest, out destLength, destUTF8MaxLengthInBytes);
  23. if (error == ConversionError.None)
  24. return CopyError.None;
  25. return CopyError.Truncation;
  26. }
  27. /// <summary>
  28. /// Copies a buffer of UCS-2 text. The copy is encoded as UTF-8.
  29. /// </summary>
  30. /// <remarks>Assumes the source data is valid UCS-2.</remarks>
  31. /// <param name="src">The source buffer for reading UCS-2.</param>
  32. /// <param name="srcLength">The number of chars to read from the source.</param>
  33. /// <param name="dest">The destination buffer for writing UTF-8.</param>
  34. /// <param name="destLength">Outputs the number of bytes written to the destination.</param>
  35. /// <param name="destUTF8MaxLengthInBytes">The max number of bytes that will be written to the destination buffer.</param>
  36. /// <returns><see cref="CopyError.None"/> if the copy fully completes. Otherwise, returns <see cref="CopyError.Truncation"/>.</returns>
  37. public static CopyError Copy(byte *dest, out ushort destLength, ushort destUTF8MaxLengthInBytes, char *src, int srcLength)
  38. {
  39. var error = Unicode.Utf16ToUtf8(src, srcLength, dest, out var temp, destUTF8MaxLengthInBytes);
  40. destLength = (ushort)temp;
  41. if (error == ConversionError.None)
  42. return CopyError.None;
  43. return CopyError.Truncation;
  44. }
  45. /// <summary>
  46. /// Copies a buffer of UCS-8 text.
  47. /// </summary>
  48. /// <remarks>Assumes the source data is valid UTF-8.</remarks>
  49. /// <param name="src">The source buffer.</param>
  50. /// <param name="srcLength">The number of chars to read from the source.</param>
  51. /// <param name="dest">The destination buffer.</param>
  52. /// <param name="destLength">Outputs the number of bytes written to the destination.</param>
  53. /// <param name="destUTF8MaxLengthInBytes">The max number of bytes that will be written to the destination buffer.</param>
  54. /// <returns><see cref="CopyError.None"/> if the copy fully completes. Otherwise, returns <see cref="CopyError.Truncation"/>.</returns>
  55. public static CopyError Copy(byte *dest, out int destLength, int destUTF8MaxLengthInBytes, byte *src, int srcLength)
  56. {
  57. var error = Unicode.Utf8ToUtf8(src, srcLength, dest, out var temp, destUTF8MaxLengthInBytes);
  58. destLength = temp;
  59. if (error == ConversionError.None)
  60. return CopyError.None;
  61. return CopyError.Truncation;
  62. }
  63. /// <summary>
  64. /// Copies a buffer of UCS-8 text.
  65. /// </summary>
  66. /// <remarks>Assumes the source data is valid UTF-8.</remarks>
  67. /// <param name="src">The source buffer.</param>
  68. /// <param name="srcLength">The number of chars to read from the source.</param>
  69. /// <param name="dest">The destination buffer.</param>
  70. /// <param name="destLength">Outputs the number of bytes written to the destination.</param>
  71. /// <param name="destUTF8MaxLengthInBytes">The max number of bytes that will be written to the destination buffer.</param>
  72. /// <returns><see cref="CopyError.None"/> if the copy fully completes. Otherwise, returns <see cref="CopyError.Truncation"/>.</returns>
  73. public static CopyError Copy(byte *dest, out ushort destLength, ushort destUTF8MaxLengthInBytes, byte *src, ushort srcLength)
  74. {
  75. var error = Unicode.Utf8ToUtf8(src, srcLength, dest, out var temp, destUTF8MaxLengthInBytes);
  76. destLength = (ushort)temp;
  77. if (error == ConversionError.None)
  78. return CopyError.None;
  79. return CopyError.Truncation;
  80. }
  81. /// <summary>
  82. /// Copies a buffer of UTF-8 text. The copy is encoded as UCS-2.
  83. /// </summary>
  84. /// <remarks>Assumes the source data is valid UTF-8.</remarks>
  85. /// <param name="src">The source buffer for reading UTF-8.</param>
  86. /// <param name="srcLength">The number of bytes to read from the source.</param>
  87. /// <param name="dest">The destination buffer for writing UCS-2.</param>
  88. /// <param name="destLength">Outputs the number of chars written to the destination.</param>
  89. /// <param name="destUCS2MaxLengthInChars">The max number of chars that will be written to the destination buffer.</param>
  90. /// <returns><see cref="CopyError.None"/> if the copy fully completes. Otherwise, returns <see cref="CopyError.Truncation"/>.</returns>
  91. public static CopyError Copy(char *dest, out int destLength, int destUCS2MaxLengthInChars, byte *src, int srcLength)
  92. {
  93. if (ConversionError.None == Unicode.Utf8ToUtf16(src, srcLength, dest, out destLength, destUCS2MaxLengthInChars))
  94. return CopyError.None;
  95. return CopyError.Truncation;
  96. }
  97. /// <summary>
  98. /// Copies a buffer of UTF-8 text. The copy is encoded as UCS-2.
  99. /// </summary>
  100. /// <remarks>Assumes the source data is valid UTF-8.</remarks>
  101. /// <param name="src">The source buffer for reading UTF-8.</param>
  102. /// <param name="srcLength">The number of bytes to read from the source.</param>
  103. /// <param name="dest">The destination buffer for writing UCS-2.</param>
  104. /// <param name="destLength">Outputs the number of chars written to the destination.</param>
  105. /// <param name="destUCS2MaxLengthInChars">The max number of chars that will be written to the destination buffer.</param>
  106. /// <returns><see cref="CopyError.None"/> if the copy fully completes. Otherwise, returns <see cref="CopyError.Truncation"/>.</returns>
  107. public static CopyError Copy(char *dest, out ushort destLength, ushort destUCS2MaxLengthInChars, byte *src, ushort srcLength)
  108. {
  109. var error = Unicode.Utf8ToUtf16(src, srcLength, dest, out var temp, destUCS2MaxLengthInChars);
  110. destLength = (ushort)temp;
  111. if (error == ConversionError.None)
  112. return CopyError.None;
  113. return CopyError.Truncation;
  114. }
  115. /// <summary>
  116. /// Appends UTF-8 text to a buffer.
  117. /// </summary>
  118. /// <remarks>Assumes the source data is valid UTF-8.
  119. ///
  120. /// No data will be copied if the destination has insufficient capacity for the full append, *i.e.* if `srcLength > (destCapacity - destLength)`.
  121. /// </remarks>
  122. /// <param name="src">The source buffer.</param>
  123. /// <param name="srcLength">The number of bytes to read from the source.</param>
  124. /// <param name="dest">The destination buffer.</param>
  125. /// <param name="destLength">Reference to the destination buffer's length in bytes *before* the append. Will be assigned the new length *after* the append.</param>
  126. /// <param name="destCapacity">The destination buffer capacity in bytes.</param>
  127. /// <returns><see cref="FormatError.None"/> if the append fully completes. Otherwise, returns <see cref="FormatError.Overflow"/>.</returns>
  128. public static FormatError AppendUTF8Bytes(byte* dest, ref int destLength, int destCapacity, byte* src, int srcLength)
  129. {
  130. if (destLength + srcLength > destCapacity)
  131. return FormatError.Overflow;
  132. UnsafeUtility.MemCpy(dest + destLength, src, srcLength);
  133. destLength += srcLength;
  134. return FormatError.None;
  135. }
  136. /// <summary>
  137. /// Appends UTF-8 text to a buffer.
  138. /// </summary>
  139. /// <remarks>Assumes the source data is valid UTF-8.</remarks>
  140. /// <param name="src">The source buffer.</param>
  141. /// <param name="srcLength">The number of bytes to read from the source.</param>
  142. /// <param name="dest">The destination buffer.</param>
  143. /// <param name="destLength">Reference to the destination buffer's length in bytes *before* the append. Will be assigned the number of bytes appended.</param>
  144. /// <param name="destUTF8MaxLengthInBytes">The destination buffer's length in bytes. Data will not be appended past this length.</param>
  145. /// <returns><see cref="CopyError.None"/> if the append fully completes. Otherwise, returns <see cref="CopyError.Truncation"/>.</returns>
  146. public static CopyError Append(byte *dest, ref ushort destLength, ushort destUTF8MaxLengthInBytes, byte *src, ushort srcLength)
  147. {
  148. var error = Unicode.Utf8ToUtf8(src, srcLength, dest + destLength, out var temp, destUTF8MaxLengthInBytes - destLength);
  149. destLength += (ushort)temp;
  150. if (error == ConversionError.None)
  151. return CopyError.None;
  152. return CopyError.Truncation;
  153. }
  154. /// <summary>
  155. /// Appends UCS-2 text to a buffer, encoded as UTF-8.
  156. /// </summary>
  157. /// <remarks>Assumes the source data is valid UCS-2.</remarks>
  158. /// <param name="src">The source buffer.</param>
  159. /// <param name="srcLength">The number of chars to read from the source.</param>
  160. /// <param name="dest">The destination buffer.</param>
  161. /// <param name="destLength">Reference to the destination buffer's length in bytes *before* the append. Will be assigned the number of bytes appended.</param>
  162. /// <param name="destUTF8MaxLengthInBytes">The destination buffer's length in bytes. Data will not be appended past this length.</param>
  163. /// <returns><see cref="CopyError.None"/> if the append fully completes. Otherwise, returns <see cref="CopyError.Truncation"/>.</returns>
  164. public static CopyError Append(byte *dest, ref ushort destLength, ushort destUTF8MaxLengthInBytes, char *src, int srcLength)
  165. {
  166. var error = Unicode.Utf16ToUtf8(src, srcLength, dest + destLength, out var temp, destUTF8MaxLengthInBytes - destLength);
  167. destLength += (ushort)temp;
  168. if (error == ConversionError.None)
  169. return CopyError.None;
  170. return CopyError.Truncation;
  171. }
  172. /// <summary>
  173. /// Appends UTF-8 text to a buffer, encoded as UCS-2.
  174. /// </summary>
  175. /// <remarks>Assumes the source data is valid UTF-8.</remarks>
  176. /// <param name="src">The source buffer.</param>
  177. /// <param name="srcLength">The number of bytes to read from the source.</param>
  178. /// <param name="dest">The destination buffer.</param>
  179. /// <param name="destLength">Reference to the destination buffer's length in chars *before* the append. Will be assigned the number of chars appended.</param>
  180. /// <param name="destUCS2MaxLengthInChars">The destination buffer's length in chars. Data will not be appended past this length.</param>
  181. /// <returns><see cref="CopyError.None"/> if the append fully completes. Otherwise, returns <see cref="CopyError.Truncation"/>.</returns>
  182. public static CopyError Append(char *dest, ref ushort destLength, ushort destUCS2MaxLengthInChars, byte *src, ushort srcLength)
  183. {
  184. var error = Unicode.Utf8ToUtf16(src, srcLength, dest + destLength, out var temp, destUCS2MaxLengthInChars - destLength);
  185. destLength += (ushort)temp;
  186. if (error == ConversionError.None)
  187. return CopyError.None;
  188. return CopyError.Truncation;
  189. }
  190. internal struct Comparison
  191. {
  192. public bool terminates;
  193. public int result;
  194. public Comparison(Unicode.Rune runeA, ConversionError errorA, Unicode.Rune runeB, ConversionError errorB)
  195. {
  196. if(errorA != ConversionError.None)
  197. runeA.value = 0;
  198. if(errorB != ConversionError.None)
  199. runeB.value = 0;
  200. if(runeA.value != runeB.value)
  201. {
  202. result = runeA.value - runeB.value;
  203. terminates = true;
  204. }
  205. else
  206. {
  207. result = 0;
  208. terminates = (runeA.value == 0 && runeB.value == 0);
  209. }
  210. }
  211. }
  212. /// <summary>Compares two UTF-8 buffers for relative equality.</summary>
  213. /// <param name="utf8BufferA">The first buffer of UTF-8 text.</param>
  214. /// <param name="utf8LengthInBytesA">The length in bytes of the first UTF-8 buffer.</param>
  215. /// <param name="utf8BufferB">The second buffer of UTF-8 text.</param>
  216. /// <param name="utf8LengthInBytesB">The length in bytes of the second UTF-8 buffer.</param>
  217. /// <returns>
  218. /// Less than zero if first different code point is less in the first UTF-8 buffer.
  219. /// Zero if the strings are identical.
  220. /// More than zero if first different code point is less in the second UTF-8 buffer.
  221. /// </returns>
  222. public static int StrCmp(byte* utf8BufferA, int utf8LengthInBytesA, byte* utf8BufferB, int utf8LengthInBytesB)
  223. {
  224. int byteIndexA = 0;
  225. int byteIndexB = 0;
  226. while(true)
  227. {
  228. var utf8ErrorA = Unicode.Utf8ToUcs(out var utf8RuneA, utf8BufferA,ref byteIndexA, utf8LengthInBytesA);
  229. var utf8ErrorB = Unicode.Utf8ToUcs(out var utf8RuneB, utf8BufferB, ref byteIndexB, utf8LengthInBytesB);
  230. var comparison = new Comparison(utf8RuneA, utf8ErrorA, utf8RuneB, utf8ErrorB);
  231. if(comparison.terminates)
  232. return comparison.result;
  233. }
  234. }
  235. internal static int StrCmp(byte* utf8BufferA, int utf8LengthInBytesA, Unicode.Rune* runeBufferB, int lengthInRunesB)
  236. {
  237. int charIndexA = 0;
  238. int charIndexB = 0;
  239. while (true)
  240. {
  241. var utf16ErrorA = Unicode.Utf8ToUcs(out var utf16RuneA, utf8BufferA, ref charIndexA, utf8LengthInBytesA);
  242. var errorB = Unicode.UcsToUcs(out var runeB, runeBufferB, ref charIndexB, lengthInRunesB);
  243. var comparison = new Comparison(utf16RuneA, utf16ErrorA, runeB, errorB);
  244. if (comparison.terminates)
  245. return comparison.result;
  246. }
  247. }
  248. /// <summary>Compares two UTF-16 buffers for relative equality.</summary>
  249. /// <param name="utf16BufferA">The first buffer of UTF-16 text.</param>
  250. /// <param name="utf16LengthInCharsA">The length in chars of the first UTF-16 buffer.</param>
  251. /// <param name="utf16BufferB">The second buffer of UTF-16 text.</param>
  252. /// <param name="utf16LengthInCharsB">The length in chars of the second UTF-16 buffer.</param>
  253. /// <returns>
  254. /// Less than zero if first different code point is less in the first UTF-16 buffer.
  255. /// Zero if the strings are identical.
  256. /// More than zero if first different code point is less in the second UTF-16 buffer.
  257. /// </returns>
  258. public static int StrCmp(char* utf16BufferA, int utf16LengthInCharsA, char* utf16BufferB, int utf16LengthInCharsB)
  259. {
  260. int charIndexA = 0;
  261. int charIndexB = 0;
  262. while(true)
  263. {
  264. var utf16ErrorA = Unicode.Utf16ToUcs(out var utf16RuneA, utf16BufferA,ref charIndexA, utf16LengthInCharsA);
  265. var utf16ErrorB = Unicode.Utf16ToUcs(out var utf16RuneB, utf16BufferB, ref charIndexB, utf16LengthInCharsB);
  266. var comparison = new Comparison(utf16RuneA, utf16ErrorA, utf16RuneB, utf16ErrorB);
  267. if(comparison.terminates)
  268. return comparison.result;
  269. }
  270. }
  271. /// <summary>Returns true if two UTF-8 buffers have the same length and content.</summary>
  272. /// <param name="aBytes">The first buffer of UTF-8 text.</param>
  273. /// <param name="aLength">The length in bytes of the first buffer.</param>
  274. /// <param name="bBytes">The second buffer of UTF-8 text.</param>
  275. /// <param name="bLength">The length in bytes of the second buffer.</param>
  276. /// <returns>True if the content of both strings is identical.</returns>
  277. public static bool EqualsUTF8Bytes(byte* aBytes, int aLength, byte* bBytes, int bLength)
  278. {
  279. return aLength == bLength && StrCmp(aBytes, aLength, bBytes, bLength) == 0;
  280. }
  281. /// <summary>Compares a UTF-8 buffer and a UTF-16 buffer for relative equality.</summary>
  282. /// <param name="utf8Buffer">The buffer of UTF-8 text.</param>
  283. /// <param name="utf8LengthInBytes">The length in bytes of the UTF-8 buffer.</param>
  284. /// <param name="utf16Buffer">The buffer of UTF-16 text.</param>
  285. /// <param name="utf16LengthInChars">The length in chars of the UTF-16 buffer.</param>
  286. /// <returns>
  287. /// Less than zero if first different code point is less in UTF-8 buffer.
  288. /// Zero if the strings are identical.
  289. /// More than zero if first different code point is less in UTF-16 buffer.
  290. /// </returns>
  291. public static int StrCmp(byte* utf8Buffer, int utf8LengthInBytes, char* utf16Buffer, int utf16LengthInChars)
  292. {
  293. int byteIndex = 0;
  294. int charIndex = 0;
  295. while(true)
  296. {
  297. var utf8Error = Unicode.Utf8ToUcs(out var utf8Rune, utf8Buffer,ref byteIndex, utf8LengthInBytes);
  298. var utf16Error = Unicode.Utf16ToUcs(out var utf16Rune, utf16Buffer, ref charIndex, utf16LengthInChars);
  299. var comparison = new Comparison(utf8Rune, utf8Error, utf16Rune, utf16Error);
  300. if(comparison.terminates)
  301. return comparison.result;
  302. }
  303. }
  304. /// <summary>Compares a UTF-16 buffer and a UTF-8 buffer for relative equality.</summary>
  305. /// <param name="utf16Buffer">The buffer of UTF-16 text.</param>
  306. /// <param name="utf16LengthInChars">The length in chars of the UTF-16 buffer.</param>
  307. /// <param name="utf8Buffer">The buffer of UTF-8 text.</param>
  308. /// <param name="utf8LengthInBytes">The length in bytes of the UTF-8 buffer.</param>
  309. /// <returns>
  310. /// Less than zero if first different code point is less in UTF-16 buffer.
  311. /// Zero if the strings are identical.
  312. /// More than zero if first different code point is less in UTF-8 buffer.
  313. /// </returns>
  314. public static int StrCmp(char* utf16Buffer, int utf16LengthInChars, byte* utf8Buffer, int utf8LengthInBytes)
  315. {
  316. return -StrCmp(utf8Buffer, utf8LengthInBytes, utf16Buffer, utf16LengthInChars);
  317. }
  318. }
  319. }