123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707 |
- using System;
- using Unity.Collections.LowLevel.Unsafe;
-
- namespace Unity.Collections
- {
- /// <summary>
- /// Kinds of format errors.
- /// </summary>
- public enum FormatError
- {
- /// <summary>
- /// No error.
- /// </summary>
- None,
-
- /// <summary>
- /// The target storage does not have sufficient capacity.
- /// Note that the format's write failed. It did not truncate.
- /// </summary>
- Overflow,
-
- /// <summary>
- /// The source format specifier is not itself correctly formatted, or
- /// a format specifier tokens were found outside of accepted usage.
- /// Note that the format's write failed.
- /// </summary>
- BadFormatSpecifier,
- }
-
- /// <summary>
- /// Kinds of parse errors.
- /// </summary>
- public enum ParseError
- {
- /// <summary>
- /// No parse error.
- /// </summary>
- None,
-
- /// <summary>
- /// The text parsed does not form a number.
- /// </summary>
- Syntax,
-
- /// <summary>
- /// The number exceeds the range of the target type.
- /// The number was either truncated, or failed to write entirely.
- /// </summary>
- Overflow,
-
- /// <summary>
- /// The number exceeds the precision of the target type.
- /// </summary>
- Underflow,
- }
-
- /// <summary>
- /// Kinds of copy errors.
- /// </summary>
- public enum CopyError
- {
- /// <summary>
- /// No copy error.
- /// </summary>
- None,
-
- /// <summary>
- /// The target storage does not have sufficient capacity.
- /// Unless stated in the API comment, assume that the write operation was partially applied.
- /// </summary>
- Truncation,
- }
-
- /// <summary>
- /// Kinds of conversion errors.
- /// </summary>
- public enum ConversionError
- {
- /// <summary>
- /// No conversion error.
- /// </summary>
- None,
-
- /// <summary>
- /// The target storage does not have sufficient capacity.
- /// For copy operations; the value was either truncated into the target storage, or failed to write entirely.
- /// </summary>
- Overflow,
-
- /// <summary>
- /// The bytes do not form a valid character.
- /// </summary>
- Encoding,
-
- /// <summary>
- /// The rune is not a valid code point.
- /// </summary>
- CodePoint,
- }
-
- /// <summary>
- /// Provides utility methods for UTF-8, UTF-16, UCS-4 (a.k.a. UTF-32), and WTF-8.
- /// </summary>
- [GenerateTestsForBurstCompatibility]
- public unsafe struct Unicode
- {
- /// <summary>
- /// Representation of a Unicode character as a code point.
- /// </summary>
- [GenerateTestsForBurstCompatibility]
- public struct Rune
- {
- /// <summary>
- /// The code point.
- /// </summary>
- /// <value>The code point.</value>
- public int value;
-
- /// <summary>
- /// Initializes and returns an instance of Rune.
- /// </summary>
- /// <remarks>You are responsible for the code point being valid.</remarks>
- /// <param name="codepoint">The code point.</param>
- public Rune(int codepoint)
- {
- value = codepoint;
- }
-
- /// <summary>
- /// Returns a rune.
- /// </summary>
- /// <remarks>Because a char is 16-bit, it can only represent the first 2^16 code points, not all 1.1 million.</remarks>
- /// <param name="codepoint">A code point.</param>
- /// <returns>A rune.</returns>
- public static implicit operator Rune(char codepoint) => new Rune { value = codepoint };
-
- /// <summary>
- /// Evaluates if one is equal to the other.
- /// </summary>
- /// <param name="lhs">The left-hand side</param>
- /// <param name="rhs">The right-hand side</param>
- /// <returns>True if the left-hand side's is equal to the right-hand side's.</returns>
- public static bool operator ==(Rune lhs, Rune rhs)
- {
- return lhs.value == rhs.value;
- }
-
- /// <summary>
- /// Returns true if the value stored in this Rune is equal to an object.
- /// </summary>
- /// <remarks>Can only be equal if the object is itself a Rune.</remarks>
- /// <param name="obj">An object to compare with.</param>
- /// <returns>True if the value stored in this Rune is equal to the object.</returns>
- [ExcludeFromBurstCompatTesting("Takes managed object")]
- public override bool Equals(object obj)
- {
- if (obj is Rune)
- {
- return value == ((Rune)obj).value;
- }
-
- return false;
- }
-
- /// <summary>
- /// A hash used for comparisons.
- /// </summary>
- /// <returns>A unique hash code.</returns>
- public override int GetHashCode()
- {
- return value;
- }
-
- /// <summary>
- /// Evaluates if one is not equal to the other.
- /// </summary>
- /// <param name="lhs">The left-hand side</param>
- /// <param name="rhs">The right-hand side</param>
- /// <returns>True if the left-hand side's is not equal to the right-hand side's.</returns>
- public static bool operator !=(Rune lhs, Rune rhs)
- {
- return lhs.value != rhs.value;
- }
-
- /// <summary>
- /// Returns true if a rune is a numerical digit character.
- /// </summary>
- /// <param name="r">The rune.</param>
- /// <returns>True if the rune is a numerical digit character.</returns>
- public static bool IsDigit(Rune r)
- {
- return r.IsDigit();
- }
-
- internal bool IsAscii()
- {
- return value < 0x80;
- }
-
- internal bool IsLatin1()
- {
- return value < 0x100;
- }
-
- internal bool IsDigit()
- {
- return value >= '0' && value <= '9';
- }
-
- internal bool IsWhiteSpace()
- {
- // https://en.wikipedia.org/wiki/Whitespace_character#Unicode
-
- if (IsLatin1())
- {
- return value == ' '
- || (value >= 0x9 && value <= 0xD) // CHARACTER TABULATION (U+0009), LINE FEED (U+000A), LINE TABULATION (U+000B), FORM FEED (U+000C), CARRIAGE RETURN (U+000D)
- || value == 0xA0 // NO-BREAK SPACE
- || value == 0x85 // NEXT LINE
- ;
- }
-
- return value == 0x1680 // OGHAM SPACE MARK
- || (value >= 0x2000 && value <= 0x200A) // EN QUAD(U+2000)
- // EM QUAD(U+2001)
- // EN SPACE(U+2002)
- // EM SPACE(U+2003)
- // THREE - PER - EM SPACE(U + 2004)
- // FOUR - PER - EM SPACE(U + 2005)
- // SIX - PER - EM SPACE(U + 2006)
- // FIGURE SPACE(U+2007)
- // PUNCTUATION SPACE(U+2008)
- // THIN SPACE(U+2009)
- // HAIR SPACE(U+200A)
- || value == 0x2028 // LINE SEPARATOR
- || value == 0x2029 // PARAGRAPH SEPARATOR
- || value == 0x202F // NARROW NO-BREAK SPACE
- || value == 0x205F // MEDIUM MATHEMATICAL SPACE
- || value == 0x3000 // IDEOGRAPHIC SPACE
- ;
- }
-
- internal Rune ToLowerAscii()
- {
- return new Rune(value + (((uint)(value - 'A') <= ('Z' - 'A')) ? 0x20 : 0));
- }
-
- internal Rune ToUpperAscii()
- {
- return new Rune(value - (((uint)(value - 'a') <= ('z' - 'a')) ? 0x20 : 0));
- }
-
- /// <summary>
- /// Returns the number of bytes required to encode this rune as UTF-8.
- /// </summary>
- /// <returns>The number of bytes required to encode this rune as UTF-8. If the rune's codepoint
- /// is invalid, returns 4 (the maximum possible encoding length).</returns>
- public int LengthInUtf8Bytes()
- {
- if (value < 0)
- return 4; // invalid codepoint
- if (value <= 0x7F)
- return 1;
- if (value <= 0x7FF)
- return 2;
- if (value <= 0xFFFF)
- return 3;
- if (value <= 0x1FFFFF)
- return 4;
- // invalid codepoint, max size.
- return 4;
- }
- }
-
- /// <summary>The maximum value of a valid UNICODE code point</summary>
- public const int kMaximumValidCodePoint = 0x10FFFF;
-
- /// <summary>
- /// Returns true if a code point is valid.
- /// </summary>
- /// <param name="codepoint">A code point.</param>
- /// <returns>True if a code point is valid.</returns>
- public static bool IsValidCodePoint(int codepoint)
- {
- if (codepoint > kMaximumValidCodePoint) // maximum valid code point
- return false;
- // if (codepoint >= 0xD800 && codepoint <= 0xDFFF) // surrogate pair
- // return false;
- if (codepoint < 0) // negative?
- return false;
- return true;
- }
-
- /// <summary>
- /// Returns true if the byte is not the last byte of a UTF-8 character.
- /// </summary>
- /// <param name="b">The byte.</param>
- /// <returns>True if the byte is not the last byte of a UTF-8 character.</returns>
- public static bool NotTrailer(byte b)
- {
- return (b & 0xC0) != 0x80;
- }
-
- /// <summary>
- /// The Unicode character �.
- /// </summary>
- /// <remarks>This character is used to stand-in for characters that can't be rendered.</remarks>
- /// <value>The Unicode character �.</value>
- public static Rune ReplacementCharacter => new Rune { value = 0xFFFD };
-
- /// <summary>
- /// The null rune value.
- /// </summary>
- /// <remarks>In this package, the "bad rune" is used as a null character. It represents no valid code point.</remarks>
- /// <value>The null rune value.</value>
- public static Rune BadRune => new Rune { value = 0 };
-
- /// <summary>
- /// Reads a UTF-8 encoded character from a buffer.
- /// </summary>
- /// <param name="rune">Outputs the character read. If the read fails, outputs <see cref="ReplacementCharacter"/>.</param>
- /// <param name="buffer">The buffer of bytes to read.</param>
- /// <param name="index">Reference to a byte index into the buffer. If the read succeeds, index is incremented by the
- /// size in bytes of the character read. If the read fails, index is incremented by 1.</param>
- /// <param name="capacity">The size in bytes of the buffer. Used to check that the read is in bounds.</param>
- /// <returns><see cref="ConversionError.None"/> if the read succeeds. Otherwise, returns <see cref="ConversionError.Overflow"/> or <see cref="ConversionError.Encoding"/>.</returns>
- public static ConversionError Utf8ToUcs(out Rune rune, byte* buffer, ref int index, int capacity)
- {
- int code = 0;
- rune = ReplacementCharacter;
- if (index + 1 > capacity)
- {
- return ConversionError.Overflow;
- }
-
- if ((buffer[index] & 0b10000000) == 0b00000000) // if high bit is 0, 1 byte
- {
- rune.value = buffer[index + 0];
- index += 1;
- return ConversionError.None;
- }
-
- if ((buffer[index] & 0b11100000) == 0b11000000) // if high 3 bits are 110, 2 bytes
- {
- if (index + 2 > capacity)
- {
- index += 1;
- return ConversionError.Overflow;
- }
- code = (buffer[index + 0] & 0b00011111);
- code = (code << 6) | (buffer[index + 1] & 0b00111111);
- if (code < (1 << 7) || NotTrailer(buffer[index + 1]))
- {
- index += 1;
- return ConversionError.Encoding;
- }
- rune.value = code;
- index += 2;
- return ConversionError.None;
- }
-
- if ((buffer[index] & 0b11110000) == 0b11100000) // if high 4 bits are 1110, 3 bytes
- {
- if (index + 3 > capacity)
- {
- index += 1;
- return ConversionError.Overflow;
- }
- code = (buffer[index + 0] & 0b00001111);
- code = (code << 6) | (buffer[index + 1] & 0b00111111);
- code = (code << 6) | (buffer[index + 2] & 0b00111111);
- if (code < (1 << 11) || !IsValidCodePoint(code) || NotTrailer(buffer[index + 1]) || NotTrailer(buffer[index + 2]))
- {
- index += 1;
- return ConversionError.Encoding;
- }
- rune.value = code;
- index += 3;
- return ConversionError.None;
- }
-
- if ((buffer[index] & 0b11111000) == 0b11110000) // if high 5 bits are 11110, 4 bytes
- {
- if (index + 4 > capacity)
- {
- index += 1;
- return ConversionError.Overflow;
- }
- code = (buffer[index + 0] & 0b00000111);
- code = (code << 6) | (buffer[index + 1] & 0b00111111);
- code = (code << 6) | (buffer[index + 2] & 0b00111111);
- code = (code << 6) | (buffer[index + 3] & 0b00111111);
- if (code < (1 << 16) || !IsValidCodePoint(code) || NotTrailer(buffer[index + 1]) || NotTrailer(buffer[index + 2]) || NotTrailer(buffer[index + 3]))
- {
- index += 1;
- return ConversionError.Encoding;
- }
- rune.value = code;
- index += 4;
- return ConversionError.None;
- }
-
- index += 1;
- return ConversionError.Encoding;
- }
-
- static int FindUtf8CharStartInReverse(byte* ptr, ref int index)
- {
- do
- {
- if (index <= 0)
- {
- return 0;
- }
-
- --index;
-
- } while ((ptr[index] & 0xC0) == 0x80);
-
- return index;
- }
-
- internal static ConversionError Utf8ToUcsReverse(out Rune rune, byte* buffer, ref int index, int capacity)
- {
- var prev = index;
- --index;
-
- index = FindUtf8CharStartInReverse(buffer, ref index);
-
- if (index == prev)
- {
- rune = ReplacementCharacter;
- return ConversionError.Overflow;
- }
-
- var ignore = index;
- return Utf8ToUcs(out rune, buffer, ref ignore, capacity);
- }
-
- /// <summary>
- /// Returns true if a char is a Unicode leading surrogate.
- /// </summary>
- /// <param name="c">The char.</param>
- /// <returns>True if the char is a Unicode leading surrogate.</returns>
- static bool IsLeadingSurrogate(char c)
- {
- return c >= 0xD800 && c <= 0xDBFF;
- }
-
- /// <summary>
- /// Returns true if a char is a Unicode trailing surrogate.
- /// </summary>
- /// <param name="c">The char.</param>
- /// <returns>True if the char is a Unicode trailing surrogate.</returns>
- static bool IsTrailingSurrogate(char c)
- {
- return c >= 0xDC00 && c <= 0xDFFF;
- }
-
- /// <summary>
- /// Reads a UTF-16 encoded character from a buffer.
- /// </summary>
- /// <param name="rune">Outputs the character read. If the read fails, rune is not set.</param>
- /// <param name="buffer">The buffer of chars to read.</param>
- /// <param name="index">Reference to a char index into the buffer. If the read succeeds, index is incremented by the
- /// size in chars of the character read. If the read fails, index is not incremented.</param>
- /// <param name="capacity">The size in chars of the buffer. Used to check that the read is in bounds.</param>
- /// <returns><see cref="ConversionError.None"/> if the read succeeds. Otherwise, returns <see cref="ConversionError.Overflow"/>.</returns>
- public static ConversionError Utf16ToUcs(out Rune rune, char* buffer, ref int index, int capacity)
- {
- int code = 0;
- rune = ReplacementCharacter;
- if (index + 1 > capacity)
- return ConversionError.Overflow;
- if (!IsLeadingSurrogate(buffer[index]) || (index + 2 > capacity))
- {
- rune.value = buffer[index];
- index += 1;
- return ConversionError.None;
- }
- code = (buffer[index + 0] & 0x03FF);
- char next = buffer[index + 1];
- if (!IsTrailingSurrogate(next))
- {
- rune.value = buffer[index];
- index += 1;
- return ConversionError.None;
- }
- code = (code << 10) | (buffer[index + 1] & 0x03FF);
- code += 0x10000;
- rune.value = code;
- index += 2;
- return ConversionError.None;
- }
-
- internal static ConversionError UcsToUcs(out Rune rune, Rune* buffer, ref int index, int capacity)
- {
- rune = ReplacementCharacter;
- if (index + 1 > capacity)
- return ConversionError.Overflow;
- rune = buffer[index];
- index += 1;
- return ConversionError.None;
- }
-
- /// <summary>
- /// Writes a rune to a buffer as a UTF-8 encoded character.
- /// </summary>
- /// <param name="rune">The rune to encode.</param>
- /// <param name="buffer">The buffer to write to.</param>
- /// <param name="index">Reference to a byte index into the buffer. If the write succeeds, index is incremented by the
- /// size in bytes of the character written. If the write fails, index is not incremented.</param>
- /// <param name="capacity">The size in bytes of the buffer. Used to check that the write is in bounds.</param>
- /// <returns><see cref="ConversionError.None"/> if the write succeeds. Otherwise, returns <see cref="ConversionError.CodePoint"/>, <see cref="ConversionError.Overflow"/>, or <see cref="ConversionError.Encoding"/>.</returns>
- public static ConversionError UcsToUtf8(byte* buffer, ref int index, int capacity, Rune rune)
- {
- if (!IsValidCodePoint(rune.value))
- {
- return ConversionError.CodePoint;
- }
-
- if (index + 1 > capacity)
- {
- return ConversionError.Overflow;
- }
-
- if (rune.value <= 0x7F)
- {
- buffer[index++] = (byte)rune.value;
- return ConversionError.None;
- }
-
- if (rune.value <= 0x7FF)
- {
- if (index + 2 > capacity)
- {
- return ConversionError.Overflow;
- }
-
- buffer[index++] = (byte)(0xC0 | (rune.value >> 6));
- buffer[index++] = (byte)(0x80 | ((rune.value >> 0) & 0x3F));
- return ConversionError.None;
- }
-
- if (rune.value <= 0xFFFF)
- {
- if (index + 3 > capacity)
- {
- return ConversionError.Overflow;
- }
-
- buffer[index++] = (byte)(0xE0 | (rune.value >> 12));
- buffer[index++] = (byte)(0x80 | ((rune.value >> 6) & 0x3F));
- buffer[index++] = (byte)(0x80 | ((rune.value >> 0) & 0x3F));
- return ConversionError.None;
- }
-
- if (rune.value <= 0x1FFFFF)
- {
- if (index + 4 > capacity)
- {
- return ConversionError.Overflow;
- }
-
- buffer[index++] = (byte)(0xF0 | (rune.value >> 18));
- buffer[index++] = (byte)(0x80 | ((rune.value >> 12) & 0x3F));
- buffer[index++] = (byte)(0x80 | ((rune.value >> 6) & 0x3F));
- buffer[index++] = (byte)(0x80 | ((rune.value >> 0) & 0x3F));
- return ConversionError.None;
- }
-
- return ConversionError.Encoding;
- }
-
- /// <summary>
- /// Writes a rune to a buffer as a UTF-16 encoded character.
- /// </summary>
- /// <param name="rune">The rune to encode.</param>
- /// <param name="buffer">The buffer of chars to write to.</param>
- /// <param name="index">Reference to a char index into the buffer. If the write succeeds, index is incremented by the
- /// size in chars of the character written. If the write fails, index is not incremented.</param>
- /// <param name="capacity">The size in chars of the buffer. Used to check that the write is in bounds.</param>
- /// <returns><see cref="ConversionError.None"/> if the write succeeds. Otherwise, returns <see cref="ConversionError.CodePoint"/>, <see cref="ConversionError.Overflow"/>, or <see cref="ConversionError.Encoding"/>.</returns>
- public static ConversionError UcsToUtf16(char* buffer, ref int index, int capacity, Rune rune)
- {
- if (!IsValidCodePoint(rune.value))
- {
- return ConversionError.CodePoint;
- }
-
- if (index + 1 > capacity)
- {
- return ConversionError.Overflow;
- }
-
- if (rune.value >= 0x10000)
- {
- if (index + 2 > capacity)
- {
- return ConversionError.Overflow;
- }
-
- int code = rune.value - 0x10000;
- if (code >= (1 << 20))
- {
- return ConversionError.Encoding;
- }
-
- buffer[index++] = (char)(0xD800 | (code >> 10));
- buffer[index++] = (char)(0xDC00 | (code & 0x3FF));
- return ConversionError.None;
- }
-
- buffer[index++] = (char)rune.value;
- return ConversionError.None;
- }
-
- /// <summary>
- /// Copies UTF-16 characters from one buffer to another buffer as UTF-8.
- /// </summary>
- /// <remarks>Assumes the source data is valid UTF-16.</remarks>
- /// <param name="utf16Buffer">The source buffer.</param>
- /// <param name="utf16Length">The number of chars to read from the source.</param>
- /// <param name="utf8Buffer">The destination buffer.</param>
- /// <param name="utf8Length">Outputs the number of bytes written to the destination.</param>
- /// <param name="utf8Capacity">The size in bytes of the destination buffer.</param>
- /// <returns><see cref="ConversionError.None"/> if the copy fully completes. Otherwise, returns <see cref="ConversionError.Overflow"/>.</returns>
- public static ConversionError Utf16ToUtf8(char* utf16Buffer, int utf16Length, byte* utf8Buffer, out int utf8Length, int utf8Capacity)
- {
- utf8Length = 0;
- for (var utf16Offset = 0; utf16Offset < utf16Length;)
- {
- Utf16ToUcs(out var ucs, utf16Buffer, ref utf16Offset, utf16Length);
- if (UcsToUtf8(utf8Buffer, ref utf8Length, utf8Capacity, ucs) == ConversionError.Overflow)
- return ConversionError.Overflow;
- }
- return ConversionError.None;
- }
-
- /// <summary>
- /// Copies UTF-8 characters from one buffer to another.
- /// </summary>
- /// <remarks>Assumes the source data is valid UTF-8.</remarks>
- /// <param name="srcBuffer">The source buffer.</param>
- /// <param name="srcLength">The number of bytes to read from the source.</param>
- /// <param name="destBuffer">The destination buffer.</param>
- /// <param name="destLength">Outputs the number of bytes written to the destination.</param>
- /// <param name="destCapacity">The size in bytes of the destination buffer.</param>
- /// <returns><see cref="ConversionError.None"/> if the copy fully completes. Otherwise, returns <see cref="ConversionError.Overflow"/>.</returns>
- public static ConversionError Utf8ToUtf8(byte* srcBuffer, int srcLength, byte* destBuffer, out int destLength, int destCapacity)
- {
- if (destCapacity >= srcLength)
- {
- UnsafeUtility.MemCpy(destBuffer, srcBuffer, srcLength);
- destLength = srcLength;
- return ConversionError.None;
- }
- // TODO even in this case, it's possible to MemCpy all but the last 3 bytes that fit, and then by looking at only
- // TODO the high bits of the last 3 bytes that fit, decide how many of the 3 to append. but that requires a
- // TODO little UNICODE presence of mind that nobody has today.
- destLength = 0;
- for (var srcOffset = 0; srcOffset < srcLength;)
- {
- Utf8ToUcs(out var ucs, srcBuffer, ref srcOffset, srcLength);
- if (UcsToUtf8(destBuffer, ref destLength, destCapacity, ucs) == ConversionError.Overflow)
- return ConversionError.Overflow;
- }
- return ConversionError.None;
- }
-
- /// <summary>
- /// Copies UTF-8 characters from one buffer to another as UTF-16.
- /// </summary>
- /// <remarks>Assumes the source data is valid UTF-8.</remarks>
- /// <param name="utf8Buffer">The source buffer.</param>
- /// <param name="utf8Length">The number of bytes to read from the source.</param>
- /// <param name="utf16Buffer">The destination buffer.</param>
- /// <param name="utf16Length">Outputs the number of chars written to the destination.</param>
- /// <param name="utf16Capacity">The size in chars of the destination buffer.</param>
- /// <returns><see cref="ConversionError.None"/> if the copy fully completes. Otherwise, <see cref="ConversionError.Overflow"/>.</returns>
- public static ConversionError Utf8ToUtf16(byte* utf8Buffer, int utf8Length, char* utf16Buffer, out int utf16Length, int utf16Capacity)
- {
- utf16Length = 0;
- for (var utf8Offset
- = 0; utf8Offset < utf8Length;)
- {
- Utf8ToUcs(out var ucs, utf8Buffer, ref utf8Offset, utf8Length);
- if (UcsToUtf16(utf16Buffer, ref utf16Length, utf16Capacity, ucs) == ConversionError.Overflow)
- return ConversionError.Overflow;
- }
- return ConversionError.None;
- }
-
- static int CountRunes(byte* utf8Buffer, int utf8Length, int maxRunes = int.MaxValue)
- {
- var numRunes = 0;
-
- for (var i = 0; numRunes < maxRunes && i < utf8Length; ++i)
- {
- if ((utf8Buffer[i] & 0xC0) != 0x80)
- numRunes++;
- }
-
- return numRunes;
- }
- }
- }
|