123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583 |
- #if UNITY_EDITOR || BURST_INTERNAL
- using System;
- using System.Collections.Generic;
- using System.Diagnostics;
- using System.Runtime.CompilerServices;
-
- namespace Unity.Burst.Editor
- {
- internal partial class BurstDisassembler
- {
- /// <summary>
- /// Base class for providing extended information of an identifier
- /// </summary>
- internal abstract class AsmTokenKindProvider
- {
- // Internally using string slice instead of string
- // to support faster lookup from AsmToken
- private readonly Dictionary<StringSlice, AsmTokenKind> _tokenKinds;
- private int _maximumLength;
-
- protected AsmTokenKindProvider(int capacity)
- {
- _tokenKinds = new Dictionary<StringSlice, AsmTokenKind>(capacity);
- }
-
- protected void AddTokenKind(string text, AsmTokenKind kind)
- {
- _tokenKinds.Add(new StringSlice(text), kind);
- if (text.Length > _maximumLength) _maximumLength = text.Length;
- }
-
- public virtual AsmTokenKind FindTokenKind(StringSlice slice)
- {
- return slice.Length <= _maximumLength && _tokenKinds.TryGetValue(slice, out var tokenKind)
- ? tokenKind
- : AsmTokenKind.Identifier;
- }
-
- public virtual bool AcceptsCharAsIdentifierOrRegisterEnd(char c)
- {
- return false;
- }
-
- public virtual bool IsInstructionOrRegisterOrIdentifier(char c)
- {
- // we include . because we have instructions like `b.le` or `f32.const`
- return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9' || c == '_' ||
- c == '@' || c == '.';
- }
-
- /// <summary>
- /// Checks whether regA == regB. This function assumes the given strings are proper registers.
- /// </summary>
- public virtual bool RegisterEqual(string regA, string regB) => regA == regB;
-
- public abstract SIMDkind SimdKind(StringSlice instruction);
- }
-
- /// <summary>
- /// The ASM tokenizer
- /// </summary>
- private struct AsmTokenizer
- {
- private readonly string _text;
- private readonly AsmKind _asmKind;
- private readonly AsmTokenKindProvider _tokenKindProvider;
- private int _position;
- private int _nextPosition;
- private int _alignedPosition;
- private int _nextAlignedPosition;
- private char _c;
- private readonly char _commentStartChar;
- private bool _doPad;
- private int _padding;
-
- public AsmTokenizer(string text, AsmKind asmKind, AsmTokenKindProvider tokenKindProvider, char commentStart)
- {
- _text = text;
- _asmKind = asmKind;
- _tokenKindProvider = tokenKindProvider;
- _position = 0;
- _nextPosition = 0;
- _alignedPosition = 0;
- _nextAlignedPosition = 0;
- _commentStartChar = commentStart;
- _doPad = false;
- _padding = 0;
- _c = (char)0;
- NextChar();
- }
-
- public bool TryGetNextToken(out AsmToken token)
- {
- token = new AsmToken();
- while (true)
- {
- var startPosition = _position;
- var startAlignedPosition = _alignedPosition;
-
- if (_c == 0)
- {
- return false;
- }
-
- if (_c == '.')
- {
- token = ParseDirective(startPosition, startAlignedPosition);
- return true;
- }
-
- // Like everywhere else in this file, we are inlining the matching characters instead
- // of using helper functions, as Mono might not be enough good at inlining by itself
- if (_c >= 'a' && _c <= 'z' || _c >= 'A' && _c <= 'Z' || _c == '_' || _c == '@')
- {
- token = ParseInstructionOrIdentifierOrRegister(startPosition, startAlignedPosition);
- PrepareAlignment(token);
- return true;
- }
-
- if (_c >= '0' && _c <= '9' || _c == '-')
- {
- token = ParseNumber(startPosition, startAlignedPosition);
- return true;
- }
-
- if (_c == '"')
- {
- token = ParseString(startPosition, startAlignedPosition);
- return true;
- }
-
- if (_c == _commentStartChar)
- {
- token = ParseComment(startPosition, startAlignedPosition);
- return true;
- }
-
- if (_c == '\r')
- {
- if (PreviewChar() == '\n')
- {
- NextChar(); // skip \r
- }
- token = ParseNewLine(startPosition, startAlignedPosition);
- return true;
- }
-
- if (_c == '\n')
- {
- token = ParseNewLine(startPosition, startAlignedPosition);
- return true;
- }
-
- if (_doPad)
- {
- _nextAlignedPosition += _padding;
- _doPad = false;
- }
- token = ParseMisc(startPosition, startAlignedPosition);
- return true;
- }
- }
-
- private void PrepareAlignment(AsmToken token)
- {
- var kind = token.Kind;
- _padding = InstructionAlignment - token.Length;
- _doPad = _asmKind == AsmKind.Intel
- && (kind == AsmTokenKind.Instruction
- || kind == AsmTokenKind.BranchInstruction
- || kind == AsmTokenKind.CallInstruction
- || kind == AsmTokenKind.JumpInstruction
- || kind == AsmTokenKind.ReturnInstruction
- || kind == AsmTokenKind.InstructionSIMD)
- && _c != '\n' && _c != '\r' // If there is no registers behind instruction don't align.
- && _padding > 0;
- }
-
- private AsmToken ParseNewLine(int startPosition, int startAlignedPosition)
- {
- var endPosition = _position;
- NextChar(); // Skip newline
- return new AsmToken(AsmTokenKind.NewLine, startPosition, startAlignedPosition, endPosition - startPosition + 1);
- }
-
- private AsmToken ParseMisc(int startPosition, int startAlignedPosition)
- {
- var endPosition = _position;
- // Parse anything that is not a directive, instruction, number, string or comment
- while (!((_c == (char)0) || (_c == '\r') || (_c == '\n') || (_c == '.') || (_c >= 'a' && _c <= 'z' || _c >= 'A' && _c <= 'Z' || _c == '_' || _c == '@') || (_c >= '0' && _c <= '9' || _c == '-') || (_c == '"') || (_c == _commentStartChar)))
- {
- endPosition = _position;
- NextChar();
- }
- return new AsmToken(AsmTokenKind.Misc, startPosition, startAlignedPosition, endPosition - startPosition + 1);
- }
-
- private static readonly string[] DataDirectiveStrings = new[]
- {
- AssertDataDirectiveLength(".long"),
- AssertDataDirectiveLength(".byte"),
- AssertDataDirectiveLength(".short"),
- AssertDataDirectiveLength(".ascii"),
- AssertDataDirectiveLength(".asciz"),
- };
-
- private static string AssertDataDirectiveLength(string text)
- {
- var length = text.Length;
- Debug.Assert(length == 5 || length == 6, $"Invalid length {length} for string {text}. Expecting 5 or 6");
- return text;
- }
-
- private AsmToken ParseDirective(int startPosition, int startAlignedPosition)
- {
- var endPosition = _position;
- NextChar(); // skip .
- bool isLabel = _c == 'L'; // A label starts with a capital `L` like .Lthis_is_a_jump_label
- while (_c >= 'a' && _c <= 'z' || _c >= 'A' && _c <= 'Z' || _c >= '0' && _c <= '9' || _c == '.' || _c == '_' || _c == '@')
- {
- endPosition = _position;
- NextChar();
- }
-
- // Refine the kind of directive:
- //
- // .Lfunc_begin => FunctionBegin
- // .Lfunc_end => FunctionEnd
- // .L????????? => Label
- // data directive (.byte, .long, .short...) => DataDirective
- // anything else => Directive
- const string MatchFunc = ".Lfunc_";
- const int MatchFuncLength = 7;
- Debug.Assert(MatchFunc.Length == MatchFuncLength);
- var kind = isLabel ? AsmTokenKind.Label : AsmTokenKind.Directive;
- // Fast early check
- if (isLabel && string.CompareOrdinal(_text, startPosition, MatchFunc, 0, MatchFuncLength) == 0)
- {
- if (string.CompareOrdinal(_text, startPosition, ".Lfunc_begin", 0, ".Lfunc_begin".Length) == 0)
- {
- kind = AsmTokenKind.FunctionBegin;
- }
- else if (string.CompareOrdinal(_text, startPosition, ".Lfunc_end", 0, ".Lfunc_end".Length) == 0)
- {
- kind = AsmTokenKind.FunctionEnd;
- }
- }
-
- // Adjust directive to mark data directives, source location directives...etc.
- int length = endPosition - startPosition + 1;
-
- // Use length to early exit
- if (!isLabel && length >= 4 && length <= 8)
- {
- if ((length == 5 || length == 6))
- {
- foreach (var dataDirectiveStr in DataDirectiveStrings)
- {
- if (string.CompareOrdinal(_text, startPosition, dataDirectiveStr, 0, dataDirectiveStr.Length) == 0)
- {
- kind = AsmTokenKind.DataDirective;
- break;
- }
- }
-
- // .file => SourceFile
- if (kind == AsmTokenKind.Directive && string.CompareOrdinal(_text, startPosition, ".file", 0, 5) == 0)
- {
- kind = AsmTokenKind.SourceFile;
- }
- }
- // .loc => SourceLocation
- // .cv_loc => SourceLocation
- else if ((length == 4 && string.CompareOrdinal(_text, startPosition, ".loc", 0, 4) == 0) ||
- (length == 7 && string.CompareOrdinal(_text, startPosition, ".cv_loc", 0, 7) == 0))
- {
- kind = AsmTokenKind.SourceLocation;
- }
- // .file .cv_file => SourceFile
- else if (length == 8 && string.CompareOrdinal(_text, startPosition, ".cv_file", 0, 8) == 0)
- {
- kind = AsmTokenKind.SourceFile;
- }
- }
-
- return new AsmToken(kind, startPosition, startAlignedPosition, length);
- }
-
- private AsmToken ParseInstructionOrIdentifierOrRegister(int startPosition, int startAlignedPosition)
- {
- var endPosition = _position;
- while (_tokenKindProvider.IsInstructionOrRegisterOrIdentifier(_c))
- {
- endPosition = _position;
- NextChar();
- }
-
- if (_tokenKindProvider.AcceptsCharAsIdentifierOrRegisterEnd(_c))
- {
- endPosition = _position;
- NextChar();
- }
-
- // Resolve token kind for identifier
- int length = endPosition - startPosition + 1;
- var tokenKind = _tokenKindProvider.FindTokenKind(new StringSlice(_text, startPosition, length));
-
- if (tokenKind == AsmTokenKind.Identifier)
- {
- // If we have `:` right after an identifier, change from identifier to label declaration to help the semantic pass later
- if (_c == ':')
- {
- tokenKind = AsmTokenKind.Label;
- }
- }
-
- return new AsmToken(tokenKind, startPosition, startAlignedPosition, endPosition - startPosition + 1);
- }
-
- private AsmToken ParseNumber(int startPosition, int startAlignedPostion)
- {
- var endPosition = _position;
- if (_c == '-')
- {
- NextChar();
- }
- while (_c >= '0' && _c <= '9' || _c >= 'a' && _c <= 'f' || _c >= 'A' && _c <= 'F' || _c == 'x' || _c == '.')
- {
- endPosition = _position;
- NextChar();
- }
-
- // If we have `:` right after a number, change from number to label declaration to help the semantic pass later
- var numberKind = _c == ':' ? AsmTokenKind.Label : AsmTokenKind.Number;
- return new AsmToken(numberKind, startPosition, startAlignedPostion, endPosition - startPosition + 1);
- }
- private AsmToken ParseString(int startPosition, int startAlignedPostion)
- {
- var endPosition = _position;
- // Skip first "
- NextChar();
- while (_c != (char)0 && _c != '"')
- {
- // Skip escape \"
- if (_c == '\\' && PreviewChar() == '"')
- {
- NextChar();
- }
- endPosition = _position;
- NextChar();
- }
-
- endPosition = _position;
- NextChar(); // Skip trailing 0
-
- // If we have `:` right after a string, change from string to label declaration to help the semantic pass later
- var stringKind = _c == ':' ? AsmTokenKind.Label : AsmTokenKind.String;
- return new AsmToken(stringKind, startPosition, startAlignedPostion, endPosition - startPosition + 1);
- }
-
- private AsmToken ParseComment(int startPosition, int startAlignedPosition)
- {
- var endPosition = _position;
- while (_c != (char)0 && (_c != '\n' && _c != '\r'))
- {
- endPosition = _position;
- NextChar();
- }
-
- return new AsmToken(AsmTokenKind.Comment, startPosition, startAlignedPosition, endPosition - startPosition + 1);
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- private void NextChar()
- {
- if (_nextPosition < _text.Length)
- {
- _position = _nextPosition;
- _c = _text[_position];
- _nextPosition = _position + 1;
-
- _alignedPosition = _nextAlignedPosition;
- _nextAlignedPosition = _alignedPosition + 1;
- }
- else
- {
- _c = (char)0;
- }
- }
-
- private char PreviewChar()
- {
- return _nextPosition < _text.Length ? _text[_nextPosition] : (char)0;
- }
-
- }
-
- public enum SIMDkind
- {
- Packed,
- Scalar,
- Infrastructure,
- }
-
-
- /// <summary>
- /// An ASM token. The token doesn't contain the string of the token, but provides method <see cref="Slice"/> and <see cref="ToString"/> to extract it.
- /// </summary>
- internal readonly struct AsmToken
- {
- // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- // CAUTION: It is important to not put *any managed objects*
- // into this struct for GC efficiency
- // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
- public AsmToken(AsmTokenKind kind, int position, int alignedPosition, int length)
- {
- Kind = kind;
- Position = position;
- AlignedPosition = alignedPosition;
- Length = length;
- }
-
- public readonly AsmTokenKind Kind;
-
- public readonly int Position;
-
- public readonly int AlignedPosition;
-
- public readonly int Length;
-
- public StringSlice Slice(string text) => new StringSlice(text, Position, Length);
-
- public string ToString(string text) => text.Substring(Position, Length);
-
- public string ToFriendlyText(string text)
- {
- return $"{text.Substring(Position, Length)} : {Kind}";
- }
- }
-
- /// <summary>
- /// Kind of an ASM token.
- /// </summary>
- internal enum AsmTokenKind
- {
- Eof,
- Directive,
- DataDirective,
- SourceFile,
- SourceLocation,
- Label,
- FunctionBegin,
- FunctionEnd,
- Identifier,
- Qualifier,
- Instruction,
- CallInstruction,
- BranchInstruction,
- JumpInstruction,
- ReturnInstruction,
- InstructionSIMD,
- Register,
- Number,
- String,
- Comment,
- NewLine,
- Misc
- }
- }
- /// <summary>
- /// A slice of a string from an original string.
- /// </summary>
- internal readonly struct StringSlice : IEquatable<StringSlice>
- {
- private readonly string _text;
-
- public readonly int Position;
-
- public readonly int Length;
-
- public StringSlice(string text)
- {
- _text = text ?? throw new ArgumentNullException(nameof(text));
- Position = 0;
- Length = text.Length;
- }
-
- public StringSlice(string text, int position, int length)
- {
- _text = text ?? throw new ArgumentNullException(nameof(text));
- Position = position;
- Length = length;
- }
-
- public char this[int index] => _text[Position + index];
-
- public bool Equals(StringSlice other)
- {
- if (Length != other.Length) return false;
-
- for (int i = 0; i < Length; i++)
- {
- if (this[i] != other[i])
- {
- return false;
- }
- }
- return true;
- }
-
- public override bool Equals(object obj)
- {
- return obj is StringSlice other && Equals(other);
- }
-
- public override int GetHashCode()
- {
- unchecked
- {
- var hashCode = Length;
- for (int i = 0; i < Length; i++)
- {
- hashCode = (hashCode * 397) ^ this[i];
- }
- return hashCode;
- }
- }
-
- public static bool operator ==(StringSlice left, StringSlice right)
- {
- return left.Equals(right);
- }
-
- public static bool operator !=(StringSlice left, StringSlice right)
- {
- return !left.Equals(right);
- }
-
- public override string ToString()
- {
- return _text.Substring(Position, Length);
- }
-
- public bool StartsWith(string text)
- {
- if (text == null) throw new ArgumentNullException(nameof(text));
- if (Length < text.Length) return false;
- for (var i = 0; i < text.Length; i++)
- {
- var c = text[i];
- if (_text[Position + i] != c) return false;
- }
- return true;
- }
-
- public bool Contains(char c)
- {
- int start = Position;
- int end = Math.Min(Position + Length, _text.Length);
- for (int i = start; i < end; i++)
- {
- if (_text[i] == c) { return true; }
- }
- return false;
- }
-
- public int IndexOf(char c)
- {
- for (var i = 0; i < Length; i++)
- {
- if (_text[Position + i] == c)
- {
- return i;
- }
- }
-
- return -1;
- }
- }
- }
-
- #endif
|