No Description
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

BurstDisassembler.Core.cs 21KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583
  1. #if UNITY_EDITOR || BURST_INTERNAL
  2. using System;
  3. using System.Collections.Generic;
  4. using System.Diagnostics;
  5. using System.Runtime.CompilerServices;
  6. namespace Unity.Burst.Editor
  7. {
  8. internal partial class BurstDisassembler
  9. {
  10. /// <summary>
  11. /// Base class for providing extended information of an identifier
  12. /// </summary>
  13. internal abstract class AsmTokenKindProvider
  14. {
  15. // Internally using string slice instead of string
  16. // to support faster lookup from AsmToken
  17. private readonly Dictionary<StringSlice, AsmTokenKind> _tokenKinds;
  18. private int _maximumLength;
  19. protected AsmTokenKindProvider(int capacity)
  20. {
  21. _tokenKinds = new Dictionary<StringSlice, AsmTokenKind>(capacity);
  22. }
  23. protected void AddTokenKind(string text, AsmTokenKind kind)
  24. {
  25. _tokenKinds.Add(new StringSlice(text), kind);
  26. if (text.Length > _maximumLength) _maximumLength = text.Length;
  27. }
  28. public virtual AsmTokenKind FindTokenKind(StringSlice slice)
  29. {
  30. return slice.Length <= _maximumLength && _tokenKinds.TryGetValue(slice, out var tokenKind)
  31. ? tokenKind
  32. : AsmTokenKind.Identifier;
  33. }
  34. public virtual bool AcceptsCharAsIdentifierOrRegisterEnd(char c)
  35. {
  36. return false;
  37. }
  38. public virtual bool IsInstructionOrRegisterOrIdentifier(char c)
  39. {
  40. // we include . because we have instructions like `b.le` or `f32.const`
  41. return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9' || c == '_' ||
  42. c == '@' || c == '.';
  43. }
  44. /// <summary>
  45. /// Checks whether regA == regB. This function assumes the given strings are proper registers.
  46. /// </summary>
  47. public virtual bool RegisterEqual(string regA, string regB) => regA == regB;
  48. public abstract SIMDkind SimdKind(StringSlice instruction);
  49. }
  50. /// <summary>
  51. /// The ASM tokenizer
  52. /// </summary>
  53. private struct AsmTokenizer
  54. {
  55. private readonly string _text;
  56. private readonly AsmKind _asmKind;
  57. private readonly AsmTokenKindProvider _tokenKindProvider;
  58. private int _position;
  59. private int _nextPosition;
  60. private int _alignedPosition;
  61. private int _nextAlignedPosition;
  62. private char _c;
  63. private readonly char _commentStartChar;
  64. private bool _doPad;
  65. private int _padding;
  66. public AsmTokenizer(string text, AsmKind asmKind, AsmTokenKindProvider tokenKindProvider, char commentStart)
  67. {
  68. _text = text;
  69. _asmKind = asmKind;
  70. _tokenKindProvider = tokenKindProvider;
  71. _position = 0;
  72. _nextPosition = 0;
  73. _alignedPosition = 0;
  74. _nextAlignedPosition = 0;
  75. _commentStartChar = commentStart;
  76. _doPad = false;
  77. _padding = 0;
  78. _c = (char)0;
  79. NextChar();
  80. }
  81. public bool TryGetNextToken(out AsmToken token)
  82. {
  83. token = new AsmToken();
  84. while (true)
  85. {
  86. var startPosition = _position;
  87. var startAlignedPosition = _alignedPosition;
  88. if (_c == 0)
  89. {
  90. return false;
  91. }
  92. if (_c == '.')
  93. {
  94. token = ParseDirective(startPosition, startAlignedPosition);
  95. return true;
  96. }
  97. // Like everywhere else in this file, we are inlining the matching characters instead
  98. // of using helper functions, as Mono might not be enough good at inlining by itself
  99. if (_c >= 'a' && _c <= 'z' || _c >= 'A' && _c <= 'Z' || _c == '_' || _c == '@')
  100. {
  101. token = ParseInstructionOrIdentifierOrRegister(startPosition, startAlignedPosition);
  102. PrepareAlignment(token);
  103. return true;
  104. }
  105. if (_c >= '0' && _c <= '9' || _c == '-')
  106. {
  107. token = ParseNumber(startPosition, startAlignedPosition);
  108. return true;
  109. }
  110. if (_c == '"')
  111. {
  112. token = ParseString(startPosition, startAlignedPosition);
  113. return true;
  114. }
  115. if (_c == _commentStartChar)
  116. {
  117. token = ParseComment(startPosition, startAlignedPosition);
  118. return true;
  119. }
  120. if (_c == '\r')
  121. {
  122. if (PreviewChar() == '\n')
  123. {
  124. NextChar(); // skip \r
  125. }
  126. token = ParseNewLine(startPosition, startAlignedPosition);
  127. return true;
  128. }
  129. if (_c == '\n')
  130. {
  131. token = ParseNewLine(startPosition, startAlignedPosition);
  132. return true;
  133. }
  134. if (_doPad)
  135. {
  136. _nextAlignedPosition += _padding;
  137. _doPad = false;
  138. }
  139. token = ParseMisc(startPosition, startAlignedPosition);
  140. return true;
  141. }
  142. }
  143. private void PrepareAlignment(AsmToken token)
  144. {
  145. var kind = token.Kind;
  146. _padding = InstructionAlignment - token.Length;
  147. _doPad = _asmKind == AsmKind.Intel
  148. && (kind == AsmTokenKind.Instruction
  149. || kind == AsmTokenKind.BranchInstruction
  150. || kind == AsmTokenKind.CallInstruction
  151. || kind == AsmTokenKind.JumpInstruction
  152. || kind == AsmTokenKind.ReturnInstruction
  153. || kind == AsmTokenKind.InstructionSIMD)
  154. && _c != '\n' && _c != '\r' // If there is no registers behind instruction don't align.
  155. && _padding > 0;
  156. }
  157. private AsmToken ParseNewLine(int startPosition, int startAlignedPosition)
  158. {
  159. var endPosition = _position;
  160. NextChar(); // Skip newline
  161. return new AsmToken(AsmTokenKind.NewLine, startPosition, startAlignedPosition, endPosition - startPosition + 1);
  162. }
  163. private AsmToken ParseMisc(int startPosition, int startAlignedPosition)
  164. {
  165. var endPosition = _position;
  166. // Parse anything that is not a directive, instruction, number, string or comment
  167. while (!((_c == (char)0) || (_c == '\r') || (_c == '\n') || (_c == '.') || (_c >= 'a' && _c <= 'z' || _c >= 'A' && _c <= 'Z' || _c == '_' || _c == '@') || (_c >= '0' && _c <= '9' || _c == '-') || (_c == '"') || (_c == _commentStartChar)))
  168. {
  169. endPosition = _position;
  170. NextChar();
  171. }
  172. return new AsmToken(AsmTokenKind.Misc, startPosition, startAlignedPosition, endPosition - startPosition + 1);
  173. }
  174. private static readonly string[] DataDirectiveStrings = new[]
  175. {
  176. AssertDataDirectiveLength(".long"),
  177. AssertDataDirectiveLength(".byte"),
  178. AssertDataDirectiveLength(".short"),
  179. AssertDataDirectiveLength(".ascii"),
  180. AssertDataDirectiveLength(".asciz"),
  181. };
  182. private static string AssertDataDirectiveLength(string text)
  183. {
  184. var length = text.Length;
  185. Debug.Assert(length == 5 || length == 6, $"Invalid length {length} for string {text}. Expecting 5 or 6");
  186. return text;
  187. }
  188. private AsmToken ParseDirective(int startPosition, int startAlignedPosition)
  189. {
  190. var endPosition = _position;
  191. NextChar(); // skip .
  192. bool isLabel = _c == 'L'; // A label starts with a capital `L` like .Lthis_is_a_jump_label
  193. while (_c >= 'a' && _c <= 'z' || _c >= 'A' && _c <= 'Z' || _c >= '0' && _c <= '9' || _c == '.' || _c == '_' || _c == '@')
  194. {
  195. endPosition = _position;
  196. NextChar();
  197. }
  198. // Refine the kind of directive:
  199. //
  200. // .Lfunc_begin => FunctionBegin
  201. // .Lfunc_end => FunctionEnd
  202. // .L????????? => Label
  203. // data directive (.byte, .long, .short...) => DataDirective
  204. // anything else => Directive
  205. const string MatchFunc = ".Lfunc_";
  206. const int MatchFuncLength = 7;
  207. Debug.Assert(MatchFunc.Length == MatchFuncLength);
  208. var kind = isLabel ? AsmTokenKind.Label : AsmTokenKind.Directive;
  209. // Fast early check
  210. if (isLabel && string.CompareOrdinal(_text, startPosition, MatchFunc, 0, MatchFuncLength) == 0)
  211. {
  212. if (string.CompareOrdinal(_text, startPosition, ".Lfunc_begin", 0, ".Lfunc_begin".Length) == 0)
  213. {
  214. kind = AsmTokenKind.FunctionBegin;
  215. }
  216. else if (string.CompareOrdinal(_text, startPosition, ".Lfunc_end", 0, ".Lfunc_end".Length) == 0)
  217. {
  218. kind = AsmTokenKind.FunctionEnd;
  219. }
  220. }
  221. // Adjust directive to mark data directives, source location directives...etc.
  222. int length = endPosition - startPosition + 1;
  223. // Use length to early exit
  224. if (!isLabel && length >= 4 && length <= 8)
  225. {
  226. if ((length == 5 || length == 6))
  227. {
  228. foreach (var dataDirectiveStr in DataDirectiveStrings)
  229. {
  230. if (string.CompareOrdinal(_text, startPosition, dataDirectiveStr, 0, dataDirectiveStr.Length) == 0)
  231. {
  232. kind = AsmTokenKind.DataDirective;
  233. break;
  234. }
  235. }
  236. // .file => SourceFile
  237. if (kind == AsmTokenKind.Directive && string.CompareOrdinal(_text, startPosition, ".file", 0, 5) == 0)
  238. {
  239. kind = AsmTokenKind.SourceFile;
  240. }
  241. }
  242. // .loc => SourceLocation
  243. // .cv_loc => SourceLocation
  244. else if ((length == 4 && string.CompareOrdinal(_text, startPosition, ".loc", 0, 4) == 0) ||
  245. (length == 7 && string.CompareOrdinal(_text, startPosition, ".cv_loc", 0, 7) == 0))
  246. {
  247. kind = AsmTokenKind.SourceLocation;
  248. }
  249. // .file .cv_file => SourceFile
  250. else if (length == 8 && string.CompareOrdinal(_text, startPosition, ".cv_file", 0, 8) == 0)
  251. {
  252. kind = AsmTokenKind.SourceFile;
  253. }
  254. }
  255. return new AsmToken(kind, startPosition, startAlignedPosition, length);
  256. }
  257. private AsmToken ParseInstructionOrIdentifierOrRegister(int startPosition, int startAlignedPosition)
  258. {
  259. var endPosition = _position;
  260. while (_tokenKindProvider.IsInstructionOrRegisterOrIdentifier(_c))
  261. {
  262. endPosition = _position;
  263. NextChar();
  264. }
  265. if (_tokenKindProvider.AcceptsCharAsIdentifierOrRegisterEnd(_c))
  266. {
  267. endPosition = _position;
  268. NextChar();
  269. }
  270. // Resolve token kind for identifier
  271. int length = endPosition - startPosition + 1;
  272. var tokenKind = _tokenKindProvider.FindTokenKind(new StringSlice(_text, startPosition, length));
  273. if (tokenKind == AsmTokenKind.Identifier)
  274. {
  275. // If we have `:` right after an identifier, change from identifier to label declaration to help the semantic pass later
  276. if (_c == ':')
  277. {
  278. tokenKind = AsmTokenKind.Label;
  279. }
  280. }
  281. return new AsmToken(tokenKind, startPosition, startAlignedPosition, endPosition - startPosition + 1);
  282. }
  283. private AsmToken ParseNumber(int startPosition, int startAlignedPostion)
  284. {
  285. var endPosition = _position;
  286. if (_c == '-')
  287. {
  288. NextChar();
  289. }
  290. while (_c >= '0' && _c <= '9' || _c >= 'a' && _c <= 'f' || _c >= 'A' && _c <= 'F' || _c == 'x' || _c == '.')
  291. {
  292. endPosition = _position;
  293. NextChar();
  294. }
  295. // If we have `:` right after a number, change from number to label declaration to help the semantic pass later
  296. var numberKind = _c == ':' ? AsmTokenKind.Label : AsmTokenKind.Number;
  297. return new AsmToken(numberKind, startPosition, startAlignedPostion, endPosition - startPosition + 1);
  298. }
  299. private AsmToken ParseString(int startPosition, int startAlignedPostion)
  300. {
  301. var endPosition = _position;
  302. // Skip first "
  303. NextChar();
  304. while (_c != (char)0 && _c != '"')
  305. {
  306. // Skip escape \"
  307. if (_c == '\\' && PreviewChar() == '"')
  308. {
  309. NextChar();
  310. }
  311. endPosition = _position;
  312. NextChar();
  313. }
  314. endPosition = _position;
  315. NextChar(); // Skip trailing 0
  316. // If we have `:` right after a string, change from string to label declaration to help the semantic pass later
  317. var stringKind = _c == ':' ? AsmTokenKind.Label : AsmTokenKind.String;
  318. return new AsmToken(stringKind, startPosition, startAlignedPostion, endPosition - startPosition + 1);
  319. }
  320. private AsmToken ParseComment(int startPosition, int startAlignedPosition)
  321. {
  322. var endPosition = _position;
  323. while (_c != (char)0 && (_c != '\n' && _c != '\r'))
  324. {
  325. endPosition = _position;
  326. NextChar();
  327. }
  328. return new AsmToken(AsmTokenKind.Comment, startPosition, startAlignedPosition, endPosition - startPosition + 1);
  329. }
  330. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  331. private void NextChar()
  332. {
  333. if (_nextPosition < _text.Length)
  334. {
  335. _position = _nextPosition;
  336. _c = _text[_position];
  337. _nextPosition = _position + 1;
  338. _alignedPosition = _nextAlignedPosition;
  339. _nextAlignedPosition = _alignedPosition + 1;
  340. }
  341. else
  342. {
  343. _c = (char)0;
  344. }
  345. }
  346. private char PreviewChar()
  347. {
  348. return _nextPosition < _text.Length ? _text[_nextPosition] : (char)0;
  349. }
  350. }
  351. public enum SIMDkind
  352. {
  353. Packed,
  354. Scalar,
  355. Infrastructure,
  356. }
  357. /// <summary>
  358. /// An ASM token. The token doesn't contain the string of the token, but provides method <see cref="Slice"/> and <see cref="ToString"/> to extract it.
  359. /// </summary>
  360. internal readonly struct AsmToken
  361. {
  362. // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  363. // CAUTION: It is important to not put *any managed objects*
  364. // into this struct for GC efficiency
  365. // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  366. public AsmToken(AsmTokenKind kind, int position, int alignedPosition, int length)
  367. {
  368. Kind = kind;
  369. Position = position;
  370. AlignedPosition = alignedPosition;
  371. Length = length;
  372. }
  373. public readonly AsmTokenKind Kind;
  374. public readonly int Position;
  375. public readonly int AlignedPosition;
  376. public readonly int Length;
  377. public StringSlice Slice(string text) => new StringSlice(text, Position, Length);
  378. public string ToString(string text) => text.Substring(Position, Length);
  379. public string ToFriendlyText(string text)
  380. {
  381. return $"{text.Substring(Position, Length)} : {Kind}";
  382. }
  383. }
  384. /// <summary>
  385. /// Kind of an ASM token.
  386. /// </summary>
  387. internal enum AsmTokenKind
  388. {
  389. Eof,
  390. Directive,
  391. DataDirective,
  392. SourceFile,
  393. SourceLocation,
  394. Label,
  395. FunctionBegin,
  396. FunctionEnd,
  397. Identifier,
  398. Qualifier,
  399. Instruction,
  400. CallInstruction,
  401. BranchInstruction,
  402. JumpInstruction,
  403. ReturnInstruction,
  404. InstructionSIMD,
  405. Register,
  406. Number,
  407. String,
  408. Comment,
  409. NewLine,
  410. Misc
  411. }
  412. }
  413. /// <summary>
  414. /// A slice of a string from an original string.
  415. /// </summary>
  416. internal readonly struct StringSlice : IEquatable<StringSlice>
  417. {
  418. private readonly string _text;
  419. public readonly int Position;
  420. public readonly int Length;
  421. public StringSlice(string text)
  422. {
  423. _text = text ?? throw new ArgumentNullException(nameof(text));
  424. Position = 0;
  425. Length = text.Length;
  426. }
  427. public StringSlice(string text, int position, int length)
  428. {
  429. _text = text ?? throw new ArgumentNullException(nameof(text));
  430. Position = position;
  431. Length = length;
  432. }
  433. public char this[int index] => _text[Position + index];
  434. public bool Equals(StringSlice other)
  435. {
  436. if (Length != other.Length) return false;
  437. for (int i = 0; i < Length; i++)
  438. {
  439. if (this[i] != other[i])
  440. {
  441. return false;
  442. }
  443. }
  444. return true;
  445. }
  446. public override bool Equals(object obj)
  447. {
  448. return obj is StringSlice other && Equals(other);
  449. }
  450. public override int GetHashCode()
  451. {
  452. unchecked
  453. {
  454. var hashCode = Length;
  455. for (int i = 0; i < Length; i++)
  456. {
  457. hashCode = (hashCode * 397) ^ this[i];
  458. }
  459. return hashCode;
  460. }
  461. }
  462. public static bool operator ==(StringSlice left, StringSlice right)
  463. {
  464. return left.Equals(right);
  465. }
  466. public static bool operator !=(StringSlice left, StringSlice right)
  467. {
  468. return !left.Equals(right);
  469. }
  470. public override string ToString()
  471. {
  472. return _text.Substring(Position, Length);
  473. }
  474. public bool StartsWith(string text)
  475. {
  476. if (text == null) throw new ArgumentNullException(nameof(text));
  477. if (Length < text.Length) return false;
  478. for (var i = 0; i < text.Length; i++)
  479. {
  480. var c = text[i];
  481. if (_text[Position + i] != c) return false;
  482. }
  483. return true;
  484. }
  485. public bool Contains(char c)
  486. {
  487. int start = Position;
  488. int end = Math.Min(Position + Length, _text.Length);
  489. for (int i = start; i < end; i++)
  490. {
  491. if (_text[i] == c) { return true; }
  492. }
  493. return false;
  494. }
  495. public int IndexOf(char c)
  496. {
  497. for (var i = 0; i < Length; i++)
  498. {
  499. if (_text[Position + i] == c)
  500. {
  501. return i;
  502. }
  503. }
  504. return -1;
  505. }
  506. }
  507. }
  508. #endif