tablegen.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177
  1. """
  2. pygments.lexers.tablegen
  3. ~~~~~~~~~~~~~~~~~~~~~~~~
  4. Lexer for LLVM's TableGen DSL.
  5. :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. from pygments.lexer import RegexLexer, include, words, using
  9. from pygments.lexers.c_cpp import CppLexer
  10. from pygments.token import Comment, Keyword, Name, Number, Operator, \
  11. Punctuation, String, Text, Whitespace, Error
  12. __all__ = ['TableGenLexer']
  13. KEYWORDS = (
  14. 'assert',
  15. 'class',
  16. 'code',
  17. 'def',
  18. 'dump',
  19. 'else',
  20. 'foreach',
  21. 'defm',
  22. 'defset',
  23. 'defvar',
  24. 'field',
  25. 'if',
  26. 'in',
  27. 'include',
  28. 'let',
  29. 'multiclass',
  30. 'then',
  31. )
  32. KEYWORDS_CONST = (
  33. 'false',
  34. 'true',
  35. )
  36. KEYWORDS_TYPE = (
  37. 'bit',
  38. 'bits',
  39. 'dag',
  40. 'int',
  41. 'list',
  42. 'string',
  43. )
  44. BANG_OPERATORS = (
  45. 'add',
  46. 'and',
  47. 'cast',
  48. 'con',
  49. 'cond',
  50. 'dag',
  51. 'div',
  52. 'empty',
  53. 'eq',
  54. 'exists',
  55. 'filter',
  56. 'find',
  57. 'foldl',
  58. 'foreach',
  59. 'ge',
  60. 'getdagarg',
  61. 'getdagname',
  62. 'getdagop',
  63. 'gt',
  64. 'head',
  65. 'if',
  66. 'interleave',
  67. 'isa',
  68. 'le',
  69. 'listconcat',
  70. 'listremove',
  71. 'listsplat',
  72. 'logtwo',
  73. 'lt',
  74. 'mul',
  75. 'ne',
  76. 'not',
  77. 'or',
  78. 'range',
  79. 'repr',
  80. 'setdagarg',
  81. 'setdagname',
  82. 'setdagop',
  83. 'shl',
  84. 'size',
  85. 'sra',
  86. 'srl',
  87. 'strconcat',
  88. 'sub',
  89. 'subst',
  90. 'substr',
  91. 'tail',
  92. 'tolower',
  93. 'toupper',
  94. 'xor',
  95. )
  96. class TableGenLexer(RegexLexer):
  97. """
  98. Lexer for TableGen
  99. """
  100. name = 'TableGen'
  101. url = 'https://llvm.org/docs/TableGen/ProgRef.html'
  102. aliases = ['tablegen', 'td']
  103. filenames = ['*.td']
  104. version_added = '2.19'
  105. tokens = {
  106. 'root': [
  107. (r'\s+', Whitespace),
  108. (r'/\*', Comment.Multiline, 'comment'),
  109. (r'//.*?$', Comment.SingleLine),
  110. (r'#(define|ifdef|ifndef|else|endif)', Comment.Preproc),
  111. # Binary/hex numbers. Note that these take priority over names,
  112. # which may begin with numbers.
  113. (r'0b[10]+', Number.Bin),
  114. (r'0x[0-9a-fA-F]+', Number.Hex),
  115. # Keywords
  116. (words(KEYWORDS, suffix=r'\b'), Keyword),
  117. (words(KEYWORDS_CONST, suffix=r'\b'), Keyword.Constant),
  118. (words(KEYWORDS_TYPE, suffix=r'\b'), Keyword.Type),
  119. # Bang operators
  120. (words(BANG_OPERATORS, prefix=r'\!', suffix=r'\b'), Operator),
  121. # Unknown bang operators are an error
  122. (r'![a-zA-Z]+', Error),
  123. # Names and identifiers
  124. (r'[0-9]*[a-zA-Z_][a-zA-Z_0-9]*', Name),
  125. (r'\$[a-zA-Z_][a-zA-Z_0-9]*', Name.Variable),
  126. # Place numbers after keywords. Names/identifiers may begin with
  127. # numbers, and we want to parse 1X as one name token as opposed to
  128. # a number and a name.
  129. (r'[-\+]?[0-9]+', Number.Integer),
  130. # String literals
  131. (r'"', String, 'dqs'),
  132. (r'\[\{', Text, 'codeblock'),
  133. # Misc. punctuation
  134. (r'[-+\[\]{}()<>\.,;:=?#]+', Punctuation),
  135. ],
  136. 'comment': [
  137. (r'[^*/]+', Comment.Multiline),
  138. (r'/\*', Comment.Multiline, '#push'),
  139. (r'\*/', Comment.Multiline, '#pop'),
  140. (r'[*/]', Comment.Multiline)
  141. ],
  142. 'strings': [
  143. (r'\\[\\\'"tn]', String.Escape),
  144. (r'[^\\"]+', String),
  145. ],
  146. # Double-quoted string, a la C
  147. 'dqs': [
  148. (r'"', String, '#pop'),
  149. include('strings'),
  150. ],
  151. # No escaping inside a code block - everything is literal
  152. # Assume that the code inside a code block is C++. This isn't always
  153. # true in TableGen, but is the far most common scenario.
  154. 'codeblock': [
  155. (r'\}\]', Text, '#pop'),
  156. (r'([^}]+|\}[^]])*', using(CppLexer)),
  157. ],
  158. }