123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177 |
- """
- pygments.lexers.tablegen
- ~~~~~~~~~~~~~~~~~~~~~~~~
- Lexer for LLVM's TableGen DSL.
- :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS.
- :license: BSD, see LICENSE for details.
- """
- from pygments.lexer import RegexLexer, include, words, using
- from pygments.lexers.c_cpp import CppLexer
- from pygments.token import Comment, Keyword, Name, Number, Operator, \
- Punctuation, String, Text, Whitespace, Error
- __all__ = ['TableGenLexer']
- KEYWORDS = (
- 'assert',
- 'class',
- 'code',
- 'def',
- 'dump',
- 'else',
- 'foreach',
- 'defm',
- 'defset',
- 'defvar',
- 'field',
- 'if',
- 'in',
- 'include',
- 'let',
- 'multiclass',
- 'then',
- )
- KEYWORDS_CONST = (
- 'false',
- 'true',
- )
- KEYWORDS_TYPE = (
- 'bit',
- 'bits',
- 'dag',
- 'int',
- 'list',
- 'string',
- )
- BANG_OPERATORS = (
- 'add',
- 'and',
- 'cast',
- 'con',
- 'cond',
- 'dag',
- 'div',
- 'empty',
- 'eq',
- 'exists',
- 'filter',
- 'find',
- 'foldl',
- 'foreach',
- 'ge',
- 'getdagarg',
- 'getdagname',
- 'getdagop',
- 'gt',
- 'head',
- 'if',
- 'interleave',
- 'isa',
- 'le',
- 'listconcat',
- 'listremove',
- 'listsplat',
- 'logtwo',
- 'lt',
- 'mul',
- 'ne',
- 'not',
- 'or',
- 'range',
- 'repr',
- 'setdagarg',
- 'setdagname',
- 'setdagop',
- 'shl',
- 'size',
- 'sra',
- 'srl',
- 'strconcat',
- 'sub',
- 'subst',
- 'substr',
- 'tail',
- 'tolower',
- 'toupper',
- 'xor',
- )
- class TableGenLexer(RegexLexer):
- """
- Lexer for TableGen
- """
- name = 'TableGen'
- url = 'https://llvm.org/docs/TableGen/ProgRef.html'
- aliases = ['tablegen', 'td']
- filenames = ['*.td']
- version_added = '2.19'
- tokens = {
- 'root': [
- (r'\s+', Whitespace),
- (r'/\*', Comment.Multiline, 'comment'),
- (r'//.*?$', Comment.SingleLine),
- (r'#(define|ifdef|ifndef|else|endif)', Comment.Preproc),
- # Binary/hex numbers. Note that these take priority over names,
- # which may begin with numbers.
- (r'0b[10]+', Number.Bin),
- (r'0x[0-9a-fA-F]+', Number.Hex),
- # Keywords
- (words(KEYWORDS, suffix=r'\b'), Keyword),
- (words(KEYWORDS_CONST, suffix=r'\b'), Keyword.Constant),
- (words(KEYWORDS_TYPE, suffix=r'\b'), Keyword.Type),
- # Bang operators
- (words(BANG_OPERATORS, prefix=r'\!', suffix=r'\b'), Operator),
- # Unknown bang operators are an error
- (r'![a-zA-Z]+', Error),
- # Names and identifiers
- (r'[0-9]*[a-zA-Z_][a-zA-Z_0-9]*', Name),
- (r'\$[a-zA-Z_][a-zA-Z_0-9]*', Name.Variable),
- # Place numbers after keywords. Names/identifiers may begin with
- # numbers, and we want to parse 1X as one name token as opposed to
- # a number and a name.
- (r'[-\+]?[0-9]+', Number.Integer),
- # String literals
- (r'"', String, 'dqs'),
- (r'\[\{', Text, 'codeblock'),
- # Misc. punctuation
- (r'[-+\[\]{}()<>\.,;:=?#]+', Punctuation),
- ],
- 'comment': [
- (r'[^*/]+', Comment.Multiline),
- (r'/\*', Comment.Multiline, '#push'),
- (r'\*/', Comment.Multiline, '#pop'),
- (r'[*/]', Comment.Multiline)
- ],
- 'strings': [
- (r'\\[\\\'"tn]', String.Escape),
- (r'[^\\"]+', String),
- ],
- # Double-quoted string, a la C
- 'dqs': [
- (r'"', String, '#pop'),
- include('strings'),
- ],
- # No escaping inside a code block - everything is literal
- # Assume that the code inside a code block is C++. This isn't always
- # true in TableGen, but is the far most common scenario.
- 'codeblock': [
- (r'\}\]', Text, '#pop'),
- (r'([^}]+|\}[^]])*', using(CppLexer)),
- ],
- }
|