123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160 |
- """
- pygments.lexers.typst
- ~~~~~~~~~~~~~~~~~~~~~
- Lexers for Typst language.
- :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS.
- :license: BSD, see LICENSE for details.
- """
- from pygments.lexer import RegexLexer, words, bygroups, include
- from pygments.token import Comment, Keyword, Name, String, Punctuation, \
- Whitespace, Generic, Operator, Number, Text
- from pygments.util import get_choice_opt
- __all__ = ['TypstLexer']
- class TypstLexer(RegexLexer):
- """
- For Typst code.
- Additional options accepted:
- `start`
- Specifies the starting state of the lexer (one of 'markup', 'math',
- 'code'). The default is 'markup'.
- """
- name = 'Typst'
- aliases = ['typst']
- filenames = ['*.typ']
- mimetypes = ['text/x-typst']
- url = 'https://typst.app'
- version_added = '2.18'
- MATH_SHORTHANDS = (
- '[|', '|]', '||', '*', ':=', '::=', '...', '\'', '-', '=:', '!=', '>>',
- '>=', '>>>', '<<', '<=', '<<<', '->', '|->', '=>', '|=>', '==>',
- '-->', '~~>', '~>', '>->', '->>', '<-', '<==', '<--', '<~~', '<~',
- '<-<','<<-','<->','<=>','<==>','<-->', '>', '<', '~', ':', '|'
- )
- tokens = {
- 'root': [
- include('markup'),
- ],
- # common cases going from math/markup into code mode
- 'into_code': [
- (words(('#let', '#set', '#show'), suffix=r'\b'), Keyword.Declaration, 'inline_code'),
- (words(('#import', '#include'), suffix=r'\b'), Keyword.Namespace, 'inline_code'),
- (words(('#if', '#for', '#while', '#export'), suffix=r'\b'), Keyword.Reserved, 'inline_code'),
- (r'#\{', Punctuation, 'code'),
- (r'#\(', Punctuation, 'code'),
- (r'(#[a-zA-Z_][a-zA-Z0-9_-]*)(\[)', bygroups(Name.Function, Punctuation), 'markup'),
- (r'(#[a-zA-Z_][a-zA-Z0-9_-]*)(\()', bygroups(Name.Function, Punctuation), 'code'),
- (words(('#true', '#false', '#none', '#auto'), suffix=r'\b'), Keyword.Constant),
- (r'#[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable),
- (r'#0x[0-9a-fA-F]+', Number.Hex),
- (r'#0b[01]+', Number.Bin),
- (r'#0o[0-7]+', Number.Oct),
- (r'#[0-9]+[\.e][0-9]+', Number.Float),
- (r'#[0-9]+', Number.Integer),
- ],
- 'markup': [
- include('comment'),
- (r'^\s*=+.*$', Generic.Heading),
- (r'[*][^*]*[*]', Generic.Strong),
- (r'_[^_]*_', Generic.Emph),
- (r'\$', Punctuation, 'math'),
- (r'`[^`]*`', String.Backtick), # inline code
- (r'^(\s*)(-)(\s+)', bygroups(Whitespace, Punctuation, Whitespace)), # unnumbered list
- (r'^(\s*)(\+)(\s+)', bygroups(Whitespace, Punctuation, Whitespace)), # numbered list
- (r'^(\s*)([0-9]+\.)', bygroups(Whitespace, Punctuation)), # numbered list variant
- (r'^(\s*)(/)(\s+)([^:]+)(:)', bygroups(Whitespace, Punctuation, Whitespace, Name.Variable, Punctuation)), # definitions
- (r'<[a-zA-Z_][a-zA-Z0-9_-]*>', Name.Label), # label
- (r'@[a-zA-Z_][a-zA-Z0-9_-]*', Name.Label), # reference
- (r'\\#', Text), # escaped
- include('into_code'),
- (r'```(?:.|\n)*?```', String.Backtick), # code block
- (r'https?://[0-9a-zA-Z~/%#&=\',;.+?]*', Generic.Emph), # links
- (words(('---', '\\', '~', '--', '...'), suffix=r'\B'), Punctuation), # special chars shorthand
- (r'\\\[', Punctuation), # escaped
- (r'\\\]', Punctuation), # escaped
- (r'\[', Punctuation, '#push'),
- (r'\]', Punctuation, '#pop'),
- (r'[ \t]+\n?|\n', Whitespace),
- (r'((?![*_$`<@\\#\] ]|https?://).)+', Text),
- ],
- 'math': [
- include('comment'),
- (words(('\\_', '\\^', '\\&')), Text), # escapes
- (words(('_', '^', '&', ';')), Punctuation),
- (words(('+', '/', '=') + MATH_SHORTHANDS), Operator),
- (r'\\', Punctuation), # line break
- (r'\\\$', Punctuation), # escaped
- (r'\$', Punctuation, '#pop'), # end of math mode
- include('into_code'),
- (r'([a-zA-Z][a-zA-Z0-9-]*)(\s*)(\()', bygroups(Name.Function, Whitespace, Punctuation)),
- (r'([a-zA-Z][a-zA-Z0-9-]*)(:)', bygroups(Name.Variable, Punctuation)), # named arguments in math functions
- (r'([a-zA-Z][a-zA-Z0-9-]*)', Name.Variable), # both variables and symbols (_ isn't supported for variables)
- (r'[0-9]+(\.[0-9]+)?', Number),
- (r'\.{1,3}|\(|\)|,|\{|\}', Punctuation),
- (r'"[^"]*"', String.Double),
- (r'[ \t\n]+', Whitespace),
- ],
- 'comment': [
- (r'//.*$', Comment.Single),
- (r'/[*](.|\n)*?[*]/', Comment.Multiline),
- ],
- 'code': [
- include('comment'),
- (r'\[', Punctuation, 'markup'),
- (r'\(|\{', Punctuation, 'code'),
- (r'\)|\}', Punctuation, '#pop'),
- (r'"[^"]*"', String.Double),
- (r',|\.{1,2}', Punctuation),
- (r'=', Operator),
- (words(('and', 'or', 'not'), suffix=r'\b'), Operator.Word),
- (r'=>|<=|==|!=|>|<|-=|\+=|\*=|/=|\+|-|\\|\*', Operator), # comparisons
- (r'([a-zA-Z_][a-zA-Z0-9_-]*)(:)', bygroups(Name.Variable, Punctuation)),
- (r'([a-zA-Z_][a-zA-Z0-9_-]*)(\()', bygroups(Name.Function, Punctuation), 'code'),
- (words(('as', 'break', 'export', 'continue', 'else', 'for', 'if',
- 'in', 'return', 'while'), suffix=r'\b'),
- Keyword.Reserved),
- (words(('import', 'include'), suffix=r'\b'), Keyword.Namespace),
- (words(('auto', 'none', 'true', 'false'), suffix=r'\b'), Keyword.Constant),
- (r'([0-9.]+)(mm|pt|cm|in|em|fr|%)', bygroups(Number, Keyword.Reserved)),
- (r'0x[0-9a-fA-F]+', Number.Hex),
- (r'0b[01]+', Number.Bin),
- (r'0o[0-7]+', Number.Oct),
- (r'[0-9]+[\.e][0-9]+', Number.Float),
- (r'[0-9]+', Number.Integer),
- (words(('let', 'set', 'show'), suffix=r'\b'), Keyword.Declaration),
- # FIXME: make this work
- ## (r'(import|include)( *)(")([^"])(")',
- ## bygroups(Keyword.Reserved, Text, Punctuation, String.Double, Punctuation)),
- (r'([a-zA-Z_][a-zA-Z0-9_-]*)', Name.Variable),
- (r'[ \t\n]+', Whitespace),
- (r':', Punctuation), # from imports like "import a: b" or "show: text.with(..)"
- ],
- 'inline_code': [
- (r';\b', Punctuation, '#pop'),
- (r'\n', Whitespace, '#pop'),
- include('code'),
- ],
- }
- def __init__(self, **options):
- self.start_state = get_choice_opt(
- options, 'start', ['markup', 'code', 'math'], 'markup', True)
- RegexLexer.__init__(self, **options)
- def get_tokens_unprocessed(self, text):
- stack = ['root']
- if self.start_state != 'markup': # markup is equivalent to root
- stack.append(self.start_state)
- yield from RegexLexer.get_tokens_unprocessed(self, text, stack)
|