typst.py 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160
  1. """
  2. pygments.lexers.typst
  3. ~~~~~~~~~~~~~~~~~~~~~
  4. Lexers for Typst language.
  5. :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. from pygments.lexer import RegexLexer, words, bygroups, include
  9. from pygments.token import Comment, Keyword, Name, String, Punctuation, \
  10. Whitespace, Generic, Operator, Number, Text
  11. from pygments.util import get_choice_opt
  12. __all__ = ['TypstLexer']
  13. class TypstLexer(RegexLexer):
  14. """
  15. For Typst code.
  16. Additional options accepted:
  17. `start`
  18. Specifies the starting state of the lexer (one of 'markup', 'math',
  19. 'code'). The default is 'markup'.
  20. """
  21. name = 'Typst'
  22. aliases = ['typst']
  23. filenames = ['*.typ']
  24. mimetypes = ['text/x-typst']
  25. url = 'https://typst.app'
  26. version_added = '2.18'
  27. MATH_SHORTHANDS = (
  28. '[|', '|]', '||', '*', ':=', '::=', '...', '\'', '-', '=:', '!=', '>>',
  29. '>=', '>>>', '<<', '<=', '<<<', '->', '|->', '=>', '|=>', '==>',
  30. '-->', '~~>', '~>', '>->', '->>', '<-', '<==', '<--', '<~~', '<~',
  31. '<-<','<<-','<->','<=>','<==>','<-->', '>', '<', '~', ':', '|'
  32. )
  33. tokens = {
  34. 'root': [
  35. include('markup'),
  36. ],
  37. # common cases going from math/markup into code mode
  38. 'into_code': [
  39. (words(('#let', '#set', '#show'), suffix=r'\b'), Keyword.Declaration, 'inline_code'),
  40. (words(('#import', '#include'), suffix=r'\b'), Keyword.Namespace, 'inline_code'),
  41. (words(('#if', '#for', '#while', '#export'), suffix=r'\b'), Keyword.Reserved, 'inline_code'),
  42. (r'#\{', Punctuation, 'code'),
  43. (r'#\(', Punctuation, 'code'),
  44. (r'(#[a-zA-Z_][a-zA-Z0-9_-]*)(\[)', bygroups(Name.Function, Punctuation), 'markup'),
  45. (r'(#[a-zA-Z_][a-zA-Z0-9_-]*)(\()', bygroups(Name.Function, Punctuation), 'code'),
  46. (words(('#true', '#false', '#none', '#auto'), suffix=r'\b'), Keyword.Constant),
  47. (r'#[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable),
  48. (r'#0x[0-9a-fA-F]+', Number.Hex),
  49. (r'#0b[01]+', Number.Bin),
  50. (r'#0o[0-7]+', Number.Oct),
  51. (r'#[0-9]+[\.e][0-9]+', Number.Float),
  52. (r'#[0-9]+', Number.Integer),
  53. ],
  54. 'markup': [
  55. include('comment'),
  56. (r'^\s*=+.*$', Generic.Heading),
  57. (r'[*][^*]*[*]', Generic.Strong),
  58. (r'_[^_]*_', Generic.Emph),
  59. (r'\$', Punctuation, 'math'),
  60. (r'`[^`]*`', String.Backtick), # inline code
  61. (r'^(\s*)(-)(\s+)', bygroups(Whitespace, Punctuation, Whitespace)), # unnumbered list
  62. (r'^(\s*)(\+)(\s+)', bygroups(Whitespace, Punctuation, Whitespace)), # numbered list
  63. (r'^(\s*)([0-9]+\.)', bygroups(Whitespace, Punctuation)), # numbered list variant
  64. (r'^(\s*)(/)(\s+)([^:]+)(:)', bygroups(Whitespace, Punctuation, Whitespace, Name.Variable, Punctuation)), # definitions
  65. (r'<[a-zA-Z_][a-zA-Z0-9_-]*>', Name.Label), # label
  66. (r'@[a-zA-Z_][a-zA-Z0-9_-]*', Name.Label), # reference
  67. (r'\\#', Text), # escaped
  68. include('into_code'),
  69. (r'```(?:.|\n)*?```', String.Backtick), # code block
  70. (r'https?://[0-9a-zA-Z~/%#&=\',;.+?]*', Generic.Emph), # links
  71. (words(('---', '\\', '~', '--', '...'), suffix=r'\B'), Punctuation), # special chars shorthand
  72. (r'\\\[', Punctuation), # escaped
  73. (r'\\\]', Punctuation), # escaped
  74. (r'\[', Punctuation, '#push'),
  75. (r'\]', Punctuation, '#pop'),
  76. (r'[ \t]+\n?|\n', Whitespace),
  77. (r'((?![*_$`<@\\#\] ]|https?://).)+', Text),
  78. ],
  79. 'math': [
  80. include('comment'),
  81. (words(('\\_', '\\^', '\\&')), Text), # escapes
  82. (words(('_', '^', '&', ';')), Punctuation),
  83. (words(('+', '/', '=') + MATH_SHORTHANDS), Operator),
  84. (r'\\', Punctuation), # line break
  85. (r'\\\$', Punctuation), # escaped
  86. (r'\$', Punctuation, '#pop'), # end of math mode
  87. include('into_code'),
  88. (r'([a-zA-Z][a-zA-Z0-9-]*)(\s*)(\()', bygroups(Name.Function, Whitespace, Punctuation)),
  89. (r'([a-zA-Z][a-zA-Z0-9-]*)(:)', bygroups(Name.Variable, Punctuation)), # named arguments in math functions
  90. (r'([a-zA-Z][a-zA-Z0-9-]*)', Name.Variable), # both variables and symbols (_ isn't supported for variables)
  91. (r'[0-9]+(\.[0-9]+)?', Number),
  92. (r'\.{1,3}|\(|\)|,|\{|\}', Punctuation),
  93. (r'"[^"]*"', String.Double),
  94. (r'[ \t\n]+', Whitespace),
  95. ],
  96. 'comment': [
  97. (r'//.*$', Comment.Single),
  98. (r'/[*](.|\n)*?[*]/', Comment.Multiline),
  99. ],
  100. 'code': [
  101. include('comment'),
  102. (r'\[', Punctuation, 'markup'),
  103. (r'\(|\{', Punctuation, 'code'),
  104. (r'\)|\}', Punctuation, '#pop'),
  105. (r'"[^"]*"', String.Double),
  106. (r',|\.{1,2}', Punctuation),
  107. (r'=', Operator),
  108. (words(('and', 'or', 'not'), suffix=r'\b'), Operator.Word),
  109. (r'=>|<=|==|!=|>|<|-=|\+=|\*=|/=|\+|-|\\|\*', Operator), # comparisons
  110. (r'([a-zA-Z_][a-zA-Z0-9_-]*)(:)', bygroups(Name.Variable, Punctuation)),
  111. (r'([a-zA-Z_][a-zA-Z0-9_-]*)(\()', bygroups(Name.Function, Punctuation), 'code'),
  112. (words(('as', 'break', 'export', 'continue', 'else', 'for', 'if',
  113. 'in', 'return', 'while'), suffix=r'\b'),
  114. Keyword.Reserved),
  115. (words(('import', 'include'), suffix=r'\b'), Keyword.Namespace),
  116. (words(('auto', 'none', 'true', 'false'), suffix=r'\b'), Keyword.Constant),
  117. (r'([0-9.]+)(mm|pt|cm|in|em|fr|%)', bygroups(Number, Keyword.Reserved)),
  118. (r'0x[0-9a-fA-F]+', Number.Hex),
  119. (r'0b[01]+', Number.Bin),
  120. (r'0o[0-7]+', Number.Oct),
  121. (r'[0-9]+[\.e][0-9]+', Number.Float),
  122. (r'[0-9]+', Number.Integer),
  123. (words(('let', 'set', 'show'), suffix=r'\b'), Keyword.Declaration),
  124. # FIXME: make this work
  125. ## (r'(import|include)( *)(")([^"])(")',
  126. ## bygroups(Keyword.Reserved, Text, Punctuation, String.Double, Punctuation)),
  127. (r'([a-zA-Z_][a-zA-Z0-9_-]*)', Name.Variable),
  128. (r'[ \t\n]+', Whitespace),
  129. (r':', Punctuation), # from imports like "import a: b" or "show: text.with(..)"
  130. ],
  131. 'inline_code': [
  132. (r';\b', Punctuation, '#pop'),
  133. (r'\n', Whitespace, '#pop'),
  134. include('code'),
  135. ],
  136. }
  137. def __init__(self, **options):
  138. self.start_state = get_choice_opt(
  139. options, 'start', ['markup', 'code', 'math'], 'markup', True)
  140. RegexLexer.__init__(self, **options)
  141. def get_tokens_unprocessed(self, text):
  142. stack = ['root']
  143. if self.start_state != 'markup': # markup is equivalent to root
  144. stack.append(self.start_state)
  145. yield from RegexLexer.get_tokens_unprocessed(self, text, stack)