special.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. """
  2. pygments.lexers.special
  3. ~~~~~~~~~~~~~~~~~~~~~~~
  4. Special lexers.
  5. :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import ast
  9. from pygments.lexer import Lexer, line_re
  10. from pygments.token import Token, Error, Text, Generic
  11. from pygments.util import get_choice_opt
  12. __all__ = ['TextLexer', 'OutputLexer', 'RawTokenLexer']
  13. class TextLexer(Lexer):
  14. """
  15. "Null" lexer, doesn't highlight anything.
  16. """
  17. name = 'Text only'
  18. aliases = ['text']
  19. filenames = ['*.txt']
  20. mimetypes = ['text/plain']
  21. url = ""
  22. version_added = ''
  23. priority = 0.01
  24. def get_tokens_unprocessed(self, text):
  25. yield 0, Text, text
  26. def analyse_text(text):
  27. return TextLexer.priority
  28. class OutputLexer(Lexer):
  29. """
  30. Simple lexer that highlights everything as ``Token.Generic.Output``.
  31. """
  32. name = 'Text output'
  33. aliases = ['output']
  34. url = ""
  35. version_added = '2.10'
  36. _example = "output/output"
  37. def get_tokens_unprocessed(self, text):
  38. yield 0, Generic.Output, text
  39. _ttype_cache = {}
  40. class RawTokenLexer(Lexer):
  41. """
  42. Recreate a token stream formatted with the `RawTokenFormatter`.
  43. Additional options accepted:
  44. `compress`
  45. If set to ``"gz"`` or ``"bz2"``, decompress the token stream with
  46. the given compression algorithm before lexing (default: ``""``).
  47. """
  48. name = 'Raw token data'
  49. aliases = []
  50. filenames = []
  51. mimetypes = ['application/x-pygments-tokens']
  52. url = 'https://pygments.org/docs/formatters/#RawTokenFormatter'
  53. version_added = ''
  54. def __init__(self, **options):
  55. self.compress = get_choice_opt(options, 'compress',
  56. ['', 'none', 'gz', 'bz2'], '')
  57. Lexer.__init__(self, **options)
  58. def get_tokens(self, text):
  59. if self.compress:
  60. if isinstance(text, str):
  61. text = text.encode('latin1')
  62. try:
  63. if self.compress == 'gz':
  64. import gzip
  65. text = gzip.decompress(text)
  66. elif self.compress == 'bz2':
  67. import bz2
  68. text = bz2.decompress(text)
  69. except OSError:
  70. yield Error, text.decode('latin1')
  71. if isinstance(text, bytes):
  72. text = text.decode('latin1')
  73. # do not call Lexer.get_tokens() because stripping is not optional.
  74. text = text.strip('\n') + '\n'
  75. for i, t, v in self.get_tokens_unprocessed(text):
  76. yield t, v
  77. def get_tokens_unprocessed(self, text):
  78. length = 0
  79. for match in line_re.finditer(text):
  80. try:
  81. ttypestr, val = match.group().rstrip().split('\t', 1)
  82. ttype = _ttype_cache.get(ttypestr)
  83. if not ttype:
  84. ttype = Token
  85. ttypes = ttypestr.split('.')[1:]
  86. for ttype_ in ttypes:
  87. if not ttype_ or not ttype_[0].isupper():
  88. raise ValueError('malformed token name')
  89. ttype = getattr(ttype, ttype_)
  90. _ttype_cache[ttypestr] = ttype
  91. val = ast.literal_eval(val)
  92. if not isinstance(val, str):
  93. raise ValueError('expected str')
  94. except (SyntaxError, ValueError):
  95. val = match.group()
  96. ttype = Error
  97. yield length, ttype, val
  98. length += len(val)