r.py 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196
  1. """
  2. pygments.lexers.r
  3. ~~~~~~~~~~~~~~~~~
  4. Lexers for the R/S languages.
  5. :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import re
  9. from pygments.lexer import Lexer, RegexLexer, include, do_insertions
  10. from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
  11. Number, Punctuation, Generic, Whitespace
  12. __all__ = ['RConsoleLexer', 'SLexer', 'RdLexer']
  13. line_re = re.compile('.*?\n')
  14. class RConsoleLexer(Lexer):
  15. """
  16. For R console transcripts or R CMD BATCH output files.
  17. """
  18. name = 'RConsole'
  19. aliases = ['rconsole', 'rout']
  20. filenames = ['*.Rout']
  21. url = 'https://www.r-project.org'
  22. version_added = ''
  23. _example = "rconsole/r-console-transcript.Rout"
  24. def get_tokens_unprocessed(self, text):
  25. slexer = SLexer(**self.options)
  26. current_code_block = ''
  27. insertions = []
  28. for match in line_re.finditer(text):
  29. line = match.group()
  30. if line.startswith('>') or line.startswith('+'):
  31. # Colorize the prompt as such,
  32. # then put rest of line into current_code_block
  33. insertions.append((len(current_code_block),
  34. [(0, Generic.Prompt, line[:2])]))
  35. current_code_block += line[2:]
  36. else:
  37. # We have reached a non-prompt line!
  38. # If we have stored prompt lines, need to process them first.
  39. if current_code_block:
  40. # Weave together the prompts and highlight code.
  41. yield from do_insertions(
  42. insertions, slexer.get_tokens_unprocessed(current_code_block))
  43. # Reset vars for next code block.
  44. current_code_block = ''
  45. insertions = []
  46. # Now process the actual line itself, this is output from R.
  47. yield match.start(), Generic.Output, line
  48. # If we happen to end on a code block with nothing after it, need to
  49. # process the last code block. This is neither elegant nor DRY so
  50. # should be changed.
  51. if current_code_block:
  52. yield from do_insertions(
  53. insertions, slexer.get_tokens_unprocessed(current_code_block))
  54. class SLexer(RegexLexer):
  55. """
  56. For S, S-plus, and R source code.
  57. """
  58. name = 'S'
  59. aliases = ['splus', 's', 'r']
  60. filenames = ['*.S', '*.R', '.Rhistory', '.Rprofile', '.Renviron']
  61. mimetypes = ['text/S-plus', 'text/S', 'text/x-r-source', 'text/x-r',
  62. 'text/x-R', 'text/x-r-history', 'text/x-r-profile']
  63. url = 'https://www.r-project.org'
  64. version_added = '0.10'
  65. valid_name = r'`[^`\\]*(?:\\.[^`\\]*)*`|(?:[a-zA-Z]|\.[A-Za-z_.])[\w.]*|\.'
  66. tokens = {
  67. 'comments': [
  68. (r'#.*$', Comment.Single),
  69. ],
  70. 'valid_name': [
  71. (valid_name, Name),
  72. ],
  73. 'function_name': [
  74. (rf'({valid_name})\s*(?=\()', Name.Function),
  75. ],
  76. 'punctuation': [
  77. (r'\[{1,2}|\]{1,2}|\(|\)|;|,', Punctuation),
  78. ],
  79. 'keywords': [
  80. (r'(if|else|for|while|repeat|in|next|break|return|switch|function)'
  81. r'(?![\w.])',
  82. Keyword.Reserved),
  83. ],
  84. 'operators': [
  85. (r'<<?-|->>?|-|==|<=|>=|\|>|<|>|&&?|!=|\|\|?|\?', Operator),
  86. (r'\*|\+|\^|/|!|%[^%]*%|=|~|\$|@|:{1,3}', Operator),
  87. ],
  88. 'builtin_symbols': [
  89. (r'(NULL|NA(_(integer|real|complex|character)_)?|'
  90. r'letters|LETTERS|Inf|TRUE|FALSE|NaN|pi|\.\.(\.|[0-9]+))'
  91. r'(?![\w.])',
  92. Keyword.Constant),
  93. (r'(T|F)\b', Name.Builtin.Pseudo),
  94. ],
  95. 'numbers': [
  96. # hex number
  97. (r'0[xX][a-fA-F0-9]+([pP][0-9]+)?[Li]?', Number.Hex),
  98. # decimal number
  99. (r'[+-]?([0-9]+(\.[0-9]+)?|\.[0-9]+|\.)([eE][+-]?[0-9]+)?[Li]?',
  100. Number),
  101. ],
  102. 'statements': [
  103. include('comments'),
  104. # whitespaces
  105. (r'\s+', Whitespace),
  106. (r'\'', String, 'string_squote'),
  107. (r'\"', String, 'string_dquote'),
  108. include('builtin_symbols'),
  109. include('keywords'),
  110. include('function_name'),
  111. include('valid_name'),
  112. include('numbers'),
  113. include('punctuation'),
  114. include('operators'),
  115. ],
  116. 'root': [
  117. # calls:
  118. include('statements'),
  119. # blocks:
  120. (r'\{|\}', Punctuation),
  121. # (r'\{', Punctuation, 'block'),
  122. (r'.', Text),
  123. ],
  124. # 'block': [
  125. # include('statements'),
  126. # ('\{', Punctuation, '#push'),
  127. # ('\}', Punctuation, '#pop')
  128. # ],
  129. 'string_squote': [
  130. (r'([^\'\\]|\\.)*\'', String, '#pop'),
  131. ],
  132. 'string_dquote': [
  133. (r'([^"\\]|\\.)*"', String, '#pop'),
  134. ],
  135. }
  136. def analyse_text(text):
  137. if re.search(r'[a-z0-9_\])\s]<-(?!-)', text):
  138. return 0.11
  139. class RdLexer(RegexLexer):
  140. """
  141. Pygments Lexer for R documentation (Rd) files
  142. This is a very minimal implementation, highlighting little more
  143. than the macros. A description of Rd syntax is found in `Writing R
  144. Extensions <http://cran.r-project.org/doc/manuals/R-exts.html>`_
  145. and `Parsing Rd files <http://developer.r-project.org/parseRd.pdf>`_.
  146. """
  147. name = 'Rd'
  148. aliases = ['rd']
  149. filenames = ['*.Rd']
  150. mimetypes = ['text/x-r-doc']
  151. url = 'http://cran.r-project.org/doc/manuals/R-exts.html'
  152. version_added = '1.6'
  153. # To account for verbatim / LaTeX-like / and R-like areas
  154. # would require parsing.
  155. tokens = {
  156. 'root': [
  157. # catch escaped brackets and percent sign
  158. (r'\\[\\{}%]', String.Escape),
  159. # comments
  160. (r'%.*$', Comment),
  161. # special macros with no arguments
  162. (r'\\(?:cr|l?dots|R|tab)\b', Keyword.Constant),
  163. # macros
  164. (r'\\[a-zA-Z]+\b', Keyword),
  165. # special preprocessor macros
  166. (r'^\s*#(?:ifn?def|endif).*\b', Comment.Preproc),
  167. # non-escaped brackets
  168. (r'[{}]', Name.Builtin),
  169. # everything else
  170. (r'[^\\%\n{}]+', Text),
  171. (r'.', Text),
  172. ]
  173. }