html.py 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995
  1. """
  2. pygments.formatters.html
  3. ~~~~~~~~~~~~~~~~~~~~~~~~
  4. Formatter for HTML output.
  5. :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import functools
  9. import os
  10. import sys
  11. import os.path
  12. from io import StringIO
  13. from pygments.formatter import Formatter
  14. from pygments.token import Token, Text, STANDARD_TYPES
  15. from pygments.util import get_bool_opt, get_int_opt, get_list_opt
  16. try:
  17. import ctags
  18. except ImportError:
  19. ctags = None
  20. __all__ = ['HtmlFormatter']
  21. _escape_html_table = {
  22. ord('&'): '&',
  23. ord('<'): '&lt;',
  24. ord('>'): '&gt;',
  25. ord('"'): '&quot;',
  26. ord("'"): '&#39;',
  27. }
  28. def escape_html(text, table=_escape_html_table):
  29. """Escape &, <, > as well as single and double quotes for HTML."""
  30. return text.translate(table)
  31. def webify(color):
  32. if color.startswith('calc') or color.startswith('var'):
  33. return color
  34. else:
  35. # Check if the color can be shortened from 6 to 3 characters
  36. color = color.upper()
  37. if (len(color) == 6 and
  38. ( color[0] == color[1]
  39. and color[2] == color[3]
  40. and color[4] == color[5])):
  41. return f'#{color[0]}{color[2]}{color[4]}'
  42. else:
  43. return f'#{color}'
  44. def _get_ttype_class(ttype):
  45. fname = STANDARD_TYPES.get(ttype)
  46. if fname:
  47. return fname
  48. aname = ''
  49. while fname is None:
  50. aname = '-' + ttype[-1] + aname
  51. ttype = ttype.parent
  52. fname = STANDARD_TYPES.get(ttype)
  53. return fname + aname
  54. CSSFILE_TEMPLATE = '''\
  55. /*
  56. generated by Pygments <https://pygments.org/>
  57. Copyright 2006-2025 by the Pygments team.
  58. Licensed under the BSD license, see LICENSE for details.
  59. */
  60. %(styledefs)s
  61. '''
  62. DOC_HEADER = '''\
  63. <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"
  64. "http://www.w3.org/TR/html4/strict.dtd">
  65. <!--
  66. generated by Pygments <https://pygments.org/>
  67. Copyright 2006-2025 by the Pygments team.
  68. Licensed under the BSD license, see LICENSE for details.
  69. -->
  70. <html>
  71. <head>
  72. <title>%(title)s</title>
  73. <meta http-equiv="content-type" content="text/html; charset=%(encoding)s">
  74. <style type="text/css">
  75. ''' + CSSFILE_TEMPLATE + '''
  76. </style>
  77. </head>
  78. <body>
  79. <h2>%(title)s</h2>
  80. '''
  81. DOC_HEADER_EXTERNALCSS = '''\
  82. <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"
  83. "http://www.w3.org/TR/html4/strict.dtd">
  84. <html>
  85. <head>
  86. <title>%(title)s</title>
  87. <meta http-equiv="content-type" content="text/html; charset=%(encoding)s">
  88. <link rel="stylesheet" href="%(cssfile)s" type="text/css">
  89. </head>
  90. <body>
  91. <h2>%(title)s</h2>
  92. '''
  93. DOC_FOOTER = '''\
  94. </body>
  95. </html>
  96. '''
  97. class HtmlFormatter(Formatter):
  98. r"""
  99. Format tokens as HTML 4 ``<span>`` tags. By default, the content is enclosed
  100. in a ``<pre>`` tag, itself wrapped in a ``<div>`` tag (but see the `nowrap` option).
  101. The ``<div>``'s CSS class can be set by the `cssclass` option.
  102. If the `linenos` option is set to ``"table"``, the ``<pre>`` is
  103. additionally wrapped inside a ``<table>`` which has one row and two
  104. cells: one containing the line numbers and one containing the code.
  105. Example:
  106. .. sourcecode:: html
  107. <div class="highlight" >
  108. <table><tr>
  109. <td class="linenos" title="click to toggle"
  110. onclick="with (this.firstChild.style)
  111. { display = (display == '') ? 'none' : '' }">
  112. <pre>1
  113. 2</pre>
  114. </td>
  115. <td class="code">
  116. <pre><span class="Ke">def </span><span class="NaFu">foo</span>(bar):
  117. <span class="Ke">pass</span>
  118. </pre>
  119. </td>
  120. </tr></table></div>
  121. (whitespace added to improve clarity).
  122. A list of lines can be specified using the `hl_lines` option to make these
  123. lines highlighted (as of Pygments 0.11).
  124. With the `full` option, a complete HTML 4 document is output, including
  125. the style definitions inside a ``<style>`` tag, or in a separate file if
  126. the `cssfile` option is given.
  127. When `tagsfile` is set to the path of a ctags index file, it is used to
  128. generate hyperlinks from names to their definition. You must enable
  129. `lineanchors` and run ctags with the `-n` option for this to work. The
  130. `python-ctags` module from PyPI must be installed to use this feature;
  131. otherwise a `RuntimeError` will be raised.
  132. The `get_style_defs(arg='')` method of a `HtmlFormatter` returns a string
  133. containing CSS rules for the CSS classes used by the formatter. The
  134. argument `arg` can be used to specify additional CSS selectors that
  135. are prepended to the classes. A call `fmter.get_style_defs('td .code')`
  136. would result in the following CSS classes:
  137. .. sourcecode:: css
  138. td .code .kw { font-weight: bold; color: #00FF00 }
  139. td .code .cm { color: #999999 }
  140. ...
  141. If you have Pygments 0.6 or higher, you can also pass a list or tuple to the
  142. `get_style_defs()` method to request multiple prefixes for the tokens:
  143. .. sourcecode:: python
  144. formatter.get_style_defs(['div.syntax pre', 'pre.syntax'])
  145. The output would then look like this:
  146. .. sourcecode:: css
  147. div.syntax pre .kw,
  148. pre.syntax .kw { font-weight: bold; color: #00FF00 }
  149. div.syntax pre .cm,
  150. pre.syntax .cm { color: #999999 }
  151. ...
  152. Additional options accepted:
  153. `nowrap`
  154. If set to ``True``, don't add a ``<pre>`` and a ``<div>`` tag
  155. around the tokens. This disables most other options (default: ``False``).
  156. `full`
  157. Tells the formatter to output a "full" document, i.e. a complete
  158. self-contained document (default: ``False``).
  159. `title`
  160. If `full` is true, the title that should be used to caption the
  161. document (default: ``''``).
  162. `style`
  163. The style to use, can be a string or a Style subclass (default:
  164. ``'default'``). This option has no effect if the `cssfile`
  165. and `noclobber_cssfile` option are given and the file specified in
  166. `cssfile` exists.
  167. `noclasses`
  168. If set to true, token ``<span>`` tags (as well as line number elements)
  169. will not use CSS classes, but inline styles. This is not recommended
  170. for larger pieces of code since it increases output size by quite a bit
  171. (default: ``False``).
  172. `classprefix`
  173. Since the token types use relatively short class names, they may clash
  174. with some of your own class names. In this case you can use the
  175. `classprefix` option to give a string to prepend to all Pygments-generated
  176. CSS class names for token types.
  177. Note that this option also affects the output of `get_style_defs()`.
  178. `cssclass`
  179. CSS class for the wrapping ``<div>`` tag (default: ``'highlight'``).
  180. If you set this option, the default selector for `get_style_defs()`
  181. will be this class.
  182. .. versionadded:: 0.9
  183. If you select the ``'table'`` line numbers, the wrapping table will
  184. have a CSS class of this string plus ``'table'``, the default is
  185. accordingly ``'highlighttable'``.
  186. `cssstyles`
  187. Inline CSS styles for the wrapping ``<div>`` tag (default: ``''``).
  188. `prestyles`
  189. Inline CSS styles for the ``<pre>`` tag (default: ``''``).
  190. .. versionadded:: 0.11
  191. `cssfile`
  192. If the `full` option is true and this option is given, it must be the
  193. name of an external file. If the filename does not include an absolute
  194. path, the file's path will be assumed to be relative to the main output
  195. file's path, if the latter can be found. The stylesheet is then written
  196. to this file instead of the HTML file.
  197. .. versionadded:: 0.6
  198. `noclobber_cssfile`
  199. If `cssfile` is given and the specified file exists, the css file will
  200. not be overwritten. This allows the use of the `full` option in
  201. combination with a user specified css file. Default is ``False``.
  202. .. versionadded:: 1.1
  203. `linenos`
  204. If set to ``'table'``, output line numbers as a table with two cells,
  205. one containing the line numbers, the other the whole code. This is
  206. copy-and-paste-friendly, but may cause alignment problems with some
  207. browsers or fonts. If set to ``'inline'``, the line numbers will be
  208. integrated in the ``<pre>`` tag that contains the code (that setting
  209. is *new in Pygments 0.8*).
  210. For compatibility with Pygments 0.7 and earlier, every true value
  211. except ``'inline'`` means the same as ``'table'`` (in particular, that
  212. means also ``True``).
  213. The default value is ``False``, which means no line numbers at all.
  214. **Note:** with the default ("table") line number mechanism, the line
  215. numbers and code can have different line heights in Internet Explorer
  216. unless you give the enclosing ``<pre>`` tags an explicit ``line-height``
  217. CSS property (you get the default line spacing with ``line-height:
  218. 125%``).
  219. `hl_lines`
  220. Specify a list of lines to be highlighted. The line numbers are always
  221. relative to the input (i.e. the first line is line 1) and are
  222. independent of `linenostart`.
  223. .. versionadded:: 0.11
  224. `linenostart`
  225. The line number for the first line (default: ``1``).
  226. `linenostep`
  227. If set to a number n > 1, only every nth line number is printed.
  228. `linenospecial`
  229. If set to a number n > 0, every nth line number is given the CSS
  230. class ``"special"`` (default: ``0``).
  231. `nobackground`
  232. If set to ``True``, the formatter won't output the background color
  233. for the wrapping element (this automatically defaults to ``False``
  234. when there is no wrapping element [eg: no argument for the
  235. `get_syntax_defs` method given]) (default: ``False``).
  236. .. versionadded:: 0.6
  237. `lineseparator`
  238. This string is output between lines of code. It defaults to ``"\n"``,
  239. which is enough to break a line inside ``<pre>`` tags, but you can
  240. e.g. set it to ``"<br>"`` to get HTML line breaks.
  241. .. versionadded:: 0.7
  242. `lineanchors`
  243. If set to a nonempty string, e.g. ``foo``, the formatter will wrap each
  244. output line in an anchor tag with an ``id`` (and `name`) of ``foo-linenumber``.
  245. This allows easy linking to certain lines.
  246. .. versionadded:: 0.9
  247. `linespans`
  248. If set to a nonempty string, e.g. ``foo``, the formatter will wrap each
  249. output line in a span tag with an ``id`` of ``foo-linenumber``.
  250. This allows easy access to lines via javascript.
  251. .. versionadded:: 1.6
  252. `anchorlinenos`
  253. If set to `True`, will wrap line numbers in <a> tags. Used in
  254. combination with `linenos` and `lineanchors`.
  255. `tagsfile`
  256. If set to the path of a ctags file, wrap names in anchor tags that
  257. link to their definitions. `lineanchors` should be used, and the
  258. tags file should specify line numbers (see the `-n` option to ctags).
  259. The tags file is assumed to be encoded in UTF-8.
  260. .. versionadded:: 1.6
  261. `tagurlformat`
  262. A string formatting pattern used to generate links to ctags definitions.
  263. Available variables are `%(path)s`, `%(fname)s` and `%(fext)s`.
  264. Defaults to an empty string, resulting in just `#prefix-number` links.
  265. .. versionadded:: 1.6
  266. `filename`
  267. A string used to generate a filename when rendering ``<pre>`` blocks,
  268. for example if displaying source code. If `linenos` is set to
  269. ``'table'`` then the filename will be rendered in an initial row
  270. containing a single `<th>` which spans both columns.
  271. .. versionadded:: 2.1
  272. `wrapcode`
  273. Wrap the code inside ``<pre>`` blocks using ``<code>``, as recommended
  274. by the HTML5 specification.
  275. .. versionadded:: 2.4
  276. `debug_token_types`
  277. Add ``title`` attributes to all token ``<span>`` tags that show the
  278. name of the token.
  279. .. versionadded:: 2.10
  280. **Subclassing the HTML formatter**
  281. .. versionadded:: 0.7
  282. The HTML formatter is now built in a way that allows easy subclassing, thus
  283. customizing the output HTML code. The `format()` method calls
  284. `self._format_lines()` which returns a generator that yields tuples of ``(1,
  285. line)``, where the ``1`` indicates that the ``line`` is a line of the
  286. formatted source code.
  287. If the `nowrap` option is set, the generator is the iterated over and the
  288. resulting HTML is output.
  289. Otherwise, `format()` calls `self.wrap()`, which wraps the generator with
  290. other generators. These may add some HTML code to the one generated by
  291. `_format_lines()`, either by modifying the lines generated by the latter,
  292. then yielding them again with ``(1, line)``, and/or by yielding other HTML
  293. code before or after the lines, with ``(0, html)``. The distinction between
  294. source lines and other code makes it possible to wrap the generator multiple
  295. times.
  296. The default `wrap()` implementation adds a ``<div>`` and a ``<pre>`` tag.
  297. A custom `HtmlFormatter` subclass could look like this:
  298. .. sourcecode:: python
  299. class CodeHtmlFormatter(HtmlFormatter):
  300. def wrap(self, source, *, include_div):
  301. return self._wrap_code(source)
  302. def _wrap_code(self, source):
  303. yield 0, '<code>'
  304. for i, t in source:
  305. if i == 1:
  306. # it's a line of formatted code
  307. t += '<br>'
  308. yield i, t
  309. yield 0, '</code>'
  310. This results in wrapping the formatted lines with a ``<code>`` tag, where the
  311. source lines are broken using ``<br>`` tags.
  312. After calling `wrap()`, the `format()` method also adds the "line numbers"
  313. and/or "full document" wrappers if the respective options are set. Then, all
  314. HTML yielded by the wrapped generator is output.
  315. """
  316. name = 'HTML'
  317. aliases = ['html']
  318. filenames = ['*.html', '*.htm']
  319. def __init__(self, **options):
  320. Formatter.__init__(self, **options)
  321. self.title = self._decodeifneeded(self.title)
  322. self.nowrap = get_bool_opt(options, 'nowrap', False)
  323. self.noclasses = get_bool_opt(options, 'noclasses', False)
  324. self.classprefix = options.get('classprefix', '')
  325. self.cssclass = self._decodeifneeded(options.get('cssclass', 'highlight'))
  326. self.cssstyles = self._decodeifneeded(options.get('cssstyles', ''))
  327. self.prestyles = self._decodeifneeded(options.get('prestyles', ''))
  328. self.cssfile = self._decodeifneeded(options.get('cssfile', ''))
  329. self.noclobber_cssfile = get_bool_opt(options, 'noclobber_cssfile', False)
  330. self.tagsfile = self._decodeifneeded(options.get('tagsfile', ''))
  331. self.tagurlformat = self._decodeifneeded(options.get('tagurlformat', ''))
  332. self.filename = self._decodeifneeded(options.get('filename', ''))
  333. self.wrapcode = get_bool_opt(options, 'wrapcode', False)
  334. self.span_element_openers = {}
  335. self.debug_token_types = get_bool_opt(options, 'debug_token_types', False)
  336. if self.tagsfile:
  337. if not ctags:
  338. raise RuntimeError('The "ctags" package must to be installed '
  339. 'to be able to use the "tagsfile" feature.')
  340. self._ctags = ctags.CTags(self.tagsfile)
  341. linenos = options.get('linenos', False)
  342. if linenos == 'inline':
  343. self.linenos = 2
  344. elif linenos:
  345. # compatibility with <= 0.7
  346. self.linenos = 1
  347. else:
  348. self.linenos = 0
  349. self.linenostart = abs(get_int_opt(options, 'linenostart', 1))
  350. self.linenostep = abs(get_int_opt(options, 'linenostep', 1))
  351. self.linenospecial = abs(get_int_opt(options, 'linenospecial', 0))
  352. self.nobackground = get_bool_opt(options, 'nobackground', False)
  353. self.lineseparator = options.get('lineseparator', '\n')
  354. self.lineanchors = options.get('lineanchors', '')
  355. self.linespans = options.get('linespans', '')
  356. self.anchorlinenos = get_bool_opt(options, 'anchorlinenos', False)
  357. self.hl_lines = set()
  358. for lineno in get_list_opt(options, 'hl_lines', []):
  359. try:
  360. self.hl_lines.add(int(lineno))
  361. except ValueError:
  362. pass
  363. self._create_stylesheet()
  364. def _get_css_class(self, ttype):
  365. """Return the css class of this token type prefixed with
  366. the classprefix option."""
  367. ttypeclass = _get_ttype_class(ttype)
  368. if ttypeclass:
  369. return self.classprefix + ttypeclass
  370. return ''
  371. def _get_css_classes(self, ttype):
  372. """Return the CSS classes of this token type prefixed with the classprefix option."""
  373. cls = self._get_css_class(ttype)
  374. while ttype not in STANDARD_TYPES:
  375. ttype = ttype.parent
  376. cls = self._get_css_class(ttype) + ' ' + cls
  377. return cls or ''
  378. def _get_css_inline_styles(self, ttype):
  379. """Return the inline CSS styles for this token type."""
  380. cclass = self.ttype2class.get(ttype)
  381. while cclass is None:
  382. ttype = ttype.parent
  383. cclass = self.ttype2class.get(ttype)
  384. return cclass or ''
  385. def _create_stylesheet(self):
  386. t2c = self.ttype2class = {Token: ''}
  387. c2s = self.class2style = {}
  388. for ttype, ndef in self.style:
  389. name = self._get_css_class(ttype)
  390. style = ''
  391. if ndef['color']:
  392. style += 'color: {}; '.format(webify(ndef['color']))
  393. if ndef['bold']:
  394. style += 'font-weight: bold; '
  395. if ndef['italic']:
  396. style += 'font-style: italic; '
  397. if ndef['underline']:
  398. style += 'text-decoration: underline; '
  399. if ndef['bgcolor']:
  400. style += 'background-color: {}; '.format(webify(ndef['bgcolor']))
  401. if ndef['border']:
  402. style += 'border: 1px solid {}; '.format(webify(ndef['border']))
  403. if style:
  404. t2c[ttype] = name
  405. # save len(ttype) to enable ordering the styles by
  406. # hierarchy (necessary for CSS cascading rules!)
  407. c2s[name] = (style[:-2], ttype, len(ttype))
  408. def get_style_defs(self, arg=None):
  409. """
  410. Return CSS style definitions for the classes produced by the current
  411. highlighting style. ``arg`` can be a string or list of selectors to
  412. insert before the token type classes.
  413. """
  414. style_lines = []
  415. style_lines.extend(self.get_linenos_style_defs())
  416. style_lines.extend(self.get_background_style_defs(arg))
  417. style_lines.extend(self.get_token_style_defs(arg))
  418. return '\n'.join(style_lines)
  419. def get_token_style_defs(self, arg=None):
  420. prefix = self.get_css_prefix(arg)
  421. styles = [
  422. (level, ttype, cls, style)
  423. for cls, (style, ttype, level) in self.class2style.items()
  424. if cls and style
  425. ]
  426. styles.sort()
  427. lines = [
  428. f'{prefix(cls)} {{ {style} }} /* {repr(ttype)[6:]} */'
  429. for (level, ttype, cls, style) in styles
  430. ]
  431. return lines
  432. def get_background_style_defs(self, arg=None):
  433. prefix = self.get_css_prefix(arg)
  434. bg_color = self.style.background_color
  435. hl_color = self.style.highlight_color
  436. lines = []
  437. if arg and not self.nobackground and bg_color is not None:
  438. text_style = ''
  439. if Text in self.ttype2class:
  440. text_style = ' ' + self.class2style[self.ttype2class[Text]][0]
  441. lines.insert(
  442. 0, '{}{{ background: {};{} }}'.format(
  443. prefix(''), bg_color, text_style
  444. )
  445. )
  446. if hl_color is not None:
  447. lines.insert(
  448. 0, '{} {{ background-color: {} }}'.format(prefix('hll'), hl_color)
  449. )
  450. return lines
  451. def get_linenos_style_defs(self):
  452. lines = [
  453. f'pre {{ {self._pre_style} }}',
  454. f'td.linenos .normal {{ {self._linenos_style} }}',
  455. f'span.linenos {{ {self._linenos_style} }}',
  456. f'td.linenos .special {{ {self._linenos_special_style} }}',
  457. f'span.linenos.special {{ {self._linenos_special_style} }}',
  458. ]
  459. return lines
  460. def get_css_prefix(self, arg):
  461. if arg is None:
  462. arg = ('cssclass' in self.options and '.'+self.cssclass or '')
  463. if isinstance(arg, str):
  464. args = [arg]
  465. else:
  466. args = list(arg)
  467. def prefix(cls):
  468. if cls:
  469. cls = '.' + cls
  470. tmp = []
  471. for arg in args:
  472. tmp.append((arg and arg + ' ' or '') + cls)
  473. return ', '.join(tmp)
  474. return prefix
  475. @property
  476. def _pre_style(self):
  477. return 'line-height: 125%;'
  478. @property
  479. def _linenos_style(self):
  480. color = self.style.line_number_color
  481. background_color = self.style.line_number_background_color
  482. return f'color: {color}; background-color: {background_color}; padding-left: 5px; padding-right: 5px;'
  483. @property
  484. def _linenos_special_style(self):
  485. color = self.style.line_number_special_color
  486. background_color = self.style.line_number_special_background_color
  487. return f'color: {color}; background-color: {background_color}; padding-left: 5px; padding-right: 5px;'
  488. def _decodeifneeded(self, value):
  489. if isinstance(value, bytes):
  490. if self.encoding:
  491. return value.decode(self.encoding)
  492. return value.decode()
  493. return value
  494. def _wrap_full(self, inner, outfile):
  495. if self.cssfile:
  496. if os.path.isabs(self.cssfile):
  497. # it's an absolute filename
  498. cssfilename = self.cssfile
  499. else:
  500. try:
  501. filename = outfile.name
  502. if not filename or filename[0] == '<':
  503. # pseudo files, e.g. name == '<fdopen>'
  504. raise AttributeError
  505. cssfilename = os.path.join(os.path.dirname(filename),
  506. self.cssfile)
  507. except AttributeError:
  508. print('Note: Cannot determine output file name, '
  509. 'using current directory as base for the CSS file name',
  510. file=sys.stderr)
  511. cssfilename = self.cssfile
  512. # write CSS file only if noclobber_cssfile isn't given as an option.
  513. try:
  514. if not os.path.exists(cssfilename) or not self.noclobber_cssfile:
  515. with open(cssfilename, "w", encoding="utf-8") as cf:
  516. cf.write(CSSFILE_TEMPLATE %
  517. {'styledefs': self.get_style_defs('body')})
  518. except OSError as err:
  519. err.strerror = 'Error writing CSS file: ' + err.strerror
  520. raise
  521. yield 0, (DOC_HEADER_EXTERNALCSS %
  522. dict(title=self.title,
  523. cssfile=self.cssfile,
  524. encoding=self.encoding))
  525. else:
  526. yield 0, (DOC_HEADER %
  527. dict(title=self.title,
  528. styledefs=self.get_style_defs('body'),
  529. encoding=self.encoding))
  530. yield from inner
  531. yield 0, DOC_FOOTER
  532. def _wrap_tablelinenos(self, inner):
  533. dummyoutfile = StringIO()
  534. lncount = 0
  535. for t, line in inner:
  536. if t:
  537. lncount += 1
  538. dummyoutfile.write(line)
  539. fl = self.linenostart
  540. mw = len(str(lncount + fl - 1))
  541. sp = self.linenospecial
  542. st = self.linenostep
  543. anchor_name = self.lineanchors or self.linespans
  544. aln = self.anchorlinenos
  545. nocls = self.noclasses
  546. lines = []
  547. for i in range(fl, fl+lncount):
  548. print_line = i % st == 0
  549. special_line = sp and i % sp == 0
  550. if print_line:
  551. line = '%*d' % (mw, i)
  552. if aln:
  553. line = '<a href="#%s-%d">%s</a>' % (anchor_name, i, line)
  554. else:
  555. line = ' ' * mw
  556. if nocls:
  557. if special_line:
  558. style = f' style="{self._linenos_special_style}"'
  559. else:
  560. style = f' style="{self._linenos_style}"'
  561. else:
  562. if special_line:
  563. style = ' class="special"'
  564. else:
  565. style = ' class="normal"'
  566. if style:
  567. line = f'<span{style}>{line}</span>'
  568. lines.append(line)
  569. ls = '\n'.join(lines)
  570. # If a filename was specified, we can't put it into the code table as it
  571. # would misalign the line numbers. Hence we emit a separate row for it.
  572. filename_tr = ""
  573. if self.filename:
  574. filename_tr = (
  575. '<tr><th colspan="2" class="filename">'
  576. '<span class="filename">' + self.filename + '</span>'
  577. '</th></tr>')
  578. # in case you wonder about the seemingly redundant <div> here: since the
  579. # content in the other cell also is wrapped in a div, some browsers in
  580. # some configurations seem to mess up the formatting...
  581. yield 0, (f'<table class="{self.cssclass}table">' + filename_tr +
  582. '<tr><td class="linenos"><div class="linenodiv"><pre>' +
  583. ls + '</pre></div></td><td class="code">')
  584. yield 0, '<div>'
  585. yield 0, dummyoutfile.getvalue()
  586. yield 0, '</div>'
  587. yield 0, '</td></tr></table>'
  588. def _wrap_inlinelinenos(self, inner):
  589. # need a list of lines since we need the width of a single number :(
  590. inner_lines = list(inner)
  591. sp = self.linenospecial
  592. st = self.linenostep
  593. num = self.linenostart
  594. mw = len(str(len(inner_lines) + num - 1))
  595. anchor_name = self.lineanchors or self.linespans
  596. aln = self.anchorlinenos
  597. nocls = self.noclasses
  598. for _, inner_line in inner_lines:
  599. print_line = num % st == 0
  600. special_line = sp and num % sp == 0
  601. if print_line:
  602. line = '%*d' % (mw, num)
  603. else:
  604. line = ' ' * mw
  605. if nocls:
  606. if special_line:
  607. style = f' style="{self._linenos_special_style}"'
  608. else:
  609. style = f' style="{self._linenos_style}"'
  610. else:
  611. if special_line:
  612. style = ' class="linenos special"'
  613. else:
  614. style = ' class="linenos"'
  615. if style:
  616. linenos = f'<span{style}>{line}</span>'
  617. else:
  618. linenos = line
  619. if aln:
  620. yield 1, ('<a href="#%s-%d">%s</a>' % (anchor_name, num, linenos) +
  621. inner_line)
  622. else:
  623. yield 1, linenos + inner_line
  624. num += 1
  625. def _wrap_lineanchors(self, inner):
  626. s = self.lineanchors
  627. # subtract 1 since we have to increment i *before* yielding
  628. i = self.linenostart - 1
  629. for t, line in inner:
  630. if t:
  631. i += 1
  632. href = "" if self.linenos else ' href="#%s-%d"' % (s, i)
  633. yield 1, '<a id="%s-%d" name="%s-%d"%s></a>' % (s, i, s, i, href) + line
  634. else:
  635. yield 0, line
  636. def _wrap_linespans(self, inner):
  637. s = self.linespans
  638. i = self.linenostart - 1
  639. for t, line in inner:
  640. if t:
  641. i += 1
  642. yield 1, '<span id="%s-%d">%s</span>' % (s, i, line)
  643. else:
  644. yield 0, line
  645. def _wrap_div(self, inner):
  646. style = []
  647. if (self.noclasses and not self.nobackground and
  648. self.style.background_color is not None):
  649. style.append(f'background: {self.style.background_color}')
  650. if self.cssstyles:
  651. style.append(self.cssstyles)
  652. style = '; '.join(style)
  653. yield 0, ('<div' + (self.cssclass and f' class="{self.cssclass}"') +
  654. (style and (f' style="{style}"')) + '>')
  655. yield from inner
  656. yield 0, '</div>\n'
  657. def _wrap_pre(self, inner):
  658. style = []
  659. if self.prestyles:
  660. style.append(self.prestyles)
  661. if self.noclasses:
  662. style.append(self._pre_style)
  663. style = '; '.join(style)
  664. if self.filename and self.linenos != 1:
  665. yield 0, ('<span class="filename">' + self.filename + '</span>')
  666. # the empty span here is to keep leading empty lines from being
  667. # ignored by HTML parsers
  668. yield 0, ('<pre' + (style and f' style="{style}"') + '><span></span>')
  669. yield from inner
  670. yield 0, '</pre>'
  671. def _wrap_code(self, inner):
  672. yield 0, '<code>'
  673. yield from inner
  674. yield 0, '</code>'
  675. @functools.lru_cache(maxsize=100)
  676. def _translate_parts(self, value):
  677. """HTML-escape a value and split it by newlines."""
  678. return value.translate(_escape_html_table).split('\n')
  679. def _format_lines(self, tokensource):
  680. """
  681. Just format the tokens, without any wrapping tags.
  682. Yield individual lines.
  683. """
  684. nocls = self.noclasses
  685. lsep = self.lineseparator
  686. tagsfile = self.tagsfile
  687. lspan = ''
  688. line = []
  689. for ttype, value in tokensource:
  690. try:
  691. cspan = self.span_element_openers[ttype]
  692. except KeyError:
  693. title = ' title="{}"'.format('.'.join(ttype)) if self.debug_token_types else ''
  694. if nocls:
  695. css_style = self._get_css_inline_styles(ttype)
  696. if css_style:
  697. css_style = self.class2style[css_style][0]
  698. cspan = f'<span style="{css_style}"{title}>'
  699. else:
  700. cspan = ''
  701. else:
  702. css_class = self._get_css_classes(ttype)
  703. if css_class:
  704. cspan = f'<span class="{css_class}"{title}>'
  705. else:
  706. cspan = ''
  707. self.span_element_openers[ttype] = cspan
  708. parts = self._translate_parts(value)
  709. if tagsfile and ttype in Token.Name:
  710. filename, linenumber = self._lookup_ctag(value)
  711. if linenumber:
  712. base, filename = os.path.split(filename)
  713. if base:
  714. base += '/'
  715. filename, extension = os.path.splitext(filename)
  716. url = self.tagurlformat % {'path': base, 'fname': filename,
  717. 'fext': extension}
  718. parts[0] = "<a href=\"%s#%s-%d\">%s" % \
  719. (url, self.lineanchors, linenumber, parts[0])
  720. parts[-1] = parts[-1] + "</a>"
  721. # for all but the last line
  722. for part in parts[:-1]:
  723. if line:
  724. # Also check for part being non-empty, so we avoid creating
  725. # empty <span> tags
  726. if lspan != cspan and part:
  727. line.extend(((lspan and '</span>'), cspan, part,
  728. (cspan and '</span>'), lsep))
  729. else: # both are the same, or the current part was empty
  730. line.extend((part, (lspan and '</span>'), lsep))
  731. yield 1, ''.join(line)
  732. line = []
  733. elif part:
  734. yield 1, ''.join((cspan, part, (cspan and '</span>'), lsep))
  735. else:
  736. yield 1, lsep
  737. # for the last line
  738. if line and parts[-1]:
  739. if lspan != cspan:
  740. line.extend(((lspan and '</span>'), cspan, parts[-1]))
  741. lspan = cspan
  742. else:
  743. line.append(parts[-1])
  744. elif parts[-1]:
  745. line = [cspan, parts[-1]]
  746. lspan = cspan
  747. # else we neither have to open a new span nor set lspan
  748. if line:
  749. line.extend(((lspan and '</span>'), lsep))
  750. yield 1, ''.join(line)
  751. def _lookup_ctag(self, token):
  752. entry = ctags.TagEntry()
  753. if self._ctags.find(entry, token.encode(), 0):
  754. return entry['file'].decode(), entry['lineNumber']
  755. else:
  756. return None, None
  757. def _highlight_lines(self, tokensource):
  758. """
  759. Highlighted the lines specified in the `hl_lines` option by
  760. post-processing the token stream coming from `_format_lines`.
  761. """
  762. hls = self.hl_lines
  763. for i, (t, value) in enumerate(tokensource):
  764. if t != 1:
  765. yield t, value
  766. if i + 1 in hls: # i + 1 because Python indexes start at 0
  767. if self.noclasses:
  768. style = ''
  769. if self.style.highlight_color is not None:
  770. style = (f' style="background-color: {self.style.highlight_color}"')
  771. yield 1, f'<span{style}>{value}</span>'
  772. else:
  773. yield 1, f'<span class="hll">{value}</span>'
  774. else:
  775. yield 1, value
  776. def wrap(self, source):
  777. """
  778. Wrap the ``source``, which is a generator yielding
  779. individual lines, in custom generators. See docstring
  780. for `format`. Can be overridden.
  781. """
  782. output = source
  783. if self.wrapcode:
  784. output = self._wrap_code(output)
  785. output = self._wrap_pre(output)
  786. return output
  787. def format_unencoded(self, tokensource, outfile):
  788. """
  789. The formatting process uses several nested generators; which of
  790. them are used is determined by the user's options.
  791. Each generator should take at least one argument, ``inner``,
  792. and wrap the pieces of text generated by this.
  793. Always yield 2-tuples: (code, text). If "code" is 1, the text
  794. is part of the original tokensource being highlighted, if it's
  795. 0, the text is some piece of wrapping. This makes it possible to
  796. use several different wrappers that process the original source
  797. linewise, e.g. line number generators.
  798. """
  799. source = self._format_lines(tokensource)
  800. # As a special case, we wrap line numbers before line highlighting
  801. # so the line numbers get wrapped in the highlighting tag.
  802. if not self.nowrap and self.linenos == 2:
  803. source = self._wrap_inlinelinenos(source)
  804. if self.hl_lines:
  805. source = self._highlight_lines(source)
  806. if not self.nowrap:
  807. if self.lineanchors:
  808. source = self._wrap_lineanchors(source)
  809. if self.linespans:
  810. source = self._wrap_linespans(source)
  811. source = self.wrap(source)
  812. if self.linenos == 1:
  813. source = self._wrap_tablelinenos(source)
  814. source = self._wrap_div(source)
  815. if self.full:
  816. source = self._wrap_full(source, outfile)
  817. for t, piece in source:
  818. outfile.write(piece)