renderer.py 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336
  1. """
  2. class Renderer
  3. Generates HTML from parsed token stream. Each instance has independent
  4. copy of rules. Those can be rewritten with ease. Also, you can add new
  5. rules if you create plugin and adds new token types.
  6. """
  7. from __future__ import annotations
  8. from collections.abc import Sequence
  9. import inspect
  10. from typing import Any, ClassVar, Protocol
  11. from .common.utils import escapeHtml, unescapeAll
  12. from .token import Token
  13. from .utils import EnvType, OptionsDict
  14. class RendererProtocol(Protocol):
  15. __output__: ClassVar[str]
  16. def render(
  17. self, tokens: Sequence[Token], options: OptionsDict, env: EnvType
  18. ) -> Any:
  19. ...
  20. class RendererHTML(RendererProtocol):
  21. """Contains render rules for tokens. Can be updated and extended.
  22. Example:
  23. Each rule is called as independent static function with fixed signature:
  24. ::
  25. class Renderer:
  26. def token_type_name(self, tokens, idx, options, env) {
  27. # ...
  28. return renderedHTML
  29. ::
  30. class CustomRenderer(RendererHTML):
  31. def strong_open(self, tokens, idx, options, env):
  32. return '<b>'
  33. def strong_close(self, tokens, idx, options, env):
  34. return '</b>'
  35. md = MarkdownIt(renderer_cls=CustomRenderer)
  36. result = md.render(...)
  37. See https://github.com/markdown-it/markdown-it/blob/master/lib/renderer.js
  38. for more details and examples.
  39. """
  40. __output__ = "html"
  41. def __init__(self, parser: Any = None):
  42. self.rules = {
  43. k: v
  44. for k, v in inspect.getmembers(self, predicate=inspect.ismethod)
  45. if not (k.startswith("render") or k.startswith("_"))
  46. }
  47. def render(
  48. self, tokens: Sequence[Token], options: OptionsDict, env: EnvType
  49. ) -> str:
  50. """Takes token stream and generates HTML.
  51. :param tokens: list on block tokens to render
  52. :param options: params of parser instance
  53. :param env: additional data from parsed input
  54. """
  55. result = ""
  56. for i, token in enumerate(tokens):
  57. if token.type == "inline":
  58. if token.children:
  59. result += self.renderInline(token.children, options, env)
  60. elif token.type in self.rules:
  61. result += self.rules[token.type](tokens, i, options, env)
  62. else:
  63. result += self.renderToken(tokens, i, options, env)
  64. return result
  65. def renderInline(
  66. self, tokens: Sequence[Token], options: OptionsDict, env: EnvType
  67. ) -> str:
  68. """The same as ``render``, but for single token of `inline` type.
  69. :param tokens: list on block tokens to render
  70. :param options: params of parser instance
  71. :param env: additional data from parsed input (references, for example)
  72. """
  73. result = ""
  74. for i, token in enumerate(tokens):
  75. if token.type in self.rules:
  76. result += self.rules[token.type](tokens, i, options, env)
  77. else:
  78. result += self.renderToken(tokens, i, options, env)
  79. return result
  80. def renderToken(
  81. self,
  82. tokens: Sequence[Token],
  83. idx: int,
  84. options: OptionsDict,
  85. env: EnvType,
  86. ) -> str:
  87. """Default token renderer.
  88. Can be overridden by custom function
  89. :param idx: token index to render
  90. :param options: params of parser instance
  91. """
  92. result = ""
  93. needLf = False
  94. token = tokens[idx]
  95. # Tight list paragraphs
  96. if token.hidden:
  97. return ""
  98. # Insert a newline between hidden paragraph and subsequent opening
  99. # block-level tag.
  100. #
  101. # For example, here we should insert a newline before blockquote:
  102. # - a
  103. # >
  104. #
  105. if token.block and token.nesting != -1 and idx and tokens[idx - 1].hidden:
  106. result += "\n"
  107. # Add token name, e.g. `<img`
  108. result += ("</" if token.nesting == -1 else "<") + token.tag
  109. # Encode attributes, e.g. `<img src="foo"`
  110. result += self.renderAttrs(token)
  111. # Add a slash for self-closing tags, e.g. `<img src="foo" /`
  112. if token.nesting == 0 and options["xhtmlOut"]:
  113. result += " /"
  114. # Check if we need to add a newline after this tag
  115. if token.block:
  116. needLf = True
  117. if token.nesting == 1 and (idx + 1 < len(tokens)):
  118. nextToken = tokens[idx + 1]
  119. if nextToken.type == "inline" or nextToken.hidden: # noqa: SIM114
  120. # Block-level tag containing an inline tag.
  121. #
  122. needLf = False
  123. elif nextToken.nesting == -1 and nextToken.tag == token.tag:
  124. # Opening tag + closing tag of the same type. E.g. `<li></li>`.
  125. #
  126. needLf = False
  127. result += ">\n" if needLf else ">"
  128. return result
  129. @staticmethod
  130. def renderAttrs(token: Token) -> str:
  131. """Render token attributes to string."""
  132. result = ""
  133. for key, value in token.attrItems():
  134. result += " " + escapeHtml(key) + '="' + escapeHtml(str(value)) + '"'
  135. return result
  136. def renderInlineAsText(
  137. self,
  138. tokens: Sequence[Token] | None,
  139. options: OptionsDict,
  140. env: EnvType,
  141. ) -> str:
  142. """Special kludge for image `alt` attributes to conform CommonMark spec.
  143. Don't try to use it! Spec requires to show `alt` content with stripped markup,
  144. instead of simple escaping.
  145. :param tokens: list on block tokens to render
  146. :param options: params of parser instance
  147. :param env: additional data from parsed input
  148. """
  149. result = ""
  150. for token in tokens or []:
  151. if token.type == "text":
  152. result += token.content
  153. elif token.type == "image":
  154. if token.children:
  155. result += self.renderInlineAsText(token.children, options, env)
  156. elif token.type == "softbreak":
  157. result += "\n"
  158. return result
  159. ###################################################
  160. def code_inline(
  161. self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: EnvType
  162. ) -> str:
  163. token = tokens[idx]
  164. return (
  165. "<code"
  166. + self.renderAttrs(token)
  167. + ">"
  168. + escapeHtml(tokens[idx].content)
  169. + "</code>"
  170. )
  171. def code_block(
  172. self,
  173. tokens: Sequence[Token],
  174. idx: int,
  175. options: OptionsDict,
  176. env: EnvType,
  177. ) -> str:
  178. token = tokens[idx]
  179. return (
  180. "<pre"
  181. + self.renderAttrs(token)
  182. + "><code>"
  183. + escapeHtml(tokens[idx].content)
  184. + "</code></pre>\n"
  185. )
  186. def fence(
  187. self,
  188. tokens: Sequence[Token],
  189. idx: int,
  190. options: OptionsDict,
  191. env: EnvType,
  192. ) -> str:
  193. token = tokens[idx]
  194. info = unescapeAll(token.info).strip() if token.info else ""
  195. langName = ""
  196. langAttrs = ""
  197. if info:
  198. arr = info.split(maxsplit=1)
  199. langName = arr[0]
  200. if len(arr) == 2:
  201. langAttrs = arr[1]
  202. if options.highlight:
  203. highlighted = options.highlight(
  204. token.content, langName, langAttrs
  205. ) or escapeHtml(token.content)
  206. else:
  207. highlighted = escapeHtml(token.content)
  208. if highlighted.startswith("<pre"):
  209. return highlighted + "\n"
  210. # If language exists, inject class gently, without modifying original token.
  211. # May be, one day we will add .deepClone() for token and simplify this part, but
  212. # now we prefer to keep things local.
  213. if info:
  214. # Fake token just to render attributes
  215. tmpToken = Token(type="", tag="", nesting=0, attrs=token.attrs.copy())
  216. tmpToken.attrJoin("class", options.langPrefix + langName)
  217. return (
  218. "<pre><code"
  219. + self.renderAttrs(tmpToken)
  220. + ">"
  221. + highlighted
  222. + "</code></pre>\n"
  223. )
  224. return (
  225. "<pre><code"
  226. + self.renderAttrs(token)
  227. + ">"
  228. + highlighted
  229. + "</code></pre>\n"
  230. )
  231. def image(
  232. self,
  233. tokens: Sequence[Token],
  234. idx: int,
  235. options: OptionsDict,
  236. env: EnvType,
  237. ) -> str:
  238. token = tokens[idx]
  239. # "alt" attr MUST be set, even if empty. Because it's mandatory and
  240. # should be placed on proper position for tests.
  241. if token.children:
  242. token.attrSet("alt", self.renderInlineAsText(token.children, options, env))
  243. else:
  244. token.attrSet("alt", "")
  245. return self.renderToken(tokens, idx, options, env)
  246. def hardbreak(
  247. self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: EnvType
  248. ) -> str:
  249. return "<br />\n" if options.xhtmlOut else "<br>\n"
  250. def softbreak(
  251. self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: EnvType
  252. ) -> str:
  253. return (
  254. ("<br />\n" if options.xhtmlOut else "<br>\n") if options.breaks else "\n"
  255. )
  256. def text(
  257. self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: EnvType
  258. ) -> str:
  259. return escapeHtml(tokens[idx].content)
  260. def html_block(
  261. self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: EnvType
  262. ) -> str:
  263. return tokens[idx].content
  264. def html_inline(
  265. self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: EnvType
  266. ) -> str:
  267. return tokens[idx].content