main.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355
  1. from __future__ import annotations
  2. from collections.abc import Callable, Generator, Iterable, Mapping, MutableMapping
  3. from contextlib import contextmanager
  4. from typing import Any, Literal, overload
  5. from . import helpers, presets
  6. from .common import normalize_url, utils
  7. from .parser_block import ParserBlock
  8. from .parser_core import ParserCore
  9. from .parser_inline import ParserInline
  10. from .renderer import RendererHTML, RendererProtocol
  11. from .rules_core.state_core import StateCore
  12. from .token import Token
  13. from .utils import EnvType, OptionsDict, OptionsType, PresetType
  14. try:
  15. import linkify_it
  16. except ModuleNotFoundError:
  17. linkify_it = None
  18. _PRESETS: dict[str, PresetType] = {
  19. "default": presets.default.make(),
  20. "js-default": presets.js_default.make(),
  21. "zero": presets.zero.make(),
  22. "commonmark": presets.commonmark.make(),
  23. "gfm-like": presets.gfm_like.make(),
  24. }
  25. class MarkdownIt:
  26. def __init__(
  27. self,
  28. config: str | PresetType = "commonmark",
  29. options_update: Mapping[str, Any] | None = None,
  30. *,
  31. renderer_cls: Callable[[MarkdownIt], RendererProtocol] = RendererHTML,
  32. ):
  33. """Main parser class
  34. :param config: name of configuration to load or a pre-defined dictionary
  35. :param options_update: dictionary that will be merged into ``config["options"]``
  36. :param renderer_cls: the class to load as the renderer:
  37. ``self.renderer = renderer_cls(self)
  38. """
  39. # add modules
  40. self.utils = utils
  41. self.helpers = helpers
  42. # initialise classes
  43. self.inline = ParserInline()
  44. self.block = ParserBlock()
  45. self.core = ParserCore()
  46. self.renderer = renderer_cls(self)
  47. self.linkify = linkify_it.LinkifyIt() if linkify_it else None
  48. # set the configuration
  49. if options_update and not isinstance(options_update, Mapping):
  50. # catch signature change where renderer_cls was not used as a key-word
  51. raise TypeError(
  52. f"options_update should be a mapping: {options_update}"
  53. "\n(Perhaps you intended this to be the renderer_cls?)"
  54. )
  55. self.configure(config, options_update=options_update)
  56. def __repr__(self) -> str:
  57. return f"{self.__class__.__module__}.{self.__class__.__name__}()"
  58. @overload
  59. def __getitem__(self, name: Literal["inline"]) -> ParserInline:
  60. ...
  61. @overload
  62. def __getitem__(self, name: Literal["block"]) -> ParserBlock:
  63. ...
  64. @overload
  65. def __getitem__(self, name: Literal["core"]) -> ParserCore:
  66. ...
  67. @overload
  68. def __getitem__(self, name: Literal["renderer"]) -> RendererProtocol:
  69. ...
  70. @overload
  71. def __getitem__(self, name: str) -> Any:
  72. ...
  73. def __getitem__(self, name: str) -> Any:
  74. return {
  75. "inline": self.inline,
  76. "block": self.block,
  77. "core": self.core,
  78. "renderer": self.renderer,
  79. }[name]
  80. def set(self, options: OptionsType) -> None:
  81. """Set parser options (in the same format as in constructor).
  82. Probably, you will never need it, but you can change options after constructor call.
  83. __Note:__ To achieve the best possible performance, don't modify a
  84. `markdown-it` instance options on the fly. If you need multiple configurations
  85. it's best to create multiple instances and initialize each with separate config.
  86. """
  87. self.options = OptionsDict(options)
  88. def configure(
  89. self, presets: str | PresetType, options_update: Mapping[str, Any] | None = None
  90. ) -> MarkdownIt:
  91. """Batch load of all options and component settings.
  92. This is an internal method, and you probably will not need it.
  93. But if you will - see available presets and data structure
  94. [here](https://github.com/markdown-it/markdown-it/tree/master/lib/presets)
  95. We strongly recommend to use presets instead of direct config loads.
  96. That will give better compatibility with next versions.
  97. """
  98. if isinstance(presets, str):
  99. if presets not in _PRESETS:
  100. raise KeyError(f"Wrong `markdown-it` preset '{presets}', check name")
  101. config = _PRESETS[presets]
  102. else:
  103. config = presets
  104. if not config:
  105. raise ValueError("Wrong `markdown-it` config, can't be empty")
  106. options = config.get("options", {}) or {}
  107. if options_update:
  108. options = {**options, **options_update} # type: ignore
  109. self.set(options) # type: ignore
  110. if "components" in config:
  111. for name, component in config["components"].items():
  112. rules = component.get("rules", None)
  113. if rules:
  114. self[name].ruler.enableOnly(rules)
  115. rules2 = component.get("rules2", None)
  116. if rules2:
  117. self[name].ruler2.enableOnly(rules2)
  118. return self
  119. def get_all_rules(self) -> dict[str, list[str]]:
  120. """Return the names of all active rules."""
  121. rules = {
  122. chain: self[chain].ruler.get_all_rules()
  123. for chain in ["core", "block", "inline"]
  124. }
  125. rules["inline2"] = self.inline.ruler2.get_all_rules()
  126. return rules
  127. def get_active_rules(self) -> dict[str, list[str]]:
  128. """Return the names of all active rules."""
  129. rules = {
  130. chain: self[chain].ruler.get_active_rules()
  131. for chain in ["core", "block", "inline"]
  132. }
  133. rules["inline2"] = self.inline.ruler2.get_active_rules()
  134. return rules
  135. def enable(
  136. self, names: str | Iterable[str], ignoreInvalid: bool = False
  137. ) -> MarkdownIt:
  138. """Enable list or rules. (chainable)
  139. :param names: rule name or list of rule names to enable.
  140. :param ignoreInvalid: set `true` to ignore errors when rule not found.
  141. It will automatically find appropriate components,
  142. containing rules with given names. If rule not found, and `ignoreInvalid`
  143. not set - throws exception.
  144. Example::
  145. md = MarkdownIt().enable(['sub', 'sup']).disable('smartquotes')
  146. """
  147. result = []
  148. if isinstance(names, str):
  149. names = [names]
  150. for chain in ["core", "block", "inline"]:
  151. result.extend(self[chain].ruler.enable(names, True))
  152. result.extend(self.inline.ruler2.enable(names, True))
  153. missed = [name for name in names if name not in result]
  154. if missed and not ignoreInvalid:
  155. raise ValueError(f"MarkdownIt. Failed to enable unknown rule(s): {missed}")
  156. return self
  157. def disable(
  158. self, names: str | Iterable[str], ignoreInvalid: bool = False
  159. ) -> MarkdownIt:
  160. """The same as [[MarkdownIt.enable]], but turn specified rules off. (chainable)
  161. :param names: rule name or list of rule names to disable.
  162. :param ignoreInvalid: set `true` to ignore errors when rule not found.
  163. """
  164. result = []
  165. if isinstance(names, str):
  166. names = [names]
  167. for chain in ["core", "block", "inline"]:
  168. result.extend(self[chain].ruler.disable(names, True))
  169. result.extend(self.inline.ruler2.disable(names, True))
  170. missed = [name for name in names if name not in result]
  171. if missed and not ignoreInvalid:
  172. raise ValueError(f"MarkdownIt. Failed to disable unknown rule(s): {missed}")
  173. return self
  174. @contextmanager
  175. def reset_rules(self) -> Generator[None, None, None]:
  176. """A context manager, that will reset the current enabled rules on exit."""
  177. chain_rules = self.get_active_rules()
  178. yield
  179. for chain, rules in chain_rules.items():
  180. if chain != "inline2":
  181. self[chain].ruler.enableOnly(rules)
  182. self.inline.ruler2.enableOnly(chain_rules["inline2"])
  183. def add_render_rule(
  184. self, name: str, function: Callable[..., Any], fmt: str = "html"
  185. ) -> None:
  186. """Add a rule for rendering a particular Token type.
  187. Only applied when ``renderer.__output__ == fmt``
  188. """
  189. if self.renderer.__output__ == fmt:
  190. self.renderer.rules[name] = function.__get__(self.renderer) # type: ignore
  191. def use(
  192. self, plugin: Callable[..., None], *params: Any, **options: Any
  193. ) -> MarkdownIt:
  194. """Load specified plugin with given params into current parser instance. (chainable)
  195. It's just a sugar to call `plugin(md, params)` with curring.
  196. Example::
  197. def func(tokens, idx):
  198. tokens[idx].content = tokens[idx].content.replace('foo', 'bar')
  199. md = MarkdownIt().use(plugin, 'foo_replace', 'text', func)
  200. """
  201. plugin(self, *params, **options)
  202. return self
  203. def parse(self, src: str, env: EnvType | None = None) -> list[Token]:
  204. """Parse the source string to a token stream
  205. :param src: source string
  206. :param env: environment sandbox
  207. Parse input string and return list of block tokens (special token type
  208. "inline" will contain list of inline tokens).
  209. `env` is used to pass data between "distributed" rules and return additional
  210. metadata like reference info, needed for the renderer. It also can be used to
  211. inject data in specific cases. Usually, you will be ok to pass `{}`,
  212. and then pass updated object to renderer.
  213. """
  214. env = {} if env is None else env
  215. if not isinstance(env, MutableMapping):
  216. raise TypeError(f"Input data should be a MutableMapping, not {type(env)}")
  217. if not isinstance(src, str):
  218. raise TypeError(f"Input data should be a string, not {type(src)}")
  219. state = StateCore(src, self, env)
  220. self.core.process(state)
  221. return state.tokens
  222. def render(self, src: str, env: EnvType | None = None) -> Any:
  223. """Render markdown string into html. It does all magic for you :).
  224. :param src: source string
  225. :param env: environment sandbox
  226. :returns: The output of the loaded renderer
  227. `env` can be used to inject additional metadata (`{}` by default).
  228. But you will not need it with high probability. See also comment
  229. in [[MarkdownIt.parse]].
  230. """
  231. env = {} if env is None else env
  232. return self.renderer.render(self.parse(src, env), self.options, env)
  233. def parseInline(self, src: str, env: EnvType | None = None) -> list[Token]:
  234. """The same as [[MarkdownIt.parse]] but skip all block rules.
  235. :param src: source string
  236. :param env: environment sandbox
  237. It returns the
  238. block tokens list with the single `inline` element, containing parsed inline
  239. tokens in `children` property. Also updates `env` object.
  240. """
  241. env = {} if env is None else env
  242. if not isinstance(env, MutableMapping):
  243. raise TypeError(f"Input data should be an MutableMapping, not {type(env)}")
  244. if not isinstance(src, str):
  245. raise TypeError(f"Input data should be a string, not {type(src)}")
  246. state = StateCore(src, self, env)
  247. state.inlineMode = True
  248. self.core.process(state)
  249. return state.tokens
  250. def renderInline(self, src: str, env: EnvType | None = None) -> Any:
  251. """Similar to [[MarkdownIt.render]] but for single paragraph content.
  252. :param src: source string
  253. :param env: environment sandbox
  254. Similar to [[MarkdownIt.render]] but for single paragraph content. Result
  255. will NOT be wrapped into `<p>` tags.
  256. """
  257. env = {} if env is None else env
  258. return self.renderer.render(self.parseInline(src, env), self.options, env)
  259. # link methods
  260. def validateLink(self, url: str) -> bool:
  261. """Validate if the URL link is allowed in output.
  262. This validator can prohibit more than really needed to prevent XSS.
  263. It's a tradeoff to keep code simple and to be secure by default.
  264. Note: the url should be normalized at this point, and existing entities decoded.
  265. """
  266. return normalize_url.validateLink(url)
  267. def normalizeLink(self, url: str) -> str:
  268. """Normalize destination URLs in links
  269. ::
  270. [label]: destination 'title'
  271. ^^^^^^^^^^^
  272. """
  273. return normalize_url.normalizeLink(url)
  274. def normalizeLinkText(self, link: str) -> str:
  275. """Normalize autolink content
  276. ::
  277. <destination>
  278. ~~~~~~~~~~~
  279. """
  280. return normalize_url.normalizeLinkText(link)