123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355 |
- from __future__ import annotations
- from collections.abc import Callable, Generator, Iterable, Mapping, MutableMapping
- from contextlib import contextmanager
- from typing import Any, Literal, overload
- from . import helpers, presets
- from .common import normalize_url, utils
- from .parser_block import ParserBlock
- from .parser_core import ParserCore
- from .parser_inline import ParserInline
- from .renderer import RendererHTML, RendererProtocol
- from .rules_core.state_core import StateCore
- from .token import Token
- from .utils import EnvType, OptionsDict, OptionsType, PresetType
- try:
- import linkify_it
- except ModuleNotFoundError:
- linkify_it = None
- _PRESETS: dict[str, PresetType] = {
- "default": presets.default.make(),
- "js-default": presets.js_default.make(),
- "zero": presets.zero.make(),
- "commonmark": presets.commonmark.make(),
- "gfm-like": presets.gfm_like.make(),
- }
- class MarkdownIt:
- def __init__(
- self,
- config: str | PresetType = "commonmark",
- options_update: Mapping[str, Any] | None = None,
- *,
- renderer_cls: Callable[[MarkdownIt], RendererProtocol] = RendererHTML,
- ):
- """Main parser class
- :param config: name of configuration to load or a pre-defined dictionary
- :param options_update: dictionary that will be merged into ``config["options"]``
- :param renderer_cls: the class to load as the renderer:
- ``self.renderer = renderer_cls(self)
- """
- # add modules
- self.utils = utils
- self.helpers = helpers
- # initialise classes
- self.inline = ParserInline()
- self.block = ParserBlock()
- self.core = ParserCore()
- self.renderer = renderer_cls(self)
- self.linkify = linkify_it.LinkifyIt() if linkify_it else None
- # set the configuration
- if options_update and not isinstance(options_update, Mapping):
- # catch signature change where renderer_cls was not used as a key-word
- raise TypeError(
- f"options_update should be a mapping: {options_update}"
- "\n(Perhaps you intended this to be the renderer_cls?)"
- )
- self.configure(config, options_update=options_update)
- def __repr__(self) -> str:
- return f"{self.__class__.__module__}.{self.__class__.__name__}()"
- @overload
- def __getitem__(self, name: Literal["inline"]) -> ParserInline:
- ...
- @overload
- def __getitem__(self, name: Literal["block"]) -> ParserBlock:
- ...
- @overload
- def __getitem__(self, name: Literal["core"]) -> ParserCore:
- ...
- @overload
- def __getitem__(self, name: Literal["renderer"]) -> RendererProtocol:
- ...
- @overload
- def __getitem__(self, name: str) -> Any:
- ...
- def __getitem__(self, name: str) -> Any:
- return {
- "inline": self.inline,
- "block": self.block,
- "core": self.core,
- "renderer": self.renderer,
- }[name]
- def set(self, options: OptionsType) -> None:
- """Set parser options (in the same format as in constructor).
- Probably, you will never need it, but you can change options after constructor call.
- __Note:__ To achieve the best possible performance, don't modify a
- `markdown-it` instance options on the fly. If you need multiple configurations
- it's best to create multiple instances and initialize each with separate config.
- """
- self.options = OptionsDict(options)
- def configure(
- self, presets: str | PresetType, options_update: Mapping[str, Any] | None = None
- ) -> MarkdownIt:
- """Batch load of all options and component settings.
- This is an internal method, and you probably will not need it.
- But if you will - see available presets and data structure
- [here](https://github.com/markdown-it/markdown-it/tree/master/lib/presets)
- We strongly recommend to use presets instead of direct config loads.
- That will give better compatibility with next versions.
- """
- if isinstance(presets, str):
- if presets not in _PRESETS:
- raise KeyError(f"Wrong `markdown-it` preset '{presets}', check name")
- config = _PRESETS[presets]
- else:
- config = presets
- if not config:
- raise ValueError("Wrong `markdown-it` config, can't be empty")
- options = config.get("options", {}) or {}
- if options_update:
- options = {**options, **options_update} # type: ignore
- self.set(options) # type: ignore
- if "components" in config:
- for name, component in config["components"].items():
- rules = component.get("rules", None)
- if rules:
- self[name].ruler.enableOnly(rules)
- rules2 = component.get("rules2", None)
- if rules2:
- self[name].ruler2.enableOnly(rules2)
- return self
- def get_all_rules(self) -> dict[str, list[str]]:
- """Return the names of all active rules."""
- rules = {
- chain: self[chain].ruler.get_all_rules()
- for chain in ["core", "block", "inline"]
- }
- rules["inline2"] = self.inline.ruler2.get_all_rules()
- return rules
- def get_active_rules(self) -> dict[str, list[str]]:
- """Return the names of all active rules."""
- rules = {
- chain: self[chain].ruler.get_active_rules()
- for chain in ["core", "block", "inline"]
- }
- rules["inline2"] = self.inline.ruler2.get_active_rules()
- return rules
- def enable(
- self, names: str | Iterable[str], ignoreInvalid: bool = False
- ) -> MarkdownIt:
- """Enable list or rules. (chainable)
- :param names: rule name or list of rule names to enable.
- :param ignoreInvalid: set `true` to ignore errors when rule not found.
- It will automatically find appropriate components,
- containing rules with given names. If rule not found, and `ignoreInvalid`
- not set - throws exception.
- Example::
- md = MarkdownIt().enable(['sub', 'sup']).disable('smartquotes')
- """
- result = []
- if isinstance(names, str):
- names = [names]
- for chain in ["core", "block", "inline"]:
- result.extend(self[chain].ruler.enable(names, True))
- result.extend(self.inline.ruler2.enable(names, True))
- missed = [name for name in names if name not in result]
- if missed and not ignoreInvalid:
- raise ValueError(f"MarkdownIt. Failed to enable unknown rule(s): {missed}")
- return self
- def disable(
- self, names: str | Iterable[str], ignoreInvalid: bool = False
- ) -> MarkdownIt:
- """The same as [[MarkdownIt.enable]], but turn specified rules off. (chainable)
- :param names: rule name or list of rule names to disable.
- :param ignoreInvalid: set `true` to ignore errors when rule not found.
- """
- result = []
- if isinstance(names, str):
- names = [names]
- for chain in ["core", "block", "inline"]:
- result.extend(self[chain].ruler.disable(names, True))
- result.extend(self.inline.ruler2.disable(names, True))
- missed = [name for name in names if name not in result]
- if missed and not ignoreInvalid:
- raise ValueError(f"MarkdownIt. Failed to disable unknown rule(s): {missed}")
- return self
- @contextmanager
- def reset_rules(self) -> Generator[None, None, None]:
- """A context manager, that will reset the current enabled rules on exit."""
- chain_rules = self.get_active_rules()
- yield
- for chain, rules in chain_rules.items():
- if chain != "inline2":
- self[chain].ruler.enableOnly(rules)
- self.inline.ruler2.enableOnly(chain_rules["inline2"])
- def add_render_rule(
- self, name: str, function: Callable[..., Any], fmt: str = "html"
- ) -> None:
- """Add a rule for rendering a particular Token type.
- Only applied when ``renderer.__output__ == fmt``
- """
- if self.renderer.__output__ == fmt:
- self.renderer.rules[name] = function.__get__(self.renderer) # type: ignore
- def use(
- self, plugin: Callable[..., None], *params: Any, **options: Any
- ) -> MarkdownIt:
- """Load specified plugin with given params into current parser instance. (chainable)
- It's just a sugar to call `plugin(md, params)` with curring.
- Example::
- def func(tokens, idx):
- tokens[idx].content = tokens[idx].content.replace('foo', 'bar')
- md = MarkdownIt().use(plugin, 'foo_replace', 'text', func)
- """
- plugin(self, *params, **options)
- return self
- def parse(self, src: str, env: EnvType | None = None) -> list[Token]:
- """Parse the source string to a token stream
- :param src: source string
- :param env: environment sandbox
- Parse input string and return list of block tokens (special token type
- "inline" will contain list of inline tokens).
- `env` is used to pass data between "distributed" rules and return additional
- metadata like reference info, needed for the renderer. It also can be used to
- inject data in specific cases. Usually, you will be ok to pass `{}`,
- and then pass updated object to renderer.
- """
- env = {} if env is None else env
- if not isinstance(env, MutableMapping):
- raise TypeError(f"Input data should be a MutableMapping, not {type(env)}")
- if not isinstance(src, str):
- raise TypeError(f"Input data should be a string, not {type(src)}")
- state = StateCore(src, self, env)
- self.core.process(state)
- return state.tokens
- def render(self, src: str, env: EnvType | None = None) -> Any:
- """Render markdown string into html. It does all magic for you :).
- :param src: source string
- :param env: environment sandbox
- :returns: The output of the loaded renderer
- `env` can be used to inject additional metadata (`{}` by default).
- But you will not need it with high probability. See also comment
- in [[MarkdownIt.parse]].
- """
- env = {} if env is None else env
- return self.renderer.render(self.parse(src, env), self.options, env)
- def parseInline(self, src: str, env: EnvType | None = None) -> list[Token]:
- """The same as [[MarkdownIt.parse]] but skip all block rules.
- :param src: source string
- :param env: environment sandbox
- It returns the
- block tokens list with the single `inline` element, containing parsed inline
- tokens in `children` property. Also updates `env` object.
- """
- env = {} if env is None else env
- if not isinstance(env, MutableMapping):
- raise TypeError(f"Input data should be an MutableMapping, not {type(env)}")
- if not isinstance(src, str):
- raise TypeError(f"Input data should be a string, not {type(src)}")
- state = StateCore(src, self, env)
- state.inlineMode = True
- self.core.process(state)
- return state.tokens
- def renderInline(self, src: str, env: EnvType | None = None) -> Any:
- """Similar to [[MarkdownIt.render]] but for single paragraph content.
- :param src: source string
- :param env: environment sandbox
- Similar to [[MarkdownIt.render]] but for single paragraph content. Result
- will NOT be wrapped into `<p>` tags.
- """
- env = {} if env is None else env
- return self.renderer.render(self.parseInline(src, env), self.options, env)
- # link methods
- def validateLink(self, url: str) -> bool:
- """Validate if the URL link is allowed in output.
- This validator can prohibit more than really needed to prevent XSS.
- It's a tradeoff to keep code simple and to be secure by default.
- Note: the url should be normalized at this point, and existing entities decoded.
- """
- return normalize_url.validateLink(url)
- def normalizeLink(self, url: str) -> str:
- """Normalize destination URLs in links
- ::
- [label]: destination 'title'
- ^^^^^^^^^^^
- """
- return normalize_url.normalizeLink(url)
- def normalizeLinkText(self, link: str) -> str:
- """Normalize autolink content
- ::
- <destination>
- ~~~~~~~~~~~
- """
- return normalize_url.normalizeLinkText(link)
|