123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147 |
- """Tokenizes paragraph content.
- """
- from __future__ import annotations
- from typing import TYPE_CHECKING, Callable
- from . import rules_inline
- from .ruler import Ruler
- from .rules_inline.state_inline import StateInline
- from .token import Token
- from .utils import EnvType
- if TYPE_CHECKING:
- from markdown_it import MarkdownIt
- # Parser rules
- RuleFuncInlineType = Callable[[StateInline, bool], bool]
- """(state: StateInline, silent: bool) -> matched: bool)
- `silent` disables token generation, useful for lookahead.
- """
- _rules: list[tuple[str, RuleFuncInlineType]] = [
- ("text", rules_inline.text),
- ("linkify", rules_inline.linkify),
- ("newline", rules_inline.newline),
- ("escape", rules_inline.escape),
- ("backticks", rules_inline.backtick),
- ("strikethrough", rules_inline.strikethrough.tokenize),
- ("emphasis", rules_inline.emphasis.tokenize),
- ("link", rules_inline.link),
- ("image", rules_inline.image),
- ("autolink", rules_inline.autolink),
- ("html_inline", rules_inline.html_inline),
- ("entity", rules_inline.entity),
- ]
- # Note `rule2` ruleset was created specifically for emphasis/strikethrough
- # post-processing and may be changed in the future.
- #
- # Don't use this for anything except pairs (plugins working with `balance_pairs`).
- #
- RuleFuncInline2Type = Callable[[StateInline], None]
- _rules2: list[tuple[str, RuleFuncInline2Type]] = [
- ("balance_pairs", rules_inline.link_pairs),
- ("strikethrough", rules_inline.strikethrough.postProcess),
- ("emphasis", rules_inline.emphasis.postProcess),
- # rules for pairs separate '**' into its own text tokens, which may be left unused,
- # rule below merges unused segments back with the rest of the text
- ("fragments_join", rules_inline.fragments_join),
- ]
- class ParserInline:
- def __init__(self) -> None:
- self.ruler = Ruler[RuleFuncInlineType]()
- for name, rule in _rules:
- self.ruler.push(name, rule)
- # Second ruler used for post-processing (e.g. in emphasis-like rules)
- self.ruler2 = Ruler[RuleFuncInline2Type]()
- for name, rule2 in _rules2:
- self.ruler2.push(name, rule2)
- def skipToken(self, state: StateInline) -> None:
- """Skip single token by running all rules in validation mode;
- returns `True` if any rule reported success
- """
- ok = False
- pos = state.pos
- rules = self.ruler.getRules("")
- maxNesting = state.md.options["maxNesting"]
- cache = state.cache
- if pos in cache:
- state.pos = cache[pos]
- return
- if state.level < maxNesting:
- for rule in rules:
- # Increment state.level and decrement it later to limit recursion.
- # It's harmless to do here, because no tokens are created.
- # But ideally, we'd need a separate private state variable for this purpose.
- state.level += 1
- ok = rule(state, True)
- state.level -= 1
- if ok:
- break
- else:
- # Too much nesting, just skip until the end of the paragraph.
- #
- # NOTE: this will cause links to behave incorrectly in the following case,
- # when an amount of `[` is exactly equal to `maxNesting + 1`:
- #
- # [[[[[[[[[[[[[[[[[[[[[foo]()
- #
- # TODO: remove this workaround when CM standard will allow nested links
- # (we can replace it by preventing links from being parsed in
- # validation mode)
- #
- state.pos = state.posMax
- if not ok:
- state.pos += 1
- cache[pos] = state.pos
- def tokenize(self, state: StateInline) -> None:
- """Generate tokens for input range."""
- ok = False
- rules = self.ruler.getRules("")
- end = state.posMax
- maxNesting = state.md.options["maxNesting"]
- while state.pos < end:
- # Try all possible rules.
- # On success, rule should:
- #
- # - update `state.pos`
- # - update `state.tokens`
- # - return true
- if state.level < maxNesting:
- for rule in rules:
- ok = rule(state, False)
- if ok:
- break
- if ok:
- if state.pos >= end:
- break
- continue
- state.pending += state.src[state.pos]
- state.pos += 1
- if state.pending:
- state.pushPending()
- def parse(
- self, src: str, md: MarkdownIt, env: EnvType, tokens: list[Token]
- ) -> list[Token]:
- """Process input string and push inline tokens into `tokens`"""
- state = StateInline(src, md, env, tokens)
- self.tokenize(state)
- rules2 = self.ruler2.getRules("")
- for rule in rules2:
- rule(state)
- return state.tokens
|