parser_inline.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. """Tokenizes paragraph content.
  2. """
  3. from __future__ import annotations
  4. from typing import TYPE_CHECKING, Callable
  5. from . import rules_inline
  6. from .ruler import Ruler
  7. from .rules_inline.state_inline import StateInline
  8. from .token import Token
  9. from .utils import EnvType
  10. if TYPE_CHECKING:
  11. from markdown_it import MarkdownIt
  12. # Parser rules
  13. RuleFuncInlineType = Callable[[StateInline, bool], bool]
  14. """(state: StateInline, silent: bool) -> matched: bool)
  15. `silent` disables token generation, useful for lookahead.
  16. """
  17. _rules: list[tuple[str, RuleFuncInlineType]] = [
  18. ("text", rules_inline.text),
  19. ("linkify", rules_inline.linkify),
  20. ("newline", rules_inline.newline),
  21. ("escape", rules_inline.escape),
  22. ("backticks", rules_inline.backtick),
  23. ("strikethrough", rules_inline.strikethrough.tokenize),
  24. ("emphasis", rules_inline.emphasis.tokenize),
  25. ("link", rules_inline.link),
  26. ("image", rules_inline.image),
  27. ("autolink", rules_inline.autolink),
  28. ("html_inline", rules_inline.html_inline),
  29. ("entity", rules_inline.entity),
  30. ]
  31. # Note `rule2` ruleset was created specifically for emphasis/strikethrough
  32. # post-processing and may be changed in the future.
  33. #
  34. # Don't use this for anything except pairs (plugins working with `balance_pairs`).
  35. #
  36. RuleFuncInline2Type = Callable[[StateInline], None]
  37. _rules2: list[tuple[str, RuleFuncInline2Type]] = [
  38. ("balance_pairs", rules_inline.link_pairs),
  39. ("strikethrough", rules_inline.strikethrough.postProcess),
  40. ("emphasis", rules_inline.emphasis.postProcess),
  41. # rules for pairs separate '**' into its own text tokens, which may be left unused,
  42. # rule below merges unused segments back with the rest of the text
  43. ("fragments_join", rules_inline.fragments_join),
  44. ]
  45. class ParserInline:
  46. def __init__(self) -> None:
  47. self.ruler = Ruler[RuleFuncInlineType]()
  48. for name, rule in _rules:
  49. self.ruler.push(name, rule)
  50. # Second ruler used for post-processing (e.g. in emphasis-like rules)
  51. self.ruler2 = Ruler[RuleFuncInline2Type]()
  52. for name, rule2 in _rules2:
  53. self.ruler2.push(name, rule2)
  54. def skipToken(self, state: StateInline) -> None:
  55. """Skip single token by running all rules in validation mode;
  56. returns `True` if any rule reported success
  57. """
  58. ok = False
  59. pos = state.pos
  60. rules = self.ruler.getRules("")
  61. maxNesting = state.md.options["maxNesting"]
  62. cache = state.cache
  63. if pos in cache:
  64. state.pos = cache[pos]
  65. return
  66. if state.level < maxNesting:
  67. for rule in rules:
  68. # Increment state.level and decrement it later to limit recursion.
  69. # It's harmless to do here, because no tokens are created.
  70. # But ideally, we'd need a separate private state variable for this purpose.
  71. state.level += 1
  72. ok = rule(state, True)
  73. state.level -= 1
  74. if ok:
  75. break
  76. else:
  77. # Too much nesting, just skip until the end of the paragraph.
  78. #
  79. # NOTE: this will cause links to behave incorrectly in the following case,
  80. # when an amount of `[` is exactly equal to `maxNesting + 1`:
  81. #
  82. # [[[[[[[[[[[[[[[[[[[[[foo]()
  83. #
  84. # TODO: remove this workaround when CM standard will allow nested links
  85. # (we can replace it by preventing links from being parsed in
  86. # validation mode)
  87. #
  88. state.pos = state.posMax
  89. if not ok:
  90. state.pos += 1
  91. cache[pos] = state.pos
  92. def tokenize(self, state: StateInline) -> None:
  93. """Generate tokens for input range."""
  94. ok = False
  95. rules = self.ruler.getRules("")
  96. end = state.posMax
  97. maxNesting = state.md.options["maxNesting"]
  98. while state.pos < end:
  99. # Try all possible rules.
  100. # On success, rule should:
  101. #
  102. # - update `state.pos`
  103. # - update `state.tokens`
  104. # - return true
  105. if state.level < maxNesting:
  106. for rule in rules:
  107. ok = rule(state, False)
  108. if ok:
  109. break
  110. if ok:
  111. if state.pos >= end:
  112. break
  113. continue
  114. state.pending += state.src[state.pos]
  115. state.pos += 1
  116. if state.pending:
  117. state.pushPending()
  118. def parse(
  119. self, src: str, md: MarkdownIt, env: EnvType, tokens: list[Token]
  120. ) -> list[Token]:
  121. """Process input string and push inline tokens into `tokens`"""
  122. state = StateInline(src, md, env, tokens)
  123. self.tokenize(state)
  124. rules2 = self.ruler2.getRules("")
  125. for rule in rules2:
  126. rule(state)
  127. return state.tokens