parser_block.py 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. """Block-level tokenizer."""
  2. from __future__ import annotations
  3. import logging
  4. from typing import TYPE_CHECKING, Callable
  5. from . import rules_block
  6. from .ruler import Ruler
  7. from .rules_block.state_block import StateBlock
  8. from .token import Token
  9. from .utils import EnvType
  10. if TYPE_CHECKING:
  11. from markdown_it import MarkdownIt
  12. LOGGER = logging.getLogger(__name__)
  13. RuleFuncBlockType = Callable[[StateBlock, int, int, bool], bool]
  14. """(state: StateBlock, startLine: int, endLine: int, silent: bool) -> matched: bool)
  15. `silent` disables token generation, useful for lookahead.
  16. """
  17. _rules: list[tuple[str, RuleFuncBlockType, list[str]]] = [
  18. # First 2 params - rule name & source. Secondary array - list of rules,
  19. # which can be terminated by this one.
  20. ("table", rules_block.table, ["paragraph", "reference"]),
  21. ("code", rules_block.code, []),
  22. ("fence", rules_block.fence, ["paragraph", "reference", "blockquote", "list"]),
  23. (
  24. "blockquote",
  25. rules_block.blockquote,
  26. ["paragraph", "reference", "blockquote", "list"],
  27. ),
  28. ("hr", rules_block.hr, ["paragraph", "reference", "blockquote", "list"]),
  29. ("list", rules_block.list_block, ["paragraph", "reference", "blockquote"]),
  30. ("reference", rules_block.reference, []),
  31. ("html_block", rules_block.html_block, ["paragraph", "reference", "blockquote"]),
  32. ("heading", rules_block.heading, ["paragraph", "reference", "blockquote"]),
  33. ("lheading", rules_block.lheading, []),
  34. ("paragraph", rules_block.paragraph, []),
  35. ]
  36. class ParserBlock:
  37. """
  38. ParserBlock#ruler -> Ruler
  39. [[Ruler]] instance. Keep configuration of block rules.
  40. """
  41. def __init__(self) -> None:
  42. self.ruler = Ruler[RuleFuncBlockType]()
  43. for name, rule, alt in _rules:
  44. self.ruler.push(name, rule, {"alt": alt})
  45. def tokenize(self, state: StateBlock, startLine: int, endLine: int) -> None:
  46. """Generate tokens for input range."""
  47. rules = self.ruler.getRules("")
  48. line = startLine
  49. maxNesting = state.md.options.maxNesting
  50. hasEmptyLines = False
  51. while line < endLine:
  52. state.line = line = state.skipEmptyLines(line)
  53. if line >= endLine:
  54. break
  55. if state.sCount[line] < state.blkIndent:
  56. # Termination condition for nested calls.
  57. # Nested calls currently used for blockquotes & lists
  58. break
  59. if state.level >= maxNesting:
  60. # If nesting level exceeded - skip tail to the end.
  61. # That's not ordinary situation and we should not care about content.
  62. state.line = endLine
  63. break
  64. # Try all possible rules.
  65. # On success, rule should:
  66. # - update `state.line`
  67. # - update `state.tokens`
  68. # - return True
  69. for rule in rules:
  70. if rule(state, line, endLine, False):
  71. break
  72. # set state.tight if we had an empty line before current tag
  73. # i.e. latest empty line should not count
  74. state.tight = not hasEmptyLines
  75. line = state.line
  76. # paragraph might "eat" one newline after it in nested lists
  77. if (line - 1) < endLine and state.isEmpty(line - 1):
  78. hasEmptyLines = True
  79. if line < endLine and state.isEmpty(line):
  80. hasEmptyLines = True
  81. line += 1
  82. state.line = line
  83. def parse(
  84. self, src: str, md: MarkdownIt, env: EnvType, outTokens: list[Token]
  85. ) -> list[Token] | None:
  86. """Process input string and push block tokens into `outTokens`."""
  87. if not src:
  88. return None
  89. state = StateBlock(src, md, env, outTokens)
  90. self.tokenize(state, state.line, state.lineMax)
  91. return state.tokens