index.py 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274
  1. from __future__ import annotations
  2. from functools import partial
  3. from typing import Any, Sequence
  4. from markdown_it import MarkdownIt
  5. from markdown_it.rules_block import StateBlock
  6. from markdown_it.rules_core import StateCore
  7. from markdown_it.rules_inline import StateInline
  8. from markdown_it.token import Token
  9. from mdit_py_plugins.utils import is_code_block
  10. from .parse import ParseError, parse
  11. def attrs_plugin(
  12. md: MarkdownIt,
  13. *,
  14. after: Sequence[str] = ("image", "code_inline", "link_close", "span_close"),
  15. spans: bool = False,
  16. span_after: str = "link",
  17. allowed: Sequence[str] | None = None,
  18. ) -> None:
  19. """Parse inline attributes that immediately follow certain inline elements::
  20. ![alt](https://image.com){#id .a b=c}
  21. This syntax is inspired by
  22. `Djot spans
  23. <https://htmlpreview.github.io/?https://github.com/jgm/djot/blob/master/doc/syntax.html#inline-attributes>`_.
  24. Inside the curly braces, the following syntax is possible:
  25. - `.foo` specifies foo as a class.
  26. Multiple classes may be given in this way; they will be combined.
  27. - `#foo` specifies foo as an identifier.
  28. An element may have only one identifier;
  29. if multiple identifiers are given, the last one is used.
  30. - `key="value"` or `key=value` specifies a key-value attribute.
  31. Quotes are not needed when the value consists entirely of
  32. ASCII alphanumeric characters or `_` or `:` or `-`.
  33. Backslash escapes may be used inside quoted values.
  34. - `%` begins a comment, which ends with the next `%` or the end of the attribute (`}`).
  35. Multiple attribute blocks are merged.
  36. :param md: The MarkdownIt instance to modify.
  37. :param after: The names of inline elements after which attributes may be specified.
  38. This plugin does not support attributes after emphasis, strikethrough or text elements,
  39. which all require post-parse processing.
  40. :param spans: If True, also parse attributes after spans of text, encapsulated by `[]`.
  41. Note Markdown link references take precedence over this syntax.
  42. :param span_after: The name of an inline rule after which spans may be specified.
  43. :param allowed: A list of allowed attribute names.
  44. If not ``None``, any attributes not in this list will be removed
  45. and placed in the token's meta under the key "insecure_attrs".
  46. """
  47. if spans:
  48. md.inline.ruler.after(span_after, "span", _span_rule)
  49. if after:
  50. md.inline.ruler.push(
  51. "attr",
  52. partial(
  53. _attr_inline_rule,
  54. after=after,
  55. allowed=None if allowed is None else set(allowed),
  56. ),
  57. )
  58. def attrs_block_plugin(md: MarkdownIt, *, allowed: Sequence[str] | None = None) -> None:
  59. """Parse block attributes.
  60. Block attributes are attributes on a single line, with no other content.
  61. They attach the specified attributes to the block below them::
  62. {.a #b c=1}
  63. A paragraph, that will be assigned the class ``a`` and the identifier ``b``.
  64. Attributes can be stacked, with classes accumulating and lower attributes overriding higher::
  65. {#a .a c=1}
  66. {#b .b c=2}
  67. A paragraph, that will be assigned the class ``a b c``, and the identifier ``b``.
  68. This syntax is inspired by Djot block attributes.
  69. :param allowed: A list of allowed attribute names.
  70. If not ``None``, any attributes not in this list will be removed
  71. and placed in the token's meta under the key "insecure_attrs".
  72. """
  73. md.block.ruler.before("fence", "attr", _attr_block_rule)
  74. md.core.ruler.after(
  75. "block",
  76. "attr",
  77. partial(
  78. _attr_resolve_block_rule, allowed=None if allowed is None else set(allowed)
  79. ),
  80. )
  81. def _find_opening(tokens: Sequence[Token], index: int) -> int | None:
  82. """Find the opening token index, if the token is closing."""
  83. if tokens[index].nesting != -1:
  84. return index
  85. level = 0
  86. while index >= 0:
  87. level += tokens[index].nesting
  88. if level == 0:
  89. return index
  90. index -= 1
  91. return None
  92. def _span_rule(state: StateInline, silent: bool) -> bool:
  93. if state.src[state.pos] != "[":
  94. return False
  95. maximum = state.posMax
  96. labelStart = state.pos + 1
  97. labelEnd = state.md.helpers.parseLinkLabel(state, state.pos, False)
  98. # parser failed to find ']', so it's not a valid span
  99. if labelEnd < 0:
  100. return False
  101. pos = labelEnd + 1
  102. # check not at end of inline
  103. if pos >= maximum:
  104. return False
  105. try:
  106. new_pos, attrs = parse(state.src[pos:])
  107. except ParseError:
  108. return False
  109. pos += new_pos + 1
  110. if not silent:
  111. state.pos = labelStart
  112. state.posMax = labelEnd
  113. token = state.push("span_open", "span", 1)
  114. token.attrs = attrs # type: ignore[assignment]
  115. state.md.inline.tokenize(state)
  116. token = state.push("span_close", "span", -1)
  117. state.pos = pos
  118. state.posMax = maximum
  119. return True
  120. def _attr_inline_rule(
  121. state: StateInline,
  122. silent: bool,
  123. after: Sequence[str],
  124. *,
  125. allowed: set[str] | None = None,
  126. ) -> bool:
  127. if state.pending or not state.tokens:
  128. return False
  129. token = state.tokens[-1]
  130. if token.type not in after:
  131. return False
  132. try:
  133. new_pos, attrs = parse(state.src[state.pos :])
  134. except ParseError:
  135. return False
  136. token_index = _find_opening(state.tokens, len(state.tokens) - 1)
  137. if token_index is None:
  138. return False
  139. state.pos += new_pos + 1
  140. if not silent:
  141. attr_token = state.tokens[token_index]
  142. if "class" in attrs and "class" in token.attrs:
  143. attrs["class"] = f"{token.attrs['class']} {attrs['class']}"
  144. _add_attrs(attr_token, attrs, allowed)
  145. return True
  146. def _attr_block_rule(
  147. state: StateBlock, startLine: int, endLine: int, silent: bool
  148. ) -> bool:
  149. """Find a block of attributes.
  150. The block must be a single line that begins with a `{`, after three or less spaces,
  151. and end with a `}` followed by any number if spaces.
  152. """
  153. if is_code_block(state, startLine):
  154. return False
  155. pos = state.bMarks[startLine] + state.tShift[startLine]
  156. maximum = state.eMarks[startLine]
  157. # if it doesn't start with a {, it's not an attribute block
  158. if state.src[pos] != "{":
  159. return False
  160. # find first non-space character from the right
  161. while maximum > pos and state.src[maximum - 1] in (" ", "\t"):
  162. maximum -= 1
  163. # if it doesn't end with a }, it's not an attribute block
  164. if maximum <= pos:
  165. return False
  166. if state.src[maximum - 1] != "}":
  167. return False
  168. try:
  169. new_pos, attrs = parse(state.src[pos:maximum])
  170. except ParseError:
  171. return False
  172. # if the block was resolved earlier than expected, it's not an attribute block
  173. # TODO this was not working in some instances, so I disabled it
  174. # if (maximum - 1) != new_pos:
  175. # return False
  176. if silent:
  177. return True
  178. token = state.push("attrs_block", "", 0)
  179. token.attrs = attrs # type: ignore[assignment]
  180. token.map = [startLine, startLine + 1]
  181. state.line = startLine + 1
  182. return True
  183. def _attr_resolve_block_rule(state: StateCore, *, allowed: set[str] | None) -> None:
  184. """Find attribute block then move its attributes to the next block."""
  185. i = 0
  186. len_tokens = len(state.tokens)
  187. while i < len_tokens:
  188. if state.tokens[i].type != "attrs_block":
  189. i += 1
  190. continue
  191. if i + 1 < len_tokens:
  192. next_token = state.tokens[i + 1]
  193. # classes are appended
  194. if "class" in state.tokens[i].attrs and "class" in next_token.attrs:
  195. state.tokens[i].attrs["class"] = (
  196. f"{state.tokens[i].attrs['class']} {next_token.attrs['class']}"
  197. )
  198. if next_token.type == "attrs_block":
  199. # subsequent attribute blocks take precedence, when merging
  200. for key, value in state.tokens[i].attrs.items():
  201. if key == "class" or key not in next_token.attrs:
  202. next_token.attrs[key] = value
  203. else:
  204. _add_attrs(next_token, state.tokens[i].attrs, allowed)
  205. state.tokens.pop(i)
  206. len_tokens -= 1
  207. def _add_attrs(
  208. token: Token,
  209. attrs: dict[str, Any],
  210. allowed: set[str] | None,
  211. ) -> None:
  212. """Add attributes to a token, skipping any disallowed attributes."""
  213. if allowed is not None and (
  214. disallowed := {k: v for k, v in attrs.items() if k not in allowed}
  215. ):
  216. token.meta["insecure_attrs"] = disallowed
  217. attrs = {k: v for k, v in attrs.items() if k in allowed}
  218. # attributes takes precedence over existing attributes
  219. token.attrs.update(attrs)