index.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525
  1. """Process footnotes"""
  2. from __future__ import annotations
  3. from functools import partial
  4. from typing import TYPE_CHECKING, Sequence, TypedDict
  5. from markdown_it import MarkdownIt
  6. from markdown_it.helpers import parseLinkLabel
  7. from markdown_it.rules_block import StateBlock
  8. from markdown_it.rules_core import StateCore
  9. from markdown_it.rules_inline import StateInline
  10. from markdown_it.token import Token
  11. from mdit_py_plugins.utils import is_code_block
  12. if TYPE_CHECKING:
  13. from markdown_it.renderer import RendererProtocol
  14. from markdown_it.utils import EnvType, OptionsDict
  15. def footnote_plugin(
  16. md: MarkdownIt,
  17. *,
  18. inline: bool = True,
  19. move_to_end: bool = True,
  20. always_match_refs: bool = False,
  21. ) -> None:
  22. """Plugin ported from
  23. `markdown-it-footnote <https://github.com/markdown-it/markdown-it-footnote>`__.
  24. It is based on the
  25. `pandoc definition <http://johnmacfarlane.net/pandoc/README.html#footnotes>`__:
  26. .. code-block:: md
  27. Normal footnote:
  28. Here is a footnote reference,[^1] and another.[^longnote]
  29. [^1]: Here is the footnote.
  30. [^longnote]: Here's one with multiple blocks.
  31. Subsequent paragraphs are indented to show that they
  32. belong to the previous footnote.
  33. :param inline: If True, also parse inline footnotes (^[...]).
  34. :param move_to_end: If True, move footnote definitions to the end of the token stream.
  35. :param always_match_refs: If True, match references, even if the footnote is not defined.
  36. """
  37. md.block.ruler.before(
  38. "reference", "footnote_def", footnote_def, {"alt": ["paragraph", "reference"]}
  39. )
  40. _footnote_ref = partial(footnote_ref, always_match=always_match_refs)
  41. if inline:
  42. md.inline.ruler.after("image", "footnote_inline", footnote_inline)
  43. md.inline.ruler.after("footnote_inline", "footnote_ref", _footnote_ref)
  44. else:
  45. md.inline.ruler.after("image", "footnote_ref", _footnote_ref)
  46. if move_to_end:
  47. md.core.ruler.after("inline", "footnote_tail", footnote_tail)
  48. md.add_render_rule("footnote_ref", render_footnote_ref)
  49. md.add_render_rule("footnote_block_open", render_footnote_block_open)
  50. md.add_render_rule("footnote_block_close", render_footnote_block_close)
  51. md.add_render_rule("footnote_open", render_footnote_open)
  52. md.add_render_rule("footnote_close", render_footnote_close)
  53. md.add_render_rule("footnote_anchor", render_footnote_anchor)
  54. # helpers (only used in other rules, no tokens are attached to those)
  55. md.add_render_rule("footnote_caption", render_footnote_caption)
  56. md.add_render_rule("footnote_anchor_name", render_footnote_anchor_name)
  57. class _RefData(TypedDict, total=False):
  58. # standard
  59. label: str
  60. count: int
  61. # inline
  62. content: str
  63. tokens: list[Token]
  64. class _FootnoteData(TypedDict):
  65. refs: dict[str, int]
  66. """A mapping of all footnote labels (prefixed with ``:``) to their ID (-1 if not yet set)."""
  67. list: dict[int, _RefData]
  68. """A mapping of all footnote IDs to their data."""
  69. def _data_from_env(env: EnvType) -> _FootnoteData:
  70. footnotes = env.setdefault("footnotes", {})
  71. footnotes.setdefault("refs", {})
  72. footnotes.setdefault("list", {})
  73. return footnotes # type: ignore[no-any-return]
  74. # ## RULES ##
  75. def footnote_def(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
  76. """Process footnote block definition"""
  77. if is_code_block(state, startLine):
  78. return False
  79. start = state.bMarks[startLine] + state.tShift[startLine]
  80. maximum = state.eMarks[startLine]
  81. # line should be at least 5 chars - "[^x]:"
  82. if start + 4 > maximum:
  83. return False
  84. if state.src[start] != "[":
  85. return False
  86. if state.src[start + 1] != "^":
  87. return False
  88. pos = start + 2
  89. while pos < maximum:
  90. if state.src[pos] == " ":
  91. return False
  92. if state.src[pos] == "]":
  93. break
  94. pos += 1
  95. if pos == start + 2: # no empty footnote labels
  96. return False
  97. pos += 1
  98. if pos >= maximum or state.src[pos] != ":":
  99. return False
  100. if silent:
  101. return True
  102. pos += 1
  103. label = state.src[start + 2 : pos - 2]
  104. footnote_data = _data_from_env(state.env)
  105. footnote_data["refs"][":" + label] = -1
  106. open_token = Token("footnote_reference_open", "", 1)
  107. open_token.meta = {"label": label}
  108. open_token.level = state.level
  109. state.level += 1
  110. state.tokens.append(open_token)
  111. oldBMark = state.bMarks[startLine]
  112. oldTShift = state.tShift[startLine]
  113. oldSCount = state.sCount[startLine]
  114. oldParentType = state.parentType
  115. posAfterColon = pos
  116. initial = offset = (
  117. state.sCount[startLine]
  118. + pos
  119. - (state.bMarks[startLine] + state.tShift[startLine])
  120. )
  121. while pos < maximum:
  122. ch = state.src[pos]
  123. if ch == "\t":
  124. offset += 4 - offset % 4
  125. elif ch == " ":
  126. offset += 1
  127. else:
  128. break
  129. pos += 1
  130. state.tShift[startLine] = pos - posAfterColon
  131. state.sCount[startLine] = offset - initial
  132. state.bMarks[startLine] = posAfterColon
  133. state.blkIndent += 4
  134. state.parentType = "footnote"
  135. if state.sCount[startLine] < state.blkIndent:
  136. state.sCount[startLine] += state.blkIndent
  137. state.md.block.tokenize(state, startLine, endLine)
  138. state.parentType = oldParentType
  139. state.blkIndent -= 4
  140. state.tShift[startLine] = oldTShift
  141. state.sCount[startLine] = oldSCount
  142. state.bMarks[startLine] = oldBMark
  143. open_token.map = [startLine, state.line]
  144. token = Token("footnote_reference_close", "", -1)
  145. state.level -= 1
  146. token.level = state.level
  147. state.tokens.append(token)
  148. return True
  149. def footnote_inline(state: StateInline, silent: bool) -> bool:
  150. """Process inline footnotes (^[...])"""
  151. maximum = state.posMax
  152. start = state.pos
  153. if start + 2 >= maximum:
  154. return False
  155. if state.src[start] != "^":
  156. return False
  157. if state.src[start + 1] != "[":
  158. return False
  159. labelStart = start + 2
  160. labelEnd = parseLinkLabel(state, start + 1)
  161. # parser failed to find ']', so it's not a valid note
  162. if labelEnd < 0:
  163. return False
  164. # We found the end of the link, and know for a fact it's a valid link
  165. # so all that's left to do is to call tokenizer.
  166. #
  167. if not silent:
  168. refs = _data_from_env(state.env)["list"]
  169. footnoteId = len(refs)
  170. tokens: list[Token] = []
  171. state.md.inline.parse(
  172. state.src[labelStart:labelEnd], state.md, state.env, tokens
  173. )
  174. token = state.push("footnote_ref", "", 0)
  175. token.meta = {"id": footnoteId}
  176. refs[footnoteId] = {"content": state.src[labelStart:labelEnd], "tokens": tokens}
  177. state.pos = labelEnd + 1
  178. state.posMax = maximum
  179. return True
  180. def footnote_ref(
  181. state: StateInline, silent: bool, *, always_match: bool = False
  182. ) -> bool:
  183. """Process footnote references ([^...])"""
  184. maximum = state.posMax
  185. start = state.pos
  186. # should be at least 4 chars - "[^x]"
  187. if start + 3 > maximum:
  188. return False
  189. footnote_data = _data_from_env(state.env)
  190. if not (always_match or footnote_data["refs"]):
  191. return False
  192. if state.src[start] != "[":
  193. return False
  194. if state.src[start + 1] != "^":
  195. return False
  196. pos = start + 2
  197. while pos < maximum:
  198. if state.src[pos] in (" ", "\n"):
  199. return False
  200. if state.src[pos] == "]":
  201. break
  202. pos += 1
  203. if pos == start + 2: # no empty footnote labels
  204. return False
  205. if pos >= maximum:
  206. return False
  207. pos += 1
  208. label = state.src[start + 2 : pos - 1]
  209. if ((":" + label) not in footnote_data["refs"]) and not always_match:
  210. return False
  211. if not silent:
  212. if footnote_data["refs"].get(":" + label, -1) < 0:
  213. footnoteId = len(footnote_data["list"])
  214. footnote_data["list"][footnoteId] = {"label": label, "count": 0}
  215. footnote_data["refs"][":" + label] = footnoteId
  216. else:
  217. footnoteId = footnote_data["refs"][":" + label]
  218. footnoteSubId = footnote_data["list"][footnoteId]["count"]
  219. footnote_data["list"][footnoteId]["count"] += 1
  220. token = state.push("footnote_ref", "", 0)
  221. token.meta = {"id": footnoteId, "subId": footnoteSubId, "label": label}
  222. state.pos = pos
  223. state.posMax = maximum
  224. return True
  225. def footnote_tail(state: StateCore) -> None:
  226. """Post-processing step, to move footnote tokens to end of the token stream.
  227. Also removes un-referenced tokens.
  228. """
  229. insideRef = False
  230. refTokens = {}
  231. if "footnotes" not in state.env:
  232. return
  233. current: list[Token] = []
  234. tok_filter = []
  235. for tok in state.tokens:
  236. if tok.type == "footnote_reference_open":
  237. insideRef = True
  238. current = []
  239. currentLabel = tok.meta["label"]
  240. tok_filter.append(False)
  241. continue
  242. if tok.type == "footnote_reference_close":
  243. insideRef = False
  244. # prepend ':' to avoid conflict with Object.prototype members
  245. refTokens[":" + currentLabel] = current
  246. tok_filter.append(False)
  247. continue
  248. if insideRef:
  249. current.append(tok)
  250. tok_filter.append(not insideRef)
  251. state.tokens = [t for t, f in zip(state.tokens, tok_filter) if f]
  252. footnote_data = _data_from_env(state.env)
  253. if not footnote_data["list"]:
  254. return
  255. token = Token("footnote_block_open", "", 1)
  256. state.tokens.append(token)
  257. for i, foot_note in footnote_data["list"].items():
  258. token = Token("footnote_open", "", 1)
  259. token.meta = {"id": i, "label": foot_note.get("label", None)}
  260. # TODO propagate line positions of original foot note
  261. # (but don't store in token.map, because this is used for scroll syncing)
  262. state.tokens.append(token)
  263. if "tokens" in foot_note:
  264. tokens = []
  265. token = Token("paragraph_open", "p", 1)
  266. token.block = True
  267. tokens.append(token)
  268. token = Token("inline", "", 0)
  269. token.children = foot_note["tokens"]
  270. token.content = foot_note["content"]
  271. tokens.append(token)
  272. token = Token("paragraph_close", "p", -1)
  273. token.block = True
  274. tokens.append(token)
  275. elif "label" in foot_note:
  276. tokens = refTokens.get(":" + foot_note["label"], [])
  277. state.tokens.extend(tokens)
  278. if state.tokens[len(state.tokens) - 1].type == "paragraph_close":
  279. lastParagraph: Token | None = state.tokens.pop()
  280. else:
  281. lastParagraph = None
  282. t = (
  283. foot_note["count"]
  284. if (("count" in foot_note) and (foot_note["count"] > 0))
  285. else 1
  286. )
  287. j = 0
  288. while j < t:
  289. token = Token("footnote_anchor", "", 0)
  290. token.meta = {"id": i, "subId": j, "label": foot_note.get("label", None)}
  291. state.tokens.append(token)
  292. j += 1
  293. if lastParagraph:
  294. state.tokens.append(lastParagraph)
  295. token = Token("footnote_close", "", -1)
  296. state.tokens.append(token)
  297. token = Token("footnote_block_close", "", -1)
  298. state.tokens.append(token)
  299. ########################################
  300. # Renderer partials
  301. def render_footnote_anchor_name(
  302. self: RendererProtocol,
  303. tokens: Sequence[Token],
  304. idx: int,
  305. options: OptionsDict,
  306. env: EnvType,
  307. ) -> str:
  308. n = str(tokens[idx].meta["id"] + 1)
  309. prefix = ""
  310. doc_id = env.get("docId", None)
  311. if isinstance(doc_id, str):
  312. prefix = f"-{doc_id}-"
  313. return prefix + n
  314. def render_footnote_caption(
  315. self: RendererProtocol,
  316. tokens: Sequence[Token],
  317. idx: int,
  318. options: OptionsDict,
  319. env: EnvType,
  320. ) -> str:
  321. n = str(tokens[idx].meta["id"] + 1)
  322. if tokens[idx].meta.get("subId", -1) > 0:
  323. n += ":" + str(tokens[idx].meta["subId"])
  324. return "[" + n + "]"
  325. def render_footnote_ref(
  326. self: RendererProtocol,
  327. tokens: Sequence[Token],
  328. idx: int,
  329. options: OptionsDict,
  330. env: EnvType,
  331. ) -> str:
  332. ident: str = self.rules["footnote_anchor_name"](tokens, idx, options, env) # type: ignore[attr-defined]
  333. caption: str = self.rules["footnote_caption"](tokens, idx, options, env) # type: ignore[attr-defined]
  334. refid = ident
  335. if tokens[idx].meta.get("subId", -1) > 0:
  336. refid += ":" + str(tokens[idx].meta["subId"])
  337. return (
  338. '<sup class="footnote-ref"><a href="#fn'
  339. + ident
  340. + '" id="fnref'
  341. + refid
  342. + '">'
  343. + caption
  344. + "</a></sup>"
  345. )
  346. def render_footnote_block_open(
  347. self: RendererProtocol,
  348. tokens: Sequence[Token],
  349. idx: int,
  350. options: OptionsDict,
  351. env: EnvType,
  352. ) -> str:
  353. return (
  354. (
  355. '<hr class="footnotes-sep" />\n'
  356. if options.xhtmlOut
  357. else '<hr class="footnotes-sep">\n'
  358. )
  359. + '<section class="footnotes">\n'
  360. + '<ol class="footnotes-list">\n'
  361. )
  362. def render_footnote_block_close(
  363. self: RendererProtocol,
  364. tokens: Sequence[Token],
  365. idx: int,
  366. options: OptionsDict,
  367. env: EnvType,
  368. ) -> str:
  369. return "</ol>\n</section>\n"
  370. def render_footnote_open(
  371. self: RendererProtocol,
  372. tokens: Sequence[Token],
  373. idx: int,
  374. options: OptionsDict,
  375. env: EnvType,
  376. ) -> str:
  377. ident: str = self.rules["footnote_anchor_name"](tokens, idx, options, env) # type: ignore[attr-defined]
  378. if tokens[idx].meta.get("subId", -1) > 0:
  379. ident += ":" + tokens[idx].meta["subId"]
  380. return '<li id="fn' + ident + '" class="footnote-item">'
  381. def render_footnote_close(
  382. self: RendererProtocol,
  383. tokens: Sequence[Token],
  384. idx: int,
  385. options: OptionsDict,
  386. env: EnvType,
  387. ) -> str:
  388. return "</li>\n"
  389. def render_footnote_anchor(
  390. self: RendererProtocol,
  391. tokens: Sequence[Token],
  392. idx: int,
  393. options: OptionsDict,
  394. env: EnvType,
  395. ) -> str:
  396. ident: str = self.rules["footnote_anchor_name"](tokens, idx, options, env) # type: ignore[attr-defined]
  397. if tokens[idx].meta["subId"] > 0:
  398. ident += ":" + str(tokens[idx].meta["subId"])
  399. # ↩ with escape code to prevent display as Apple Emoji on iOS
  400. return ' <a href="#fnref' + ident + '" class="footnote-backref">\u21a9\ufe0e</a>'