table.py 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236
  1. # GFM table, https://github.github.com/gfm/#tables-extension-
  2. from __future__ import annotations
  3. import re
  4. from ..common.utils import charStrAt, isStrSpace
  5. from .state_block import StateBlock
  6. headerLineRe = re.compile(r"^:?-+:?$")
  7. enclosingPipesRe = re.compile(r"^\||\|$")
  8. def getLine(state: StateBlock, line: int) -> str:
  9. pos = state.bMarks[line] + state.tShift[line]
  10. maximum = state.eMarks[line]
  11. # return state.src.substr(pos, max - pos)
  12. return state.src[pos:maximum]
  13. def escapedSplit(string: str) -> list[str]:
  14. result: list[str] = []
  15. pos = 0
  16. max = len(string)
  17. isEscaped = False
  18. lastPos = 0
  19. current = ""
  20. ch = charStrAt(string, pos)
  21. while pos < max:
  22. if ch == "|":
  23. if not isEscaped:
  24. # pipe separating cells, '|'
  25. result.append(current + string[lastPos:pos])
  26. current = ""
  27. lastPos = pos + 1
  28. else:
  29. # escaped pipe, '\|'
  30. current += string[lastPos : pos - 1]
  31. lastPos = pos
  32. isEscaped = ch == "\\"
  33. pos += 1
  34. ch = charStrAt(string, pos)
  35. result.append(current + string[lastPos:])
  36. return result
  37. def table(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
  38. tbodyLines = None
  39. # should have at least two lines
  40. if startLine + 2 > endLine:
  41. return False
  42. nextLine = startLine + 1
  43. if state.sCount[nextLine] < state.blkIndent:
  44. return False
  45. if state.is_code_block(nextLine):
  46. return False
  47. # first character of the second line should be '|', '-', ':',
  48. # and no other characters are allowed but spaces;
  49. # basically, this is the equivalent of /^[-:|][-:|\s]*$/ regexp
  50. pos = state.bMarks[nextLine] + state.tShift[nextLine]
  51. if pos >= state.eMarks[nextLine]:
  52. return False
  53. first_ch = state.src[pos]
  54. pos += 1
  55. if first_ch not in ("|", "-", ":"):
  56. return False
  57. if pos >= state.eMarks[nextLine]:
  58. return False
  59. second_ch = state.src[pos]
  60. pos += 1
  61. if second_ch not in ("|", "-", ":") and not isStrSpace(second_ch):
  62. return False
  63. # if first character is '-', then second character must not be a space
  64. # (due to parsing ambiguity with list)
  65. if first_ch == "-" and isStrSpace(second_ch):
  66. return False
  67. while pos < state.eMarks[nextLine]:
  68. ch = state.src[pos]
  69. if ch not in ("|", "-", ":") and not isStrSpace(ch):
  70. return False
  71. pos += 1
  72. lineText = getLine(state, startLine + 1)
  73. columns = lineText.split("|")
  74. aligns = []
  75. for i in range(len(columns)):
  76. t = columns[i].strip()
  77. if not t:
  78. # allow empty columns before and after table, but not in between columns;
  79. # e.g. allow ` |---| `, disallow ` ---||--- `
  80. if i == 0 or i == len(columns) - 1:
  81. continue
  82. else:
  83. return False
  84. if not headerLineRe.search(t):
  85. return False
  86. if charStrAt(t, len(t) - 1) == ":":
  87. aligns.append("center" if charStrAt(t, 0) == ":" else "right")
  88. elif charStrAt(t, 0) == ":":
  89. aligns.append("left")
  90. else:
  91. aligns.append("")
  92. lineText = getLine(state, startLine).strip()
  93. if "|" not in lineText:
  94. return False
  95. if state.is_code_block(startLine):
  96. return False
  97. columns = escapedSplit(lineText)
  98. if columns and columns[0] == "":
  99. columns.pop(0)
  100. if columns and columns[-1] == "":
  101. columns.pop()
  102. # header row will define an amount of columns in the entire table,
  103. # and align row should be exactly the same (the rest of the rows can differ)
  104. columnCount = len(columns)
  105. if columnCount == 0 or columnCount != len(aligns):
  106. return False
  107. if silent:
  108. return True
  109. oldParentType = state.parentType
  110. state.parentType = "table"
  111. # use 'blockquote' lists for termination because it's
  112. # the most similar to tables
  113. terminatorRules = state.md.block.ruler.getRules("blockquote")
  114. token = state.push("table_open", "table", 1)
  115. token.map = tableLines = [startLine, 0]
  116. token = state.push("thead_open", "thead", 1)
  117. token.map = [startLine, startLine + 1]
  118. token = state.push("tr_open", "tr", 1)
  119. token.map = [startLine, startLine + 1]
  120. for i in range(len(columns)):
  121. token = state.push("th_open", "th", 1)
  122. if aligns[i]:
  123. token.attrs = {"style": "text-align:" + aligns[i]}
  124. token = state.push("inline", "", 0)
  125. # note in markdown-it this map was removed in v12.0.0 however, we keep it,
  126. # since it is helpful to propagate to children tokens
  127. token.map = [startLine, startLine + 1]
  128. token.content = columns[i].strip()
  129. token.children = []
  130. token = state.push("th_close", "th", -1)
  131. token = state.push("tr_close", "tr", -1)
  132. token = state.push("thead_close", "thead", -1)
  133. nextLine = startLine + 2
  134. while nextLine < endLine:
  135. if state.sCount[nextLine] < state.blkIndent:
  136. break
  137. terminate = False
  138. for i in range(len(terminatorRules)):
  139. if terminatorRules[i](state, nextLine, endLine, True):
  140. terminate = True
  141. break
  142. if terminate:
  143. break
  144. lineText = getLine(state, nextLine).strip()
  145. if not lineText:
  146. break
  147. if state.is_code_block(nextLine):
  148. break
  149. columns = escapedSplit(lineText)
  150. if columns and columns[0] == "":
  151. columns.pop(0)
  152. if columns and columns[-1] == "":
  153. columns.pop()
  154. if nextLine == startLine + 2:
  155. token = state.push("tbody_open", "tbody", 1)
  156. token.map = tbodyLines = [startLine + 2, 0]
  157. token = state.push("tr_open", "tr", 1)
  158. token.map = [nextLine, nextLine + 1]
  159. for i in range(columnCount):
  160. token = state.push("td_open", "td", 1)
  161. if aligns[i]:
  162. token.attrs = {"style": "text-align:" + aligns[i]}
  163. token = state.push("inline", "", 0)
  164. # note in markdown-it this map was removed in v12.0.0 however, we keep it,
  165. # since it is helpful to propagate to children tokens
  166. token.map = [nextLine, nextLine + 1]
  167. try:
  168. token.content = columns[i].strip() if columns[i] else ""
  169. except IndexError:
  170. token.content = ""
  171. token.children = []
  172. token = state.push("td_close", "td", -1)
  173. token = state.push("tr_close", "tr", -1)
  174. nextLine += 1
  175. if tbodyLines:
  176. token = state.push("tbody_close", "tbody", -1)
  177. tbodyLines[1] = nextLine
  178. token = state.push("table_close", "table", -1)
  179. tableLines[1] = nextLine
  180. state.parentType = oldParentType
  181. state.line = nextLine
  182. return True