entity.py 1.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253
  1. # Process html entity - {, ¯, ", ...
  2. import re
  3. from ..common.entities import entities
  4. from ..common.utils import fromCodePoint, isValidEntityCode
  5. from .state_inline import StateInline
  6. DIGITAL_RE = re.compile(r"^&#((?:x[a-f0-9]{1,6}|[0-9]{1,7}));", re.IGNORECASE)
  7. NAMED_RE = re.compile(r"^&([a-z][a-z0-9]{1,31});", re.IGNORECASE)
  8. def entity(state: StateInline, silent: bool) -> bool:
  9. pos = state.pos
  10. maximum = state.posMax
  11. if state.src[pos] != "&":
  12. return False
  13. if pos + 1 >= maximum:
  14. return False
  15. if state.src[pos + 1] == "#":
  16. if match := DIGITAL_RE.search(state.src[pos:]):
  17. if not silent:
  18. match1 = match.group(1)
  19. code = (
  20. int(match1[1:], 16) if match1[0].lower() == "x" else int(match1, 10)
  21. )
  22. token = state.push("text_special", "", 0)
  23. token.content = (
  24. fromCodePoint(code)
  25. if isValidEntityCode(code)
  26. else fromCodePoint(0xFFFD)
  27. )
  28. token.markup = match.group(0)
  29. token.info = "entity"
  30. state.pos += len(match.group(0))
  31. return True
  32. else:
  33. if (match := NAMED_RE.search(state.src[pos:])) and match.group(1) in entities:
  34. if not silent:
  35. token = state.push("text_special", "", 0)
  36. token.content = entities[match.group(1)]
  37. token.markup = match.group(0)
  38. token.info = "entity"
  39. state.pos += len(match.group(0))
  40. return True
  41. return False