extract.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129
  1. # ext/extract.py
  2. # Copyright 2006-2025 the Mako authors and contributors <see AUTHORS file>
  3. #
  4. # This module is part of Mako and is released under
  5. # the MIT License: http://www.opensource.org/licenses/mit-license.php
  6. from io import BytesIO
  7. from io import StringIO
  8. import re
  9. from mako import lexer
  10. from mako import parsetree
  11. class MessageExtractor:
  12. use_bytes = True
  13. def process_file(self, fileobj):
  14. template_node = lexer.Lexer(
  15. fileobj.read(), input_encoding=self.config["encoding"]
  16. ).parse()
  17. yield from self.extract_nodes(template_node.get_children())
  18. def extract_nodes(self, nodes):
  19. translator_comments = []
  20. in_translator_comments = False
  21. input_encoding = self.config["encoding"] or "ascii"
  22. comment_tags = list(
  23. filter(None, re.split(r"\s+", self.config["comment-tags"]))
  24. )
  25. for node in nodes:
  26. child_nodes = None
  27. if (
  28. in_translator_comments
  29. and isinstance(node, parsetree.Text)
  30. and not node.content.strip()
  31. ):
  32. # Ignore whitespace within translator comments
  33. continue
  34. if isinstance(node, parsetree.Comment):
  35. value = node.text.strip()
  36. if in_translator_comments:
  37. translator_comments.extend(
  38. self._split_comment(node.lineno, value)
  39. )
  40. continue
  41. for comment_tag in comment_tags:
  42. if value.startswith(comment_tag):
  43. in_translator_comments = True
  44. translator_comments.extend(
  45. self._split_comment(node.lineno, value)
  46. )
  47. continue
  48. if isinstance(node, parsetree.DefTag):
  49. code = node.function_decl.code
  50. child_nodes = node.nodes
  51. elif isinstance(node, parsetree.BlockTag):
  52. code = node.body_decl.code
  53. child_nodes = node.nodes
  54. elif isinstance(node, parsetree.CallTag):
  55. code = node.code.code
  56. child_nodes = node.nodes
  57. elif isinstance(node, parsetree.PageTag):
  58. code = node.body_decl.code
  59. elif isinstance(node, parsetree.CallNamespaceTag):
  60. code = node.expression
  61. child_nodes = node.nodes
  62. elif isinstance(node, parsetree.ControlLine):
  63. if node.isend:
  64. in_translator_comments = False
  65. continue
  66. code = node.text
  67. elif isinstance(node, parsetree.Code):
  68. in_translator_comments = False
  69. code = node.code.code
  70. elif isinstance(node, parsetree.Expression):
  71. code = node.code.code
  72. else:
  73. continue
  74. # Comments don't apply unless they immediately precede the message
  75. if (
  76. translator_comments
  77. and translator_comments[-1][0] < node.lineno - 1
  78. ):
  79. translator_comments = []
  80. translator_strings = [
  81. comment[1] for comment in translator_comments
  82. ]
  83. if isinstance(code, str) and self.use_bytes:
  84. code = code.encode(input_encoding, "backslashreplace")
  85. used_translator_comments = False
  86. # We add extra newline to work around a pybabel bug
  87. # (see python-babel/babel#274, parse_encoding dies if the first
  88. # input string of the input is non-ascii)
  89. # Also, because we added it, we have to subtract one from
  90. # node.lineno
  91. if self.use_bytes:
  92. code = BytesIO(b"\n" + code)
  93. else:
  94. code = StringIO("\n" + code)
  95. for message in self.process_python(
  96. code, node.lineno - 1, translator_strings
  97. ):
  98. yield message
  99. used_translator_comments = True
  100. if used_translator_comments:
  101. translator_comments = []
  102. in_translator_comments = False
  103. if child_nodes:
  104. yield from self.extract_nodes(child_nodes)
  105. @staticmethod
  106. def _split_comment(lineno, comment):
  107. """Return the multiline comment at lineno split into a list of
  108. comment line numbers and the accompanying comment line"""
  109. return [
  110. (lineno + index, line)
  111. for index, line in enumerate(comment.splitlines())
  112. ]