| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129 |
- # ext/extract.py
- # Copyright 2006-2025 the Mako authors and contributors <see AUTHORS file>
- #
- # This module is part of Mako and is released under
- # the MIT License: http://www.opensource.org/licenses/mit-license.php
- from io import BytesIO
- from io import StringIO
- import re
- from mako import lexer
- from mako import parsetree
- class MessageExtractor:
- use_bytes = True
- def process_file(self, fileobj):
- template_node = lexer.Lexer(
- fileobj.read(), input_encoding=self.config["encoding"]
- ).parse()
- yield from self.extract_nodes(template_node.get_children())
- def extract_nodes(self, nodes):
- translator_comments = []
- in_translator_comments = False
- input_encoding = self.config["encoding"] or "ascii"
- comment_tags = list(
- filter(None, re.split(r"\s+", self.config["comment-tags"]))
- )
- for node in nodes:
- child_nodes = None
- if (
- in_translator_comments
- and isinstance(node, parsetree.Text)
- and not node.content.strip()
- ):
- # Ignore whitespace within translator comments
- continue
- if isinstance(node, parsetree.Comment):
- value = node.text.strip()
- if in_translator_comments:
- translator_comments.extend(
- self._split_comment(node.lineno, value)
- )
- continue
- for comment_tag in comment_tags:
- if value.startswith(comment_tag):
- in_translator_comments = True
- translator_comments.extend(
- self._split_comment(node.lineno, value)
- )
- continue
- if isinstance(node, parsetree.DefTag):
- code = node.function_decl.code
- child_nodes = node.nodes
- elif isinstance(node, parsetree.BlockTag):
- code = node.body_decl.code
- child_nodes = node.nodes
- elif isinstance(node, parsetree.CallTag):
- code = node.code.code
- child_nodes = node.nodes
- elif isinstance(node, parsetree.PageTag):
- code = node.body_decl.code
- elif isinstance(node, parsetree.CallNamespaceTag):
- code = node.expression
- child_nodes = node.nodes
- elif isinstance(node, parsetree.ControlLine):
- if node.isend:
- in_translator_comments = False
- continue
- code = node.text
- elif isinstance(node, parsetree.Code):
- in_translator_comments = False
- code = node.code.code
- elif isinstance(node, parsetree.Expression):
- code = node.code.code
- else:
- continue
- # Comments don't apply unless they immediately precede the message
- if (
- translator_comments
- and translator_comments[-1][0] < node.lineno - 1
- ):
- translator_comments = []
- translator_strings = [
- comment[1] for comment in translator_comments
- ]
- if isinstance(code, str) and self.use_bytes:
- code = code.encode(input_encoding, "backslashreplace")
- used_translator_comments = False
- # We add extra newline to work around a pybabel bug
- # (see python-babel/babel#274, parse_encoding dies if the first
- # input string of the input is non-ascii)
- # Also, because we added it, we have to subtract one from
- # node.lineno
- if self.use_bytes:
- code = BytesIO(b"\n" + code)
- else:
- code = StringIO("\n" + code)
- for message in self.process_python(
- code, node.lineno - 1, translator_strings
- ):
- yield message
- used_translator_comments = True
- if used_translator_comments:
- translator_comments = []
- in_translator_comments = False
- if child_nodes:
- yield from self.extract_nodes(child_nodes)
- @staticmethod
- def _split_comment(lineno, comment):
- """Return the multiline comment at lineno split into a list of
- comment line numbers and the accompanying comment line"""
- return [
- (lineno + index, line)
- for index, line in enumerate(comment.splitlines())
- ]
|