util.py 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296
  1. """
  2. babel.util
  3. ~~~~~~~~~~
  4. Various utility classes and functions.
  5. :copyright: (c) 2013-2025 by the Babel Team.
  6. :license: BSD, see LICENSE for more details.
  7. """
  8. from __future__ import annotations
  9. import codecs
  10. import datetime
  11. import os
  12. import re
  13. import textwrap
  14. import warnings
  15. from collections.abc import Generator, Iterable
  16. from typing import IO, Any, TypeVar
  17. from babel import dates, localtime
  18. missing = object()
  19. _T = TypeVar("_T")
  20. def distinct(iterable: Iterable[_T]) -> Generator[_T, None, None]:
  21. """Yield all items in an iterable collection that are distinct.
  22. Unlike when using sets for a similar effect, the original ordering of the
  23. items in the collection is preserved by this function.
  24. >>> print(list(distinct([1, 2, 1, 3, 4, 4])))
  25. [1, 2, 3, 4]
  26. >>> print(list(distinct('foobar')))
  27. ['f', 'o', 'b', 'a', 'r']
  28. :param iterable: the iterable collection providing the data
  29. """
  30. seen = set()
  31. for item in iter(iterable):
  32. if item not in seen:
  33. yield item
  34. seen.add(item)
  35. # Regexp to match python magic encoding line
  36. PYTHON_MAGIC_COMMENT_re = re.compile(
  37. br'[ \t\f]* \# .* coding[=:][ \t]*([-\w.]+)', re.VERBOSE)
  38. def parse_encoding(fp: IO[bytes]) -> str | None:
  39. """Deduce the encoding of a source file from magic comment.
  40. It does this in the same way as the `Python interpreter`__
  41. .. __: https://docs.python.org/3.4/reference/lexical_analysis.html#encoding-declarations
  42. The ``fp`` argument should be a seekable file object.
  43. (From Jeff Dairiki)
  44. """
  45. pos = fp.tell()
  46. fp.seek(0)
  47. try:
  48. line1 = fp.readline()
  49. has_bom = line1.startswith(codecs.BOM_UTF8)
  50. if has_bom:
  51. line1 = line1[len(codecs.BOM_UTF8):]
  52. m = PYTHON_MAGIC_COMMENT_re.match(line1)
  53. if not m:
  54. try:
  55. import ast
  56. ast.parse(line1.decode('latin-1'))
  57. except (ImportError, SyntaxError, UnicodeEncodeError):
  58. # Either it's a real syntax error, in which case the source is
  59. # not valid python source, or line2 is a continuation of line1,
  60. # in which case we don't want to scan line2 for a magic
  61. # comment.
  62. pass
  63. else:
  64. line2 = fp.readline()
  65. m = PYTHON_MAGIC_COMMENT_re.match(line2)
  66. if has_bom:
  67. if m:
  68. magic_comment_encoding = m.group(1).decode('latin-1')
  69. if magic_comment_encoding != 'utf-8':
  70. raise SyntaxError(f"encoding problem: {magic_comment_encoding} with BOM")
  71. return 'utf-8'
  72. elif m:
  73. return m.group(1).decode('latin-1')
  74. else:
  75. return None
  76. finally:
  77. fp.seek(pos)
  78. PYTHON_FUTURE_IMPORT_re = re.compile(
  79. r'from\s+__future__\s+import\s+\(*(.+)\)*')
  80. def parse_future_flags(fp: IO[bytes], encoding: str = 'latin-1') -> int:
  81. """Parse the compiler flags by :mod:`__future__` from the given Python
  82. code.
  83. """
  84. import __future__
  85. pos = fp.tell()
  86. fp.seek(0)
  87. flags = 0
  88. try:
  89. body = fp.read().decode(encoding)
  90. # Fix up the source to be (hopefully) parsable by regexpen.
  91. # This will likely do untoward things if the source code itself is broken.
  92. # (1) Fix `import (\n...` to be `import (...`.
  93. body = re.sub(r'import\s*\([\r\n]+', 'import (', body)
  94. # (2) Join line-ending commas with the next line.
  95. body = re.sub(r',\s*[\r\n]+', ', ', body)
  96. # (3) Remove backslash line continuations.
  97. body = re.sub(r'\\\s*[\r\n]+', ' ', body)
  98. for m in PYTHON_FUTURE_IMPORT_re.finditer(body):
  99. names = [x.strip().strip('()') for x in m.group(1).split(',')]
  100. for name in names:
  101. feature = getattr(__future__, name, None)
  102. if feature:
  103. flags |= feature.compiler_flag
  104. finally:
  105. fp.seek(pos)
  106. return flags
  107. def pathmatch(pattern: str, filename: str) -> bool:
  108. """Extended pathname pattern matching.
  109. This function is similar to what is provided by the ``fnmatch`` module in
  110. the Python standard library, but:
  111. * can match complete (relative or absolute) path names, and not just file
  112. names, and
  113. * also supports a convenience pattern ("**") to match files at any
  114. directory level.
  115. Examples:
  116. >>> pathmatch('**.py', 'bar.py')
  117. True
  118. >>> pathmatch('**.py', 'foo/bar/baz.py')
  119. True
  120. >>> pathmatch('**.py', 'templates/index.html')
  121. False
  122. >>> pathmatch('./foo/**.py', 'foo/bar/baz.py')
  123. True
  124. >>> pathmatch('./foo/**.py', 'bar/baz.py')
  125. False
  126. >>> pathmatch('^foo/**.py', 'foo/bar/baz.py')
  127. True
  128. >>> pathmatch('^foo/**.py', 'bar/baz.py')
  129. False
  130. >>> pathmatch('**/templates/*.html', 'templates/index.html')
  131. True
  132. >>> pathmatch('**/templates/*.html', 'templates/foo/bar.html')
  133. False
  134. :param pattern: the glob pattern
  135. :param filename: the path name of the file to match against
  136. """
  137. symbols = {
  138. '?': '[^/]',
  139. '?/': '[^/]/',
  140. '*': '[^/]+',
  141. '*/': '[^/]+/',
  142. '**/': '(?:.+/)*?',
  143. '**': '(?:.+/)*?[^/]+',
  144. }
  145. if pattern.startswith('^'):
  146. buf = ['^']
  147. pattern = pattern[1:]
  148. elif pattern.startswith('./'):
  149. buf = ['^']
  150. pattern = pattern[2:]
  151. else:
  152. buf = []
  153. for idx, part in enumerate(re.split('([?*]+/?)', pattern)):
  154. if idx % 2:
  155. buf.append(symbols[part])
  156. elif part:
  157. buf.append(re.escape(part))
  158. match = re.match(f"{''.join(buf)}$", filename.replace(os.sep, "/"))
  159. return match is not None
  160. class TextWrapper(textwrap.TextWrapper):
  161. wordsep_re = re.compile(
  162. r'(\s+|' # any whitespace
  163. r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))', # em-dash
  164. )
  165. # e.g. '\u2068foo bar.py\u2069:42'
  166. _enclosed_filename_re = re.compile(r'(\u2068[^\u2068]+?\u2069(?::-?\d+)?)')
  167. def _split(self, text):
  168. """Splits the text into indivisible chunks while ensuring that file names
  169. containing spaces are not broken up.
  170. """
  171. enclosed_filename_start = '\u2068'
  172. if enclosed_filename_start not in text:
  173. # There are no file names which contain spaces, fallback to the default implementation
  174. return super()._split(text)
  175. chunks = []
  176. for chunk in re.split(self._enclosed_filename_re, text):
  177. if chunk.startswith(enclosed_filename_start):
  178. chunks.append(chunk)
  179. else:
  180. chunks.extend(super()._split(chunk))
  181. return [c for c in chunks if c]
  182. def wraptext(text: str, width: int = 70, initial_indent: str = '', subsequent_indent: str = '') -> list[str]:
  183. """Simple wrapper around the ``textwrap.wrap`` function in the standard
  184. library. This version does not wrap lines on hyphens in words. It also
  185. does not wrap PO file locations containing spaces.
  186. :param text: the text to wrap
  187. :param width: the maximum line width
  188. :param initial_indent: string that will be prepended to the first line of
  189. wrapped output
  190. :param subsequent_indent: string that will be prepended to all lines save
  191. the first of wrapped output
  192. """
  193. warnings.warn(
  194. "`babel.util.wraptext` is deprecated and will be removed in a future version of Babel. "
  195. "If you need this functionality, use the `babel.util.TextWrapper` class directly.",
  196. DeprecationWarning,
  197. stacklevel=2,
  198. )
  199. wrapper = TextWrapper(width=width, initial_indent=initial_indent,
  200. subsequent_indent=subsequent_indent,
  201. break_long_words=False)
  202. return wrapper.wrap(text)
  203. # TODO (Babel 3.x): Remove this re-export
  204. odict = dict
  205. class FixedOffsetTimezone(datetime.tzinfo):
  206. """Fixed offset in minutes east from UTC."""
  207. def __init__(self, offset: float, name: str | None = None) -> None:
  208. self._offset = datetime.timedelta(minutes=offset)
  209. if name is None:
  210. name = 'Etc/GMT%+d' % offset
  211. self.zone = name
  212. def __str__(self) -> str:
  213. return self.zone
  214. def __repr__(self) -> str:
  215. return f'<FixedOffset "{self.zone}" {self._offset}>'
  216. def utcoffset(self, dt: datetime.datetime) -> datetime.timedelta:
  217. return self._offset
  218. def tzname(self, dt: datetime.datetime) -> str:
  219. return self.zone
  220. def dst(self, dt: datetime.datetime) -> datetime.timedelta:
  221. return ZERO
  222. # Export the localtime functionality here because that's
  223. # where it was in the past.
  224. # TODO(3.0): remove these aliases
  225. UTC = dates.UTC
  226. LOCALTZ = dates.LOCALTZ
  227. get_localzone = localtime.get_localzone
  228. STDOFFSET = localtime.STDOFFSET
  229. DSTOFFSET = localtime.DSTOFFSET
  230. DSTDIFF = localtime.DSTDIFF
  231. ZERO = localtime.ZERO
  232. def _cmp(a: Any, b: Any):
  233. return (a > b) - (a < b)