catalog.py 37 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000
  1. """
  2. babel.messages.catalog
  3. ~~~~~~~~~~~~~~~~~~~~~~
  4. Data structures for message catalogs.
  5. :copyright: (c) 2013-2025 by the Babel Team.
  6. :license: BSD, see LICENSE for more details.
  7. """
  8. from __future__ import annotations
  9. import datetime
  10. import re
  11. from collections.abc import Iterable, Iterator
  12. from copy import copy
  13. from difflib import SequenceMatcher
  14. from email import message_from_string
  15. from heapq import nlargest
  16. from string import Formatter
  17. from typing import TYPE_CHECKING
  18. from babel import __version__ as VERSION
  19. from babel.core import Locale, UnknownLocaleError
  20. from babel.dates import format_datetime
  21. from babel.messages.plurals import get_plural
  22. from babel.util import LOCALTZ, FixedOffsetTimezone, _cmp, distinct
  23. if TYPE_CHECKING:
  24. from typing_extensions import TypeAlias
  25. _MessageID: TypeAlias = str | tuple[str, ...] | list[str]
  26. __all__ = [
  27. 'DEFAULT_HEADER',
  28. 'PYTHON_FORMAT',
  29. 'Catalog',
  30. 'Message',
  31. 'TranslationError',
  32. ]
  33. def get_close_matches(word, possibilities, n=3, cutoff=0.6):
  34. """A modified version of ``difflib.get_close_matches``.
  35. It just passes ``autojunk=False`` to the ``SequenceMatcher``, to work
  36. around https://github.com/python/cpython/issues/90825.
  37. """
  38. if not n > 0: # pragma: no cover
  39. raise ValueError(f"n must be > 0: {n!r}")
  40. if not 0.0 <= cutoff <= 1.0: # pragma: no cover
  41. raise ValueError(f"cutoff must be in [0.0, 1.0]: {cutoff!r}")
  42. result = []
  43. s = SequenceMatcher(autojunk=False) # only line changed from difflib.py
  44. s.set_seq2(word)
  45. for x in possibilities:
  46. s.set_seq1(x)
  47. if s.real_quick_ratio() >= cutoff and \
  48. s.quick_ratio() >= cutoff and \
  49. s.ratio() >= cutoff:
  50. result.append((s.ratio(), x))
  51. # Move the best scorers to head of list
  52. result = nlargest(n, result)
  53. # Strip scores for the best n matches
  54. return [x for score, x in result]
  55. PYTHON_FORMAT = re.compile(r'''
  56. \%
  57. (?:\(([\w]*)\))?
  58. (
  59. [-#0\ +]?(?:\*|[\d]+)?
  60. (?:\.(?:\*|[\d]+))?
  61. [hlL]?
  62. )
  63. ([diouxXeEfFgGcrs%])
  64. ''', re.VERBOSE)
  65. def _has_python_brace_format(string: str) -> bool:
  66. if "{" not in string:
  67. return False
  68. fmt = Formatter()
  69. try:
  70. # `fmt.parse` returns 3-or-4-tuples of the form
  71. # `(literal_text, field_name, format_spec, conversion)`;
  72. # if `field_name` is set, this smells like brace format
  73. field_name_seen = False
  74. for t in fmt.parse(string):
  75. if t[1] is not None:
  76. field_name_seen = True
  77. # We cannot break here, as we need to consume the whole string
  78. # to ensure that it is a valid format string.
  79. except ValueError:
  80. return False
  81. return field_name_seen
  82. def _parse_datetime_header(value: str) -> datetime.datetime:
  83. match = re.match(r'^(?P<datetime>.*?)(?P<tzoffset>[+-]\d{4})?$', value)
  84. dt = datetime.datetime.strptime(match.group('datetime'), '%Y-%m-%d %H:%M')
  85. # Separate the offset into a sign component, hours, and # minutes
  86. tzoffset = match.group('tzoffset')
  87. if tzoffset is not None:
  88. plus_minus_s, rest = tzoffset[0], tzoffset[1:]
  89. hours_offset_s, mins_offset_s = rest[:2], rest[2:]
  90. # Make them all integers
  91. plus_minus = int(f"{plus_minus_s}1")
  92. hours_offset = int(hours_offset_s)
  93. mins_offset = int(mins_offset_s)
  94. # Calculate net offset
  95. net_mins_offset = hours_offset * 60
  96. net_mins_offset += mins_offset
  97. net_mins_offset *= plus_minus
  98. # Create an offset object
  99. tzoffset = FixedOffsetTimezone(net_mins_offset)
  100. # Store the offset in a datetime object
  101. dt = dt.replace(tzinfo=tzoffset)
  102. return dt
  103. class Message:
  104. """Representation of a single message in a catalog."""
  105. def __init__(
  106. self,
  107. id: _MessageID,
  108. string: _MessageID | None = '',
  109. locations: Iterable[tuple[str, int]] = (),
  110. flags: Iterable[str] = (),
  111. auto_comments: Iterable[str] = (),
  112. user_comments: Iterable[str] = (),
  113. previous_id: _MessageID = (),
  114. lineno: int | None = None,
  115. context: str | None = None,
  116. ) -> None:
  117. """Create the message object.
  118. :param id: the message ID, or a ``(singular, plural)`` tuple for
  119. pluralizable messages
  120. :param string: the translated message string, or a
  121. ``(singular, plural)`` tuple for pluralizable messages
  122. :param locations: a sequence of ``(filename, lineno)`` tuples
  123. :param flags: a set or sequence of flags
  124. :param auto_comments: a sequence of automatic comments for the message
  125. :param user_comments: a sequence of user comments for the message
  126. :param previous_id: the previous message ID, or a ``(singular, plural)``
  127. tuple for pluralizable messages
  128. :param lineno: the line number on which the msgid line was found in the
  129. PO file, if any
  130. :param context: the message context
  131. """
  132. self.id = id
  133. if not string and self.pluralizable:
  134. string = ('', '')
  135. self.string = string
  136. self.locations = list(distinct(locations))
  137. self.flags = set(flags)
  138. if id and self.python_format:
  139. self.flags.add('python-format')
  140. else:
  141. self.flags.discard('python-format')
  142. if id and self.python_brace_format:
  143. self.flags.add('python-brace-format')
  144. else:
  145. self.flags.discard('python-brace-format')
  146. self.auto_comments = list(distinct(auto_comments))
  147. self.user_comments = list(distinct(user_comments))
  148. if isinstance(previous_id, str):
  149. self.previous_id = [previous_id]
  150. else:
  151. self.previous_id = list(previous_id)
  152. self.lineno = lineno
  153. self.context = context
  154. def __repr__(self) -> str:
  155. return f"<{type(self).__name__} {self.id!r} (flags: {list(self.flags)!r})>"
  156. def __cmp__(self, other: object) -> int:
  157. """Compare Messages, taking into account plural ids"""
  158. def values_to_compare(obj):
  159. if isinstance(obj, Message) and obj.pluralizable:
  160. return obj.id[0], obj.context or ''
  161. return obj.id, obj.context or ''
  162. return _cmp(values_to_compare(self), values_to_compare(other))
  163. def __gt__(self, other: object) -> bool:
  164. return self.__cmp__(other) > 0
  165. def __lt__(self, other: object) -> bool:
  166. return self.__cmp__(other) < 0
  167. def __ge__(self, other: object) -> bool:
  168. return self.__cmp__(other) >= 0
  169. def __le__(self, other: object) -> bool:
  170. return self.__cmp__(other) <= 0
  171. def __eq__(self, other: object) -> bool:
  172. return self.__cmp__(other) == 0
  173. def __ne__(self, other: object) -> bool:
  174. return self.__cmp__(other) != 0
  175. def is_identical(self, other: Message) -> bool:
  176. """Checks whether messages are identical, taking into account all
  177. properties.
  178. """
  179. assert isinstance(other, Message)
  180. return self.__dict__ == other.__dict__
  181. def clone(self) -> Message:
  182. return Message(*map(copy, (self.id, self.string, self.locations,
  183. self.flags, self.auto_comments,
  184. self.user_comments, self.previous_id,
  185. self.lineno, self.context)))
  186. def check(self, catalog: Catalog | None = None) -> list[TranslationError]:
  187. """Run various validation checks on the message. Some validations
  188. are only performed if the catalog is provided. This method returns
  189. a sequence of `TranslationError` objects.
  190. :rtype: ``iterator``
  191. :param catalog: A catalog instance that is passed to the checkers
  192. :see: `Catalog.check` for a way to perform checks for all messages
  193. in a catalog.
  194. """
  195. from babel.messages.checkers import checkers
  196. errors: list[TranslationError] = []
  197. for checker in checkers:
  198. try:
  199. checker(catalog, self)
  200. except TranslationError as e:
  201. errors.append(e)
  202. return errors
  203. @property
  204. def fuzzy(self) -> bool:
  205. """Whether the translation is fuzzy.
  206. >>> Message('foo').fuzzy
  207. False
  208. >>> msg = Message('foo', 'foo', flags=['fuzzy'])
  209. >>> msg.fuzzy
  210. True
  211. >>> msg
  212. <Message 'foo' (flags: ['fuzzy'])>
  213. :type: `bool`"""
  214. return 'fuzzy' in self.flags
  215. @property
  216. def pluralizable(self) -> bool:
  217. """Whether the message is plurizable.
  218. >>> Message('foo').pluralizable
  219. False
  220. >>> Message(('foo', 'bar')).pluralizable
  221. True
  222. :type: `bool`"""
  223. return isinstance(self.id, (list, tuple))
  224. @property
  225. def python_format(self) -> bool:
  226. """Whether the message contains Python-style parameters.
  227. >>> Message('foo %(name)s bar').python_format
  228. True
  229. >>> Message(('foo %(name)s', 'foo %(name)s')).python_format
  230. True
  231. :type: `bool`"""
  232. ids = self.id
  233. if not isinstance(ids, (list, tuple)):
  234. ids = [ids]
  235. return any(PYTHON_FORMAT.search(id) for id in ids)
  236. @property
  237. def python_brace_format(self) -> bool:
  238. """Whether the message contains Python f-string parameters.
  239. >>> Message('Hello, {name}!').python_brace_format
  240. True
  241. >>> Message(('One apple', '{count} apples')).python_brace_format
  242. True
  243. :type: `bool`"""
  244. ids = self.id
  245. if not isinstance(ids, (list, tuple)):
  246. ids = [ids]
  247. return any(_has_python_brace_format(id) for id in ids)
  248. class TranslationError(Exception):
  249. """Exception thrown by translation checkers when invalid message
  250. translations are encountered."""
  251. DEFAULT_HEADER = """\
  252. # Translations template for PROJECT.
  253. # Copyright (C) YEAR ORGANIZATION
  254. # This file is distributed under the same license as the PROJECT project.
  255. # FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
  256. #"""
  257. def parse_separated_header(value: str) -> dict[str, str]:
  258. # Adapted from https://peps.python.org/pep-0594/#cgi
  259. from email.message import Message
  260. m = Message()
  261. m['content-type'] = value
  262. return dict(m.get_params())
  263. def _force_text(s: str | bytes, encoding: str = 'utf-8', errors: str = 'strict') -> str:
  264. if isinstance(s, str):
  265. return s
  266. if isinstance(s, bytes):
  267. return s.decode(encoding, errors)
  268. return str(s)
  269. class Catalog:
  270. """Representation of a message catalog."""
  271. def __init__(
  272. self,
  273. locale: Locale | str | None = None,
  274. domain: str | None = None,
  275. header_comment: str | None = DEFAULT_HEADER,
  276. project: str | None = None,
  277. version: str | None = None,
  278. copyright_holder: str | None = None,
  279. msgid_bugs_address: str | None = None,
  280. creation_date: datetime.datetime | str | None = None,
  281. revision_date: datetime.datetime | datetime.time | float | str | None = None,
  282. last_translator: str | None = None,
  283. language_team: str | None = None,
  284. charset: str | None = None,
  285. fuzzy: bool = True,
  286. ) -> None:
  287. """Initialize the catalog object.
  288. :param locale: the locale identifier or `Locale` object, or `None`
  289. if the catalog is not bound to a locale (which basically
  290. means it's a template)
  291. :param domain: the message domain
  292. :param header_comment: the header comment as string, or `None` for the
  293. default header
  294. :param project: the project's name
  295. :param version: the project's version
  296. :param copyright_holder: the copyright holder of the catalog
  297. :param msgid_bugs_address: the email address or URL to submit bug
  298. reports to
  299. :param creation_date: the date the catalog was created
  300. :param revision_date: the date the catalog was revised
  301. :param last_translator: the name and email of the last translator
  302. :param language_team: the name and email of the language team
  303. :param charset: the encoding to use in the output (defaults to utf-8)
  304. :param fuzzy: the fuzzy bit on the catalog header
  305. """
  306. self.domain = domain
  307. self.locale = locale
  308. self._header_comment = header_comment
  309. self._messages: dict[str | tuple[str, str], Message] = {}
  310. self.project = project or 'PROJECT'
  311. self.version = version or 'VERSION'
  312. self.copyright_holder = copyright_holder or 'ORGANIZATION'
  313. self.msgid_bugs_address = msgid_bugs_address or 'EMAIL@ADDRESS'
  314. self.last_translator = last_translator or 'FULL NAME <EMAIL@ADDRESS>'
  315. """Name and email address of the last translator."""
  316. self.language_team = language_team or 'LANGUAGE <LL@li.org>'
  317. """Name and email address of the language team."""
  318. self.charset = charset or 'utf-8'
  319. if creation_date is None:
  320. creation_date = datetime.datetime.now(LOCALTZ)
  321. elif isinstance(creation_date, datetime.datetime) and not creation_date.tzinfo:
  322. creation_date = creation_date.replace(tzinfo=LOCALTZ)
  323. self.creation_date = creation_date
  324. if revision_date is None:
  325. revision_date = 'YEAR-MO-DA HO:MI+ZONE'
  326. elif isinstance(revision_date, datetime.datetime) and not revision_date.tzinfo:
  327. revision_date = revision_date.replace(tzinfo=LOCALTZ)
  328. self.revision_date = revision_date
  329. self.fuzzy = fuzzy
  330. # Dictionary of obsolete messages
  331. self.obsolete: dict[str | tuple[str, str], Message] = {}
  332. self._num_plurals = None
  333. self._plural_expr = None
  334. def _set_locale(self, locale: Locale | str | None) -> None:
  335. if locale is None:
  336. self._locale_identifier = None
  337. self._locale = None
  338. return
  339. if isinstance(locale, Locale):
  340. self._locale_identifier = str(locale)
  341. self._locale = locale
  342. return
  343. if isinstance(locale, str):
  344. self._locale_identifier = str(locale)
  345. try:
  346. self._locale = Locale.parse(locale)
  347. except UnknownLocaleError:
  348. self._locale = None
  349. return
  350. raise TypeError(f"`locale` must be a Locale, a locale identifier string, or None; got {locale!r}")
  351. def _get_locale(self) -> Locale | None:
  352. return self._locale
  353. def _get_locale_identifier(self) -> str | None:
  354. return self._locale_identifier
  355. locale = property(_get_locale, _set_locale)
  356. locale_identifier = property(_get_locale_identifier)
  357. def _get_header_comment(self) -> str:
  358. comment = self._header_comment
  359. year = datetime.datetime.now(LOCALTZ).strftime('%Y')
  360. if hasattr(self.revision_date, 'strftime'):
  361. year = self.revision_date.strftime('%Y')
  362. comment = comment.replace('PROJECT', self.project) \
  363. .replace('VERSION', self.version) \
  364. .replace('YEAR', year) \
  365. .replace('ORGANIZATION', self.copyright_holder)
  366. locale_name = (self.locale.english_name if self.locale else self.locale_identifier)
  367. if locale_name:
  368. comment = comment.replace("Translations template", f"{locale_name} translations")
  369. return comment
  370. def _set_header_comment(self, string: str | None) -> None:
  371. self._header_comment = string
  372. header_comment = property(_get_header_comment, _set_header_comment, doc="""\
  373. The header comment for the catalog.
  374. >>> catalog = Catalog(project='Foobar', version='1.0',
  375. ... copyright_holder='Foo Company')
  376. >>> print(catalog.header_comment) #doctest: +ELLIPSIS
  377. # Translations template for Foobar.
  378. # Copyright (C) ... Foo Company
  379. # This file is distributed under the same license as the Foobar project.
  380. # FIRST AUTHOR <EMAIL@ADDRESS>, ....
  381. #
  382. The header can also be set from a string. Any known upper-case variables
  383. will be replaced when the header is retrieved again:
  384. >>> catalog = Catalog(project='Foobar', version='1.0',
  385. ... copyright_holder='Foo Company')
  386. >>> catalog.header_comment = '''\\
  387. ... # The POT for my really cool PROJECT project.
  388. ... # Copyright (C) 1990-2003 ORGANIZATION
  389. ... # This file is distributed under the same license as the PROJECT
  390. ... # project.
  391. ... #'''
  392. >>> print(catalog.header_comment)
  393. # The POT for my really cool Foobar project.
  394. # Copyright (C) 1990-2003 Foo Company
  395. # This file is distributed under the same license as the Foobar
  396. # project.
  397. #
  398. :type: `unicode`
  399. """)
  400. def _get_mime_headers(self) -> list[tuple[str, str]]:
  401. if isinstance(self.revision_date, (datetime.datetime, datetime.time, int, float)):
  402. revision_date = format_datetime(self.revision_date, 'yyyy-MM-dd HH:mmZ', locale='en')
  403. else:
  404. revision_date = self.revision_date
  405. language_team = self.language_team
  406. if self.locale_identifier and 'LANGUAGE' in language_team:
  407. language_team = language_team.replace('LANGUAGE', str(self.locale_identifier))
  408. headers: list[tuple[str, str]] = [
  409. ("Project-Id-Version", f"{self.project} {self.version}"),
  410. ('Report-Msgid-Bugs-To', self.msgid_bugs_address),
  411. ('POT-Creation-Date', format_datetime(self.creation_date, 'yyyy-MM-dd HH:mmZ', locale='en')),
  412. ('PO-Revision-Date', revision_date),
  413. ('Last-Translator', self.last_translator),
  414. ]
  415. if self.locale_identifier:
  416. headers.append(('Language', str(self.locale_identifier)))
  417. headers.append(('Language-Team', language_team))
  418. if self.locale is not None:
  419. headers.append(('Plural-Forms', self.plural_forms))
  420. headers += [
  421. ('MIME-Version', '1.0'),
  422. ("Content-Type", f"text/plain; charset={self.charset}"),
  423. ('Content-Transfer-Encoding', '8bit'),
  424. ("Generated-By", f"Babel {VERSION}\n"),
  425. ]
  426. return headers
  427. def _set_mime_headers(self, headers: Iterable[tuple[str, str]]) -> None:
  428. for name, value in headers:
  429. name = _force_text(name.lower(), encoding=self.charset)
  430. value = _force_text(value, encoding=self.charset)
  431. if name == 'project-id-version':
  432. parts = value.split(' ')
  433. self.project = ' '.join(parts[:-1])
  434. self.version = parts[-1]
  435. elif name == 'report-msgid-bugs-to':
  436. self.msgid_bugs_address = value
  437. elif name == 'last-translator':
  438. self.last_translator = value
  439. elif name == 'language':
  440. value = value.replace('-', '_')
  441. # The `or None` makes sure that the locale is set to None
  442. # if the header's value is an empty string, which is what
  443. # some tools generate (instead of eliding the empty Language
  444. # header altogether).
  445. self._set_locale(value or None)
  446. elif name == 'language-team':
  447. self.language_team = value
  448. elif name == 'content-type':
  449. params = parse_separated_header(value)
  450. if 'charset' in params:
  451. self.charset = params['charset'].lower()
  452. elif name == 'plural-forms':
  453. params = parse_separated_header(f" ;{value}")
  454. self._num_plurals = int(params.get('nplurals', 2))
  455. self._plural_expr = params.get('plural', '(n != 1)')
  456. elif name == 'pot-creation-date':
  457. self.creation_date = _parse_datetime_header(value)
  458. elif name == 'po-revision-date':
  459. # Keep the value if it's not the default one
  460. if 'YEAR' not in value:
  461. self.revision_date = _parse_datetime_header(value)
  462. mime_headers = property(_get_mime_headers, _set_mime_headers, doc="""\
  463. The MIME headers of the catalog, used for the special ``msgid ""`` entry.
  464. The behavior of this property changes slightly depending on whether a locale
  465. is set or not, the latter indicating that the catalog is actually a template
  466. for actual translations.
  467. Here's an example of the output for such a catalog template:
  468. >>> from babel.dates import UTC
  469. >>> from datetime import datetime
  470. >>> created = datetime(1990, 4, 1, 15, 30, tzinfo=UTC)
  471. >>> catalog = Catalog(project='Foobar', version='1.0',
  472. ... creation_date=created)
  473. >>> for name, value in catalog.mime_headers:
  474. ... print('%s: %s' % (name, value))
  475. Project-Id-Version: Foobar 1.0
  476. Report-Msgid-Bugs-To: EMAIL@ADDRESS
  477. POT-Creation-Date: 1990-04-01 15:30+0000
  478. PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE
  479. Last-Translator: FULL NAME <EMAIL@ADDRESS>
  480. Language-Team: LANGUAGE <LL@li.org>
  481. MIME-Version: 1.0
  482. Content-Type: text/plain; charset=utf-8
  483. Content-Transfer-Encoding: 8bit
  484. Generated-By: Babel ...
  485. And here's an example of the output when the locale is set:
  486. >>> revised = datetime(1990, 8, 3, 12, 0, tzinfo=UTC)
  487. >>> catalog = Catalog(locale='de_DE', project='Foobar', version='1.0',
  488. ... creation_date=created, revision_date=revised,
  489. ... last_translator='John Doe <jd@example.com>',
  490. ... language_team='de_DE <de@example.com>')
  491. >>> for name, value in catalog.mime_headers:
  492. ... print('%s: %s' % (name, value))
  493. Project-Id-Version: Foobar 1.0
  494. Report-Msgid-Bugs-To: EMAIL@ADDRESS
  495. POT-Creation-Date: 1990-04-01 15:30+0000
  496. PO-Revision-Date: 1990-08-03 12:00+0000
  497. Last-Translator: John Doe <jd@example.com>
  498. Language: de_DE
  499. Language-Team: de_DE <de@example.com>
  500. Plural-Forms: nplurals=2; plural=(n != 1);
  501. MIME-Version: 1.0
  502. Content-Type: text/plain; charset=utf-8
  503. Content-Transfer-Encoding: 8bit
  504. Generated-By: Babel ...
  505. :type: `list`
  506. """)
  507. @property
  508. def num_plurals(self) -> int:
  509. """The number of plurals used by the catalog or locale.
  510. >>> Catalog(locale='en').num_plurals
  511. 2
  512. >>> Catalog(locale='ga').num_plurals
  513. 5
  514. :type: `int`"""
  515. if self._num_plurals is None:
  516. num = 2
  517. if self.locale:
  518. num = get_plural(self.locale)[0]
  519. self._num_plurals = num
  520. return self._num_plurals
  521. @property
  522. def plural_expr(self) -> str:
  523. """The plural expression used by the catalog or locale.
  524. >>> Catalog(locale='en').plural_expr
  525. '(n != 1)'
  526. >>> Catalog(locale='ga').plural_expr
  527. '(n==1 ? 0 : n==2 ? 1 : n>=3 && n<=6 ? 2 : n>=7 && n<=10 ? 3 : 4)'
  528. >>> Catalog(locale='ding').plural_expr # unknown locale
  529. '(n != 1)'
  530. :type: `str`"""
  531. if self._plural_expr is None:
  532. expr = '(n != 1)'
  533. if self.locale:
  534. expr = get_plural(self.locale)[1]
  535. self._plural_expr = expr
  536. return self._plural_expr
  537. @property
  538. def plural_forms(self) -> str:
  539. """Return the plural forms declaration for the locale.
  540. >>> Catalog(locale='en').plural_forms
  541. 'nplurals=2; plural=(n != 1);'
  542. >>> Catalog(locale='pt_BR').plural_forms
  543. 'nplurals=2; plural=(n > 1);'
  544. :type: `str`"""
  545. return f"nplurals={self.num_plurals}; plural={self.plural_expr};"
  546. def __contains__(self, id: _MessageID) -> bool:
  547. """Return whether the catalog has a message with the specified ID."""
  548. return self._key_for(id) in self._messages
  549. def __len__(self) -> int:
  550. """The number of messages in the catalog.
  551. This does not include the special ``msgid ""`` entry."""
  552. return len(self._messages)
  553. def __iter__(self) -> Iterator[Message]:
  554. """Iterates through all the entries in the catalog, in the order they
  555. were added, yielding a `Message` object for every entry.
  556. :rtype: ``iterator``"""
  557. buf = []
  558. for name, value in self.mime_headers:
  559. buf.append(f"{name}: {value}")
  560. flags = set()
  561. if self.fuzzy:
  562. flags |= {'fuzzy'}
  563. yield Message('', '\n'.join(buf), flags=flags)
  564. for key in self._messages:
  565. yield self._messages[key]
  566. def __repr__(self) -> str:
  567. locale = ''
  568. if self.locale:
  569. locale = f" {self.locale}"
  570. return f"<{type(self).__name__} {self.domain!r}{locale}>"
  571. def __delitem__(self, id: _MessageID) -> None:
  572. """Delete the message with the specified ID."""
  573. self.delete(id)
  574. def __getitem__(self, id: _MessageID) -> Message:
  575. """Return the message with the specified ID.
  576. :param id: the message ID
  577. """
  578. return self.get(id)
  579. def __setitem__(self, id: _MessageID, message: Message) -> None:
  580. """Add or update the message with the specified ID.
  581. >>> catalog = Catalog()
  582. >>> catalog[u'foo'] = Message(u'foo')
  583. >>> catalog[u'foo']
  584. <Message u'foo' (flags: [])>
  585. If a message with that ID is already in the catalog, it is updated
  586. to include the locations and flags of the new message.
  587. >>> catalog = Catalog()
  588. >>> catalog[u'foo'] = Message(u'foo', locations=[('main.py', 1)])
  589. >>> catalog[u'foo'].locations
  590. [('main.py', 1)]
  591. >>> catalog[u'foo'] = Message(u'foo', locations=[('utils.py', 5)])
  592. >>> catalog[u'foo'].locations
  593. [('main.py', 1), ('utils.py', 5)]
  594. :param id: the message ID
  595. :param message: the `Message` object
  596. """
  597. assert isinstance(message, Message), 'expected a Message object'
  598. key = self._key_for(id, message.context)
  599. current = self._messages.get(key)
  600. if current:
  601. if message.pluralizable and not current.pluralizable:
  602. # The new message adds pluralization
  603. current.id = message.id
  604. current.string = message.string
  605. current.locations = list(distinct(current.locations +
  606. message.locations))
  607. current.auto_comments = list(distinct(current.auto_comments +
  608. message.auto_comments))
  609. current.user_comments = list(distinct(current.user_comments +
  610. message.user_comments))
  611. current.flags |= message.flags
  612. elif id == '':
  613. # special treatment for the header message
  614. self.mime_headers = message_from_string(message.string).items()
  615. self.header_comment = "\n".join([f"# {c}".rstrip() for c in message.user_comments])
  616. self.fuzzy = message.fuzzy
  617. else:
  618. if isinstance(id, (list, tuple)):
  619. assert isinstance(message.string, (list, tuple)), \
  620. f"Expected sequence but got {type(message.string)}"
  621. self._messages[key] = message
  622. def add(
  623. self,
  624. id: _MessageID,
  625. string: _MessageID | None = None,
  626. locations: Iterable[tuple[str, int]] = (),
  627. flags: Iterable[str] = (),
  628. auto_comments: Iterable[str] = (),
  629. user_comments: Iterable[str] = (),
  630. previous_id: _MessageID = (),
  631. lineno: int | None = None,
  632. context: str | None = None,
  633. ) -> Message:
  634. """Add or update the message with the specified ID.
  635. >>> catalog = Catalog()
  636. >>> catalog.add(u'foo')
  637. <Message ...>
  638. >>> catalog[u'foo']
  639. <Message u'foo' (flags: [])>
  640. This method simply constructs a `Message` object with the given
  641. arguments and invokes `__setitem__` with that object.
  642. :param id: the message ID, or a ``(singular, plural)`` tuple for
  643. pluralizable messages
  644. :param string: the translated message string, or a
  645. ``(singular, plural)`` tuple for pluralizable messages
  646. :param locations: a sequence of ``(filename, lineno)`` tuples
  647. :param flags: a set or sequence of flags
  648. :param auto_comments: a sequence of automatic comments
  649. :param user_comments: a sequence of user comments
  650. :param previous_id: the previous message ID, or a ``(singular, plural)``
  651. tuple for pluralizable messages
  652. :param lineno: the line number on which the msgid line was found in the
  653. PO file, if any
  654. :param context: the message context
  655. """
  656. message = Message(id, string, list(locations), flags, auto_comments,
  657. user_comments, previous_id, lineno=lineno,
  658. context=context)
  659. self[id] = message
  660. return message
  661. def check(self) -> Iterable[tuple[Message, list[TranslationError]]]:
  662. """Run various validation checks on the translations in the catalog.
  663. For every message which fails validation, this method yield a
  664. ``(message, errors)`` tuple, where ``message`` is the `Message` object
  665. and ``errors`` is a sequence of `TranslationError` objects.
  666. :rtype: ``generator`` of ``(message, errors)``
  667. """
  668. for message in self._messages.values():
  669. errors = message.check(catalog=self)
  670. if errors:
  671. yield message, errors
  672. def get(self, id: _MessageID, context: str | None = None) -> Message | None:
  673. """Return the message with the specified ID and context.
  674. :param id: the message ID
  675. :param context: the message context, or ``None`` for no context
  676. """
  677. return self._messages.get(self._key_for(id, context))
  678. def delete(self, id: _MessageID, context: str | None = None) -> None:
  679. """Delete the message with the specified ID and context.
  680. :param id: the message ID
  681. :param context: the message context, or ``None`` for no context
  682. """
  683. key = self._key_for(id, context)
  684. if key in self._messages:
  685. del self._messages[key]
  686. def update(
  687. self,
  688. template: Catalog,
  689. no_fuzzy_matching: bool = False,
  690. update_header_comment: bool = False,
  691. keep_user_comments: bool = True,
  692. update_creation_date: bool = True,
  693. ) -> None:
  694. """Update the catalog based on the given template catalog.
  695. >>> from babel.messages import Catalog
  696. >>> template = Catalog()
  697. >>> template.add('green', locations=[('main.py', 99)])
  698. <Message ...>
  699. >>> template.add('blue', locations=[('main.py', 100)])
  700. <Message ...>
  701. >>> template.add(('salad', 'salads'), locations=[('util.py', 42)])
  702. <Message ...>
  703. >>> catalog = Catalog(locale='de_DE')
  704. >>> catalog.add('blue', u'blau', locations=[('main.py', 98)])
  705. <Message ...>
  706. >>> catalog.add('head', u'Kopf', locations=[('util.py', 33)])
  707. <Message ...>
  708. >>> catalog.add(('salad', 'salads'), (u'Salat', u'Salate'),
  709. ... locations=[('util.py', 38)])
  710. <Message ...>
  711. >>> catalog.update(template)
  712. >>> len(catalog)
  713. 3
  714. >>> msg1 = catalog['green']
  715. >>> msg1.string
  716. >>> msg1.locations
  717. [('main.py', 99)]
  718. >>> msg2 = catalog['blue']
  719. >>> msg2.string
  720. u'blau'
  721. >>> msg2.locations
  722. [('main.py', 100)]
  723. >>> msg3 = catalog['salad']
  724. >>> msg3.string
  725. (u'Salat', u'Salate')
  726. >>> msg3.locations
  727. [('util.py', 42)]
  728. Messages that are in the catalog but not in the template are removed
  729. from the main collection, but can still be accessed via the `obsolete`
  730. member:
  731. >>> 'head' in catalog
  732. False
  733. >>> list(catalog.obsolete.values())
  734. [<Message 'head' (flags: [])>]
  735. :param template: the reference catalog, usually read from a POT file
  736. :param no_fuzzy_matching: whether to use fuzzy matching of message IDs
  737. :param update_header_comment: whether to copy the header comment from the template
  738. :param keep_user_comments: whether to keep user comments from the old catalog
  739. :param update_creation_date: whether to copy the creation date from the template
  740. """
  741. messages = self._messages
  742. remaining = messages.copy()
  743. self._messages = {}
  744. # Prepare for fuzzy matching
  745. fuzzy_candidates = {}
  746. if not no_fuzzy_matching:
  747. for msgid in messages:
  748. if msgid and messages[msgid].string:
  749. key = self._key_for(msgid)
  750. ctxt = messages[msgid].context
  751. fuzzy_candidates[self._to_fuzzy_match_key(key)] = (key, ctxt)
  752. fuzzy_matches = set()
  753. def _merge(message: Message, oldkey: tuple[str, str] | str, newkey: tuple[str, str] | str) -> None:
  754. message = message.clone()
  755. fuzzy = False
  756. if oldkey != newkey:
  757. fuzzy = True
  758. fuzzy_matches.add(oldkey)
  759. oldmsg = messages.get(oldkey)
  760. assert oldmsg is not None
  761. if isinstance(oldmsg.id, str):
  762. message.previous_id = [oldmsg.id]
  763. else:
  764. message.previous_id = list(oldmsg.id)
  765. else:
  766. oldmsg = remaining.pop(oldkey, None)
  767. assert oldmsg is not None
  768. message.string = oldmsg.string
  769. if keep_user_comments:
  770. message.user_comments = list(distinct(oldmsg.user_comments))
  771. if isinstance(message.id, (list, tuple)):
  772. if not isinstance(message.string, (list, tuple)):
  773. fuzzy = True
  774. message.string = tuple(
  775. [message.string] + ([''] * (len(message.id) - 1)),
  776. )
  777. elif len(message.string) != self.num_plurals:
  778. fuzzy = True
  779. message.string = tuple(message.string[:len(oldmsg.string)])
  780. elif isinstance(message.string, (list, tuple)):
  781. fuzzy = True
  782. message.string = message.string[0]
  783. message.flags |= oldmsg.flags
  784. if fuzzy:
  785. message.flags |= {'fuzzy'}
  786. self[message.id] = message
  787. for message in template:
  788. if message.id:
  789. key = self._key_for(message.id, message.context)
  790. if key in messages:
  791. _merge(message, key, key)
  792. else:
  793. if not no_fuzzy_matching:
  794. # do some fuzzy matching with difflib
  795. matches = get_close_matches(
  796. self._to_fuzzy_match_key(key),
  797. fuzzy_candidates.keys(),
  798. 1,
  799. )
  800. if matches:
  801. modified_key = matches[0]
  802. newkey, newctxt = fuzzy_candidates[modified_key]
  803. if newctxt is not None:
  804. newkey = newkey, newctxt
  805. _merge(message, newkey, key)
  806. continue
  807. self[message.id] = message
  808. for msgid in remaining:
  809. if no_fuzzy_matching or msgid not in fuzzy_matches:
  810. self.obsolete[msgid] = remaining[msgid]
  811. if update_header_comment:
  812. # Allow the updated catalog's header to be rewritten based on the
  813. # template's header
  814. self.header_comment = template.header_comment
  815. # Make updated catalog's POT-Creation-Date equal to the template
  816. # used to update the catalog
  817. if update_creation_date:
  818. self.creation_date = template.creation_date
  819. def _to_fuzzy_match_key(self, key: tuple[str, str] | str) -> str:
  820. """Converts a message key to a string suitable for fuzzy matching."""
  821. if isinstance(key, tuple):
  822. matchkey = key[0] # just the msgid, no context
  823. else:
  824. matchkey = key
  825. return matchkey.lower().strip()
  826. def _key_for(self, id: _MessageID, context: str | None = None) -> tuple[str, str] | str:
  827. """The key for a message is just the singular ID even for pluralizable
  828. messages, but is a ``(msgid, msgctxt)`` tuple for context-specific
  829. messages.
  830. """
  831. key = id
  832. if isinstance(key, (list, tuple)):
  833. key = id[0]
  834. if context is not None:
  835. key = (key, context)
  836. return key
  837. def is_identical(self, other: Catalog) -> bool:
  838. """Checks if catalogs are identical, taking into account messages and
  839. headers.
  840. """
  841. assert isinstance(other, Catalog)
  842. for key in self._messages.keys() | other._messages.keys():
  843. message_1 = self.get(key)
  844. message_2 = other.get(key)
  845. if (
  846. message_1 is None
  847. or message_2 is None
  848. or not message_1.is_identical(message_2)
  849. ):
  850. return False
  851. return dict(self.mime_headers) == dict(other.mime_headers)