localedata.py 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278
  1. """
  2. babel.localedata
  3. ~~~~~~~~~~~~~~~~
  4. Low-level locale data access.
  5. :note: The `Locale` class, which uses this module under the hood, provides a
  6. more convenient interface for accessing the locale data.
  7. :copyright: (c) 2013-2025 by the Babel Team.
  8. :license: BSD, see LICENSE for more details.
  9. """
  10. from __future__ import annotations
  11. import os
  12. import pickle
  13. import re
  14. import sys
  15. import threading
  16. from collections import abc
  17. from collections.abc import Iterator, Mapping, MutableMapping
  18. from functools import lru_cache
  19. from itertools import chain
  20. from typing import Any
  21. _cache: dict[str, Any] = {}
  22. _cache_lock = threading.RLock()
  23. _dirname = os.path.join(os.path.dirname(__file__), 'locale-data')
  24. _windows_reserved_name_re = re.compile("^(con|prn|aux|nul|com[0-9]|lpt[0-9])$", re.I)
  25. def normalize_locale(name: str) -> str | None:
  26. """Normalize a locale ID by stripping spaces and apply proper casing.
  27. Returns the normalized locale ID string or `None` if the ID is not
  28. recognized.
  29. """
  30. if not name or not isinstance(name, str):
  31. return None
  32. name = name.strip().lower()
  33. for locale_id in chain.from_iterable([_cache, locale_identifiers()]):
  34. if name == locale_id.lower():
  35. return locale_id
  36. def resolve_locale_filename(name: os.PathLike[str] | str) -> str:
  37. """
  38. Resolve a locale identifier to a `.dat` path on disk.
  39. """
  40. # Clean up any possible relative paths.
  41. name = os.path.basename(name)
  42. # Ensure we're not left with one of the Windows reserved names.
  43. if sys.platform == "win32" and _windows_reserved_name_re.match(os.path.splitext(name)[0]):
  44. raise ValueError(f"Name {name} is invalid on Windows")
  45. # Build the path.
  46. return os.path.join(_dirname, f"{name}.dat")
  47. def exists(name: str) -> bool:
  48. """Check whether locale data is available for the given locale.
  49. Returns `True` if it exists, `False` otherwise.
  50. :param name: the locale identifier string
  51. """
  52. if not name or not isinstance(name, str):
  53. return False
  54. if name in _cache:
  55. return True
  56. file_found = os.path.exists(resolve_locale_filename(name))
  57. return True if file_found else bool(normalize_locale(name))
  58. @lru_cache(maxsize=None)
  59. def locale_identifiers() -> list[str]:
  60. """Return a list of all locale identifiers for which locale data is
  61. available.
  62. This data is cached after the first invocation.
  63. You can clear the cache by calling `locale_identifiers.cache_clear()`.
  64. .. versionadded:: 0.8.1
  65. :return: a list of locale identifiers (strings)
  66. """
  67. return [
  68. stem
  69. for stem, extension in
  70. (os.path.splitext(filename) for filename in os.listdir(_dirname))
  71. if extension == '.dat' and stem != 'root'
  72. ]
  73. def _is_non_likely_script(name: str) -> bool:
  74. """Return whether the locale is of the form ``lang_Script``,
  75. and the script is not the likely script for the language.
  76. This implements the behavior of the ``nonlikelyScript`` value of the
  77. ``localRules`` attribute for parent locales added in CLDR 45.
  78. """
  79. from babel.core import get_global, parse_locale
  80. try:
  81. lang, territory, script, variant, *rest = parse_locale(name)
  82. except ValueError:
  83. return False
  84. if lang and script and not territory and not variant and not rest:
  85. likely_subtag = get_global('likely_subtags').get(lang)
  86. _, _, likely_script, *_ = parse_locale(likely_subtag)
  87. return script != likely_script
  88. return False
  89. def load(name: os.PathLike[str] | str, merge_inherited: bool = True) -> dict[str, Any]:
  90. """Load the locale data for the given locale.
  91. The locale data is a dictionary that contains much of the data defined by
  92. the Common Locale Data Repository (CLDR). This data is stored as a
  93. collection of pickle files inside the ``babel`` package.
  94. >>> d = load('en_US')
  95. >>> d['languages']['sv']
  96. u'Swedish'
  97. Note that the results are cached, and subsequent requests for the same
  98. locale return the same dictionary:
  99. >>> d1 = load('en_US')
  100. >>> d2 = load('en_US')
  101. >>> d1 is d2
  102. True
  103. :param name: the locale identifier string (or "root")
  104. :param merge_inherited: whether the inherited data should be merged into
  105. the data of the requested locale
  106. :raise `IOError`: if no locale data file is found for the given locale
  107. identifier, or one of the locales it inherits from
  108. """
  109. name = os.path.basename(name)
  110. _cache_lock.acquire()
  111. try:
  112. data = _cache.get(name)
  113. if not data:
  114. # Load inherited data
  115. if name == 'root' or not merge_inherited:
  116. data = {}
  117. else:
  118. from babel.core import get_global
  119. parent = get_global('parent_exceptions').get(name)
  120. if not parent:
  121. if _is_non_likely_script(name):
  122. parent = 'root'
  123. else:
  124. parts = name.split('_')
  125. parent = "root" if len(parts) == 1 else "_".join(parts[:-1])
  126. data = load(parent).copy()
  127. filename = resolve_locale_filename(name)
  128. with open(filename, 'rb') as fileobj:
  129. if name != 'root' and merge_inherited:
  130. merge(data, pickle.load(fileobj))
  131. else:
  132. data = pickle.load(fileobj)
  133. _cache[name] = data
  134. return data
  135. finally:
  136. _cache_lock.release()
  137. def merge(dict1: MutableMapping[Any, Any], dict2: Mapping[Any, Any]) -> None:
  138. """Merge the data from `dict2` into the `dict1` dictionary, making copies
  139. of nested dictionaries.
  140. >>> d = {1: 'foo', 3: 'baz'}
  141. >>> merge(d, {1: 'Foo', 2: 'Bar'})
  142. >>> sorted(d.items())
  143. [(1, 'Foo'), (2, 'Bar'), (3, 'baz')]
  144. :param dict1: the dictionary to merge into
  145. :param dict2: the dictionary containing the data that should be merged
  146. """
  147. for key, val2 in dict2.items():
  148. if val2 is not None:
  149. val1 = dict1.get(key)
  150. if isinstance(val2, dict):
  151. if val1 is None:
  152. val1 = {}
  153. if isinstance(val1, Alias):
  154. val1 = (val1, val2)
  155. elif isinstance(val1, tuple):
  156. alias, others = val1
  157. others = others.copy()
  158. merge(others, val2)
  159. val1 = (alias, others)
  160. else:
  161. val1 = val1.copy()
  162. merge(val1, val2)
  163. else:
  164. val1 = val2
  165. dict1[key] = val1
  166. class Alias:
  167. """Representation of an alias in the locale data.
  168. An alias is a value that refers to some other part of the locale data,
  169. as specified by the `keys`.
  170. """
  171. def __init__(self, keys: tuple[str, ...]) -> None:
  172. self.keys = tuple(keys)
  173. def __repr__(self) -> str:
  174. return f"<{type(self).__name__} {self.keys!r}>"
  175. def resolve(self, data: Mapping[str | int | None, Any]) -> Mapping[str | int | None, Any]:
  176. """Resolve the alias based on the given data.
  177. This is done recursively, so if one alias resolves to a second alias,
  178. that second alias will also be resolved.
  179. :param data: the locale data
  180. :type data: `dict`
  181. """
  182. base = data
  183. for key in self.keys:
  184. data = data[key]
  185. if isinstance(data, Alias):
  186. data = data.resolve(base)
  187. elif isinstance(data, tuple):
  188. alias, others = data
  189. data = alias.resolve(base)
  190. return data
  191. class LocaleDataDict(abc.MutableMapping):
  192. """Dictionary wrapper that automatically resolves aliases to the actual
  193. values.
  194. """
  195. def __init__(self, data: MutableMapping[str | int | None, Any], base: Mapping[str | int | None, Any] | None = None):
  196. self._data = data
  197. if base is None:
  198. base = data
  199. self.base = base
  200. def __len__(self) -> int:
  201. return len(self._data)
  202. def __iter__(self) -> Iterator[str | int | None]:
  203. return iter(self._data)
  204. def __getitem__(self, key: str | int | None) -> Any:
  205. orig = val = self._data[key]
  206. if isinstance(val, Alias): # resolve an alias
  207. val = val.resolve(self.base)
  208. if isinstance(val, tuple): # Merge a partial dict with an alias
  209. alias, others = val
  210. val = alias.resolve(self.base).copy()
  211. merge(val, others)
  212. if isinstance(val, dict): # Return a nested alias-resolving dict
  213. val = LocaleDataDict(val, base=self.base)
  214. if val is not orig:
  215. self._data[key] = val
  216. return val
  217. def __setitem__(self, key: str | int | None, value: Any) -> None:
  218. self._data[key] = value
  219. def __delitem__(self, key: str | int | None) -> None:
  220. del self._data[key]
  221. def copy(self) -> LocaleDataDict:
  222. return LocaleDataDict(self._data.copy(), base=self.base)