123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637 |
- """
- babel.numbers
- ~~~~~~~~~~~~~
- CLDR Plural support. See UTS #35.
- :copyright: (c) 2013-2025 by the Babel Team.
- :license: BSD, see LICENSE for more details.
- """
- from __future__ import annotations
- import decimal
- import re
- from collections.abc import Iterable, Mapping
- from typing import Any, Callable, Literal
- _plural_tags = ('zero', 'one', 'two', 'few', 'many', 'other')
- _fallback_tag = 'other'
- def extract_operands(source: float | decimal.Decimal) -> tuple[decimal.Decimal | int, int, int, int, int, int, Literal[0], Literal[0]]:
- """Extract operands from a decimal, a float or an int, according to `CLDR rules`_.
- The result is an 8-tuple (n, i, v, w, f, t, c, e), where those symbols are as follows:
- ====== ===============================================================
- Symbol Value
- ------ ---------------------------------------------------------------
- n absolute value of the source number (integer and decimals).
- i integer digits of n.
- v number of visible fraction digits in n, with trailing zeros.
- w number of visible fraction digits in n, without trailing zeros.
- f visible fractional digits in n, with trailing zeros.
- t visible fractional digits in n, without trailing zeros.
- c compact decimal exponent value: exponent of the power of 10 used in compact decimal formatting.
- e currently, synonym for ‘c’. however, may be redefined in the future.
- ====== ===============================================================
- .. _`CLDR rules`: https://www.unicode.org/reports/tr35/tr35-61/tr35-numbers.html#Operands
- :param source: A real number
- :type source: int|float|decimal.Decimal
- :return: A n-i-v-w-f-t-c-e tuple
- :rtype: tuple[decimal.Decimal, int, int, int, int, int, int, int]
- """
- n = abs(source)
- i = int(n)
- if isinstance(n, float):
- if i == n:
- n = i
- else:
- # Cast the `float` to a number via the string representation.
- # This is required for Python 2.6 anyway (it will straight out fail to
- # do the conversion otherwise), and it's highly unlikely that the user
- # actually wants the lossless conversion behavior (quoting the Python
- # documentation):
- # > If value is a float, the binary floating point value is losslessly
- # > converted to its exact decimal equivalent.
- # > This conversion can often require 53 or more digits of precision.
- # Should the user want that behavior, they can simply pass in a pre-
- # converted `Decimal` instance of desired accuracy.
- n = decimal.Decimal(str(n))
- if isinstance(n, decimal.Decimal):
- dec_tuple = n.as_tuple()
- exp = dec_tuple.exponent
- fraction_digits = dec_tuple.digits[exp:] if exp < 0 else ()
- trailing = ''.join(str(d) for d in fraction_digits)
- no_trailing = trailing.rstrip('0')
- v = len(trailing)
- w = len(no_trailing)
- f = int(trailing or 0)
- t = int(no_trailing or 0)
- else:
- v = w = f = t = 0
- c = e = 0 # TODO: c and e are not supported
- return n, i, v, w, f, t, c, e
- class PluralRule:
- """Represents a set of language pluralization rules. The constructor
- accepts a list of (tag, expr) tuples or a dict of `CLDR rules`_. The
- resulting object is callable and accepts one parameter with a positive or
- negative number (both integer and float) for the number that indicates the
- plural form for a string and returns the tag for the format:
- >>> rule = PluralRule({'one': 'n is 1'})
- >>> rule(1)
- 'one'
- >>> rule(2)
- 'other'
- Currently the CLDR defines these tags: zero, one, two, few, many and
- other where other is an implicit default. Rules should be mutually
- exclusive; for a given numeric value, only one rule should apply (i.e.
- the condition should only be true for one of the plural rule elements.
- .. _`CLDR rules`: https://www.unicode.org/reports/tr35/tr35-33/tr35-numbers.html#Language_Plural_Rules
- """
- __slots__ = ('abstract', '_func')
- def __init__(self, rules: Mapping[str, str] | Iterable[tuple[str, str]]) -> None:
- """Initialize the rule instance.
- :param rules: a list of ``(tag, expr)``) tuples with the rules
- conforming to UTS #35 or a dict with the tags as keys
- and expressions as values.
- :raise RuleError: if the expression is malformed
- """
- if isinstance(rules, Mapping):
- rules = rules.items()
- found = set()
- self.abstract: list[tuple[str, Any]] = []
- for key, expr in sorted(rules):
- if key not in _plural_tags:
- raise ValueError(f"unknown tag {key!r}")
- elif key in found:
- raise ValueError(f"tag {key!r} defined twice")
- found.add(key)
- ast = _Parser(expr).ast
- if ast:
- self.abstract.append((key, ast))
- def __repr__(self) -> str:
- rules = self.rules
- args = ", ".join([f"{tag}: {rules[tag]}" for tag in _plural_tags if tag in rules])
- return f"<{type(self).__name__} {args!r}>"
- @classmethod
- def parse(cls, rules: Mapping[str, str] | Iterable[tuple[str, str]] | PluralRule) -> PluralRule:
- """Create a `PluralRule` instance for the given rules. If the rules
- are a `PluralRule` object, that object is returned.
- :param rules: the rules as list or dict, or a `PluralRule` object
- :raise RuleError: if the expression is malformed
- """
- if isinstance(rules, PluralRule):
- return rules
- return cls(rules)
- @property
- def rules(self) -> Mapping[str, str]:
- """The `PluralRule` as a dict of unicode plural rules.
- >>> rule = PluralRule({'one': 'n is 1'})
- >>> rule.rules
- {'one': 'n is 1'}
- """
- _compile = _UnicodeCompiler().compile
- return {tag: _compile(ast) for tag, ast in self.abstract}
- @property
- def tags(self) -> frozenset[str]:
- """A set of explicitly defined tags in this rule. The implicit default
- ``'other'`` rules is not part of this set unless there is an explicit
- rule for it.
- """
- return frozenset(i[0] for i in self.abstract)
- def __getstate__(self) -> list[tuple[str, Any]]:
- return self.abstract
- def __setstate__(self, abstract: list[tuple[str, Any]]) -> None:
- self.abstract = abstract
- def __call__(self, n: float | decimal.Decimal) -> str:
- if not hasattr(self, '_func'):
- self._func = to_python(self)
- return self._func(n)
- def to_javascript(rule: Mapping[str, str] | Iterable[tuple[str, str]] | PluralRule) -> str:
- """Convert a list/dict of rules or a `PluralRule` object into a JavaScript
- function. This function depends on no external library:
- >>> to_javascript({'one': 'n is 1'})
- "(function(n) { return (n == 1) ? 'one' : 'other'; })"
- Implementation detail: The function generated will probably evaluate
- expressions involved into range operations multiple times. This has the
- advantage that external helper functions are not required and is not a
- big performance hit for these simple calculations.
- :param rule: the rules as list or dict, or a `PluralRule` object
- :raise RuleError: if the expression is malformed
- """
- to_js = _JavaScriptCompiler().compile
- result = ['(function(n) { return ']
- for tag, ast in PluralRule.parse(rule).abstract:
- result.append(f"{to_js(ast)} ? {tag!r} : ")
- result.append('%r; })' % _fallback_tag)
- return ''.join(result)
- def to_python(rule: Mapping[str, str] | Iterable[tuple[str, str]] | PluralRule) -> Callable[[float | decimal.Decimal], str]:
- """Convert a list/dict of rules or a `PluralRule` object into a regular
- Python function. This is useful in situations where you need a real
- function and don't are about the actual rule object:
- >>> func = to_python({'one': 'n is 1', 'few': 'n in 2..4'})
- >>> func(1)
- 'one'
- >>> func(3)
- 'few'
- >>> func = to_python({'one': 'n in 1,11', 'few': 'n in 3..10,13..19'})
- >>> func(11)
- 'one'
- >>> func(15)
- 'few'
- :param rule: the rules as list or dict, or a `PluralRule` object
- :raise RuleError: if the expression is malformed
- """
- namespace = {
- 'IN': in_range_list,
- 'WITHIN': within_range_list,
- 'MOD': cldr_modulo,
- 'extract_operands': extract_operands,
- }
- to_python_func = _PythonCompiler().compile
- result = [
- 'def evaluate(n):',
- ' n, i, v, w, f, t, c, e = extract_operands(n)',
- ]
- for tag, ast in PluralRule.parse(rule).abstract:
- # the str() call is to coerce the tag to the native string. It's
- # a limited ascii restricted set of tags anyways so that is fine.
- result.append(f" if ({to_python_func(ast)}): return {str(tag)!r}")
- result.append(f" return {_fallback_tag!r}")
- code = compile('\n'.join(result), '<rule>', 'exec')
- eval(code, namespace)
- return namespace['evaluate']
- def to_gettext(rule: Mapping[str, str] | Iterable[tuple[str, str]] | PluralRule) -> str:
- """The plural rule as gettext expression. The gettext expression is
- technically limited to integers and returns indices rather than tags.
- >>> to_gettext({'one': 'n is 1', 'two': 'n is 2'})
- 'nplurals=3; plural=((n == 1) ? 0 : (n == 2) ? 1 : 2);'
- :param rule: the rules as list or dict, or a `PluralRule` object
- :raise RuleError: if the expression is malformed
- """
- rule = PluralRule.parse(rule)
- used_tags = rule.tags | {_fallback_tag}
- _compile = _GettextCompiler().compile
- _get_index = [tag for tag in _plural_tags if tag in used_tags].index
- result = [f"nplurals={len(used_tags)}; plural=("]
- for tag, ast in rule.abstract:
- result.append(f"{_compile(ast)} ? {_get_index(tag)} : ")
- result.append(f"{_get_index(_fallback_tag)});")
- return ''.join(result)
- def in_range_list(num: float | decimal.Decimal, range_list: Iterable[Iterable[float | decimal.Decimal]]) -> bool:
- """Integer range list test. This is the callback for the "in" operator
- of the UTS #35 pluralization rule language:
- >>> in_range_list(1, [(1, 3)])
- True
- >>> in_range_list(3, [(1, 3)])
- True
- >>> in_range_list(3, [(1, 3), (5, 8)])
- True
- >>> in_range_list(1.2, [(1, 4)])
- False
- >>> in_range_list(10, [(1, 4)])
- False
- >>> in_range_list(10, [(1, 4), (6, 8)])
- False
- """
- return num == int(num) and within_range_list(num, range_list)
- def within_range_list(num: float | decimal.Decimal, range_list: Iterable[Iterable[float | decimal.Decimal]]) -> bool:
- """Float range test. This is the callback for the "within" operator
- of the UTS #35 pluralization rule language:
- >>> within_range_list(1, [(1, 3)])
- True
- >>> within_range_list(1.0, [(1, 3)])
- True
- >>> within_range_list(1.2, [(1, 4)])
- True
- >>> within_range_list(8.8, [(1, 4), (7, 15)])
- True
- >>> within_range_list(10, [(1, 4)])
- False
- >>> within_range_list(10.5, [(1, 4), (20, 30)])
- False
- """
- return any(min_ <= num <= max_ for min_, max_ in range_list)
- def cldr_modulo(a: float, b: float) -> float:
- """Javaish modulo. This modulo operator returns the value with the sign
- of the dividend rather than the divisor like Python does:
- >>> cldr_modulo(-3, 5)
- -3
- >>> cldr_modulo(-3, -5)
- -3
- >>> cldr_modulo(3, 5)
- 3
- """
- reverse = 0
- if a < 0:
- a *= -1
- reverse = 1
- if b < 0:
- b *= -1
- rv = a % b
- if reverse:
- rv *= -1
- return rv
- class RuleError(Exception):
- """Raised if a rule is malformed."""
- _VARS = {
- 'n', # absolute value of the source number.
- 'i', # integer digits of n.
- 'v', # number of visible fraction digits in n, with trailing zeros.*
- 'w', # number of visible fraction digits in n, without trailing zeros.*
- 'f', # visible fraction digits in n, with trailing zeros.*
- 't', # visible fraction digits in n, without trailing zeros.*
- 'c', # compact decimal exponent value: exponent of the power of 10 used in compact decimal formatting.
- 'e', # currently, synonym for `c`. however, may be redefined in the future.
- }
- _RULES: list[tuple[str | None, re.Pattern[str]]] = [
- (None, re.compile(r'\s+', re.UNICODE)),
- ('word', re.compile(fr'\b(and|or|is|(?:with)?in|not|mod|[{"".join(_VARS)}])\b')),
- ('value', re.compile(r'\d+')),
- ('symbol', re.compile(r'%|,|!=|=')),
- ('ellipsis', re.compile(r'\.{2,3}|\u2026', re.UNICODE)), # U+2026: ELLIPSIS
- ]
- def tokenize_rule(s: str) -> list[tuple[str, str]]:
- s = s.split('@')[0]
- result: list[tuple[str, str]] = []
- pos = 0
- end = len(s)
- while pos < end:
- for tok, rule in _RULES:
- match = rule.match(s, pos)
- if match is not None:
- pos = match.end()
- if tok:
- result.append((tok, match.group()))
- break
- else:
- raise RuleError(f"malformed CLDR pluralization rule. Got unexpected {s[pos]!r}")
- return result[::-1]
- def test_next_token(
- tokens: list[tuple[str, str]],
- type_: str,
- value: str | None = None,
- ) -> list[tuple[str, str]] | bool:
- return tokens and tokens[-1][0] == type_ and \
- (value is None or tokens[-1][1] == value)
- def skip_token(tokens: list[tuple[str, str]], type_: str, value: str | None = None):
- if test_next_token(tokens, type_, value):
- return tokens.pop()
- def value_node(value: int) -> tuple[Literal['value'], tuple[int]]:
- return 'value', (value, )
- def ident_node(name: str) -> tuple[str, tuple[()]]:
- return name, ()
- def range_list_node(
- range_list: Iterable[Iterable[float | decimal.Decimal]],
- ) -> tuple[Literal['range_list'], Iterable[Iterable[float | decimal.Decimal]]]:
- return 'range_list', range_list
- def negate(rv: tuple[Any, ...]) -> tuple[Literal['not'], tuple[tuple[Any, ...]]]:
- return 'not', (rv,)
- class _Parser:
- """Internal parser. This class can translate a single rule into an abstract
- tree of tuples. It implements the following grammar::
- condition = and_condition ('or' and_condition)*
- ('@integer' samples)?
- ('@decimal' samples)?
- and_condition = relation ('and' relation)*
- relation = is_relation | in_relation | within_relation
- is_relation = expr 'is' ('not')? value
- in_relation = expr (('not')? 'in' | '=' | '!=') range_list
- within_relation = expr ('not')? 'within' range_list
- expr = operand (('mod' | '%') value)?
- operand = 'n' | 'i' | 'f' | 't' | 'v' | 'w'
- range_list = (range | value) (',' range_list)*
- value = digit+
- digit = 0|1|2|3|4|5|6|7|8|9
- range = value'..'value
- samples = sampleRange (',' sampleRange)* (',' ('…'|'...'))?
- sampleRange = decimalValue '~' decimalValue
- decimalValue = value ('.' value)?
- - Whitespace can occur between or around any of the above tokens.
- - Rules should be mutually exclusive; for a given numeric value, only one
- rule should apply (i.e. the condition should only be true for one of
- the plural rule elements).
- - The in and within relations can take comma-separated lists, such as:
- 'n in 3,5,7..15'.
- - Samples are ignored.
- The translator parses the expression on instantiation into an attribute
- called `ast`.
- """
- def __init__(self, string):
- self.tokens = tokenize_rule(string)
- if not self.tokens:
- # If the pattern is only samples, it's entirely possible
- # no stream of tokens whatsoever is generated.
- self.ast = None
- return
- self.ast = self.condition()
- if self.tokens:
- raise RuleError(f"Expected end of rule, got {self.tokens[-1][1]!r}")
- def expect(self, type_, value=None, term=None):
- token = skip_token(self.tokens, type_, value)
- if token is not None:
- return token
- if term is None:
- term = repr(value is None and type_ or value)
- if not self.tokens:
- raise RuleError(f"expected {term} but end of rule reached")
- raise RuleError(f"expected {term} but got {self.tokens[-1][1]!r}")
- def condition(self):
- op = self.and_condition()
- while skip_token(self.tokens, 'word', 'or'):
- op = 'or', (op, self.and_condition())
- return op
- def and_condition(self):
- op = self.relation()
- while skip_token(self.tokens, 'word', 'and'):
- op = 'and', (op, self.relation())
- return op
- def relation(self):
- left = self.expr()
- if skip_token(self.tokens, 'word', 'is'):
- return skip_token(self.tokens, 'word', 'not') and 'isnot' or 'is', \
- (left, self.value())
- negated = skip_token(self.tokens, 'word', 'not')
- method = 'in'
- if skip_token(self.tokens, 'word', 'within'):
- method = 'within'
- else:
- if not skip_token(self.tokens, 'word', 'in'):
- if negated:
- raise RuleError('Cannot negate operator based rules.')
- return self.newfangled_relation(left)
- rv = 'relation', (method, left, self.range_list())
- return negate(rv) if negated else rv
- def newfangled_relation(self, left):
- if skip_token(self.tokens, 'symbol', '='):
- negated = False
- elif skip_token(self.tokens, 'symbol', '!='):
- negated = True
- else:
- raise RuleError('Expected "=" or "!=" or legacy relation')
- rv = 'relation', ('in', left, self.range_list())
- return negate(rv) if negated else rv
- def range_or_value(self):
- left = self.value()
- if skip_token(self.tokens, 'ellipsis'):
- return left, self.value()
- else:
- return left, left
- def range_list(self):
- range_list = [self.range_or_value()]
- while skip_token(self.tokens, 'symbol', ','):
- range_list.append(self.range_or_value())
- return range_list_node(range_list)
- def expr(self):
- word = skip_token(self.tokens, 'word')
- if word is None or word[1] not in _VARS:
- raise RuleError('Expected identifier variable')
- name = word[1]
- if skip_token(self.tokens, 'word', 'mod'):
- return 'mod', ((name, ()), self.value())
- elif skip_token(self.tokens, 'symbol', '%'):
- return 'mod', ((name, ()), self.value())
- return ident_node(name)
- def value(self):
- return value_node(int(self.expect('value')[1]))
- def _binary_compiler(tmpl):
- """Compiler factory for the `_Compiler`."""
- return lambda self, left, right: tmpl % (self.compile(left), self.compile(right))
- def _unary_compiler(tmpl):
- """Compiler factory for the `_Compiler`."""
- return lambda self, x: tmpl % self.compile(x)
- compile_zero = lambda x: '0'
- class _Compiler:
- """The compilers are able to transform the expressions into multiple
- output formats.
- """
- def compile(self, arg):
- op, args = arg
- return getattr(self, f"compile_{op}")(*args)
- compile_n = lambda x: 'n'
- compile_i = lambda x: 'i'
- compile_v = lambda x: 'v'
- compile_w = lambda x: 'w'
- compile_f = lambda x: 'f'
- compile_t = lambda x: 't'
- compile_c = lambda x: 'c'
- compile_e = lambda x: 'e'
- compile_value = lambda x, v: str(v)
- compile_and = _binary_compiler('(%s && %s)')
- compile_or = _binary_compiler('(%s || %s)')
- compile_not = _unary_compiler('(!%s)')
- compile_mod = _binary_compiler('(%s %% %s)')
- compile_is = _binary_compiler('(%s == %s)')
- compile_isnot = _binary_compiler('(%s != %s)')
- def compile_relation(self, method, expr, range_list):
- raise NotImplementedError()
- class _PythonCompiler(_Compiler):
- """Compiles an expression to Python."""
- compile_and = _binary_compiler('(%s and %s)')
- compile_or = _binary_compiler('(%s or %s)')
- compile_not = _unary_compiler('(not %s)')
- compile_mod = _binary_compiler('MOD(%s, %s)')
- def compile_relation(self, method, expr, range_list):
- ranges = ",".join([f"({self.compile(a)}, {self.compile(b)})" for (a, b) in range_list[1]])
- return f"{method.upper()}({self.compile(expr)}, [{ranges}])"
- class _GettextCompiler(_Compiler):
- """Compile into a gettext plural expression."""
- compile_i = _Compiler.compile_n
- compile_v = compile_zero
- compile_w = compile_zero
- compile_f = compile_zero
- compile_t = compile_zero
- def compile_relation(self, method, expr, range_list):
- rv = []
- expr = self.compile(expr)
- for item in range_list[1]:
- if item[0] == item[1]:
- rv.append(f"({expr} == {self.compile(item[0])})")
- else:
- min, max = map(self.compile, item)
- rv.append(f"({expr} >= {min} && {expr} <= {max})")
- return f"({' || '.join(rv)})"
- class _JavaScriptCompiler(_GettextCompiler):
- """Compiles the expression to plain of JavaScript."""
- # XXX: presently javascript does not support any of the
- # fraction support and basically only deals with integers.
- compile_i = lambda x: 'parseInt(n, 10)'
- compile_v = compile_zero
- compile_w = compile_zero
- compile_f = compile_zero
- compile_t = compile_zero
- def compile_relation(self, method, expr, range_list):
- code = _GettextCompiler.compile_relation(
- self, method, expr, range_list)
- if method == 'in':
- expr = self.compile(expr)
- code = f"(parseInt({expr}, 10) == {expr} && {code})"
- return code
- class _UnicodeCompiler(_Compiler):
- """Returns a unicode pluralization rule again."""
- # XXX: this currently spits out the old syntax instead of the new
- # one. We can change that, but it will break a whole bunch of stuff
- # for users I suppose.
- compile_is = _binary_compiler('%s is %s')
- compile_isnot = _binary_compiler('%s is not %s')
- compile_and = _binary_compiler('%s and %s')
- compile_or = _binary_compiler('%s or %s')
- compile_mod = _binary_compiler('%s mod %s')
- def compile_not(self, relation):
- return self.compile_relation(*relation[1], negated=True)
- def compile_relation(self, method, expr, range_list, negated=False):
- ranges = []
- for item in range_list[1]:
- if item[0] == item[1]:
- ranges.append(self.compile(item[0]))
- else:
- ranges.append(f"{self.compile(item[0])}..{self.compile(item[1])}")
- return f"{self.compile(expr)}{' not' if negated else ''} {method} {','.join(ranges)}"
|