123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698 |
- from __future__ import annotations
- import datetime
- import re
- from typing import TYPE_CHECKING
- from typing import Any
- from typing import Callable
- from typing import ClassVar
- from typing import Match
- from typing import Sequence
- from typing import cast
- import pendulum
- from pendulum.locales.locale import Locale
- if TYPE_CHECKING:
- from pendulum import Timezone
- _MATCH_1 = r"\d"
- _MATCH_2 = r"\d\d"
- _MATCH_3 = r"\d{3}"
- _MATCH_4 = r"\d{4}"
- _MATCH_6 = r"[+-]?\d{6}"
- _MATCH_1_TO_2 = r"\d\d?"
- _MATCH_1_TO_2_LEFT_PAD = r"[0-9 ]\d?"
- _MATCH_1_TO_3 = r"\d{1,3}"
- _MATCH_1_TO_4 = r"\d{1,4}"
- _MATCH_1_TO_6 = r"[+-]?\d{1,6}"
- _MATCH_3_TO_4 = r"\d{3}\d?"
- _MATCH_5_TO_6 = r"\d{5}\d?"
- _MATCH_UNSIGNED = r"\d+"
- _MATCH_SIGNED = r"[+-]?\d+"
- _MATCH_OFFSET = r"[Zz]|[+-]\d\d:?\d\d"
- _MATCH_SHORT_OFFSET = r"[Zz]|[+-]\d\d(?::?\d\d)?"
- _MATCH_TIMESTAMP = r"[+-]?\d+(\.\d{1,6})?"
- _MATCH_WORD = (
- "(?i)[0-9]*"
- "['a-z\u00A0-\u05FF\u0700-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF]+"
- r"|[\u0600-\u06FF/]+(\s*?[\u0600-\u06FF]+){1,2}"
- )
- _MATCH_TIMEZONE = "[A-Za-z0-9-+]+(/[A-Za-z0-9-+_]+)?"
- class Formatter:
- _TOKENS: str = (
- r"\[([^\[]*)\]|\\(.)|"
- "("
- "Mo|MM?M?M?"
- "|Do|DDDo|DD?D?D?|ddd?d?|do?|eo?"
- "|E{1,4}"
- "|w[o|w]?|W[o|W]?|Qo?"
- "|YYYY|YY|Y"
- "|gg(ggg?)?|GG(GGG?)?"
- "|a|A"
- "|hh?|HH?|kk?"
- "|mm?|ss?|S{1,9}"
- "|x|X"
- "|zz?|ZZ?"
- "|LTS|LT|LL?L?L?"
- ")"
- )
- _FORMAT_RE: re.Pattern[str] = re.compile(_TOKENS)
- _FROM_FORMAT_RE: re.Pattern[str] = re.compile(r"(?<!\\\[)" + _TOKENS + r"(?!\\\])")
- _LOCALIZABLE_TOKENS: ClassVar[
- dict[str, str | Callable[[Locale], Sequence[str]] | None]
- ] = {
- "Qo": None,
- "MMMM": "months.wide",
- "MMM": "months.abbreviated",
- "Mo": None,
- "DDDo": None,
- "Do": lambda locale: tuple(
- rf"\d+{o}" for o in locale.get("custom.ordinal").values()
- ),
- "dddd": "days.wide",
- "ddd": "days.abbreviated",
- "dd": "days.short",
- "do": None,
- "e": None,
- "eo": None,
- "Wo": None,
- "wo": None,
- "A": lambda locale: (
- locale.translation("day_periods.am"),
- locale.translation("day_periods.pm"),
- ),
- "a": lambda locale: (
- locale.translation("day_periods.am").lower(),
- locale.translation("day_periods.pm").lower(),
- ),
- }
- _TOKENS_RULES: ClassVar[dict[str, Callable[[pendulum.DateTime], str]]] = {
- # Year
- "YYYY": lambda dt: f"{dt.year:d}",
- "YY": lambda dt: f"{dt.year:d}"[2:],
- "Y": lambda dt: f"{dt.year:d}",
- # Quarter
- "Q": lambda dt: f"{dt.quarter:d}",
- # Month
- "MM": lambda dt: f"{dt.month:02d}",
- "M": lambda dt: f"{dt.month:d}",
- # Day
- "DD": lambda dt: f"{dt.day:02d}",
- "D": lambda dt: f"{dt.day:d}",
- # Day of Year
- "DDDD": lambda dt: f"{dt.day_of_year:03d}",
- "DDD": lambda dt: f"{dt.day_of_year:d}",
- # Day of Week
- "d": lambda dt: f"{(dt.day_of_week + 1) % 7:d}",
- # Day of ISO Week
- "E": lambda dt: f"{dt.isoweekday():d}",
- # Hour
- "HH": lambda dt: f"{dt.hour:02d}",
- "H": lambda dt: f"{dt.hour:d}",
- "hh": lambda dt: f"{dt.hour % 12 or 12:02d}",
- "h": lambda dt: f"{dt.hour % 12 or 12:d}",
- # Minute
- "mm": lambda dt: f"{dt.minute:02d}",
- "m": lambda dt: f"{dt.minute:d}",
- # Second
- "ss": lambda dt: f"{dt.second:02d}",
- "s": lambda dt: f"{dt.second:d}",
- # Fractional second
- "S": lambda dt: f"{dt.microsecond // 100000:01d}",
- "SS": lambda dt: f"{dt.microsecond // 10000:02d}",
- "SSS": lambda dt: f"{dt.microsecond // 1000:03d}",
- "SSSS": lambda dt: f"{dt.microsecond // 100:04d}",
- "SSSSS": lambda dt: f"{dt.microsecond // 10:05d}",
- "SSSSSS": lambda dt: f"{dt.microsecond:06d}",
- # Timestamp
- "X": lambda dt: f"{dt.int_timestamp:d}",
- "x": lambda dt: f"{dt.int_timestamp * 1000 + dt.microsecond // 1000:d}",
- # Timezone
- "zz": lambda dt: f'{dt.tzname() if dt.tzinfo is not None else ""}',
- "z": lambda dt: f'{dt.timezone_name or ""}',
- }
- _DATE_FORMATS: ClassVar[dict[str, str]] = {
- "LTS": "formats.time.full",
- "LT": "formats.time.short",
- "L": "formats.date.short",
- "LL": "formats.date.long",
- "LLL": "formats.datetime.long",
- "LLLL": "formats.datetime.full",
- }
- _DEFAULT_DATE_FORMATS: ClassVar[dict[str, str]] = {
- "LTS": "h:mm:ss A",
- "LT": "h:mm A",
- "L": "MM/DD/YYYY",
- "LL": "MMMM D, YYYY",
- "LLL": "MMMM D, YYYY h:mm A",
- "LLLL": "dddd, MMMM D, YYYY h:mm A",
- }
- _REGEX_TOKENS: ClassVar[dict[str, str | Sequence[str] | None]] = {
- "Y": _MATCH_SIGNED,
- "YY": (_MATCH_1_TO_2, _MATCH_2),
- "YYYY": (_MATCH_1_TO_4, _MATCH_4),
- "Q": _MATCH_1,
- "Qo": None,
- "M": _MATCH_1_TO_2,
- "MM": (_MATCH_1_TO_2, _MATCH_2),
- "MMM": _MATCH_WORD,
- "MMMM": _MATCH_WORD,
- "D": _MATCH_1_TO_2,
- "DD": (_MATCH_1_TO_2_LEFT_PAD, _MATCH_2),
- "DDD": _MATCH_1_TO_3,
- "DDDD": _MATCH_3,
- "dddd": _MATCH_WORD,
- "ddd": _MATCH_WORD,
- "dd": _MATCH_WORD,
- "d": _MATCH_1,
- "e": _MATCH_1,
- "E": _MATCH_1,
- "Do": None,
- "H": _MATCH_1_TO_2,
- "HH": (_MATCH_1_TO_2, _MATCH_2),
- "h": _MATCH_1_TO_2,
- "hh": (_MATCH_1_TO_2, _MATCH_2),
- "m": _MATCH_1_TO_2,
- "mm": (_MATCH_1_TO_2, _MATCH_2),
- "s": _MATCH_1_TO_2,
- "ss": (_MATCH_1_TO_2, _MATCH_2),
- "S": (_MATCH_1_TO_3, _MATCH_1),
- "SS": (_MATCH_1_TO_3, _MATCH_2),
- "SSS": (_MATCH_1_TO_3, _MATCH_3),
- "SSSS": _MATCH_UNSIGNED,
- "SSSSS": _MATCH_UNSIGNED,
- "SSSSSS": _MATCH_UNSIGNED,
- "x": _MATCH_SIGNED,
- "X": _MATCH_TIMESTAMP,
- "ZZ": _MATCH_SHORT_OFFSET,
- "Z": _MATCH_OFFSET,
- "z": _MATCH_TIMEZONE,
- }
- _PARSE_TOKENS: ClassVar[dict[str, Callable[[str], Any]]] = {
- "YYYY": lambda year: int(year),
- "YY": lambda year: int(year),
- "Q": lambda quarter: int(quarter),
- "MMMM": lambda month: month,
- "MMM": lambda month: month,
- "MM": lambda month: int(month),
- "M": lambda month: int(month),
- "DDDD": lambda day: int(day),
- "DDD": lambda day: int(day),
- "DD": lambda day: int(day),
- "D": lambda day: int(day),
- "dddd": lambda weekday: weekday,
- "ddd": lambda weekday: weekday,
- "dd": lambda weekday: weekday,
- "d": lambda weekday: int(weekday),
- "E": lambda weekday: int(weekday) - 1,
- "HH": lambda hour: int(hour),
- "H": lambda hour: int(hour),
- "hh": lambda hour: int(hour),
- "h": lambda hour: int(hour),
- "mm": lambda minute: int(minute),
- "m": lambda minute: int(minute),
- "ss": lambda second: int(second),
- "s": lambda second: int(second),
- "S": lambda us: int(us) * 100000,
- "SS": lambda us: int(us) * 10000,
- "SSS": lambda us: int(us) * 1000,
- "SSSS": lambda us: int(us) * 100,
- "SSSSS": lambda us: int(us) * 10,
- "SSSSSS": lambda us: int(us),
- "a": lambda meridiem: meridiem,
- "X": lambda ts: float(ts),
- "x": lambda ts: float(ts) / 1e3,
- "ZZ": str,
- "Z": str,
- "z": str,
- }
- def format(
- self, dt: pendulum.DateTime, fmt: str, locale: str | Locale | None = None
- ) -> str:
- """
- Formats a DateTime instance with a given format and locale.
- :param dt: The instance to format
- :param fmt: The format to use
- :param locale: The locale to use
- """
- loaded_locale: Locale = Locale.load(locale or pendulum.get_locale())
- result = self._FORMAT_RE.sub(
- lambda m: m.group(1)
- if m.group(1)
- else m.group(2)
- if m.group(2)
- else self._format_token(dt, m.group(3), loaded_locale),
- fmt,
- )
- return result
- def _format_token(self, dt: pendulum.DateTime, token: str, locale: Locale) -> str:
- """
- Formats a DateTime instance with a given token and locale.
- :param dt: The instance to format
- :param token: The token to use
- :param locale: The locale to use
- """
- if token in self._DATE_FORMATS:
- fmt = locale.get(f"custom.date_formats.{token}")
- if fmt is None:
- fmt = self._DEFAULT_DATE_FORMATS[token]
- return self.format(dt, fmt, locale)
- if token in self._LOCALIZABLE_TOKENS:
- return self._format_localizable_token(dt, token, locale)
- if token in self._TOKENS_RULES:
- return self._TOKENS_RULES[token](dt)
- # Timezone
- if token in ["ZZ", "Z"]:
- if dt.tzinfo is None:
- return ""
- separator = ":" if token == "Z" else ""
- offset = dt.utcoffset() or datetime.timedelta()
- minutes = offset.total_seconds() / 60
- sign = "+" if minutes >= 0 else "-"
- hour, minute = divmod(abs(int(minutes)), 60)
- return f"{sign}{hour:02d}{separator}{minute:02d}"
- return token
- def _format_localizable_token(
- self, dt: pendulum.DateTime, token: str, locale: Locale
- ) -> str:
- """
- Formats a DateTime instance
- with a given localizable token and locale.
- :param dt: The instance to format
- :param token: The token to use
- :param locale: The locale to use
- """
- if token == "MMM":
- return cast(str, locale.get("translations.months.abbreviated")[dt.month])
- elif token == "MMMM":
- return cast(str, locale.get("translations.months.wide")[dt.month])
- elif token == "dd":
- return cast(str, locale.get("translations.days.short")[dt.day_of_week])
- elif token == "ddd":
- return cast(
- str,
- locale.get("translations.days.abbreviated")[dt.day_of_week],
- )
- elif token == "dddd":
- return cast(str, locale.get("translations.days.wide")[dt.day_of_week])
- elif token == "e":
- first_day = cast(int, locale.get("translations.week_data.first_day"))
- return str((dt.day_of_week % 7 - first_day) % 7)
- elif token == "Do":
- return locale.ordinalize(dt.day)
- elif token == "do":
- return locale.ordinalize((dt.day_of_week + 1) % 7)
- elif token == "Mo":
- return locale.ordinalize(dt.month)
- elif token == "Qo":
- return locale.ordinalize(dt.quarter)
- elif token == "wo":
- return locale.ordinalize(dt.week_of_year)
- elif token == "DDDo":
- return locale.ordinalize(dt.day_of_year)
- elif token == "eo":
- first_day = cast(int, locale.get("translations.week_data.first_day"))
- return locale.ordinalize((dt.day_of_week % 7 - first_day) % 7 + 1)
- elif token == "A":
- key = "translations.day_periods"
- if dt.hour >= 12:
- key += ".pm"
- else:
- key += ".am"
- return cast(str, locale.get(key))
- else:
- return token
- def parse(
- self,
- time: str,
- fmt: str,
- now: pendulum.DateTime,
- locale: str | None = None,
- ) -> dict[str, Any]:
- """
- Parses a time string matching a given format as a tuple.
- :param time: The timestring
- :param fmt: The format
- :param now: The datetime to use as "now"
- :param locale: The locale to use
- :return: The parsed elements
- """
- escaped_fmt = re.escape(fmt)
- tokens = self._FROM_FORMAT_RE.findall(escaped_fmt)
- if not tokens:
- raise ValueError("The given time string does not match the given format")
- if not locale:
- locale = pendulum.get_locale()
- loaded_locale: Locale = Locale.load(locale)
- parsed = {
- "year": None,
- "month": None,
- "day": None,
- "hour": None,
- "minute": None,
- "second": None,
- "microsecond": None,
- "tz": None,
- "quarter": None,
- "day_of_week": None,
- "day_of_year": None,
- "meridiem": None,
- "timestamp": None,
- }
- pattern = self._FROM_FORMAT_RE.sub(
- lambda m: self._replace_tokens(m.group(0), loaded_locale), escaped_fmt
- )
- if not re.search("^" + pattern + "$", time):
- raise ValueError(f"String does not match format {fmt}")
- def _get_parsed_values(m: Match[str]) -> Any:
- return self._get_parsed_values(m, parsed, loaded_locale, now)
- re.sub(pattern, _get_parsed_values, time)
- return self._check_parsed(parsed, now)
- def _check_parsed(
- self, parsed: dict[str, Any], now: pendulum.DateTime
- ) -> dict[str, Any]:
- """
- Checks validity of parsed elements.
- :param parsed: The elements to parse.
- :return: The validated elements.
- """
- validated: dict[str, int | Timezone | None] = {
- "year": parsed["year"],
- "month": parsed["month"],
- "day": parsed["day"],
- "hour": parsed["hour"],
- "minute": parsed["minute"],
- "second": parsed["second"],
- "microsecond": parsed["microsecond"],
- "tz": None,
- }
- # If timestamp has been specified
- # we use it and don't go any further
- if parsed["timestamp"] is not None:
- str_us = str(parsed["timestamp"])
- if "." in str_us:
- microseconds = int(f'{str_us.split(".")[1].ljust(6, "0")}')
- else:
- microseconds = 0
- from pendulum.helpers import local_time
- time = local_time(parsed["timestamp"], 0, microseconds)
- validated["year"] = time[0]
- validated["month"] = time[1]
- validated["day"] = time[2]
- validated["hour"] = time[3]
- validated["minute"] = time[4]
- validated["second"] = time[5]
- validated["microsecond"] = time[6]
- return validated
- if parsed["quarter"] is not None:
- if validated["year"] is not None:
- dt = pendulum.datetime(cast(int, validated["year"]), 1, 1)
- else:
- dt = now
- dt = dt.start_of("year")
- while dt.quarter != parsed["quarter"]:
- dt = dt.add(months=3)
- validated["year"] = dt.year
- validated["month"] = dt.month
- validated["day"] = dt.day
- if validated["year"] is None:
- validated["year"] = now.year
- if parsed["day_of_year"] is not None:
- dt = cast(
- pendulum.DateTime,
- pendulum.parse(f'{validated["year"]}-{parsed["day_of_year"]:>03d}'),
- )
- validated["month"] = dt.month
- validated["day"] = dt.day
- if parsed["day_of_week"] is not None:
- dt = pendulum.datetime(
- cast(int, validated["year"]),
- cast(int, validated["month"]) or now.month,
- cast(int, validated["day"]) or now.day,
- )
- dt = dt.start_of("week").subtract(days=1)
- dt = dt.next(parsed["day_of_week"])
- validated["year"] = dt.year
- validated["month"] = dt.month
- validated["day"] = dt.day
- # Meridiem
- if parsed["meridiem"] is not None:
- # If the time is greater than 13:00:00
- # This is not valid
- if validated["hour"] is None:
- raise ValueError("Invalid Date")
- t = (
- validated["hour"],
- validated["minute"],
- validated["second"],
- validated["microsecond"],
- )
- if t >= (13, 0, 0, 0):
- raise ValueError("Invalid date")
- pm = parsed["meridiem"] == "pm"
- validated["hour"] %= 12 # type: ignore[operator]
- if pm:
- validated["hour"] += 12 # type: ignore[operator]
- if validated["month"] is None:
- if parsed["year"] is not None:
- validated["month"] = parsed["month"] or 1
- else:
- validated["month"] = parsed["month"] or now.month
- if validated["day"] is None:
- if parsed["year"] is not None or parsed["month"] is not None:
- validated["day"] = parsed["day"] or 1
- else:
- validated["day"] = parsed["day"] or now.day
- for part in ["hour", "minute", "second", "microsecond"]:
- if validated[part] is None:
- validated[part] = 0
- validated["tz"] = parsed["tz"]
- return validated
- def _get_parsed_values(
- self,
- m: Match[str],
- parsed: dict[str, Any],
- locale: Locale,
- now: pendulum.DateTime,
- ) -> None:
- for token, index in m.re.groupindex.items():
- if token in self._LOCALIZABLE_TOKENS:
- self._get_parsed_locale_value(token, m.group(index), parsed, locale)
- else:
- self._get_parsed_value(token, m.group(index), parsed, now)
- def _get_parsed_value(
- self,
- token: str,
- value: str,
- parsed: dict[str, Any],
- now: pendulum.DateTime,
- ) -> None:
- parsed_token = self._PARSE_TOKENS[token](value)
- if "Y" in token:
- if token == "YY":
- if parsed_token <= 68:
- parsed_token += 2000
- else:
- parsed_token += 1900
- parsed["year"] = parsed_token
- elif token == "Q":
- parsed["quarter"] = parsed_token
- elif token in ["MM", "M"]:
- parsed["month"] = parsed_token
- elif token in ["DDDD", "DDD"]:
- parsed["day_of_year"] = parsed_token
- elif "D" in token:
- parsed["day"] = parsed_token
- elif "H" in token:
- parsed["hour"] = parsed_token
- elif token in ["hh", "h"]:
- if parsed_token > 12:
- raise ValueError("Invalid date")
- parsed["hour"] = parsed_token
- elif "m" in token:
- parsed["minute"] = parsed_token
- elif "s" in token:
- parsed["second"] = parsed_token
- elif "S" in token:
- parsed["microsecond"] = parsed_token
- elif token in ["d", "E"]:
- parsed["day_of_week"] = parsed_token
- elif token in ["X", "x"]:
- parsed["timestamp"] = parsed_token
- elif token in ["ZZ", "Z"]:
- negative = bool(value.startswith("-"))
- tz = value[1:]
- if ":" not in tz:
- if len(tz) == 2:
- tz = f"{tz}00"
- off_hour = tz[0:2]
- off_minute = tz[2:4]
- else:
- off_hour, off_minute = tz.split(":")
- offset = ((int(off_hour) * 60) + int(off_minute)) * 60
- if negative:
- offset = -1 * offset
- parsed["tz"] = pendulum.timezone(offset)
- elif token == "z":
- # Full timezone
- if value not in pendulum.timezones():
- raise ValueError("Invalid date")
- parsed["tz"] = pendulum.timezone(value)
- def _get_parsed_locale_value(
- self, token: str, value: str, parsed: dict[str, Any], locale: Locale
- ) -> None:
- if token == "MMMM":
- unit = "month"
- match = "months.wide"
- elif token == "MMM":
- unit = "month"
- match = "months.abbreviated"
- elif token == "Do":
- parsed["day"] = int(cast(Match[str], re.match(r"(\d+)", value)).group(1))
- return
- elif token == "dddd":
- unit = "day_of_week"
- match = "days.wide"
- elif token == "ddd":
- unit = "day_of_week"
- match = "days.abbreviated"
- elif token == "dd":
- unit = "day_of_week"
- match = "days.short"
- elif token in ["a", "A"]:
- valid_values = [
- locale.translation("day_periods.am"),
- locale.translation("day_periods.pm"),
- ]
- if token == "a":
- value = value.lower()
- valid_values = [x.lower() for x in valid_values]
- if value not in valid_values:
- raise ValueError("Invalid date")
- parsed["meridiem"] = ["am", "pm"][valid_values.index(value)]
- return
- else:
- raise ValueError(f'Invalid token "{token}"')
- parsed[unit] = locale.match_translation(match, value)
- if value is None:
- raise ValueError("Invalid date")
- def _replace_tokens(self, token: str, locale: Locale) -> str:
- if token.startswith("[") and token.endswith("]"):
- return token[1:-1]
- elif token.startswith("\\"):
- if len(token) == 2 and token[1] in {"[", "]"}:
- return ""
- return token
- elif token not in self._REGEX_TOKENS and token not in self._LOCALIZABLE_TOKENS:
- raise ValueError(f"Unsupported token: {token}")
- if token in self._LOCALIZABLE_TOKENS:
- values = self._LOCALIZABLE_TOKENS[token]
- if callable(values):
- candidates = values(locale)
- else:
- candidates = tuple(
- locale.translation(
- cast(str, self._LOCALIZABLE_TOKENS[token])
- ).values()
- )
- else:
- candidates = cast(Sequence[str], self._REGEX_TOKENS[token])
- if not candidates:
- raise ValueError(f"Unsupported token: {token}")
- if not isinstance(candidates, tuple):
- candidates = (cast(str, candidates),)
- pattern = f'(?P<{token}>{"|".join(candidates)})'
- return pattern
|