1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999 |
- """
- babel.dates
- ~~~~~~~~~~~
- Locale dependent formatting and parsing of dates and times.
- The default locale for the functions in this module is determined by the
- following environment variables, in that order:
- * ``LC_TIME``,
- * ``LC_ALL``, and
- * ``LANG``
- :copyright: (c) 2013-2025 by the Babel Team.
- :license: BSD, see LICENSE for more details.
- """
- from __future__ import annotations
- import math
- import re
- import warnings
- from functools import lru_cache
- from typing import TYPE_CHECKING, Literal, SupportsInt
- try:
- import pytz
- except ModuleNotFoundError:
- pytz = None
- import zoneinfo
- import datetime
- from collections.abc import Iterable
- from babel import localtime
- from babel.core import Locale, default_locale, get_global
- from babel.localedata import LocaleDataDict
- if TYPE_CHECKING:
- from typing_extensions import TypeAlias
- _Instant: TypeAlias = datetime.date | datetime.time | float | None
- _PredefinedTimeFormat: TypeAlias = Literal['full', 'long', 'medium', 'short']
- _Context: TypeAlias = Literal['format', 'stand-alone']
- _DtOrTzinfo: TypeAlias = datetime.datetime | datetime.tzinfo | str | int | datetime.time | None
- # "If a given short metazone form is known NOT to be understood in a given
- # locale and the parent locale has this value such that it would normally
- # be inherited, the inheritance of this value can be explicitly disabled by
- # use of the 'no inheritance marker' as the value, which is 3 simultaneous [sic]
- # empty set characters ( U+2205 )."
- # - https://www.unicode.org/reports/tr35/tr35-dates.html#Metazone_Names
- NO_INHERITANCE_MARKER = '\u2205\u2205\u2205'
- UTC = datetime.timezone.utc
- LOCALTZ = localtime.LOCALTZ
- LC_TIME = default_locale('LC_TIME')
- def _localize(tz: datetime.tzinfo, dt: datetime.datetime) -> datetime.datetime:
- # Support localizing with both pytz and zoneinfo tzinfos
- # nothing to do
- if dt.tzinfo is tz:
- return dt
- if hasattr(tz, 'localize'): # pytz
- return tz.localize(dt)
- if dt.tzinfo is None:
- # convert naive to localized
- return dt.replace(tzinfo=tz)
- # convert timezones
- return dt.astimezone(tz)
- def _get_dt_and_tzinfo(dt_or_tzinfo: _DtOrTzinfo) -> tuple[datetime.datetime | None, datetime.tzinfo]:
- """
- Parse a `dt_or_tzinfo` value into a datetime and a tzinfo.
- See the docs for this function's callers for semantics.
- :rtype: tuple[datetime, tzinfo]
- """
- if dt_or_tzinfo is None:
- dt = datetime.datetime.now()
- tzinfo = LOCALTZ
- elif isinstance(dt_or_tzinfo, str):
- dt = None
- tzinfo = get_timezone(dt_or_tzinfo)
- elif isinstance(dt_or_tzinfo, int):
- dt = None
- tzinfo = UTC
- elif isinstance(dt_or_tzinfo, (datetime.datetime, datetime.time)):
- dt = _get_datetime(dt_or_tzinfo)
- tzinfo = dt.tzinfo if dt.tzinfo is not None else UTC
- else:
- dt = None
- tzinfo = dt_or_tzinfo
- return dt, tzinfo
- def _get_tz_name(dt_or_tzinfo: _DtOrTzinfo) -> str:
- """
- Get the timezone name out of a time, datetime, or tzinfo object.
- :rtype: str
- """
- dt, tzinfo = _get_dt_and_tzinfo(dt_or_tzinfo)
- if hasattr(tzinfo, 'zone'): # pytz object
- return tzinfo.zone
- elif hasattr(tzinfo, 'key') and tzinfo.key is not None: # ZoneInfo object
- return tzinfo.key
- else:
- return tzinfo.tzname(dt or datetime.datetime.now(UTC))
- def _get_datetime(instant: _Instant) -> datetime.datetime:
- """
- Get a datetime out of an "instant" (date, time, datetime, number).
- .. warning:: The return values of this function may depend on the system clock.
- If the instant is None, the current moment is used.
- If the instant is a time, it's augmented with today's date.
- Dates are converted to naive datetimes with midnight as the time component.
- >>> from datetime import date, datetime
- >>> _get_datetime(date(2015, 1, 1))
- datetime.datetime(2015, 1, 1, 0, 0)
- UNIX timestamps are converted to datetimes.
- >>> _get_datetime(1400000000)
- datetime.datetime(2014, 5, 13, 16, 53, 20)
- Other values are passed through as-is.
- >>> x = datetime(2015, 1, 1)
- >>> _get_datetime(x) is x
- True
- :param instant: date, time, datetime, integer, float or None
- :type instant: date|time|datetime|int|float|None
- :return: a datetime
- :rtype: datetime
- """
- if instant is None:
- return datetime.datetime.now(UTC).replace(tzinfo=None)
- elif isinstance(instant, (int, float)):
- return datetime.datetime.fromtimestamp(instant, UTC).replace(tzinfo=None)
- elif isinstance(instant, datetime.time):
- return datetime.datetime.combine(datetime.date.today(), instant)
- elif isinstance(instant, datetime.date) and not isinstance(instant, datetime.datetime):
- return datetime.datetime.combine(instant, datetime.time())
- # TODO (3.x): Add an assertion/type check for this fallthrough branch:
- return instant
- def _ensure_datetime_tzinfo(dt: datetime.datetime, tzinfo: datetime.tzinfo | None = None) -> datetime.datetime:
- """
- Ensure the datetime passed has an attached tzinfo.
- If the datetime is tz-naive to begin with, UTC is attached.
- If a tzinfo is passed in, the datetime is normalized to that timezone.
- >>> from datetime import datetime
- >>> _get_tz_name(_ensure_datetime_tzinfo(datetime(2015, 1, 1)))
- 'UTC'
- >>> tz = get_timezone("Europe/Stockholm")
- >>> _ensure_datetime_tzinfo(datetime(2015, 1, 1, 13, 15, tzinfo=UTC), tzinfo=tz).hour
- 14
- :param datetime: Datetime to augment.
- :param tzinfo: optional tzinfo
- :return: datetime with tzinfo
- :rtype: datetime
- """
- if dt.tzinfo is None:
- dt = dt.replace(tzinfo=UTC)
- if tzinfo is not None:
- dt = dt.astimezone(get_timezone(tzinfo))
- if hasattr(tzinfo, 'normalize'): # pytz
- dt = tzinfo.normalize(dt)
- return dt
- def _get_time(
- time: datetime.time | datetime.datetime | None,
- tzinfo: datetime.tzinfo | None = None,
- ) -> datetime.time:
- """
- Get a timezoned time from a given instant.
- .. warning:: The return values of this function may depend on the system clock.
- :param time: time, datetime or None
- :rtype: time
- """
- if time is None:
- time = datetime.datetime.now(UTC)
- elif isinstance(time, (int, float)):
- time = datetime.datetime.fromtimestamp(time, UTC)
- if time.tzinfo is None:
- time = time.replace(tzinfo=UTC)
- if isinstance(time, datetime.datetime):
- if tzinfo is not None:
- time = time.astimezone(tzinfo)
- if hasattr(tzinfo, 'normalize'): # pytz
- time = tzinfo.normalize(time)
- time = time.timetz()
- elif tzinfo is not None:
- time = time.replace(tzinfo=tzinfo)
- return time
- def get_timezone(zone: str | datetime.tzinfo | None = None) -> datetime.tzinfo:
- """Looks up a timezone by name and returns it. The timezone object
- returned comes from ``pytz`` or ``zoneinfo``, whichever is available.
- It corresponds to the `tzinfo` interface and can be used with all of
- the functions of Babel that operate with dates.
- If a timezone is not known a :exc:`LookupError` is raised. If `zone`
- is ``None`` a local zone object is returned.
- :param zone: the name of the timezone to look up. If a timezone object
- itself is passed in, it's returned unchanged.
- """
- if zone is None:
- return LOCALTZ
- if not isinstance(zone, str):
- return zone
- if pytz:
- try:
- return pytz.timezone(zone)
- except pytz.UnknownTimeZoneError as e:
- exc = e
- else:
- assert zoneinfo
- try:
- return zoneinfo.ZoneInfo(zone)
- except zoneinfo.ZoneInfoNotFoundError as e:
- exc = e
- raise LookupError(f"Unknown timezone {zone}") from exc
- def get_period_names(
- width: Literal['abbreviated', 'narrow', 'wide'] = 'wide',
- context: _Context = 'stand-alone',
- locale: Locale | str | None = None,
- ) -> LocaleDataDict:
- """Return the names for day periods (AM/PM) used by the locale.
- >>> get_period_names(locale='en_US')['am']
- u'AM'
- :param width: the width to use, one of "abbreviated", "narrow", or "wide"
- :param context: the context, either "format" or "stand-alone"
- :param locale: the `Locale` object, or a locale string. Defaults to the system time locale.
- """
- return Locale.parse(locale or LC_TIME).day_periods[context][width]
- def get_day_names(
- width: Literal['abbreviated', 'narrow', 'short', 'wide'] = 'wide',
- context: _Context = 'format',
- locale: Locale | str | None = None,
- ) -> LocaleDataDict:
- """Return the day names used by the locale for the specified format.
- >>> get_day_names('wide', locale='en_US')[1]
- u'Tuesday'
- >>> get_day_names('short', locale='en_US')[1]
- u'Tu'
- >>> get_day_names('abbreviated', locale='es')[1]
- u'mar'
- >>> get_day_names('narrow', context='stand-alone', locale='de_DE')[1]
- u'D'
- :param width: the width to use, one of "wide", "abbreviated", "short" or "narrow"
- :param context: the context, either "format" or "stand-alone"
- :param locale: the `Locale` object, or a locale string. Defaults to the system time locale.
- """
- return Locale.parse(locale or LC_TIME).days[context][width]
- def get_month_names(
- width: Literal['abbreviated', 'narrow', 'wide'] = 'wide',
- context: _Context = 'format',
- locale: Locale | str | None = None,
- ) -> LocaleDataDict:
- """Return the month names used by the locale for the specified format.
- >>> get_month_names('wide', locale='en_US')[1]
- u'January'
- >>> get_month_names('abbreviated', locale='es')[1]
- u'ene'
- >>> get_month_names('narrow', context='stand-alone', locale='de_DE')[1]
- u'J'
- :param width: the width to use, one of "wide", "abbreviated", or "narrow"
- :param context: the context, either "format" or "stand-alone"
- :param locale: the `Locale` object, or a locale string. Defaults to the system time locale.
- """
- return Locale.parse(locale or LC_TIME).months[context][width]
- def get_quarter_names(
- width: Literal['abbreviated', 'narrow', 'wide'] = 'wide',
- context: _Context = 'format',
- locale: Locale | str | None = None,
- ) -> LocaleDataDict:
- """Return the quarter names used by the locale for the specified format.
- >>> get_quarter_names('wide', locale='en_US')[1]
- u'1st quarter'
- >>> get_quarter_names('abbreviated', locale='de_DE')[1]
- u'Q1'
- >>> get_quarter_names('narrow', locale='de_DE')[1]
- u'1'
- :param width: the width to use, one of "wide", "abbreviated", or "narrow"
- :param context: the context, either "format" or "stand-alone"
- :param locale: the `Locale` object, or a locale string. Defaults to the system time locale.
- """
- return Locale.parse(locale or LC_TIME).quarters[context][width]
- def get_era_names(
- width: Literal['abbreviated', 'narrow', 'wide'] = 'wide',
- locale: Locale | str | None = None,
- ) -> LocaleDataDict:
- """Return the era names used by the locale for the specified format.
- >>> get_era_names('wide', locale='en_US')[1]
- u'Anno Domini'
- >>> get_era_names('abbreviated', locale='de_DE')[1]
- u'n. Chr.'
- :param width: the width to use, either "wide", "abbreviated", or "narrow"
- :param locale: the `Locale` object, or a locale string. Defaults to the system time locale.
- """
- return Locale.parse(locale or LC_TIME).eras[width]
- def get_date_format(
- format: _PredefinedTimeFormat = 'medium',
- locale: Locale | str | None = None,
- ) -> DateTimePattern:
- """Return the date formatting patterns used by the locale for the specified
- format.
- >>> get_date_format(locale='en_US')
- <DateTimePattern u'MMM d, y'>
- >>> get_date_format('full', locale='de_DE')
- <DateTimePattern u'EEEE, d. MMMM y'>
- :param format: the format to use, one of "full", "long", "medium", or
- "short"
- :param locale: the `Locale` object, or a locale string. Defaults to the system time locale.
- """
- return Locale.parse(locale or LC_TIME).date_formats[format]
- def get_datetime_format(
- format: _PredefinedTimeFormat = 'medium',
- locale: Locale | str | None = None,
- ) -> DateTimePattern:
- """Return the datetime formatting patterns used by the locale for the
- specified format.
- >>> get_datetime_format(locale='en_US')
- u'{1}, {0}'
- :param format: the format to use, one of "full", "long", "medium", or
- "short"
- :param locale: the `Locale` object, or a locale string. Defaults to the system time locale.
- """
- patterns = Locale.parse(locale or LC_TIME).datetime_formats
- if format not in patterns:
- format = None
- return patterns[format]
- def get_time_format(
- format: _PredefinedTimeFormat = 'medium',
- locale: Locale | str | None = None,
- ) -> DateTimePattern:
- """Return the time formatting patterns used by the locale for the specified
- format.
- >>> get_time_format(locale='en_US')
- <DateTimePattern u'h:mm:ss\u202fa'>
- >>> get_time_format('full', locale='de_DE')
- <DateTimePattern u'HH:mm:ss zzzz'>
- :param format: the format to use, one of "full", "long", "medium", or
- "short"
- :param locale: the `Locale` object, or a locale string. Defaults to the system time locale.
- """
- return Locale.parse(locale or LC_TIME).time_formats[format]
- def get_timezone_gmt(
- datetime: _Instant = None,
- width: Literal['long', 'short', 'iso8601', 'iso8601_short'] = 'long',
- locale: Locale | str | None = None,
- return_z: bool = False,
- ) -> str:
- """Return the timezone associated with the given `datetime` object formatted
- as string indicating the offset from GMT.
- >>> from datetime import datetime
- >>> dt = datetime(2007, 4, 1, 15, 30)
- >>> get_timezone_gmt(dt, locale='en')
- u'GMT+00:00'
- >>> get_timezone_gmt(dt, locale='en', return_z=True)
- 'Z'
- >>> get_timezone_gmt(dt, locale='en', width='iso8601_short')
- u'+00'
- >>> tz = get_timezone('America/Los_Angeles')
- >>> dt = _localize(tz, datetime(2007, 4, 1, 15, 30))
- >>> get_timezone_gmt(dt, locale='en')
- u'GMT-07:00'
- >>> get_timezone_gmt(dt, 'short', locale='en')
- u'-0700'
- >>> get_timezone_gmt(dt, locale='en', width='iso8601_short')
- u'-07'
- The long format depends on the locale, for example in France the acronym
- UTC string is used instead of GMT:
- >>> get_timezone_gmt(dt, 'long', locale='fr_FR')
- u'UTC-07:00'
- .. versionadded:: 0.9
- :param datetime: the ``datetime`` object; if `None`, the current date and
- time in UTC is used
- :param width: either "long" or "short" or "iso8601" or "iso8601_short"
- :param locale: the `Locale` object, or a locale string. Defaults to the system time locale.
- :param return_z: True or False; Function returns indicator "Z"
- when local time offset is 0
- """
- datetime = _ensure_datetime_tzinfo(_get_datetime(datetime))
- locale = Locale.parse(locale or LC_TIME)
- offset = datetime.tzinfo.utcoffset(datetime)
- seconds = offset.days * 24 * 60 * 60 + offset.seconds
- hours, seconds = divmod(seconds, 3600)
- if return_z and hours == 0 and seconds == 0:
- return 'Z'
- elif seconds == 0 and width == 'iso8601_short':
- return '%+03d' % hours
- elif width == 'short' or width == 'iso8601_short':
- pattern = '%+03d%02d'
- elif width == 'iso8601':
- pattern = '%+03d:%02d'
- else:
- pattern = locale.zone_formats['gmt'] % '%+03d:%02d'
- return pattern % (hours, seconds // 60)
- def get_timezone_location(
- dt_or_tzinfo: _DtOrTzinfo = None,
- locale: Locale | str | None = None,
- return_city: bool = False,
- ) -> str:
- """Return a representation of the given timezone using "location format".
- The result depends on both the local display name of the country and the
- city associated with the time zone:
- >>> tz = get_timezone('America/St_Johns')
- >>> print(get_timezone_location(tz, locale='de_DE'))
- Kanada (St. John’s) (Ortszeit)
- >>> print(get_timezone_location(tz, locale='en'))
- Canada (St. John’s) Time
- >>> print(get_timezone_location(tz, locale='en', return_city=True))
- St. John’s
- >>> tz = get_timezone('America/Mexico_City')
- >>> get_timezone_location(tz, locale='de_DE')
- u'Mexiko (Mexiko-Stadt) (Ortszeit)'
- If the timezone is associated with a country that uses only a single
- timezone, just the localized country name is returned:
- >>> tz = get_timezone('Europe/Berlin')
- >>> get_timezone_name(tz, locale='de_DE')
- u'Mitteleurop\\xe4ische Zeit'
- .. versionadded:: 0.9
- :param dt_or_tzinfo: the ``datetime`` or ``tzinfo`` object that determines
- the timezone; if `None`, the current date and time in
- UTC is assumed
- :param locale: the `Locale` object, or a locale string. Defaults to the system time locale.
- :param return_city: True or False, if True then return exemplar city (location)
- for the time zone
- :return: the localized timezone name using location format
- """
- locale = Locale.parse(locale or LC_TIME)
- zone = _get_tz_name(dt_or_tzinfo)
- # Get the canonical time-zone code
- zone = get_global('zone_aliases').get(zone, zone)
- info = locale.time_zones.get(zone, {})
- # Otherwise, if there is only one timezone for the country, return the
- # localized country name
- region_format = locale.zone_formats['region']
- territory = get_global('zone_territories').get(zone)
- if territory not in locale.territories:
- territory = 'ZZ' # invalid/unknown
- territory_name = locale.territories[territory]
- if not return_city and territory and len(get_global('territory_zones').get(territory, [])) == 1:
- return region_format % territory_name
- # Otherwise, include the city in the output
- fallback_format = locale.zone_formats['fallback']
- if 'city' in info:
- city_name = info['city']
- else:
- metazone = get_global('meta_zones').get(zone)
- metazone_info = locale.meta_zones.get(metazone, {})
- if 'city' in metazone_info:
- city_name = metazone_info['city']
- elif '/' in zone:
- city_name = zone.split('/', 1)[1].replace('_', ' ')
- else:
- city_name = zone.replace('_', ' ')
- if return_city:
- return city_name
- return region_format % (fallback_format % {
- '0': city_name,
- '1': territory_name,
- })
- def get_timezone_name(
- dt_or_tzinfo: _DtOrTzinfo = None,
- width: Literal['long', 'short'] = 'long',
- uncommon: bool = False,
- locale: Locale | str | None = None,
- zone_variant: Literal['generic', 'daylight', 'standard'] | None = None,
- return_zone: bool = False,
- ) -> str:
- r"""Return the localized display name for the given timezone. The timezone
- may be specified using a ``datetime`` or `tzinfo` object.
- >>> from datetime import time
- >>> dt = time(15, 30, tzinfo=get_timezone('America/Los_Angeles'))
- >>> get_timezone_name(dt, locale='en_US') # doctest: +SKIP
- u'Pacific Standard Time'
- >>> get_timezone_name(dt, locale='en_US', return_zone=True)
- 'America/Los_Angeles'
- >>> get_timezone_name(dt, width='short', locale='en_US') # doctest: +SKIP
- u'PST'
- If this function gets passed only a `tzinfo` object and no concrete
- `datetime`, the returned display name is independent of daylight savings
- time. This can be used for example for selecting timezones, or to set the
- time of events that recur across DST changes:
- >>> tz = get_timezone('America/Los_Angeles')
- >>> get_timezone_name(tz, locale='en_US')
- u'Pacific Time'
- >>> get_timezone_name(tz, 'short', locale='en_US')
- u'PT'
- If no localized display name for the timezone is available, and the timezone
- is associated with a country that uses only a single timezone, the name of
- that country is returned, formatted according to the locale:
- >>> tz = get_timezone('Europe/Berlin')
- >>> get_timezone_name(tz, locale='de_DE')
- u'Mitteleurop\xe4ische Zeit'
- >>> get_timezone_name(tz, locale='pt_BR')
- u'Hor\xe1rio da Europa Central'
- On the other hand, if the country uses multiple timezones, the city is also
- included in the representation:
- >>> tz = get_timezone('America/St_Johns')
- >>> get_timezone_name(tz, locale='de_DE')
- u'Neufundland-Zeit'
- Note that short format is currently not supported for all timezones and
- all locales. This is partially because not every timezone has a short
- code in every locale. In that case it currently falls back to the long
- format.
- For more information see `LDML Appendix J: Time Zone Display Names
- <https://www.unicode.org/reports/tr35/#Time_Zone_Fallback>`_
- .. versionadded:: 0.9
- .. versionchanged:: 1.0
- Added `zone_variant` support.
- :param dt_or_tzinfo: the ``datetime`` or ``tzinfo`` object that determines
- the timezone; if a ``tzinfo`` object is used, the
- resulting display name will be generic, i.e.
- independent of daylight savings time; if `None`, the
- current date in UTC is assumed
- :param width: either "long" or "short"
- :param uncommon: deprecated and ignored
- :param zone_variant: defines the zone variation to return. By default the
- variation is defined from the datetime object
- passed in. If no datetime object is passed in, the
- ``'generic'`` variation is assumed. The following
- values are valid: ``'generic'``, ``'daylight'`` and
- ``'standard'``.
- :param locale: the `Locale` object, or a locale string. Defaults to the system time locale.
- :param return_zone: True or False. If true then function
- returns long time zone ID
- """
- dt, tzinfo = _get_dt_and_tzinfo(dt_or_tzinfo)
- locale = Locale.parse(locale or LC_TIME)
- zone = _get_tz_name(dt_or_tzinfo)
- if zone_variant is None:
- if dt is None:
- zone_variant = 'generic'
- else:
- dst = tzinfo.dst(dt)
- zone_variant = "daylight" if dst else "standard"
- else:
- if zone_variant not in ('generic', 'standard', 'daylight'):
- raise ValueError('Invalid zone variation')
- # Get the canonical time-zone code
- zone = get_global('zone_aliases').get(zone, zone)
- if return_zone:
- return zone
- info = locale.time_zones.get(zone, {})
- # Try explicitly translated zone names first
- if width in info and zone_variant in info[width]:
- return info[width][zone_variant]
- metazone = get_global('meta_zones').get(zone)
- if metazone:
- metazone_info = locale.meta_zones.get(metazone, {})
- if width in metazone_info:
- name = metazone_info[width].get(zone_variant)
- if width == 'short' and name == NO_INHERITANCE_MARKER:
- # If the short form is marked no-inheritance,
- # try to fall back to the long name instead.
- name = metazone_info.get('long', {}).get(zone_variant)
- if name:
- return name
- # If we have a concrete datetime, we assume that the result can't be
- # independent of daylight savings time, so we return the GMT offset
- if dt is not None:
- return get_timezone_gmt(dt, width=width, locale=locale)
- return get_timezone_location(dt_or_tzinfo, locale=locale)
- def format_date(
- date: datetime.date | None = None,
- format: _PredefinedTimeFormat | str = 'medium',
- locale: Locale | str | None = None,
- ) -> str:
- """Return a date formatted according to the given pattern.
- >>> from datetime import date
- >>> d = date(2007, 4, 1)
- >>> format_date(d, locale='en_US')
- u'Apr 1, 2007'
- >>> format_date(d, format='full', locale='de_DE')
- u'Sonntag, 1. April 2007'
- If you don't want to use the locale default formats, you can specify a
- custom date pattern:
- >>> format_date(d, "EEE, MMM d, ''yy", locale='en')
- u"Sun, Apr 1, '07"
- :param date: the ``date`` or ``datetime`` object; if `None`, the current
- date is used
- :param format: one of "full", "long", "medium", or "short", or a custom
- date/time pattern
- :param locale: a `Locale` object or a locale identifier. Defaults to the system time locale.
- """
- if date is None:
- date = datetime.date.today()
- elif isinstance(date, datetime.datetime):
- date = date.date()
- locale = Locale.parse(locale or LC_TIME)
- if format in ('full', 'long', 'medium', 'short'):
- format = get_date_format(format, locale=locale)
- pattern = parse_pattern(format)
- return pattern.apply(date, locale)
- def format_datetime(
- datetime: _Instant = None,
- format: _PredefinedTimeFormat | str = 'medium',
- tzinfo: datetime.tzinfo | None = None,
- locale: Locale | str | None = None,
- ) -> str:
- r"""Return a date formatted according to the given pattern.
- >>> from datetime import datetime
- >>> dt = datetime(2007, 4, 1, 15, 30)
- >>> format_datetime(dt, locale='en_US')
- u'Apr 1, 2007, 3:30:00\u202fPM'
- For any pattern requiring the display of the timezone:
- >>> format_datetime(dt, 'full', tzinfo=get_timezone('Europe/Paris'),
- ... locale='fr_FR')
- 'dimanche 1 avril 2007, 17:30:00 heure d’été d’Europe centrale'
- >>> format_datetime(dt, "yyyy.MM.dd G 'at' HH:mm:ss zzz",
- ... tzinfo=get_timezone('US/Eastern'), locale='en')
- u'2007.04.01 AD at 11:30:00 EDT'
- :param datetime: the `datetime` object; if `None`, the current date and
- time is used
- :param format: one of "full", "long", "medium", or "short", or a custom
- date/time pattern
- :param tzinfo: the timezone to apply to the time for display
- :param locale: a `Locale` object or a locale identifier. Defaults to the system time locale.
- """
- datetime = _ensure_datetime_tzinfo(_get_datetime(datetime), tzinfo)
- locale = Locale.parse(locale or LC_TIME)
- if format in ('full', 'long', 'medium', 'short'):
- return get_datetime_format(format, locale=locale) \
- .replace("'", "") \
- .replace('{0}', format_time(datetime, format, tzinfo=None,
- locale=locale)) \
- .replace('{1}', format_date(datetime, format, locale=locale))
- else:
- return parse_pattern(format).apply(datetime, locale)
- def format_time(
- time: datetime.time | datetime.datetime | float | None = None,
- format: _PredefinedTimeFormat | str = 'medium',
- tzinfo: datetime.tzinfo | None = None,
- locale: Locale | str | None = None,
- ) -> str:
- r"""Return a time formatted according to the given pattern.
- >>> from datetime import datetime, time
- >>> t = time(15, 30)
- >>> format_time(t, locale='en_US')
- u'3:30:00\u202fPM'
- >>> format_time(t, format='short', locale='de_DE')
- u'15:30'
- If you don't want to use the locale default formats, you can specify a
- custom time pattern:
- >>> format_time(t, "hh 'o''clock' a", locale='en')
- u"03 o'clock PM"
- For any pattern requiring the display of the time-zone a
- timezone has to be specified explicitly:
- >>> t = datetime(2007, 4, 1, 15, 30)
- >>> tzinfo = get_timezone('Europe/Paris')
- >>> t = _localize(tzinfo, t)
- >>> format_time(t, format='full', tzinfo=tzinfo, locale='fr_FR')
- '15:30:00 heure d’été d’Europe centrale'
- >>> format_time(t, "hh 'o''clock' a, zzzz", tzinfo=get_timezone('US/Eastern'),
- ... locale='en')
- u"09 o'clock AM, Eastern Daylight Time"
- As that example shows, when this function gets passed a
- ``datetime.datetime`` value, the actual time in the formatted string is
- adjusted to the timezone specified by the `tzinfo` parameter. If the
- ``datetime`` is "naive" (i.e. it has no associated timezone information),
- it is assumed to be in UTC.
- These timezone calculations are **not** performed if the value is of type
- ``datetime.time``, as without date information there's no way to determine
- what a given time would translate to in a different timezone without
- information about whether daylight savings time is in effect or not. This
- means that time values are left as-is, and the value of the `tzinfo`
- parameter is only used to display the timezone name if needed:
- >>> t = time(15, 30)
- >>> format_time(t, format='full', tzinfo=get_timezone('Europe/Paris'),
- ... locale='fr_FR') # doctest: +SKIP
- u'15:30:00 heure normale d\u2019Europe centrale'
- >>> format_time(t, format='full', tzinfo=get_timezone('US/Eastern'),
- ... locale='en_US') # doctest: +SKIP
- u'3:30:00\u202fPM Eastern Standard Time'
- :param time: the ``time`` or ``datetime`` object; if `None`, the current
- time in UTC is used
- :param format: one of "full", "long", "medium", or "short", or a custom
- date/time pattern
- :param tzinfo: the time-zone to apply to the time for display
- :param locale: a `Locale` object or a locale identifier. Defaults to the system time locale.
- """
- # get reference date for if we need to find the right timezone variant
- # in the pattern
- ref_date = time.date() if isinstance(time, datetime.datetime) else None
- time = _get_time(time, tzinfo)
- locale = Locale.parse(locale or LC_TIME)
- if format in ('full', 'long', 'medium', 'short'):
- format = get_time_format(format, locale=locale)
- return parse_pattern(format).apply(time, locale, reference_date=ref_date)
- def format_skeleton(
- skeleton: str,
- datetime: _Instant = None,
- tzinfo: datetime.tzinfo | None = None,
- fuzzy: bool = True,
- locale: Locale | str | None = None,
- ) -> str:
- r"""Return a time and/or date formatted according to the given pattern.
- The skeletons are defined in the CLDR data and provide more flexibility
- than the simple short/long/medium formats, but are a bit harder to use.
- The are defined using the date/time symbols without order or punctuation
- and map to a suitable format for the given locale.
- >>> from datetime import datetime
- >>> t = datetime(2007, 4, 1, 15, 30)
- >>> format_skeleton('MMMEd', t, locale='fr')
- u'dim. 1 avr.'
- >>> format_skeleton('MMMEd', t, locale='en')
- u'Sun, Apr 1'
- >>> format_skeleton('yMMd', t, locale='fi') # yMMd is not in the Finnish locale; yMd gets used
- u'1.4.2007'
- >>> format_skeleton('yMMd', t, fuzzy=False, locale='fi') # yMMd is not in the Finnish locale, an error is thrown
- Traceback (most recent call last):
- ...
- KeyError: yMMd
- >>> format_skeleton('GH', t, fuzzy=True, locale='fi_FI') # GH is not in the Finnish locale and there is no close match, an error is thrown
- Traceback (most recent call last):
- ...
- KeyError: None
- After the skeleton is resolved to a pattern `format_datetime` is called so
- all timezone processing etc is the same as for that.
- :param skeleton: A date time skeleton as defined in the cldr data.
- :param datetime: the ``time`` or ``datetime`` object; if `None`, the current
- time in UTC is used
- :param tzinfo: the time-zone to apply to the time for display
- :param fuzzy: If the skeleton is not found, allow choosing a skeleton that's
- close enough to it. If there is no close match, a `KeyError`
- is thrown.
- :param locale: a `Locale` object or a locale identifier. Defaults to the system time locale.
- """
- locale = Locale.parse(locale or LC_TIME)
- if fuzzy and skeleton not in locale.datetime_skeletons:
- skeleton = match_skeleton(skeleton, locale.datetime_skeletons)
- format = locale.datetime_skeletons[skeleton]
- return format_datetime(datetime, format, tzinfo, locale)
- TIMEDELTA_UNITS: tuple[tuple[str, int], ...] = (
- ('year', 3600 * 24 * 365),
- ('month', 3600 * 24 * 30),
- ('week', 3600 * 24 * 7),
- ('day', 3600 * 24),
- ('hour', 3600),
- ('minute', 60),
- ('second', 1),
- )
- def format_timedelta(
- delta: datetime.timedelta | int,
- granularity: Literal['year', 'month', 'week', 'day', 'hour', 'minute', 'second'] = 'second',
- threshold: float = .85,
- add_direction: bool = False,
- format: Literal['narrow', 'short', 'medium', 'long'] = 'long',
- locale: Locale | str | None = None,
- ) -> str:
- """Return a time delta according to the rules of the given locale.
- >>> from datetime import timedelta
- >>> format_timedelta(timedelta(weeks=12), locale='en_US')
- u'3 months'
- >>> format_timedelta(timedelta(seconds=1), locale='es')
- u'1 segundo'
- The granularity parameter can be provided to alter the lowest unit
- presented, which defaults to a second.
- >>> format_timedelta(timedelta(hours=3), granularity='day', locale='en_US')
- u'1 day'
- The threshold parameter can be used to determine at which value the
- presentation switches to the next higher unit. A higher threshold factor
- means the presentation will switch later. For example:
- >>> format_timedelta(timedelta(hours=23), threshold=0.9, locale='en_US')
- u'1 day'
- >>> format_timedelta(timedelta(hours=23), threshold=1.1, locale='en_US')
- u'23 hours'
- In addition directional information can be provided that informs
- the user if the date is in the past or in the future:
- >>> format_timedelta(timedelta(hours=1), add_direction=True, locale='en')
- u'in 1 hour'
- >>> format_timedelta(timedelta(hours=-1), add_direction=True, locale='en')
- u'1 hour ago'
- The format parameter controls how compact or wide the presentation is:
- >>> format_timedelta(timedelta(hours=3), format='short', locale='en')
- u'3 hr'
- >>> format_timedelta(timedelta(hours=3), format='narrow', locale='en')
- u'3h'
- :param delta: a ``timedelta`` object representing the time difference to
- format, or the delta in seconds as an `int` value
- :param granularity: determines the smallest unit that should be displayed,
- the value can be one of "year", "month", "week", "day",
- "hour", "minute" or "second"
- :param threshold: factor that determines at which point the presentation
- switches to the next higher unit
- :param add_direction: if this flag is set to `True` the return value will
- include directional information. For instance a
- positive timedelta will include the information about
- it being in the future, a negative will be information
- about the value being in the past.
- :param format: the format, can be "narrow", "short" or "long". (
- "medium" is deprecated, currently converted to "long" to
- maintain compatibility)
- :param locale: a `Locale` object or a locale identifier. Defaults to the system time locale.
- """
- if format not in ('narrow', 'short', 'medium', 'long'):
- raise TypeError('Format must be one of "narrow", "short" or "long"')
- if format == 'medium':
- warnings.warn(
- '"medium" value for format param of format_timedelta'
- ' is deprecated. Use "long" instead',
- category=DeprecationWarning,
- stacklevel=2,
- )
- format = 'long'
- if isinstance(delta, datetime.timedelta):
- seconds = int((delta.days * 86400) + delta.seconds)
- else:
- seconds = delta
- locale = Locale.parse(locale or LC_TIME)
- date_fields = locale._data["date_fields"]
- unit_patterns = locale._data["unit_patterns"]
- def _iter_patterns(a_unit):
- if add_direction:
- # Try to find the length variant version first ("year-narrow")
- # before falling back to the default.
- unit_rel_patterns = (date_fields.get(f"{a_unit}-{format}") or date_fields[a_unit])
- if seconds >= 0:
- yield unit_rel_patterns['future']
- else:
- yield unit_rel_patterns['past']
- a_unit = f"duration-{a_unit}"
- unit_pats = unit_patterns.get(a_unit, {})
- yield unit_pats.get(format)
- # We do not support `<alias>` tags at all while ingesting CLDR data,
- # so these aliases specified in `root.xml` are hard-coded here:
- # <unitLength type="long"><alias source="locale" path="../unitLength[@type='short']"/></unitLength>
- # <unitLength type="narrow"><alias source="locale" path="../unitLength[@type='short']"/></unitLength>
- if format in ("long", "narrow"):
- yield unit_pats.get("short")
- for unit, secs_per_unit in TIMEDELTA_UNITS:
- value = abs(seconds) / secs_per_unit
- if value >= threshold or unit == granularity:
- if unit == granularity and value > 0:
- value = max(1, value)
- value = int(round(value))
- plural_form = locale.plural_form(value)
- pattern = None
- for patterns in _iter_patterns(unit):
- if patterns is not None:
- pattern = patterns.get(plural_form) or patterns.get('other')
- if pattern:
- break
- # This really should not happen
- if pattern is None:
- return ''
- return pattern.replace('{0}', str(value))
- return ''
- def _format_fallback_interval(
- start: _Instant,
- end: _Instant,
- skeleton: str | None,
- tzinfo: datetime.tzinfo | None,
- locale: Locale,
- ) -> str:
- if skeleton in locale.datetime_skeletons: # Use the given skeleton
- format = lambda dt: format_skeleton(skeleton, dt, tzinfo, locale=locale)
- elif all((isinstance(d, datetime.date) and not isinstance(d, datetime.datetime)) for d in (start, end)): # Both are just dates
- format = lambda dt: format_date(dt, locale=locale)
- elif all((isinstance(d, datetime.time) and not isinstance(d, datetime.date)) for d in (start, end)): # Both are times
- format = lambda dt: format_time(dt, tzinfo=tzinfo, locale=locale)
- else:
- format = lambda dt: format_datetime(dt, tzinfo=tzinfo, locale=locale)
- formatted_start = format(start)
- formatted_end = format(end)
- if formatted_start == formatted_end:
- return format(start)
- return (
- locale.interval_formats.get(None, "{0}-{1}").
- replace("{0}", formatted_start).
- replace("{1}", formatted_end)
- )
- def format_interval(
- start: _Instant,
- end: _Instant,
- skeleton: str | None = None,
- tzinfo: datetime.tzinfo | None = None,
- fuzzy: bool = True,
- locale: Locale | str | None = None,
- ) -> str:
- """
- Format an interval between two instants according to the locale's rules.
- >>> from datetime import date, time
- >>> format_interval(date(2016, 1, 15), date(2016, 1, 17), "yMd", locale="fi")
- u'15.\u201317.1.2016'
- >>> format_interval(time(12, 12), time(16, 16), "Hm", locale="en_GB")
- '12:12\u201316:16'
- >>> format_interval(time(5, 12), time(16, 16), "hm", locale="en_US")
- '5:12\u202fAM\u2009–\u20094:16\u202fPM'
- >>> format_interval(time(16, 18), time(16, 24), "Hm", locale="it")
- '16:18\u201316:24'
- If the start instant equals the end instant, the interval is formatted like the instant.
- >>> format_interval(time(16, 18), time(16, 18), "Hm", locale="it")
- '16:18'
- Unknown skeletons fall back to "default" formatting.
- >>> format_interval(date(2015, 1, 1), date(2017, 1, 1), "wzq", locale="ja")
- '2015/01/01\uff5e2017/01/01'
- >>> format_interval(time(16, 18), time(16, 24), "xxx", locale="ja")
- '16:18:00\uff5e16:24:00'
- >>> format_interval(date(2016, 1, 15), date(2016, 1, 17), "xxx", locale="de")
- '15.01.2016\u2009–\u200917.01.2016'
- :param start: First instant (datetime/date/time)
- :param end: Second instant (datetime/date/time)
- :param skeleton: The "skeleton format" to use for formatting.
- :param tzinfo: tzinfo to use (if none is already attached)
- :param fuzzy: If the skeleton is not found, allow choosing a skeleton that's
- close enough to it.
- :param locale: A locale object or identifier. Defaults to the system time locale.
- :return: Formatted interval
- """
- locale = Locale.parse(locale or LC_TIME)
- # NB: The quote comments below are from the algorithm description in
- # https://www.unicode.org/reports/tr35/tr35-dates.html#intervalFormats
- # > Look for the intervalFormatItem element that matches the "skeleton",
- # > starting in the current locale and then following the locale fallback
- # > chain up to, but not including root.
- interval_formats = locale.interval_formats
- if skeleton not in interval_formats or not skeleton:
- # > If no match was found from the previous step, check what the closest
- # > match is in the fallback locale chain, as in availableFormats. That
- # > is, this allows for adjusting the string value field's width,
- # > including adjusting between "MMM" and "MMMM", and using different
- # > variants of the same field, such as 'v' and 'z'.
- if skeleton and fuzzy:
- skeleton = match_skeleton(skeleton, interval_formats)
- else:
- skeleton = None
- if not skeleton: # Still no match whatsoever?
- # > Otherwise, format the start and end datetime using the fallback pattern.
- return _format_fallback_interval(start, end, skeleton, tzinfo, locale)
- skel_formats = interval_formats[skeleton]
- if start == end:
- return format_skeleton(skeleton, start, tzinfo, fuzzy=fuzzy, locale=locale)
- start = _ensure_datetime_tzinfo(_get_datetime(start), tzinfo=tzinfo)
- end = _ensure_datetime_tzinfo(_get_datetime(end), tzinfo=tzinfo)
- start_fmt = DateTimeFormat(start, locale=locale)
- end_fmt = DateTimeFormat(end, locale=locale)
- # > If a match is found from previous steps, compute the calendar field
- # > with the greatest difference between start and end datetime. If there
- # > is no difference among any of the fields in the pattern, format as a
- # > single date using availableFormats, and return.
- for field in PATTERN_CHAR_ORDER: # These are in largest-to-smallest order
- if field in skel_formats and start_fmt.extract(field) != end_fmt.extract(field):
- # > If there is a match, use the pieces of the corresponding pattern to
- # > format the start and end datetime, as above.
- return "".join(
- parse_pattern(pattern).apply(instant, locale)
- for pattern, instant
- in zip(skel_formats[field], (start, end))
- )
- # > Otherwise, format the start and end datetime using the fallback pattern.
- return _format_fallback_interval(start, end, skeleton, tzinfo, locale)
- def get_period_id(
- time: _Instant,
- tzinfo: datetime.tzinfo | None = None,
- type: Literal['selection'] | None = None,
- locale: Locale | str | None = None,
- ) -> str:
- """
- Get the day period ID for a given time.
- This ID can be used as a key for the period name dictionary.
- >>> from datetime import time
- >>> get_period_names(locale="de")[get_period_id(time(7, 42), locale="de")]
- u'Morgen'
- >>> get_period_id(time(0), locale="en_US")
- u'midnight'
- >>> get_period_id(time(0), type="selection", locale="en_US")
- u'night1'
- :param time: The time to inspect.
- :param tzinfo: The timezone for the time. See ``format_time``.
- :param type: The period type to use. Either "selection" or None.
- The selection type is used for selecting among phrases such as
- “Your email arrived yesterday evening” or “Your email arrived last night”.
- :param locale: the `Locale` object, or a locale string. Defaults to the system time locale.
- :return: period ID. Something is always returned -- even if it's just "am" or "pm".
- """
- time = _get_time(time, tzinfo)
- seconds_past_midnight = int(time.hour * 60 * 60 + time.minute * 60 + time.second)
- locale = Locale.parse(locale or LC_TIME)
- # The LDML rules state that the rules may not overlap, so iterating in arbitrary
- # order should be alright, though `at` periods should be preferred.
- rulesets = locale.day_period_rules.get(type, {}).items()
- for rule_id, rules in rulesets:
- for rule in rules:
- if "at" in rule and rule["at"] == seconds_past_midnight:
- return rule_id
- for rule_id, rules in rulesets:
- for rule in rules:
- if "from" in rule and "before" in rule:
- if rule["from"] < rule["before"]:
- if rule["from"] <= seconds_past_midnight < rule["before"]:
- return rule_id
- else:
- # e.g. from="21:00" before="06:00"
- if rule["from"] <= seconds_past_midnight < 86400 or \
- 0 <= seconds_past_midnight < rule["before"]:
- return rule_id
- start_ok = end_ok = False
- if "from" in rule and seconds_past_midnight >= rule["from"]:
- start_ok = True
- if "to" in rule and seconds_past_midnight <= rule["to"]:
- # This rule type does not exist in the present CLDR data;
- # excuse the lack of test coverage.
- end_ok = True
- if "before" in rule and seconds_past_midnight < rule["before"]:
- end_ok = True
- if "after" in rule:
- raise NotImplementedError("'after' is deprecated as of CLDR 29.")
- if start_ok and end_ok:
- return rule_id
- if seconds_past_midnight < 43200:
- return "am"
- else:
- return "pm"
- class ParseError(ValueError):
- pass
- def parse_date(
- string: str,
- locale: Locale | str | None = None,
- format: _PredefinedTimeFormat | str = 'medium',
- ) -> datetime.date:
- """Parse a date from a string.
- If an explicit format is provided, it is used to parse the date.
- >>> parse_date('01.04.2004', format='dd.MM.yyyy')
- datetime.date(2004, 4, 1)
- If no format is given, or if it is one of "full", "long", "medium",
- or "short", the function first tries to interpret the string as
- ISO-8601 date format and then uses the date format for the locale
- as a hint to determine the order in which the date fields appear in
- the string.
- >>> parse_date('4/1/04', locale='en_US')
- datetime.date(2004, 4, 1)
- >>> parse_date('01.04.2004', locale='de_DE')
- datetime.date(2004, 4, 1)
- >>> parse_date('2004-04-01', locale='en_US')
- datetime.date(2004, 4, 1)
- >>> parse_date('2004-04-01', locale='de_DE')
- datetime.date(2004, 4, 1)
- >>> parse_date('01.04.04', locale='de_DE', format='short')
- datetime.date(2004, 4, 1)
- :param string: the string containing the date
- :param locale: a `Locale` object or a locale identifier
- :param locale: a `Locale` object or a locale identifier. Defaults to the system time locale.
- :param format: the format to use, either an explicit date format,
- or one of "full", "long", "medium", or "short"
- (see ``get_time_format``)
- """
- numbers = re.findall(r'(\d+)', string)
- if not numbers:
- raise ParseError("No numbers were found in input")
- use_predefined_format = format in ('full', 'long', 'medium', 'short')
- # we try ISO-8601 format first, meaning similar to formats
- # extended YYYY-MM-DD or basic YYYYMMDD
- iso_alike = re.match(r'^(\d{4})-?([01]\d)-?([0-3]\d)$',
- string, flags=re.ASCII) # allow only ASCII digits
- if iso_alike and use_predefined_format:
- try:
- return datetime.date(*map(int, iso_alike.groups()))
- except ValueError:
- pass # a locale format might fit better, so let's continue
- if use_predefined_format:
- fmt = get_date_format(format=format, locale=locale)
- else:
- fmt = parse_pattern(format)
- format_str = fmt.pattern.lower()
- year_idx = format_str.index('y')
- month_idx = format_str.find('m')
- if month_idx < 0:
- month_idx = format_str.index('l')
- day_idx = format_str.index('d')
- indexes = sorted([(year_idx, 'Y'), (month_idx, 'M'), (day_idx, 'D')])
- indexes = {item[1]: idx for idx, item in enumerate(indexes)}
- # FIXME: this currently only supports numbers, but should also support month
- # names, both in the requested locale, and english
- year = numbers[indexes['Y']]
- year = 2000 + int(year) if len(year) == 2 else int(year)
- month = int(numbers[indexes['M']])
- day = int(numbers[indexes['D']])
- if month > 12:
- month, day = day, month
- return datetime.date(year, month, day)
- def parse_time(
- string: str,
- locale: Locale | str | None = None,
- format: _PredefinedTimeFormat | str = 'medium',
- ) -> datetime.time:
- """Parse a time from a string.
- This function uses the time format for the locale as a hint to determine
- the order in which the time fields appear in the string.
- If an explicit format is provided, the function will use it to parse
- the time instead.
- >>> parse_time('15:30:00', locale='en_US')
- datetime.time(15, 30)
- >>> parse_time('15:30:00', format='H:mm:ss')
- datetime.time(15, 30)
- :param string: the string containing the time
- :param locale: a `Locale` object or a locale identifier. Defaults to the system time locale.
- :param format: the format to use, either an explicit time format,
- or one of "full", "long", "medium", or "short"
- (see ``get_time_format``)
- :return: the parsed time
- :rtype: `time`
- """
- numbers = re.findall(r'(\d+)', string)
- if not numbers:
- raise ParseError("No numbers were found in input")
- # TODO: try ISO format first?
- if format in ('full', 'long', 'medium', 'short'):
- fmt = get_time_format(format=format, locale=locale)
- else:
- fmt = parse_pattern(format)
- format_str = fmt.pattern.lower()
- hour_idx = format_str.find('h')
- if hour_idx < 0:
- hour_idx = format_str.index('k')
- min_idx = format_str.index('m')
- # format might not contain seconds
- if (sec_idx := format_str.find('s')) < 0:
- sec_idx = math.inf
- indexes = sorted([(hour_idx, 'H'), (min_idx, 'M'), (sec_idx, 'S')])
- indexes = {item[1]: idx for idx, item in enumerate(indexes)}
- # TODO: support time zones
- # Check if the format specifies a period to be used;
- # if it does, look for 'pm' to figure out an offset.
- hour_offset = 0
- if 'a' in format_str and 'pm' in string.lower():
- hour_offset = 12
- # Parse up to three numbers from the string.
- minute = second = 0
- hour = int(numbers[indexes['H']]) + hour_offset
- if len(numbers) > 1:
- minute = int(numbers[indexes['M']])
- if len(numbers) > 2:
- second = int(numbers[indexes['S']])
- return datetime.time(hour, minute, second)
- class DateTimePattern:
- def __init__(self, pattern: str, format: DateTimeFormat):
- self.pattern = pattern
- self.format = format
- def __repr__(self) -> str:
- return f"<{type(self).__name__} {self.pattern!r}>"
- def __str__(self) -> str:
- pat = self.pattern
- return pat
- def __mod__(self, other: DateTimeFormat) -> str:
- if not isinstance(other, DateTimeFormat):
- return NotImplemented
- return self.format % other
- def apply(
- self,
- datetime: datetime.date | datetime.time,
- locale: Locale | str | None,
- reference_date: datetime.date | None = None,
- ) -> str:
- return self % DateTimeFormat(datetime, locale, reference_date)
- class DateTimeFormat:
- def __init__(
- self,
- value: datetime.date | datetime.time,
- locale: Locale | str,
- reference_date: datetime.date | None = None,
- ) -> None:
- assert isinstance(value, (datetime.date, datetime.datetime, datetime.time))
- if isinstance(value, (datetime.datetime, datetime.time)) and value.tzinfo is None:
- value = value.replace(tzinfo=UTC)
- self.value = value
- self.locale = Locale.parse(locale)
- self.reference_date = reference_date
- def __getitem__(self, name: str) -> str:
- char = name[0]
- num = len(name)
- if char == 'G':
- return self.format_era(char, num)
- elif char in ('y', 'Y', 'u'):
- return self.format_year(char, num)
- elif char in ('Q', 'q'):
- return self.format_quarter(char, num)
- elif char in ('M', 'L'):
- return self.format_month(char, num)
- elif char in ('w', 'W'):
- return self.format_week(char, num)
- elif char == 'd':
- return self.format(self.value.day, num)
- elif char == 'D':
- return self.format_day_of_year(num)
- elif char == 'F':
- return self.format_day_of_week_in_month()
- elif char in ('E', 'e', 'c'):
- return self.format_weekday(char, num)
- elif char in ('a', 'b', 'B'):
- return self.format_period(char, num)
- elif char == 'h':
- if self.value.hour % 12 == 0:
- return self.format(12, num)
- else:
- return self.format(self.value.hour % 12, num)
- elif char == 'H':
- return self.format(self.value.hour, num)
- elif char == 'K':
- return self.format(self.value.hour % 12, num)
- elif char == 'k':
- if self.value.hour == 0:
- return self.format(24, num)
- else:
- return self.format(self.value.hour, num)
- elif char == 'm':
- return self.format(self.value.minute, num)
- elif char == 's':
- return self.format(self.value.second, num)
- elif char == 'S':
- return self.format_frac_seconds(num)
- elif char == 'A':
- return self.format_milliseconds_in_day(num)
- elif char in ('z', 'Z', 'v', 'V', 'x', 'X', 'O'):
- return self.format_timezone(char, num)
- else:
- raise KeyError(f"Unsupported date/time field {char!r}")
- def extract(self, char: str) -> int:
- char = str(char)[0]
- if char == 'y':
- return self.value.year
- elif char == 'M':
- return self.value.month
- elif char == 'd':
- return self.value.day
- elif char == 'H':
- return self.value.hour
- elif char == 'h':
- return self.value.hour % 12 or 12
- elif char == 'm':
- return self.value.minute
- elif char == 'a':
- return int(self.value.hour >= 12) # 0 for am, 1 for pm
- else:
- raise NotImplementedError(f"Not implemented: extracting {char!r} from {self.value!r}")
- def format_era(self, char: str, num: int) -> str:
- width = {3: 'abbreviated', 4: 'wide', 5: 'narrow'}[max(3, num)]
- era = int(self.value.year >= 0)
- return get_era_names(width, self.locale)[era]
- def format_year(self, char: str, num: int) -> str:
- value = self.value.year
- if char.isupper():
- month = self.value.month
- if month == 1 and self.value.day < 7 and self.get_week_of_year() >= 52:
- value -= 1
- elif month == 12 and self.value.day > 25 and self.get_week_of_year() <= 2:
- value += 1
- year = self.format(value, num)
- if num == 2:
- year = year[-2:]
- return year
- def format_quarter(self, char: str, num: int) -> str:
- quarter = (self.value.month - 1) // 3 + 1
- if num <= 2:
- return '%0*d' % (num, quarter)
- width = {3: 'abbreviated', 4: 'wide', 5: 'narrow'}[num]
- context = {'Q': 'format', 'q': 'stand-alone'}[char]
- return get_quarter_names(width, context, self.locale)[quarter]
- def format_month(self, char: str, num: int) -> str:
- if num <= 2:
- return '%0*d' % (num, self.value.month)
- width = {3: 'abbreviated', 4: 'wide', 5: 'narrow'}[num]
- context = {'M': 'format', 'L': 'stand-alone'}[char]
- return get_month_names(width, context, self.locale)[self.value.month]
- def format_week(self, char: str, num: int) -> str:
- if char.islower(): # week of year
- week = self.get_week_of_year()
- return self.format(week, num)
- else: # week of month
- week = self.get_week_of_month()
- return str(week)
- def format_weekday(self, char: str = 'E', num: int = 4) -> str:
- """
- Return weekday from parsed datetime according to format pattern.
- >>> from datetime import date
- >>> format = DateTimeFormat(date(2016, 2, 28), Locale.parse('en_US'))
- >>> format.format_weekday()
- u'Sunday'
- 'E': Day of week - Use one through three letters for the abbreviated day name, four for the full (wide) name,
- five for the narrow name, or six for the short name.
- >>> format.format_weekday('E',2)
- u'Sun'
- 'e': Local day of week. Same as E except adds a numeric value that will depend on the local starting day of the
- week, using one or two letters. For this example, Monday is the first day of the week.
- >>> format.format_weekday('e',2)
- '01'
- 'c': Stand-Alone local day of week - Use one letter for the local numeric value (same as 'e'), three for the
- abbreviated day name, four for the full (wide) name, five for the narrow name, or six for the short name.
- >>> format.format_weekday('c',1)
- '1'
- :param char: pattern format character ('e','E','c')
- :param num: count of format character
- """
- if num < 3:
- if char.islower():
- value = 7 - self.locale.first_week_day + self.value.weekday()
- return self.format(value % 7 + 1, num)
- num = 3
- weekday = self.value.weekday()
- width = {3: 'abbreviated', 4: 'wide', 5: 'narrow', 6: 'short'}[num]
- context = "stand-alone" if char == "c" else "format"
- return get_day_names(width, context, self.locale)[weekday]
- def format_day_of_year(self, num: int) -> str:
- return self.format(self.get_day_of_year(), num)
- def format_day_of_week_in_month(self) -> str:
- return str((self.value.day - 1) // 7 + 1)
- def format_period(self, char: str, num: int) -> str:
- """
- Return period from parsed datetime according to format pattern.
- >>> from datetime import datetime, time
- >>> format = DateTimeFormat(time(13, 42), 'fi_FI')
- >>> format.format_period('a', 1)
- u'ip.'
- >>> format.format_period('b', 1)
- u'iltap.'
- >>> format.format_period('b', 4)
- u'iltapäivä'
- >>> format.format_period('B', 4)
- u'iltapäivällä'
- >>> format.format_period('B', 5)
- u'ip.'
- >>> format = DateTimeFormat(datetime(2022, 4, 28, 6, 27), 'zh_Hant')
- >>> format.format_period('a', 1)
- u'上午'
- >>> format.format_period('B', 1)
- u'清晨'
- :param char: pattern format character ('a', 'b', 'B')
- :param num: count of format character
- """
- widths = [{3: 'abbreviated', 4: 'wide', 5: 'narrow'}[max(3, num)],
- 'wide', 'narrow', 'abbreviated']
- if char == 'a':
- period = 'pm' if self.value.hour >= 12 else 'am'
- context = 'format'
- else:
- period = get_period_id(self.value, locale=self.locale)
- context = 'format' if char == 'B' else 'stand-alone'
- for width in widths:
- period_names = get_period_names(context=context, width=width, locale=self.locale)
- if period in period_names:
- return period_names[period]
- raise ValueError(f"Could not format period {period} in {self.locale}")
- def format_frac_seconds(self, num: int) -> str:
- """ Return fractional seconds.
- Rounds the time's microseconds to the precision given by the number \
- of digits passed in.
- """
- value = self.value.microsecond / 1000000
- return self.format(round(value, num) * 10**num, num)
- def format_milliseconds_in_day(self, num):
- msecs = self.value.microsecond // 1000 + self.value.second * 1000 + \
- self.value.minute * 60000 + self.value.hour * 3600000
- return self.format(msecs, num)
- def format_timezone(self, char: str, num: int) -> str:
- width = {3: 'short', 4: 'long', 5: 'iso8601'}[max(3, num)]
- # It could be that we only receive a time to format, but also have a
- # reference date which is important to distinguish between timezone
- # variants (summer/standard time)
- value = self.value
- if self.reference_date:
- value = datetime.datetime.combine(self.reference_date, self.value)
- if char == 'z':
- return get_timezone_name(value, width, locale=self.locale)
- elif char == 'Z':
- if num == 5:
- return get_timezone_gmt(value, width, locale=self.locale, return_z=True)
- return get_timezone_gmt(value, width, locale=self.locale)
- elif char == 'O':
- if num == 4:
- return get_timezone_gmt(value, width, locale=self.locale)
- # TODO: To add support for O:1
- elif char == 'v':
- return get_timezone_name(value.tzinfo, width,
- locale=self.locale)
- elif char == 'V':
- if num == 1:
- return get_timezone_name(value.tzinfo, width,
- uncommon=True, locale=self.locale)
- elif num == 2:
- return get_timezone_name(value.tzinfo, locale=self.locale, return_zone=True)
- elif num == 3:
- return get_timezone_location(value.tzinfo, locale=self.locale, return_city=True)
- return get_timezone_location(value.tzinfo, locale=self.locale)
- # Included additional elif condition to add support for 'Xx' in timezone format
- elif char == 'X':
- if num == 1:
- return get_timezone_gmt(value, width='iso8601_short', locale=self.locale,
- return_z=True)
- elif num in (2, 4):
- return get_timezone_gmt(value, width='short', locale=self.locale,
- return_z=True)
- elif num in (3, 5):
- return get_timezone_gmt(value, width='iso8601', locale=self.locale,
- return_z=True)
- elif char == 'x':
- if num == 1:
- return get_timezone_gmt(value, width='iso8601_short', locale=self.locale)
- elif num in (2, 4):
- return get_timezone_gmt(value, width='short', locale=self.locale)
- elif num in (3, 5):
- return get_timezone_gmt(value, width='iso8601', locale=self.locale)
- def format(self, value: SupportsInt, length: int) -> str:
- return '%0*d' % (length, value)
- def get_day_of_year(self, date: datetime.date | None = None) -> int:
- if date is None:
- date = self.value
- return (date - date.replace(month=1, day=1)).days + 1
- def get_week_of_year(self) -> int:
- """Return the week of the year."""
- day_of_year = self.get_day_of_year(self.value)
- week = self.get_week_number(day_of_year)
- if week == 0:
- date = datetime.date(self.value.year - 1, 12, 31)
- week = self.get_week_number(self.get_day_of_year(date),
- date.weekday())
- elif week > 52:
- weekday = datetime.date(self.value.year + 1, 1, 1).weekday()
- if self.get_week_number(1, weekday) == 1 and \
- 32 - (weekday - self.locale.first_week_day) % 7 <= self.value.day:
- week = 1
- return week
- def get_week_of_month(self) -> int:
- """Return the week of the month."""
- return self.get_week_number(self.value.day)
- def get_week_number(self, day_of_period: int, day_of_week: int | None = None) -> int:
- """Return the number of the week of a day within a period. This may be
- the week number in a year or the week number in a month.
- Usually this will return a value equal to or greater than 1, but if the
- first week of the period is so short that it actually counts as the last
- week of the previous period, this function will return 0.
- >>> date = datetime.date(2006, 1, 8)
- >>> DateTimeFormat(date, 'de_DE').get_week_number(6)
- 1
- >>> DateTimeFormat(date, 'en_US').get_week_number(6)
- 2
- :param day_of_period: the number of the day in the period (usually
- either the day of month or the day of year)
- :param day_of_week: the week day; if omitted, the week day of the
- current date is assumed
- """
- if day_of_week is None:
- day_of_week = self.value.weekday()
- first_day = (day_of_week - self.locale.first_week_day -
- day_of_period + 1) % 7
- if first_day < 0:
- first_day += 7
- week_number = (day_of_period + first_day - 1) // 7
- if 7 - first_day >= self.locale.min_week_days:
- week_number += 1
- return week_number
- PATTERN_CHARS: dict[str, list[int] | None] = {
- 'G': [1, 2, 3, 4, 5], # era
- 'y': None, 'Y': None, 'u': None, # year
- 'Q': [1, 2, 3, 4, 5], 'q': [1, 2, 3, 4, 5], # quarter
- 'M': [1, 2, 3, 4, 5], 'L': [1, 2, 3, 4, 5], # month
- 'w': [1, 2], 'W': [1], # week
- 'd': [1, 2], 'D': [1, 2, 3], 'F': [1], 'g': None, # day
- 'E': [1, 2, 3, 4, 5, 6], 'e': [1, 2, 3, 4, 5, 6], 'c': [1, 3, 4, 5, 6], # week day
- 'a': [1, 2, 3, 4, 5], 'b': [1, 2, 3, 4, 5], 'B': [1, 2, 3, 4, 5], # period
- 'h': [1, 2], 'H': [1, 2], 'K': [1, 2], 'k': [1, 2], # hour
- 'm': [1, 2], # minute
- 's': [1, 2], 'S': None, 'A': None, # second
- 'z': [1, 2, 3, 4], 'Z': [1, 2, 3, 4, 5], 'O': [1, 4], 'v': [1, 4], # zone
- 'V': [1, 2, 3, 4], 'x': [1, 2, 3, 4, 5], 'X': [1, 2, 3, 4, 5], # zone
- }
- #: The pattern characters declared in the Date Field Symbol Table
- #: (https://www.unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table)
- #: in order of decreasing magnitude.
- PATTERN_CHAR_ORDER = "GyYuUQqMLlwWdDFgEecabBChHKkjJmsSAzZOvVXx"
- def parse_pattern(pattern: str | DateTimePattern) -> DateTimePattern:
- """Parse date, time, and datetime format patterns.
- >>> parse_pattern("MMMMd").format
- u'%(MMMM)s%(d)s'
- >>> parse_pattern("MMM d, yyyy").format
- u'%(MMM)s %(d)s, %(yyyy)s'
- Pattern can contain literal strings in single quotes:
- >>> parse_pattern("H:mm' Uhr 'z").format
- u'%(H)s:%(mm)s Uhr %(z)s'
- An actual single quote can be used by using two adjacent single quote
- characters:
- >>> parse_pattern("hh' o''clock'").format
- u"%(hh)s o'clock"
- :param pattern: the formatting pattern to parse
- """
- if isinstance(pattern, DateTimePattern):
- return pattern
- return _cached_parse_pattern(pattern)
- @lru_cache(maxsize=1024)
- def _cached_parse_pattern(pattern: str) -> DateTimePattern:
- result = []
- for tok_type, tok_value in tokenize_pattern(pattern):
- if tok_type == "chars":
- result.append(tok_value.replace('%', '%%'))
- elif tok_type == "field":
- fieldchar, fieldnum = tok_value
- limit = PATTERN_CHARS[fieldchar]
- if limit and fieldnum not in limit:
- raise ValueError(f"Invalid length for field: {fieldchar * fieldnum!r}")
- result.append('%%(%s)s' % (fieldchar * fieldnum))
- else:
- raise NotImplementedError(f"Unknown token type: {tok_type}")
- return DateTimePattern(pattern, ''.join(result))
- def tokenize_pattern(pattern: str) -> list[tuple[str, str | tuple[str, int]]]:
- """
- Tokenize date format patterns.
- Returns a list of (token_type, token_value) tuples.
- ``token_type`` may be either "chars" or "field".
- For "chars" tokens, the value is the literal value.
- For "field" tokens, the value is a tuple of (field character, repetition count).
- :param pattern: Pattern string
- :type pattern: str
- :rtype: list[tuple]
- """
- result = []
- quotebuf = None
- charbuf = []
- fieldchar = ['']
- fieldnum = [0]
- def append_chars():
- result.append(('chars', ''.join(charbuf).replace('\0', "'")))
- del charbuf[:]
- def append_field():
- result.append(('field', (fieldchar[0], fieldnum[0])))
- fieldchar[0] = ''
- fieldnum[0] = 0
- for char in pattern.replace("''", '\0'):
- if quotebuf is None:
- if char == "'": # quote started
- if fieldchar[0]:
- append_field()
- elif charbuf:
- append_chars()
- quotebuf = []
- elif char in PATTERN_CHARS:
- if charbuf:
- append_chars()
- if char == fieldchar[0]:
- fieldnum[0] += 1
- else:
- if fieldchar[0]:
- append_field()
- fieldchar[0] = char
- fieldnum[0] = 1
- else:
- if fieldchar[0]:
- append_field()
- charbuf.append(char)
- elif quotebuf is not None:
- if char == "'": # end of quote
- charbuf.extend(quotebuf)
- quotebuf = None
- else: # inside quote
- quotebuf.append(char)
- if fieldchar[0]:
- append_field()
- elif charbuf:
- append_chars()
- return result
- def untokenize_pattern(tokens: Iterable[tuple[str, str | tuple[str, int]]]) -> str:
- """
- Turn a date format pattern token stream back into a string.
- This is the reverse operation of ``tokenize_pattern``.
- :type tokens: Iterable[tuple]
- :rtype: str
- """
- output = []
- for tok_type, tok_value in tokens:
- if tok_type == "field":
- output.append(tok_value[0] * tok_value[1])
- elif tok_type == "chars":
- if not any(ch in PATTERN_CHARS for ch in tok_value): # No need to quote
- output.append(tok_value)
- else:
- output.append("'%s'" % tok_value.replace("'", "''"))
- return "".join(output)
- def split_interval_pattern(pattern: str) -> list[str]:
- """
- Split an interval-describing datetime pattern into multiple pieces.
- > The pattern is then designed to be broken up into two pieces by determining the first repeating field.
- - https://www.unicode.org/reports/tr35/tr35-dates.html#intervalFormats
- >>> split_interval_pattern(u'E d.M. \u2013 E d.M.')
- [u'E d.M. \u2013 ', 'E d.M.']
- >>> split_interval_pattern("Y 'text' Y 'more text'")
- ["Y 'text '", "Y 'more text'"]
- >>> split_interval_pattern(u"E, MMM d \u2013 E")
- [u'E, MMM d \u2013 ', u'E']
- >>> split_interval_pattern("MMM d")
- ['MMM d']
- >>> split_interval_pattern("y G")
- ['y G']
- >>> split_interval_pattern(u"MMM d \u2013 d")
- [u'MMM d \u2013 ', u'd']
- :param pattern: Interval pattern string
- :return: list of "subpatterns"
- """
- seen_fields = set()
- parts = [[]]
- for tok_type, tok_value in tokenize_pattern(pattern):
- if tok_type == "field":
- if tok_value[0] in seen_fields: # Repeated field
- parts.append([])
- seen_fields.clear()
- seen_fields.add(tok_value[0])
- parts[-1].append((tok_type, tok_value))
- return [untokenize_pattern(tokens) for tokens in parts]
- def match_skeleton(skeleton: str, options: Iterable[str], allow_different_fields: bool = False) -> str | None:
- """
- Find the closest match for the given datetime skeleton among the options given.
- This uses the rules outlined in the TR35 document.
- >>> match_skeleton('yMMd', ('yMd', 'yMMMd'))
- 'yMd'
- >>> match_skeleton('yMMd', ('jyMMd',), allow_different_fields=True)
- 'jyMMd'
- >>> match_skeleton('yMMd', ('qyMMd',), allow_different_fields=False)
- >>> match_skeleton('hmz', ('hmv',))
- 'hmv'
- :param skeleton: The skeleton to match
- :type skeleton: str
- :param options: An iterable of other skeletons to match against
- :type options: Iterable[str]
- :param allow_different_fields: Whether to allow a match that uses different fields
- than the skeleton requested.
- :type allow_different_fields: bool
- :return: The closest skeleton match, or if no match was found, None.
- :rtype: str|None
- """
- # TODO: maybe implement pattern expansion?
- # Based on the implementation in
- # https://github.com/unicode-org/icu/blob/main/icu4j/main/core/src/main/java/com/ibm/icu/text/DateIntervalInfo.java
- # Filter out falsy values and sort for stability; when `interval_formats` is passed in, there may be a None key.
- options = sorted(option for option in options if option)
- if 'z' in skeleton and not any('z' in option for option in options):
- skeleton = skeleton.replace('z', 'v')
- if 'k' in skeleton and not any('k' in option for option in options):
- skeleton = skeleton.replace('k', 'H')
- if 'K' in skeleton and not any('K' in option for option in options):
- skeleton = skeleton.replace('K', 'h')
- if 'a' in skeleton and not any('a' in option for option in options):
- skeleton = skeleton.replace('a', '')
- if 'b' in skeleton and not any('b' in option for option in options):
- skeleton = skeleton.replace('b', '')
- get_input_field_width = dict(t[1] for t in tokenize_pattern(skeleton) if t[0] == "field").get
- best_skeleton = None
- best_distance = None
- for option in options:
- get_opt_field_width = dict(t[1] for t in tokenize_pattern(option) if t[0] == "field").get
- distance = 0
- for field in PATTERN_CHARS:
- input_width = get_input_field_width(field, 0)
- opt_width = get_opt_field_width(field, 0)
- if input_width == opt_width:
- continue
- if opt_width == 0 or input_width == 0:
- if not allow_different_fields: # This one is not okay
- option = None
- break
- distance += 0x1000 # Magic weight constant for "entirely different fields"
- elif field == 'M' and ((input_width > 2 and opt_width <= 2) or (input_width <= 2 and opt_width > 2)):
- distance += 0x100 # Magic weight for "text turns into a number"
- else:
- distance += abs(input_width - opt_width)
- if not option: # We lost the option along the way (probably due to "allow_different_fields")
- continue
- if not best_skeleton or distance < best_distance:
- best_skeleton = option
- best_distance = distance
- if distance == 0: # Found a perfect match!
- break
- return best_skeleton
|