123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453 |
- from __future__ import annotations
- import datetime
- import re
- from typing import cast
- from pendulum.constants import HOURS_PER_DAY
- from pendulum.constants import MINUTES_PER_HOUR
- from pendulum.constants import MONTHS_OFFSETS
- from pendulum.constants import SECONDS_PER_MINUTE
- from pendulum.duration import Duration
- from pendulum.helpers import days_in_year
- from pendulum.helpers import is_leap
- from pendulum.helpers import is_long_year
- from pendulum.helpers import week_day
- from pendulum.parsing.exceptions import ParserError
- from pendulum.tz.timezone import UTC
- from pendulum.tz.timezone import FixedTimezone
- from pendulum.tz.timezone import Timezone
- ISO8601_DT = re.compile(
- # Date (optional) # noqa: ERA001
- "^"
- "(?P<date>"
- " (?P<classic>" # Classic date (YYYY-MM-DD) or ordinal (YYYY-DDD)
- r" (?P<year>\d{4})" # Year
- " (?P<monthday>"
- r" (?P<monthsep>-)?(?P<month>\d{2})" # Month (optional)
- r" ((?P<daysep>-)?(?P<day>\d{1,2}))?" # Day (optional)
- " )?"
- " )"
- " |"
- " (?P<isocalendar>" # Calendar date (2016-W05 or 2016-W05-5)
- r" (?P<isoyear>\d{4})" # Year
- " (?P<weeksep>-)?" # Separator (optional)
- " W" # W separator
- r" (?P<isoweek>\d{2})" # Week number
- " (?P<weekdaysep>-)?" # Separator (optional)
- r" (?P<isoweekday>\d)?" # Weekday (optional)
- " )"
- ")?"
- # Time (optional) # noqa: ERA001
- "(?P<time>" r" (?P<timesep>[T\ ])?" # Separator (T or space)
- # HH:mm:ss (optional mm and ss)
- r" (?P<hour>\d{1,2})(?P<minsep>:)?(?P<minute>\d{1,2})?(?P<secsep>:)?(?P<second>\d{1,2})?" # noqa: E501
- # Subsecond part (optional)
- " (?P<subsecondsection>"
- " (?:[.,])" # Subsecond separator (optional)
- r" (?P<subsecond>\d{1,9})" # Subsecond
- " )?"
- # Timezone offset
- " (?P<tz>"
- r" (?:[-+])\d{2}:?(?:\d{2})?|Z" # Offset (+HH:mm or +HHmm or +HH or Z)
- " )?"
- ")?"
- "$",
- re.VERBOSE,
- )
- ISO8601_DURATION = re.compile(
- "^P" # Duration P indicator
- # Years, months and days (optional) # noqa: ERA001
- "(?P<w>"
- r" (?P<weeks>\d+(?:[.,]\d+)?W)"
- ")?"
- "(?P<ymd>"
- r" (?P<years>\d+(?:[.,]\d+)?Y)?"
- r" (?P<months>\d+(?:[.,]\d+)?M)?"
- r" (?P<days>\d+(?:[.,]\d+)?D)?"
- ")?"
- "(?P<hms>"
- " (?P<timesep>T)" # Separator (T)
- r" (?P<hours>\d+(?:[.,]\d+)?H)?"
- r" (?P<minutes>\d+(?:[.,]\d+)?M)?"
- r" (?P<seconds>\d+(?:[.,]\d+)?S)?"
- ")?"
- "$",
- re.VERBOSE,
- )
- def parse_iso8601(
- text: str,
- ) -> datetime.datetime | datetime.date | datetime.time | Duration:
- """
- ISO 8601 compliant parser.
- :param text: The string to parse
- :type text: str
- :rtype: datetime.datetime or datetime.time or datetime.date
- """
- parsed = _parse_iso8601_duration(text)
- if parsed is not None:
- return parsed
- m = ISO8601_DT.match(text)
- if not m:
- raise ParserError("Invalid ISO 8601 string")
- ambiguous_date = False
- is_date = False
- is_time = False
- year = 0
- month = 1
- day = 1
- minute = 0
- second = 0
- microsecond = 0
- tzinfo: FixedTimezone | Timezone | None = None
- if m.group("date"):
- # A date has been specified
- is_date = True
- if m.group("isocalendar"):
- # We have a ISO 8601 string defined
- # by week number
- if (
- m.group("weeksep")
- and not m.group("weekdaysep")
- and m.group("isoweekday")
- ):
- raise ParserError(f"Invalid date string: {text}")
- if not m.group("weeksep") and m.group("weekdaysep"):
- raise ParserError(f"Invalid date string: {text}")
- try:
- date = _get_iso_8601_week(
- m.group("isoyear"), m.group("isoweek"), m.group("isoweekday")
- )
- except ParserError:
- raise
- except ValueError:
- raise ParserError(f"Invalid date string: {text}")
- year = date["year"]
- month = date["month"]
- day = date["day"]
- else:
- # We have a classic date representation
- year = int(m.group("year"))
- if not m.group("monthday"):
- # No month and day
- month = 1
- day = 1
- else:
- if m.group("month") and m.group("day"):
- # Month and day
- if not m.group("daysep") and len(m.group("day")) == 1:
- # Ordinal day
- ordinal = int(m.group("month") + m.group("day"))
- leap = is_leap(year)
- months_offsets = MONTHS_OFFSETS[leap]
- if ordinal > months_offsets[13]:
- raise ParserError("Ordinal day is out of range")
- for i in range(1, 14):
- if ordinal <= months_offsets[i]:
- day = ordinal - months_offsets[i - 1]
- month = i - 1
- break
- else:
- month = int(m.group("month"))
- day = int(m.group("day"))
- else:
- # Only month
- if not m.group("monthsep"):
- # The date looks like 201207
- # which is invalid for a date
- # But it might be a time in the form hhmmss
- ambiguous_date = True
- month = int(m.group("month"))
- day = 1
- if not m.group("time"):
- # No time has been specified
- if ambiguous_date:
- # We can "safely" assume that the ambiguous date
- # was actually a time in the form hhmmss
- hhmmss = f"{year!s}{month!s:0>2}"
- return datetime.time(int(hhmmss[:2]), int(hhmmss[2:4]), int(hhmmss[4:]))
- return datetime.date(year, month, day)
- if ambiguous_date:
- raise ParserError(f"Invalid date string: {text}")
- if is_date and not m.group("timesep"):
- raise ParserError(f"Invalid date string: {text}")
- if not is_date:
- is_time = True
- # Grabbing hh:mm:ss
- hour = int(m.group("hour"))
- minsep = m.group("minsep")
- if m.group("minute"):
- minute = int(m.group("minute"))
- elif minsep:
- raise ParserError("Invalid ISO 8601 time part")
- secsep = m.group("secsep")
- if secsep and not minsep and m.group("minute"):
- # minute/second separator but no hour/minute separator
- raise ParserError("Invalid ISO 8601 time part")
- if m.group("second"):
- if not secsep and minsep:
- # No minute/second separator but hour/minute separator
- raise ParserError("Invalid ISO 8601 time part")
- second = int(m.group("second"))
- elif secsep:
- raise ParserError("Invalid ISO 8601 time part")
- # Grabbing subseconds, if any
- if m.group("subsecondsection"):
- # Limiting to 6 chars
- subsecond = m.group("subsecond")[:6]
- microsecond = int(f"{subsecond:0<6}")
- # Grabbing timezone, if any
- tz = m.group("tz")
- if tz:
- if tz == "Z":
- tzinfo = UTC
- else:
- negative = bool(tz.startswith("-"))
- tz = tz[1:]
- if ":" not in tz:
- if len(tz) == 2:
- tz = f"{tz}00"
- off_hour = tz[0:2]
- off_minute = tz[2:4]
- else:
- off_hour, off_minute = tz.split(":")
- offset = ((int(off_hour) * 60) + int(off_minute)) * 60
- if negative:
- offset = -1 * offset
- tzinfo = FixedTimezone(offset)
- if is_time:
- return datetime.time(hour, minute, second, microsecond, tzinfo=tzinfo)
- return datetime.datetime(
- year, month, day, hour, minute, second, microsecond, tzinfo=tzinfo
- )
- def _parse_iso8601_duration(text: str, **options: str) -> Duration | None:
- m = ISO8601_DURATION.match(text)
- if not m:
- return None
- years = 0
- months = 0
- weeks = 0
- days: int | float = 0
- hours: int | float = 0
- minutes: int | float = 0
- seconds: int | float = 0
- microseconds: int | float = 0
- fractional = False
- _days: str | float
- _hour: str | int | None
- _minutes: str | int | None
- _seconds: str | int | None
- if m.group("w"):
- # Weeks
- if m.group("ymd") or m.group("hms"):
- # Specifying anything more than weeks is not supported
- raise ParserError("Invalid duration string")
- _weeks = m.group("weeks")
- if not _weeks:
- raise ParserError("Invalid duration string")
- _weeks = _weeks.replace(",", ".").replace("W", "")
- if "." in _weeks:
- _weeks, portion = _weeks.split(".")
- weeks = int(_weeks)
- _days = int(portion) / 10 * 7
- days, hours = int(_days // 1), int(_days % 1 * HOURS_PER_DAY)
- else:
- weeks = int(_weeks)
- if m.group("ymd"):
- # Years, months and/or days
- _years = m.group("years")
- _months = m.group("months")
- _days = m.group("days")
- # Checking order
- years_start = m.start("years") if _years else -3
- months_start = m.start("months") if _months else years_start + 1
- days_start = m.start("days") if _days else months_start + 1
- # Check correct order
- if not (years_start < months_start < days_start):
- raise ParserError("Invalid duration")
- if _years:
- _years = _years.replace(",", ".").replace("Y", "")
- if "." in _years:
- raise ParserError("Float years in duration are not supported")
- else:
- years = int(_years)
- if _months:
- if fractional:
- raise ParserError("Invalid duration")
- _months = _months.replace(",", ".").replace("M", "")
- if "." in _months:
- raise ParserError("Float months in duration are not supported")
- else:
- months = int(_months)
- if _days:
- if fractional:
- raise ParserError("Invalid duration")
- _days = _days.replace(",", ".").replace("D", "")
- if "." in _days:
- fractional = True
- _days, _hours = _days.split(".")
- days = int(_days)
- hours = int(_hours) / 10 * HOURS_PER_DAY
- else:
- days = int(_days)
- if m.group("hms"):
- # Hours, minutes and/or seconds
- _hours = m.group("hours") or 0
- _minutes = m.group("minutes") or 0
- _seconds = m.group("seconds") or 0
- # Checking order
- hours_start = m.start("hours") if _hours else -3
- minutes_start = m.start("minutes") if _minutes else hours_start + 1
- seconds_start = m.start("seconds") if _seconds else minutes_start + 1
- # Check correct order
- if not (hours_start < minutes_start < seconds_start):
- raise ParserError("Invalid duration")
- if _hours:
- if fractional:
- raise ParserError("Invalid duration")
- _hours = cast(str, _hours).replace(",", ".").replace("H", "")
- if "." in _hours:
- fractional = True
- _hours, _mins = _hours.split(".")
- hours += int(_hours)
- minutes += int(_mins) / 10 * MINUTES_PER_HOUR
- else:
- hours += int(_hours)
- if _minutes:
- if fractional:
- raise ParserError("Invalid duration")
- _minutes = cast(str, _minutes).replace(",", ".").replace("M", "")
- if "." in _minutes:
- fractional = True
- _minutes, _secs = _minutes.split(".")
- minutes += int(_minutes)
- seconds += int(_secs) / 10 * SECONDS_PER_MINUTE
- else:
- minutes += int(_minutes)
- if _seconds:
- if fractional:
- raise ParserError("Invalid duration")
- _seconds = cast(str, _seconds).replace(",", ".").replace("S", "")
- if "." in _seconds:
- _seconds, _microseconds = _seconds.split(".")
- seconds += int(_seconds)
- microseconds += int(f"{_microseconds[:6]:0<6}")
- else:
- seconds += int(_seconds)
- return Duration(
- years=years,
- months=months,
- weeks=weeks,
- days=days,
- hours=hours,
- minutes=minutes,
- seconds=seconds,
- microseconds=microseconds,
- )
- def _get_iso_8601_week(
- year: int | str, week: int | str, weekday: int | str
- ) -> dict[str, int]:
- weekday = 1 if not weekday else int(weekday)
- year = int(year)
- week = int(week)
- if week > 53 or week > 52 and not is_long_year(year):
- raise ParserError("Invalid week for week date")
- if weekday > 7:
- raise ParserError("Invalid weekday for week date")
- # We can't rely on strptime directly here since
- # it does not support ISO week date
- ordinal = week * 7 + weekday - (week_day(year, 1, 4) + 3)
- if ordinal < 1:
- # Previous year
- ordinal += days_in_year(year - 1)
- year -= 1
- if ordinal > days_in_year(year):
- # Next year
- ordinal -= days_in_year(year)
- year += 1
- fmt = "%Y-%j"
- string = f"{year}-{ordinal}"
- dt = datetime.datetime.strptime(string, fmt)
- return {"year": dt.year, "month": dt.month, "day": dt.day}
|