__init__.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235
  1. from __future__ import annotations
  2. import contextlib
  3. import copy
  4. import os
  5. import re
  6. import struct
  7. from datetime import date
  8. from datetime import datetime
  9. from datetime import time
  10. from typing import Any
  11. from typing import Optional
  12. from typing import cast
  13. from dateutil import parser
  14. from pendulum.parsing.exceptions import ParserError
  15. with_extensions = os.getenv("PENDULUM_EXTENSIONS", "1") == "1"
  16. try:
  17. if not with_extensions or struct.calcsize("P") == 4:
  18. raise ImportError()
  19. from pendulum._pendulum import Duration
  20. from pendulum._pendulum import parse_iso8601
  21. except ImportError:
  22. from pendulum.duration import Duration # type: ignore[assignment]
  23. from pendulum.parsing.iso8601 import parse_iso8601 # type: ignore[assignment]
  24. COMMON = re.compile(
  25. # Date (optional) # noqa: ERA001
  26. "^"
  27. "(?P<date>"
  28. " (?P<classic>" # Classic date (YYYY-MM-DD)
  29. r" (?P<year>\d{4})" # Year
  30. " (?P<monthday>"
  31. r" (?P<monthsep>[/:])?(?P<month>\d{2})" # Month (optional)
  32. r" ((?P<daysep>[/:])?(?P<day>\d{2}))" # Day (optional)
  33. " )?"
  34. " )"
  35. ")?"
  36. # Time (optional) # noqa: ERA001
  37. "(?P<time>" r" (?P<timesep>\ )?" # Separator (space)
  38. # HH:mm:ss (optional mm and ss)
  39. r" (?P<hour>\d{1,2}):(?P<minute>\d{1,2})?(?::(?P<second>\d{1,2}))?"
  40. # Subsecond part (optional)
  41. " (?P<subsecondsection>"
  42. " (?:[.|,])" # Subsecond separator (optional)
  43. r" (?P<subsecond>\d{1,9})" # Subsecond
  44. " )?"
  45. ")?"
  46. "$",
  47. re.VERBOSE,
  48. )
  49. DEFAULT_OPTIONS = {
  50. "day_first": False,
  51. "year_first": True,
  52. "strict": True,
  53. "exact": False,
  54. "now": None,
  55. }
  56. def parse(text: str, **options: Any) -> datetime | date | time | _Interval | Duration:
  57. """
  58. Parses a string with the given options.
  59. :param text: The string to parse.
  60. """
  61. _options: dict[str, Any] = copy.copy(DEFAULT_OPTIONS)
  62. _options.update(options)
  63. return _normalize(_parse(text, **_options), **_options)
  64. def _normalize(
  65. parsed: datetime | date | time | _Interval | Duration, **options: Any
  66. ) -> datetime | date | time | _Interval | Duration:
  67. """
  68. Normalizes the parsed element.
  69. :param parsed: The parsed elements.
  70. """
  71. if options.get("exact"):
  72. return parsed
  73. if isinstance(parsed, time):
  74. now = cast(Optional[datetime], options["now"]) or datetime.now()
  75. return datetime(
  76. now.year,
  77. now.month,
  78. now.day,
  79. parsed.hour,
  80. parsed.minute,
  81. parsed.second,
  82. parsed.microsecond,
  83. )
  84. elif isinstance(parsed, date) and not isinstance(parsed, datetime):
  85. return datetime(parsed.year, parsed.month, parsed.day)
  86. return parsed
  87. def _parse(text: str, **options: Any) -> datetime | date | time | _Interval | Duration:
  88. # Trying to parse ISO8601
  89. with contextlib.suppress(ValueError):
  90. return parse_iso8601(text)
  91. with contextlib.suppress(ValueError):
  92. return _parse_iso8601_interval(text)
  93. with contextlib.suppress(ParserError):
  94. return _parse_common(text, **options)
  95. # We couldn't parse the string
  96. # so we fallback on the dateutil parser
  97. # If not strict
  98. if options.get("strict", True):
  99. raise ParserError(f"Unable to parse string [{text}]")
  100. try:
  101. dt = parser.parse(
  102. text, dayfirst=options["day_first"], yearfirst=options["year_first"]
  103. )
  104. except ValueError:
  105. raise ParserError(f"Invalid date string: {text}")
  106. return dt
  107. def _parse_common(text: str, **options: Any) -> datetime | date | time:
  108. """
  109. Tries to parse the string as a common datetime format.
  110. :param text: The string to parse.
  111. """
  112. m = COMMON.match(text)
  113. has_date = False
  114. year = 0
  115. month = 1
  116. day = 1
  117. if not m:
  118. raise ParserError("Invalid datetime string")
  119. if m.group("date"):
  120. # A date has been specified
  121. has_date = True
  122. year = int(m.group("year"))
  123. if not m.group("monthday"):
  124. # No month and day
  125. month = 1
  126. day = 1
  127. else:
  128. if options["day_first"]:
  129. month = int(m.group("day"))
  130. day = int(m.group("month"))
  131. else:
  132. month = int(m.group("month"))
  133. day = int(m.group("day"))
  134. if not m.group("time"):
  135. return date(year, month, day)
  136. # Grabbing hh:mm:ss
  137. hour = int(m.group("hour"))
  138. minute = int(m.group("minute"))
  139. second = int(m.group("second")) if m.group("second") else 0
  140. # Grabbing subseconds, if any
  141. microsecond = 0
  142. if m.group("subsecondsection"):
  143. # Limiting to 6 chars
  144. subsecond = m.group("subsecond")[:6]
  145. microsecond = int(f"{subsecond:0<6}")
  146. if has_date:
  147. return datetime(year, month, day, hour, minute, second, microsecond)
  148. return time(hour, minute, second, microsecond)
  149. class _Interval:
  150. """
  151. Special class to handle ISO 8601 intervals
  152. """
  153. def __init__(
  154. self,
  155. start: datetime | None = None,
  156. end: datetime | None = None,
  157. duration: Duration | None = None,
  158. ) -> None:
  159. self.start = start
  160. self.end = end
  161. self.duration = duration
  162. def _parse_iso8601_interval(text: str) -> _Interval:
  163. if "/" not in text:
  164. raise ParserError("Invalid interval")
  165. first, last = text.split("/")
  166. start = end = duration = None
  167. if first[0] == "P":
  168. # duration/end
  169. duration = parse_iso8601(first)
  170. end = parse_iso8601(last)
  171. elif last[0] == "P":
  172. # start/duration
  173. start = parse_iso8601(first)
  174. duration = parse_iso8601(last)
  175. else:
  176. # start/end
  177. start = parse_iso8601(first)
  178. end = parse_iso8601(last)
  179. return _Interval(
  180. cast(datetime, start), cast(datetime, end), cast(Duration, duration)
  181. )
  182. __all__ = ["parse", "parse_iso8601"]