iso8601.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453
  1. from __future__ import annotations
  2. import datetime
  3. import re
  4. from typing import cast
  5. from pendulum.constants import HOURS_PER_DAY
  6. from pendulum.constants import MINUTES_PER_HOUR
  7. from pendulum.constants import MONTHS_OFFSETS
  8. from pendulum.constants import SECONDS_PER_MINUTE
  9. from pendulum.duration import Duration
  10. from pendulum.helpers import days_in_year
  11. from pendulum.helpers import is_leap
  12. from pendulum.helpers import is_long_year
  13. from pendulum.helpers import week_day
  14. from pendulum.parsing.exceptions import ParserError
  15. from pendulum.tz.timezone import UTC
  16. from pendulum.tz.timezone import FixedTimezone
  17. from pendulum.tz.timezone import Timezone
  18. ISO8601_DT = re.compile(
  19. # Date (optional) # noqa: ERA001
  20. "^"
  21. "(?P<date>"
  22. " (?P<classic>" # Classic date (YYYY-MM-DD) or ordinal (YYYY-DDD)
  23. r" (?P<year>\d{4})" # Year
  24. " (?P<monthday>"
  25. r" (?P<monthsep>-)?(?P<month>\d{2})" # Month (optional)
  26. r" ((?P<daysep>-)?(?P<day>\d{1,2}))?" # Day (optional)
  27. " )?"
  28. " )"
  29. " |"
  30. " (?P<isocalendar>" # Calendar date (2016-W05 or 2016-W05-5)
  31. r" (?P<isoyear>\d{4})" # Year
  32. " (?P<weeksep>-)?" # Separator (optional)
  33. " W" # W separator
  34. r" (?P<isoweek>\d{2})" # Week number
  35. " (?P<weekdaysep>-)?" # Separator (optional)
  36. r" (?P<isoweekday>\d)?" # Weekday (optional)
  37. " )"
  38. ")?"
  39. # Time (optional) # noqa: ERA001
  40. "(?P<time>" r" (?P<timesep>[T\ ])?" # Separator (T or space)
  41. # HH:mm:ss (optional mm and ss)
  42. r" (?P<hour>\d{1,2})(?P<minsep>:)?(?P<minute>\d{1,2})?(?P<secsep>:)?(?P<second>\d{1,2})?" # noqa: E501
  43. # Subsecond part (optional)
  44. " (?P<subsecondsection>"
  45. " (?:[.,])" # Subsecond separator (optional)
  46. r" (?P<subsecond>\d{1,9})" # Subsecond
  47. " )?"
  48. # Timezone offset
  49. " (?P<tz>"
  50. r" (?:[-+])\d{2}:?(?:\d{2})?|Z" # Offset (+HH:mm or +HHmm or +HH or Z)
  51. " )?"
  52. ")?"
  53. "$",
  54. re.VERBOSE,
  55. )
  56. ISO8601_DURATION = re.compile(
  57. "^P" # Duration P indicator
  58. # Years, months and days (optional) # noqa: ERA001
  59. "(?P<w>"
  60. r" (?P<weeks>\d+(?:[.,]\d+)?W)"
  61. ")?"
  62. "(?P<ymd>"
  63. r" (?P<years>\d+(?:[.,]\d+)?Y)?"
  64. r" (?P<months>\d+(?:[.,]\d+)?M)?"
  65. r" (?P<days>\d+(?:[.,]\d+)?D)?"
  66. ")?"
  67. "(?P<hms>"
  68. " (?P<timesep>T)" # Separator (T)
  69. r" (?P<hours>\d+(?:[.,]\d+)?H)?"
  70. r" (?P<minutes>\d+(?:[.,]\d+)?M)?"
  71. r" (?P<seconds>\d+(?:[.,]\d+)?S)?"
  72. ")?"
  73. "$",
  74. re.VERBOSE,
  75. )
  76. def parse_iso8601(
  77. text: str,
  78. ) -> datetime.datetime | datetime.date | datetime.time | Duration:
  79. """
  80. ISO 8601 compliant parser.
  81. :param text: The string to parse
  82. :type text: str
  83. :rtype: datetime.datetime or datetime.time or datetime.date
  84. """
  85. parsed = _parse_iso8601_duration(text)
  86. if parsed is not None:
  87. return parsed
  88. m = ISO8601_DT.match(text)
  89. if not m:
  90. raise ParserError("Invalid ISO 8601 string")
  91. ambiguous_date = False
  92. is_date = False
  93. is_time = False
  94. year = 0
  95. month = 1
  96. day = 1
  97. minute = 0
  98. second = 0
  99. microsecond = 0
  100. tzinfo: FixedTimezone | Timezone | None = None
  101. if m.group("date"):
  102. # A date has been specified
  103. is_date = True
  104. if m.group("isocalendar"):
  105. # We have a ISO 8601 string defined
  106. # by week number
  107. if (
  108. m.group("weeksep")
  109. and not m.group("weekdaysep")
  110. and m.group("isoweekday")
  111. ):
  112. raise ParserError(f"Invalid date string: {text}")
  113. if not m.group("weeksep") and m.group("weekdaysep"):
  114. raise ParserError(f"Invalid date string: {text}")
  115. try:
  116. date = _get_iso_8601_week(
  117. m.group("isoyear"), m.group("isoweek"), m.group("isoweekday")
  118. )
  119. except ParserError:
  120. raise
  121. except ValueError:
  122. raise ParserError(f"Invalid date string: {text}")
  123. year = date["year"]
  124. month = date["month"]
  125. day = date["day"]
  126. else:
  127. # We have a classic date representation
  128. year = int(m.group("year"))
  129. if not m.group("monthday"):
  130. # No month and day
  131. month = 1
  132. day = 1
  133. else:
  134. if m.group("month") and m.group("day"):
  135. # Month and day
  136. if not m.group("daysep") and len(m.group("day")) == 1:
  137. # Ordinal day
  138. ordinal = int(m.group("month") + m.group("day"))
  139. leap = is_leap(year)
  140. months_offsets = MONTHS_OFFSETS[leap]
  141. if ordinal > months_offsets[13]:
  142. raise ParserError("Ordinal day is out of range")
  143. for i in range(1, 14):
  144. if ordinal <= months_offsets[i]:
  145. day = ordinal - months_offsets[i - 1]
  146. month = i - 1
  147. break
  148. else:
  149. month = int(m.group("month"))
  150. day = int(m.group("day"))
  151. else:
  152. # Only month
  153. if not m.group("monthsep"):
  154. # The date looks like 201207
  155. # which is invalid for a date
  156. # But it might be a time in the form hhmmss
  157. ambiguous_date = True
  158. month = int(m.group("month"))
  159. day = 1
  160. if not m.group("time"):
  161. # No time has been specified
  162. if ambiguous_date:
  163. # We can "safely" assume that the ambiguous date
  164. # was actually a time in the form hhmmss
  165. hhmmss = f"{year!s}{month!s:0>2}"
  166. return datetime.time(int(hhmmss[:2]), int(hhmmss[2:4]), int(hhmmss[4:]))
  167. return datetime.date(year, month, day)
  168. if ambiguous_date:
  169. raise ParserError(f"Invalid date string: {text}")
  170. if is_date and not m.group("timesep"):
  171. raise ParserError(f"Invalid date string: {text}")
  172. if not is_date:
  173. is_time = True
  174. # Grabbing hh:mm:ss
  175. hour = int(m.group("hour"))
  176. minsep = m.group("minsep")
  177. if m.group("minute"):
  178. minute = int(m.group("minute"))
  179. elif minsep:
  180. raise ParserError("Invalid ISO 8601 time part")
  181. secsep = m.group("secsep")
  182. if secsep and not minsep and m.group("minute"):
  183. # minute/second separator but no hour/minute separator
  184. raise ParserError("Invalid ISO 8601 time part")
  185. if m.group("second"):
  186. if not secsep and minsep:
  187. # No minute/second separator but hour/minute separator
  188. raise ParserError("Invalid ISO 8601 time part")
  189. second = int(m.group("second"))
  190. elif secsep:
  191. raise ParserError("Invalid ISO 8601 time part")
  192. # Grabbing subseconds, if any
  193. if m.group("subsecondsection"):
  194. # Limiting to 6 chars
  195. subsecond = m.group("subsecond")[:6]
  196. microsecond = int(f"{subsecond:0<6}")
  197. # Grabbing timezone, if any
  198. tz = m.group("tz")
  199. if tz:
  200. if tz == "Z":
  201. tzinfo = UTC
  202. else:
  203. negative = bool(tz.startswith("-"))
  204. tz = tz[1:]
  205. if ":" not in tz:
  206. if len(tz) == 2:
  207. tz = f"{tz}00"
  208. off_hour = tz[0:2]
  209. off_minute = tz[2:4]
  210. else:
  211. off_hour, off_minute = tz.split(":")
  212. offset = ((int(off_hour) * 60) + int(off_minute)) * 60
  213. if negative:
  214. offset = -1 * offset
  215. tzinfo = FixedTimezone(offset)
  216. if is_time:
  217. return datetime.time(hour, minute, second, microsecond, tzinfo=tzinfo)
  218. return datetime.datetime(
  219. year, month, day, hour, minute, second, microsecond, tzinfo=tzinfo
  220. )
  221. def _parse_iso8601_duration(text: str, **options: str) -> Duration | None:
  222. m = ISO8601_DURATION.match(text)
  223. if not m:
  224. return None
  225. years = 0
  226. months = 0
  227. weeks = 0
  228. days: int | float = 0
  229. hours: int | float = 0
  230. minutes: int | float = 0
  231. seconds: int | float = 0
  232. microseconds: int | float = 0
  233. fractional = False
  234. _days: str | float
  235. _hour: str | int | None
  236. _minutes: str | int | None
  237. _seconds: str | int | None
  238. if m.group("w"):
  239. # Weeks
  240. if m.group("ymd") or m.group("hms"):
  241. # Specifying anything more than weeks is not supported
  242. raise ParserError("Invalid duration string")
  243. _weeks = m.group("weeks")
  244. if not _weeks:
  245. raise ParserError("Invalid duration string")
  246. _weeks = _weeks.replace(",", ".").replace("W", "")
  247. if "." in _weeks:
  248. _weeks, portion = _weeks.split(".")
  249. weeks = int(_weeks)
  250. _days = int(portion) / 10 * 7
  251. days, hours = int(_days // 1), int(_days % 1 * HOURS_PER_DAY)
  252. else:
  253. weeks = int(_weeks)
  254. if m.group("ymd"):
  255. # Years, months and/or days
  256. _years = m.group("years")
  257. _months = m.group("months")
  258. _days = m.group("days")
  259. # Checking order
  260. years_start = m.start("years") if _years else -3
  261. months_start = m.start("months") if _months else years_start + 1
  262. days_start = m.start("days") if _days else months_start + 1
  263. # Check correct order
  264. if not (years_start < months_start < days_start):
  265. raise ParserError("Invalid duration")
  266. if _years:
  267. _years = _years.replace(",", ".").replace("Y", "")
  268. if "." in _years:
  269. raise ParserError("Float years in duration are not supported")
  270. else:
  271. years = int(_years)
  272. if _months:
  273. if fractional:
  274. raise ParserError("Invalid duration")
  275. _months = _months.replace(",", ".").replace("M", "")
  276. if "." in _months:
  277. raise ParserError("Float months in duration are not supported")
  278. else:
  279. months = int(_months)
  280. if _days:
  281. if fractional:
  282. raise ParserError("Invalid duration")
  283. _days = _days.replace(",", ".").replace("D", "")
  284. if "." in _days:
  285. fractional = True
  286. _days, _hours = _days.split(".")
  287. days = int(_days)
  288. hours = int(_hours) / 10 * HOURS_PER_DAY
  289. else:
  290. days = int(_days)
  291. if m.group("hms"):
  292. # Hours, minutes and/or seconds
  293. _hours = m.group("hours") or 0
  294. _minutes = m.group("minutes") or 0
  295. _seconds = m.group("seconds") or 0
  296. # Checking order
  297. hours_start = m.start("hours") if _hours else -3
  298. minutes_start = m.start("minutes") if _minutes else hours_start + 1
  299. seconds_start = m.start("seconds") if _seconds else minutes_start + 1
  300. # Check correct order
  301. if not (hours_start < minutes_start < seconds_start):
  302. raise ParserError("Invalid duration")
  303. if _hours:
  304. if fractional:
  305. raise ParserError("Invalid duration")
  306. _hours = cast(str, _hours).replace(",", ".").replace("H", "")
  307. if "." in _hours:
  308. fractional = True
  309. _hours, _mins = _hours.split(".")
  310. hours += int(_hours)
  311. minutes += int(_mins) / 10 * MINUTES_PER_HOUR
  312. else:
  313. hours += int(_hours)
  314. if _minutes:
  315. if fractional:
  316. raise ParserError("Invalid duration")
  317. _minutes = cast(str, _minutes).replace(",", ".").replace("M", "")
  318. if "." in _minutes:
  319. fractional = True
  320. _minutes, _secs = _minutes.split(".")
  321. minutes += int(_minutes)
  322. seconds += int(_secs) / 10 * SECONDS_PER_MINUTE
  323. else:
  324. minutes += int(_minutes)
  325. if _seconds:
  326. if fractional:
  327. raise ParserError("Invalid duration")
  328. _seconds = cast(str, _seconds).replace(",", ".").replace("S", "")
  329. if "." in _seconds:
  330. _seconds, _microseconds = _seconds.split(".")
  331. seconds += int(_seconds)
  332. microseconds += int(f"{_microseconds[:6]:0<6}")
  333. else:
  334. seconds += int(_seconds)
  335. return Duration(
  336. years=years,
  337. months=months,
  338. weeks=weeks,
  339. days=days,
  340. hours=hours,
  341. minutes=minutes,
  342. seconds=seconds,
  343. microseconds=microseconds,
  344. )
  345. def _get_iso_8601_week(
  346. year: int | str, week: int | str, weekday: int | str
  347. ) -> dict[str, int]:
  348. weekday = 1 if not weekday else int(weekday)
  349. year = int(year)
  350. week = int(week)
  351. if week > 53 or week > 52 and not is_long_year(year):
  352. raise ParserError("Invalid week for week date")
  353. if weekday > 7:
  354. raise ParserError("Invalid weekday for week date")
  355. # We can't rely on strptime directly here since
  356. # it does not support ISO week date
  357. ordinal = week * 7 + weekday - (week_day(year, 1, 4) + 3)
  358. if ordinal < 1:
  359. # Previous year
  360. ordinal += days_in_year(year - 1)
  361. year -= 1
  362. if ordinal > days_in_year(year):
  363. # Next year
  364. ordinal -= days_in_year(year)
  365. year += 1
  366. fmt = "%Y-%j"
  367. string = f"{year}-{ordinal}"
  368. dt = datetime.datetime.strptime(string, fmt)
  369. return {"year": dt.year, "month": dt.month, "day": dt.day}