_cron.py 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. # Licensed to the Apache Software Foundation (ASF) under one
  2. # or more contributor license agreements. See the NOTICE file
  3. # distributed with this work for additional information
  4. # regarding copyright ownership. The ASF licenses this file
  5. # to you under the Apache License, Version 2.0 (the
  6. # "License"); you may not use this file except in compliance
  7. # with the License. You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing,
  12. # software distributed under the License is distributed on an
  13. # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  14. # KIND, either express or implied. See the License for the
  15. # specific language governing permissions and limitations
  16. # under the License.
  17. from __future__ import annotations
  18. import datetime
  19. from typing import TYPE_CHECKING, Any
  20. from cron_descriptor import CasingTypeEnum, ExpressionDescriptor, FormatException, MissingFieldException
  21. from croniter import CroniterBadCronError, CroniterBadDateError, croniter
  22. from airflow.exceptions import AirflowTimetableInvalid
  23. from airflow.utils.dates import cron_presets
  24. from airflow.utils.timezone import convert_to_utc, make_aware, make_naive, parse_timezone
  25. if TYPE_CHECKING:
  26. from pendulum import DateTime
  27. from pendulum.tz.timezone import FixedTimezone, Timezone
  28. def _covers_every_hour(cron: croniter) -> bool:
  29. """
  30. Check whether the given cron runs at least once an hour.
  31. This indicates whether we need to implement a workaround for (what I call)
  32. the "fold hour problem". Folding happens when a region switches time
  33. backwards, usually as a part of ending a DST period, causing a block of time
  34. to occur twice in the wall clock. This is indicated by the ``fold`` flag on
  35. datetime.
  36. As an example, Switzerland in 2023 ended DST on 3am (wall clock time, UTC+2)
  37. by dialing back the clock to 2am (UTC+1). So for (say) ``30 * * * *``, if
  38. the last run was 2:30am (UTC+2), the next needs to be 2:30am (UTC+1, folded)
  39. instead of 3:30am.
  40. While this technically happens for all cron schedules (in such a timezone),
  41. we only care about schedules that create at least one run every hour, and
  42. can provide a somewhat reasonable rationale to skip the fold hour for things
  43. such as ``*/2`` (every two hours). Therefore, we try to *minially* peak into
  44. croniter internals to work around the issue.
  45. The check is simple since croniter internally normalizes things to ``*``.
  46. More edge cases can be added later as needed.
  47. See also: https://github.com/kiorky/croniter/issues/56.
  48. """
  49. return cron.expanded[1] == ["*"]
  50. class CronMixin:
  51. """Mixin to provide interface to work with croniter."""
  52. def __init__(self, cron: str, timezone: str | Timezone | FixedTimezone) -> None:
  53. self._expression = cron_presets.get(cron, cron)
  54. if isinstance(timezone, str):
  55. timezone = parse_timezone(timezone)
  56. self._timezone = timezone
  57. try:
  58. descriptor = ExpressionDescriptor(
  59. expression=self._expression, casing_type=CasingTypeEnum.Sentence, use_24hour_time_format=True
  60. )
  61. # checking for more than 5 parameters in Cron and avoiding evaluation for now,
  62. # as Croniter has inconsistent evaluation with other libraries
  63. if len(croniter(self._expression).expanded) > 5:
  64. raise FormatException()
  65. interval_description: str = descriptor.get_description()
  66. except (CroniterBadCronError, FormatException, MissingFieldException):
  67. interval_description = ""
  68. self.description: str = interval_description
  69. def __eq__(self, other: Any) -> bool:
  70. """
  71. Both expression and timezone should match.
  72. This is only for testing purposes and should not be relied on otherwise.
  73. """
  74. if not isinstance(other, type(self)):
  75. return NotImplemented
  76. return self._expression == other._expression and self._timezone == other._timezone
  77. @property
  78. def summary(self) -> str:
  79. return self._expression
  80. def validate(self) -> None:
  81. try:
  82. croniter(self._expression)
  83. except (CroniterBadCronError, CroniterBadDateError) as e:
  84. raise AirflowTimetableInvalid(str(e))
  85. def _get_next(self, current: DateTime) -> DateTime:
  86. """Get the first schedule after specified time, with DST fixed."""
  87. naive = make_naive(current, self._timezone)
  88. cron = croniter(self._expression, start_time=naive)
  89. scheduled = cron.get_next(datetime.datetime)
  90. if not _covers_every_hour(cron):
  91. return convert_to_utc(make_aware(scheduled, self._timezone))
  92. delta = scheduled - naive
  93. return convert_to_utc(current.in_timezone(self._timezone) + delta)
  94. def _get_prev(self, current: DateTime) -> DateTime:
  95. """Get the first schedule before specified time, with DST fixed."""
  96. naive = make_naive(current, self._timezone)
  97. cron = croniter(self._expression, start_time=naive)
  98. scheduled = cron.get_prev(datetime.datetime)
  99. if not _covers_every_hour(cron):
  100. return convert_to_utc(make_aware(scheduled, self._timezone))
  101. delta = naive - scheduled
  102. return convert_to_utc(current.in_timezone(self._timezone) - delta)
  103. def _align_to_next(self, current: DateTime) -> DateTime:
  104. """
  105. Get the next scheduled time.
  106. This is ``current + interval``, unless ``current`` falls right on the
  107. interval boundary, when ``current`` is returned.
  108. """
  109. next_time = self._get_next(current)
  110. if self._get_prev(next_time) != current:
  111. return next_time
  112. return current
  113. def _align_to_prev(self, current: DateTime) -> DateTime:
  114. """
  115. Get the prev scheduled time.
  116. This is ``current - interval``, unless ``current`` falls right on the
  117. interval boundary, when ``current`` is returned.
  118. """
  119. prev_time = self._get_prev(current)
  120. if self._get_next(prev_time) != current:
  121. return prev_time
  122. return current