context.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522
  1. #
  2. # Licensed to the Apache Software Foundation (ASF) under one
  3. # or more contributor license agreements. See the NOTICE file
  4. # distributed with this work for additional information
  5. # regarding copyright ownership. The ASF licenses this file
  6. # to you under the Apache License, Version 2.0 (the
  7. # "License"); you may not use this file except in compliance
  8. # with the License. You may obtain a copy of the License at
  9. #
  10. # http://www.apache.org/licenses/LICENSE-2.0
  11. #
  12. # Unless required by applicable law or agreed to in writing,
  13. # software distributed under the License is distributed on an
  14. # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  15. # KIND, either express or implied. See the License for the
  16. # specific language governing permissions and limitations
  17. # under the License.
  18. """Jinja2 template rendering context helper."""
  19. from __future__ import annotations
  20. import contextlib
  21. import copy
  22. import functools
  23. import warnings
  24. from typing import (
  25. TYPE_CHECKING,
  26. Any,
  27. Container,
  28. ItemsView,
  29. Iterator,
  30. KeysView,
  31. Mapping,
  32. MutableMapping,
  33. SupportsIndex,
  34. ValuesView,
  35. )
  36. import attrs
  37. import lazy_object_proxy
  38. from sqlalchemy import select
  39. from airflow.datasets import (
  40. Dataset,
  41. DatasetAlias,
  42. DatasetAliasEvent,
  43. extract_event_key,
  44. )
  45. from airflow.exceptions import RemovedInAirflow3Warning
  46. from airflow.models.dataset import DatasetAliasModel, DatasetEvent, DatasetModel
  47. from airflow.utils.db import LazySelectSequence
  48. from airflow.utils.types import NOTSET
  49. if TYPE_CHECKING:
  50. from sqlalchemy.engine import Row
  51. from sqlalchemy.orm import Session
  52. from sqlalchemy.sql.expression import Select, TextClause
  53. from airflow.models.baseoperator import BaseOperator
  54. # NOTE: Please keep this in sync with the following:
  55. # * Context in airflow/utils/context.pyi.
  56. # * Table in docs/apache-airflow/templates-ref.rst
  57. KNOWN_CONTEXT_KEYS: set[str] = {
  58. "conf",
  59. "conn",
  60. "dag",
  61. "dag_run",
  62. "data_interval_end",
  63. "data_interval_start",
  64. "ds",
  65. "ds_nodash",
  66. "execution_date",
  67. "expanded_ti_count",
  68. "exception",
  69. "inlets",
  70. "inlet_events",
  71. "logical_date",
  72. "macros",
  73. "map_index_template",
  74. "next_ds",
  75. "next_ds_nodash",
  76. "next_execution_date",
  77. "outlets",
  78. "outlet_events",
  79. "params",
  80. "prev_data_interval_start_success",
  81. "prev_data_interval_end_success",
  82. "prev_ds",
  83. "prev_ds_nodash",
  84. "prev_execution_date",
  85. "prev_execution_date_success",
  86. "prev_start_date_success",
  87. "prev_end_date_success",
  88. "reason",
  89. "run_id",
  90. "task",
  91. "task_instance",
  92. "task_instance_key_str",
  93. "test_mode",
  94. "templates_dict",
  95. "ti",
  96. "tomorrow_ds",
  97. "tomorrow_ds_nodash",
  98. "triggering_dataset_events",
  99. "ts",
  100. "ts_nodash",
  101. "ts_nodash_with_tz",
  102. "try_number",
  103. "var",
  104. "yesterday_ds",
  105. "yesterday_ds_nodash",
  106. }
  107. class VariableAccessor:
  108. """Wrapper to access Variable values in template."""
  109. def __init__(self, *, deserialize_json: bool) -> None:
  110. self._deserialize_json = deserialize_json
  111. self.var: Any = None
  112. def __getattr__(self, key: str) -> Any:
  113. from airflow.models.variable import Variable
  114. self.var = Variable.get(key, deserialize_json=self._deserialize_json)
  115. return self.var
  116. def __repr__(self) -> str:
  117. return str(self.var)
  118. def get(self, key, default: Any = NOTSET) -> Any:
  119. from airflow.models.variable import Variable
  120. if default is NOTSET:
  121. return Variable.get(key, deserialize_json=self._deserialize_json)
  122. return Variable.get(key, default, deserialize_json=self._deserialize_json)
  123. class ConnectionAccessor:
  124. """Wrapper to access Connection entries in template."""
  125. def __init__(self) -> None:
  126. self.var: Any = None
  127. def __getattr__(self, key: str) -> Any:
  128. from airflow.models.connection import Connection
  129. self.var = Connection.get_connection_from_secrets(key)
  130. return self.var
  131. def __repr__(self) -> str:
  132. return str(self.var)
  133. def get(self, key: str, default_conn: Any = None) -> Any:
  134. from airflow.exceptions import AirflowNotFoundException
  135. from airflow.models.connection import Connection
  136. try:
  137. return Connection.get_connection_from_secrets(key)
  138. except AirflowNotFoundException:
  139. return default_conn
  140. @attrs.define()
  141. class OutletEventAccessor:
  142. """
  143. Wrapper to access an outlet dataset event in template.
  144. :meta private:
  145. """
  146. raw_key: str | Dataset | DatasetAlias
  147. extra: dict[str, Any] = attrs.Factory(dict)
  148. dataset_alias_events: list[DatasetAliasEvent] = attrs.field(factory=list)
  149. def add(self, dataset: Dataset | str, extra: dict[str, Any] | None = None) -> None:
  150. """Add a DatasetEvent to an existing Dataset."""
  151. if isinstance(dataset, str):
  152. warnings.warn(
  153. (
  154. "Emitting dataset events using string is deprecated and will be removed in Airflow 3. "
  155. "Please use the Dataset object (renamed as Asset in Airflow 3) directly"
  156. ),
  157. DeprecationWarning,
  158. stacklevel=2,
  159. )
  160. dataset_uri = dataset
  161. elif isinstance(dataset, Dataset):
  162. dataset_uri = dataset.uri
  163. else:
  164. return
  165. if isinstance(self.raw_key, str):
  166. dataset_alias_name = self.raw_key
  167. elif isinstance(self.raw_key, DatasetAlias):
  168. dataset_alias_name = self.raw_key.name
  169. else:
  170. return
  171. event = DatasetAliasEvent(
  172. source_alias_name=dataset_alias_name, dest_dataset_uri=dataset_uri, extra=extra or {}
  173. )
  174. self.dataset_alias_events.append(event)
  175. class OutletEventAccessors(Mapping[str, OutletEventAccessor]):
  176. """
  177. Lazy mapping of outlet dataset event accessors.
  178. :meta private:
  179. """
  180. def __init__(self) -> None:
  181. self._dict: dict[str, OutletEventAccessor] = {}
  182. def __str__(self) -> str:
  183. return f"OutletEventAccessors(_dict={self._dict})"
  184. def __iter__(self) -> Iterator[str]:
  185. return iter(self._dict)
  186. def __len__(self) -> int:
  187. return len(self._dict)
  188. def __getitem__(self, key: str | Dataset | DatasetAlias) -> OutletEventAccessor:
  189. if isinstance(key, str):
  190. warnings.warn(
  191. (
  192. "Accessing outlet_events using string is deprecated and will be removed in Airflow 3. "
  193. "Please use the Dataset or DatasetAlias object (renamed as Asset and AssetAlias in Airflow 3) directly"
  194. ),
  195. DeprecationWarning,
  196. stacklevel=2,
  197. )
  198. event_key = extract_event_key(key)
  199. if event_key not in self._dict:
  200. self._dict[event_key] = OutletEventAccessor(extra={}, raw_key=key)
  201. return self._dict[event_key]
  202. class LazyDatasetEventSelectSequence(LazySelectSequence[DatasetEvent]):
  203. """
  204. List-like interface to lazily access DatasetEvent rows.
  205. :meta private:
  206. """
  207. @staticmethod
  208. def _rebuild_select(stmt: TextClause) -> Select:
  209. return select(DatasetEvent).from_statement(stmt)
  210. @staticmethod
  211. def _process_row(row: Row) -> DatasetEvent:
  212. return row[0]
  213. @attrs.define(init=False)
  214. class InletEventsAccessors(Mapping[str, LazyDatasetEventSelectSequence]):
  215. """
  216. Lazy mapping for inlet dataset events accessors.
  217. :meta private:
  218. """
  219. _inlets: list[Any]
  220. _datasets: dict[str, Dataset]
  221. _dataset_aliases: dict[str, DatasetAlias]
  222. _session: Session
  223. def __init__(self, inlets: list, *, session: Session) -> None:
  224. self._inlets = inlets
  225. self._session = session
  226. self._datasets = {}
  227. self._dataset_aliases = {}
  228. for inlet in inlets:
  229. if isinstance(inlet, Dataset):
  230. self._datasets[inlet.uri] = inlet
  231. elif isinstance(inlet, DatasetAlias):
  232. self._dataset_aliases[inlet.name] = inlet
  233. def __iter__(self) -> Iterator[str]:
  234. return iter(self._inlets)
  235. def __len__(self) -> int:
  236. return len(self._inlets)
  237. def __getitem__(self, key: int | str | Dataset | DatasetAlias) -> LazyDatasetEventSelectSequence:
  238. if isinstance(key, int): # Support index access; it's easier for trivial cases.
  239. obj = self._inlets[key]
  240. if not isinstance(obj, (Dataset, DatasetAlias)):
  241. raise IndexError(key)
  242. else:
  243. obj = key
  244. if isinstance(obj, DatasetAlias):
  245. dataset_alias = self._dataset_aliases[obj.name]
  246. join_clause = DatasetEvent.source_aliases
  247. where_clause = DatasetAliasModel.name == dataset_alias.name
  248. elif isinstance(obj, (Dataset, str)):
  249. if isinstance(obj, str):
  250. warnings.warn(
  251. (
  252. "Accessing inlet_events using string is deprecated and will be removed in Airflow 3. "
  253. "Please use the Dataset object (renamed as Asset in Airflow 3) directly"
  254. ),
  255. DeprecationWarning,
  256. stacklevel=2,
  257. )
  258. dataset = self._datasets[extract_event_key(obj)]
  259. join_clause = DatasetEvent.dataset
  260. where_clause = DatasetModel.uri == dataset.uri
  261. else:
  262. raise ValueError(key)
  263. return LazyDatasetEventSelectSequence.from_select(
  264. select(DatasetEvent).join(join_clause).where(where_clause),
  265. order_by=[DatasetEvent.timestamp],
  266. session=self._session,
  267. )
  268. class AirflowContextDeprecationWarning(RemovedInAirflow3Warning):
  269. """Warn for usage of deprecated context variables in a task."""
  270. def _create_deprecation_warning(key: str, replacements: list[str]) -> RemovedInAirflow3Warning:
  271. message = f"Accessing {key!r} from the template is deprecated and will be removed in a future version."
  272. if not replacements:
  273. return AirflowContextDeprecationWarning(message)
  274. display_except_last = ", ".join(repr(r) for r in replacements[:-1])
  275. if display_except_last:
  276. message += f" Please use {display_except_last} or {replacements[-1]!r} instead."
  277. else:
  278. message += f" Please use {replacements[-1]!r} instead."
  279. return AirflowContextDeprecationWarning(message)
  280. class Context(MutableMapping[str, Any]):
  281. """
  282. Jinja2 template context for task rendering.
  283. This is a mapping (dict-like) class that can lazily emit warnings when
  284. (and only when) deprecated context keys are accessed.
  285. """
  286. _DEPRECATION_REPLACEMENTS: dict[str, list[str]] = {
  287. "execution_date": ["data_interval_start", "logical_date"],
  288. "next_ds": ["{{ data_interval_end | ds }}"],
  289. "next_ds_nodash": ["{{ data_interval_end | ds_nodash }}"],
  290. "next_execution_date": ["data_interval_end"],
  291. "prev_ds": [],
  292. "prev_ds_nodash": [],
  293. "prev_execution_date": [],
  294. "prev_execution_date_success": ["prev_data_interval_start_success"],
  295. "tomorrow_ds": [],
  296. "tomorrow_ds_nodash": [],
  297. "yesterday_ds": [],
  298. "yesterday_ds_nodash": [],
  299. "conf": [],
  300. }
  301. def __init__(self, context: MutableMapping[str, Any] | None = None, **kwargs: Any) -> None:
  302. self._context: MutableMapping[str, Any] = context or {}
  303. if kwargs:
  304. self._context.update(kwargs)
  305. self._deprecation_replacements = self._DEPRECATION_REPLACEMENTS.copy()
  306. def __repr__(self) -> str:
  307. return repr(self._context)
  308. def __reduce_ex__(self, protocol: SupportsIndex) -> tuple[Any, ...]:
  309. """
  310. Pickle the context as a dict.
  311. We are intentionally going through ``__getitem__`` in this function,
  312. instead of using ``items()``, to trigger deprecation warnings.
  313. """
  314. items = [(key, self[key]) for key in self._context]
  315. return dict, (items,)
  316. def __copy__(self) -> Context:
  317. new = type(self)(copy.copy(self._context))
  318. new._deprecation_replacements = self._deprecation_replacements.copy()
  319. return new
  320. def __getitem__(self, key: str) -> Any:
  321. with contextlib.suppress(KeyError):
  322. warnings.warn(
  323. _create_deprecation_warning(key, self._deprecation_replacements[key]),
  324. stacklevel=2,
  325. )
  326. with contextlib.suppress(KeyError):
  327. return self._context[key]
  328. raise KeyError(key)
  329. def __setitem__(self, key: str, value: Any) -> None:
  330. self._deprecation_replacements.pop(key, None)
  331. self._context[key] = value
  332. def __delitem__(self, key: str) -> None:
  333. self._deprecation_replacements.pop(key, None)
  334. del self._context[key]
  335. def __contains__(self, key: object) -> bool:
  336. return key in self._context
  337. def __iter__(self) -> Iterator[str]:
  338. return iter(self._context)
  339. def __len__(self) -> int:
  340. return len(self._context)
  341. def __eq__(self, other: Any) -> bool:
  342. if not isinstance(other, Context):
  343. return NotImplemented
  344. return self._context == other._context
  345. def __ne__(self, other: Any) -> bool:
  346. if not isinstance(other, Context):
  347. return NotImplemented
  348. return self._context != other._context
  349. def keys(self) -> KeysView[str]:
  350. return self._context.keys()
  351. def items(self):
  352. return ItemsView(self._context)
  353. def values(self):
  354. return ValuesView(self._context)
  355. def context_merge(context: Context, *args: Any, **kwargs: Any) -> None:
  356. """
  357. Merge parameters into an existing context.
  358. Like ``dict.update()`` , this take the same parameters, and updates
  359. ``context`` in-place.
  360. This is implemented as a free function because the ``Context`` type is
  361. "faked" as a ``TypedDict`` in ``context.pyi``, which cannot have custom
  362. functions.
  363. :meta private:
  364. """
  365. context.update(*args, **kwargs)
  366. def context_update_for_unmapped(context: Context, task: BaseOperator) -> None:
  367. """
  368. Update context after task unmapping.
  369. Since ``get_template_context()`` is called before unmapping, the context
  370. contains information about the mapped task. We need to do some in-place
  371. updates to ensure the template context reflects the unmapped task instead.
  372. :meta private:
  373. """
  374. from airflow.models.param import process_params
  375. context["task"] = context["ti"].task = task
  376. context["params"] = process_params(context["dag"], task, context["dag_run"], suppress_exception=False)
  377. def context_copy_partial(source: Context, keys: Container[str]) -> Context:
  378. """
  379. Create a context by copying items under selected keys in ``source``.
  380. This is implemented as a free function because the ``Context`` type is
  381. "faked" as a ``TypedDict`` in ``context.pyi``, which cannot have custom
  382. functions.
  383. :meta private:
  384. """
  385. new = Context({k: v for k, v in source._context.items() if k in keys})
  386. new._deprecation_replacements = source._deprecation_replacements.copy()
  387. return new
  388. def lazy_mapping_from_context(source: Context) -> Mapping[str, Any]:
  389. """
  390. Create a mapping that wraps deprecated entries in a lazy object proxy.
  391. This further delays deprecation warning to until when the entry is actually
  392. used, instead of when it's accessed in the context. The result is useful for
  393. passing into a callable with ``**kwargs``, which would unpack the mapping
  394. too eagerly otherwise.
  395. This is implemented as a free function because the ``Context`` type is
  396. "faked" as a ``TypedDict`` in ``context.pyi``, which cannot have custom
  397. functions.
  398. :meta private:
  399. """
  400. if not isinstance(source, Context):
  401. # Sometimes we are passed a plain dict (usually in tests, or in User's
  402. # custom operators) -- be lienent about what we accept so we don't
  403. # break anything for users.
  404. return source
  405. def _deprecated_proxy_factory(k: str, v: Any) -> Any:
  406. replacements = source._deprecation_replacements[k]
  407. warnings.warn(_create_deprecation_warning(k, replacements), stacklevel=2)
  408. return v
  409. def _create_value(k: str, v: Any) -> Any:
  410. if k not in source._deprecation_replacements:
  411. return v
  412. factory = functools.partial(_deprecated_proxy_factory, k, v)
  413. return lazy_object_proxy.Proxy(factory)
  414. return {k: _create_value(k, v) for k, v in source._context.items()}
  415. def context_get_outlet_events(context: Context) -> OutletEventAccessors:
  416. try:
  417. return context["outlet_events"]
  418. except KeyError:
  419. return OutletEventAccessors()