123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312 |
- #
- # Licensed to the Apache Software Foundation (ASF) under one
- # or more contributor license agreements. See the NOTICE file
- # distributed with this work for additional information
- # regarding copyright ownership. The ASF licenses this file
- # to you under the Apache License, Version 2.0 (the
- # "License"); you may not use this file except in compliance
- # with the License. You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing,
- # software distributed under the License is distributed on an
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- # KIND, either express or implied. See the License for the
- # specific language governing permissions and limitations
- # under the License.
- from __future__ import annotations
- import collections.abc
- import functools
- import operator
- from collections.abc import Sized
- from typing import TYPE_CHECKING, Any, Dict, Iterable, Mapping, NamedTuple, Sequence, Union
- import attr
- from airflow.utils.mixins import ResolveMixin
- from airflow.utils.session import NEW_SESSION, provide_session
- if TYPE_CHECKING:
- from sqlalchemy.orm import Session
- from airflow.models.operator import Operator
- from airflow.models.xcom_arg import XComArg
- from airflow.serialization.serialized_objects import _ExpandInputRef
- from airflow.typing_compat import TypeGuard
- from airflow.utils.context import Context
- ExpandInput = Union["DictOfListsExpandInput", "ListOfDictsExpandInput"]
- # Each keyword argument to expand() can be an XComArg, sequence, or dict (not
- # any mapping since we need the value to be ordered).
- OperatorExpandArgument = Union["MappedArgument", "XComArg", Sequence, Dict[str, Any]]
- # The single argument of expand_kwargs() can be an XComArg, or a list with each
- # element being either an XComArg or a dict.
- OperatorExpandKwargsArgument = Union["XComArg", Sequence[Union["XComArg", Mapping[str, Any]]]]
- @attr.define(kw_only=True)
- class MappedArgument(ResolveMixin):
- """
- Stand-in stub for task-group-mapping arguments.
- This is very similar to an XComArg, but resolved differently. Declared here
- (instead of in the task group module) to avoid import cycles.
- """
- _input: ExpandInput
- _key: str
- def get_task_map_length(self, run_id: str, *, session: Session) -> int | None:
- # TODO (AIP-42): Implement run-time task map length inspection. This is
- # needed when we implement task mapping inside a mapped task group.
- raise NotImplementedError()
- def iter_references(self) -> Iterable[tuple[Operator, str]]:
- yield from self._input.iter_references()
- @provide_session
- def resolve(self, context: Context, *, include_xcom: bool = True, session: Session = NEW_SESSION) -> Any:
- data, _ = self._input.resolve(context, session=session, include_xcom=include_xcom)
- return data[self._key]
- # To replace tedious isinstance() checks.
- def is_mappable(v: Any) -> TypeGuard[OperatorExpandArgument]:
- from airflow.models.xcom_arg import XComArg
- return isinstance(v, (MappedArgument, XComArg, Mapping, Sequence)) and not isinstance(v, str)
- # To replace tedious isinstance() checks.
- def _is_parse_time_mappable(v: OperatorExpandArgument) -> TypeGuard[Mapping | Sequence]:
- from airflow.models.xcom_arg import XComArg
- return not isinstance(v, (MappedArgument, XComArg))
- # To replace tedious isinstance() checks.
- def _needs_run_time_resolution(v: OperatorExpandArgument) -> TypeGuard[MappedArgument | XComArg]:
- from airflow.models.xcom_arg import XComArg
- return isinstance(v, (MappedArgument, XComArg))
- class NotFullyPopulated(RuntimeError):
- """
- Raise when ``get_map_lengths`` cannot populate all mapping metadata.
- This is generally due to not all upstream tasks have finished when the
- function is called.
- """
- def __init__(self, missing: set[str]) -> None:
- self.missing = missing
- def __str__(self) -> str:
- keys = ", ".join(repr(k) for k in sorted(self.missing))
- return f"Failed to populate all mapping metadata; missing: {keys}"
- class DictOfListsExpandInput(NamedTuple):
- """
- Storage type of a mapped operator's mapped kwargs.
- This is created from ``expand(**kwargs)``.
- """
- value: dict[str, OperatorExpandArgument]
- def _iter_parse_time_resolved_kwargs(self) -> Iterable[tuple[str, Sized]]:
- """Generate kwargs with values available on parse-time."""
- return ((k, v) for k, v in self.value.items() if _is_parse_time_mappable(v))
- def get_parse_time_mapped_ti_count(self) -> int:
- if not self.value:
- return 0
- literal_values = [len(v) for _, v in self._iter_parse_time_resolved_kwargs()]
- if len(literal_values) != len(self.value):
- literal_keys = (k for k, _ in self._iter_parse_time_resolved_kwargs())
- raise NotFullyPopulated(set(self.value).difference(literal_keys))
- return functools.reduce(operator.mul, literal_values, 1)
- def _get_map_lengths(self, run_id: str, *, session: Session) -> dict[str, int]:
- """
- Return dict of argument name to map length.
- If any arguments are not known right now (upstream task not finished),
- they will not be present in the dict.
- """
- # TODO: This initiates one database call for each XComArg. Would it be
- # more efficient to do one single db call and unpack the value here?
- def _get_length(v: OperatorExpandArgument) -> int | None:
- if _needs_run_time_resolution(v):
- return v.get_task_map_length(run_id, session=session)
- # Unfortunately a user-defined TypeGuard cannot apply negative type
- # narrowing. https://github.com/python/typing/discussions/1013
- if TYPE_CHECKING:
- assert isinstance(v, Sized)
- return len(v)
- map_lengths_iterator = ((k, _get_length(v)) for k, v in self.value.items())
- map_lengths = {k: v for k, v in map_lengths_iterator if v is not None}
- if len(map_lengths) < len(self.value):
- raise NotFullyPopulated(set(self.value).difference(map_lengths))
- return map_lengths
- def get_total_map_length(self, run_id: str, *, session: Session) -> int:
- if not self.value:
- return 0
- lengths = self._get_map_lengths(run_id, session=session)
- return functools.reduce(operator.mul, (lengths[name] for name in self.value), 1)
- def _expand_mapped_field(
- self, key: str, value: Any, context: Context, *, session: Session, include_xcom: bool
- ) -> Any:
- if _needs_run_time_resolution(value):
- value = (
- value.resolve(context, session=session, include_xcom=include_xcom)
- if include_xcom
- else str(value)
- )
- map_index = context["ti"].map_index
- if map_index < 0:
- raise RuntimeError("can't resolve task-mapping argument without expanding")
- all_lengths = self._get_map_lengths(context["run_id"], session=session)
- def _find_index_for_this_field(index: int) -> int:
- # Need to use the original user input to retain argument order.
- for mapped_key in reversed(self.value):
- mapped_length = all_lengths[mapped_key]
- if mapped_length < 1:
- raise RuntimeError(f"cannot expand field mapped to length {mapped_length!r}")
- if mapped_key == key:
- return index % mapped_length
- index //= mapped_length
- return -1
- found_index = _find_index_for_this_field(map_index)
- if found_index < 0:
- return value
- if isinstance(value, collections.abc.Sequence):
- return value[found_index]
- if not isinstance(value, dict):
- raise TypeError(f"can't map over value of type {type(value)}")
- for i, (k, v) in enumerate(value.items()):
- if i == found_index:
- return k, v
- raise IndexError(f"index {map_index} is over mapped length")
- def iter_references(self) -> Iterable[tuple[Operator, str]]:
- from airflow.models.xcom_arg import XComArg
- for x in self.value.values():
- if isinstance(x, XComArg):
- yield from x.iter_references()
- def resolve(
- self, context: Context, session: Session, *, include_xcom: bool = True
- ) -> tuple[Mapping[str, Any], set[int]]:
- data = {
- k: self._expand_mapped_field(k, v, context, session=session, include_xcom=include_xcom)
- for k, v in self.value.items()
- }
- literal_keys = {k for k, _ in self._iter_parse_time_resolved_kwargs()}
- resolved_oids = {id(v) for k, v in data.items() if k not in literal_keys}
- return data, resolved_oids
- def _describe_type(value: Any) -> str:
- if value is None:
- return "None"
- return type(value).__name__
- class ListOfDictsExpandInput(NamedTuple):
- """
- Storage type of a mapped operator's mapped kwargs.
- This is created from ``expand_kwargs(xcom_arg)``.
- """
- value: OperatorExpandKwargsArgument
- def get_parse_time_mapped_ti_count(self) -> int:
- if isinstance(self.value, collections.abc.Sized):
- return len(self.value)
- raise NotFullyPopulated({"expand_kwargs() argument"})
- def get_total_map_length(self, run_id: str, *, session: Session) -> int:
- if isinstance(self.value, collections.abc.Sized):
- return len(self.value)
- length = self.value.get_task_map_length(run_id, session=session)
- if length is None:
- raise NotFullyPopulated({"expand_kwargs() argument"})
- return length
- def iter_references(self) -> Iterable[tuple[Operator, str]]:
- from airflow.models.xcom_arg import XComArg
- if isinstance(self.value, XComArg):
- yield from self.value.iter_references()
- else:
- for x in self.value:
- if isinstance(x, XComArg):
- yield from x.iter_references()
- def resolve(
- self, context: Context, session: Session, *, include_xcom: bool = True
- ) -> tuple[Mapping[str, Any], set[int]]:
- map_index = context["ti"].map_index
- if map_index < 0:
- raise RuntimeError("can't resolve task-mapping argument without expanding")
- mapping: Any
- if isinstance(self.value, collections.abc.Sized):
- mapping = self.value[map_index]
- if not isinstance(mapping, collections.abc.Mapping):
- mapping = mapping.resolve(context, session, include_xcom=include_xcom)
- elif include_xcom:
- mappings = self.value.resolve(context, session, include_xcom=include_xcom)
- if not isinstance(mappings, collections.abc.Sequence):
- raise ValueError(f"expand_kwargs() expects a list[dict], not {_describe_type(mappings)}")
- mapping = mappings[map_index]
- if not isinstance(mapping, collections.abc.Mapping):
- raise ValueError(f"expand_kwargs() expects a list[dict], not list[{_describe_type(mapping)}]")
- for key in mapping:
- if not isinstance(key, str):
- raise ValueError(
- f"expand_kwargs() input dict keys must all be str, "
- f"but {key!r} is of type {_describe_type(key)}"
- )
- # filter out parse time resolved values from the resolved_oids
- resolved_oids = {id(v) for k, v in mapping.items() if not _is_parse_time_mappable(v)}
- return mapping, resolved_oids
- EXPAND_INPUT_EMPTY = DictOfListsExpandInput({}) # Sentinel value.
- _EXPAND_INPUT_TYPES = {
- "dict-of-lists": DictOfListsExpandInput,
- "list-of-dicts": ListOfDictsExpandInput,
- }
- def get_map_type_key(expand_input: ExpandInput | _ExpandInputRef) -> str:
- from airflow.serialization.serialized_objects import _ExpandInputRef
- if isinstance(expand_input, _ExpandInputRef):
- return expand_input.key
- return next(k for k, v in _EXPAND_INPUT_TYPES.items() if isinstance(expand_input, v))
- def create_expand_input(kind: str, value: Any) -> ExpandInput:
- return _EXPAND_INPUT_TYPES[kind](value)
|