branch.py 3.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. #
  2. # Licensed to the Apache Software Foundation (ASF) under one
  3. # or more contributor license agreements. See the NOTICE file
  4. # distributed with this work for additional information
  5. # regarding copyright ownership. The ASF licenses this file
  6. # to you under the Apache License, Version 2.0 (the
  7. # "License"); you may not use this file except in compliance
  8. # with the License. You may obtain a copy of the License at
  9. #
  10. # http://www.apache.org/licenses/LICENSE-2.0
  11. #
  12. # Unless required by applicable law or agreed to in writing,
  13. # software distributed under the License is distributed on an
  14. # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  15. # KIND, either express or implied. See the License for the
  16. # specific language governing permissions and limitations
  17. # under the License.
  18. """Branching operators."""
  19. from __future__ import annotations
  20. from typing import TYPE_CHECKING, Iterable
  21. from airflow.models.baseoperator import BaseOperator
  22. from airflow.models.skipmixin import SkipMixin
  23. if TYPE_CHECKING:
  24. from airflow.models import TaskInstance
  25. from airflow.serialization.pydantic.taskinstance import TaskInstancePydantic
  26. from airflow.utils.context import Context
  27. class BranchMixIn(SkipMixin):
  28. """Utility helper which handles the branching as one-liner."""
  29. def do_branch(self, context: Context, branches_to_execute: str | Iterable[str]) -> str | Iterable[str]:
  30. """Implement the handling of branching including logging."""
  31. self.log.info("Branch into %s", branches_to_execute)
  32. branch_task_ids = self._expand_task_group_roots(context["ti"], branches_to_execute)
  33. self.skip_all_except(context["ti"], branch_task_ids)
  34. return branches_to_execute
  35. def _expand_task_group_roots(
  36. self, ti: TaskInstance | TaskInstancePydantic, branches_to_execute: str | Iterable[str]
  37. ) -> Iterable[str]:
  38. """Expand any task group into its root task ids."""
  39. if TYPE_CHECKING:
  40. assert ti.task
  41. task = ti.task
  42. dag = task.dag
  43. if TYPE_CHECKING:
  44. assert dag
  45. if branches_to_execute is None:
  46. return
  47. elif isinstance(branches_to_execute, str) or not isinstance(branches_to_execute, Iterable):
  48. branches_to_execute = [branches_to_execute]
  49. for branch in branches_to_execute:
  50. if branch in dag.task_group_dict:
  51. tg = dag.task_group_dict[branch]
  52. root_ids = [root.task_id for root in tg.roots]
  53. self.log.info("Expanding task group %s into %s", tg.group_id, root_ids)
  54. yield from root_ids
  55. else:
  56. yield branch
  57. class BaseBranchOperator(BaseOperator, BranchMixIn):
  58. """
  59. A base class for creating operators with branching functionality, like to BranchPythonOperator.
  60. Users should create a subclass from this operator and implement the function
  61. `choose_branch(self, context)`. This should run whatever business logic
  62. is needed to determine the branch, and return one of the following:
  63. - A single task_id (as a str)
  64. - A single task_group_id (as a str)
  65. - A list containing a combination of task_ids and task_group_ids
  66. The operator will continue with the returned task_id(s) and/or task_group_id(s), and all other
  67. tasks directly downstream of this operator will be skipped.
  68. """
  69. def choose_branch(self, context: Context) -> str | Iterable[str]:
  70. """
  71. Abstract method to choose which branch to run.
  72. Subclasses should implement this, running whatever logic is
  73. necessary to choose a branch and returning a task_id or list of
  74. task_ids.
  75. :param context: Context dictionary as passed to execute()
  76. """
  77. raise NotImplementedError
  78. def execute(self, context: Context):
  79. return self.do_branch(context, self.choose_branch(context))