smtp.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424
  1. #
  2. # Licensed to the Apache Software Foundation (ASF) under one
  3. # or more contributor license agreements. See the NOTICE file
  4. # distributed with this work for additional information
  5. # regarding copyright ownership. The ASF licenses this file
  6. # to you under the Apache License, Version 2.0 (the
  7. # "License"); you may not use this file except in compliance
  8. # with the License. You may obtain a copy of the License at
  9. #
  10. # http://www.apache.org/licenses/LICENSE-2.0
  11. #
  12. # Unless required by applicable law or agreed to in writing,
  13. # software distributed under the License is distributed on an
  14. # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  15. # KIND, either express or implied. See the License for the
  16. # specific language governing permissions and limitations
  17. # under the License.
  18. """
  19. Search in emails for a specific attachment and also to download it.
  20. It uses the smtplib library that is already integrated in python 3.
  21. """
  22. from __future__ import annotations
  23. import collections.abc
  24. import os
  25. import re
  26. import smtplib
  27. import ssl
  28. from collections.abc import Iterable
  29. from email.mime.application import MIMEApplication
  30. from email.mime.multipart import MIMEMultipart
  31. from email.mime.text import MIMEText
  32. from email.utils import formatdate
  33. from pathlib import Path
  34. from typing import TYPE_CHECKING, Any
  35. from airflow.exceptions import AirflowException, AirflowNotFoundException
  36. from airflow.hooks.base import BaseHook
  37. if TYPE_CHECKING:
  38. from airflow.models.connection import Connection
  39. class SmtpHook(BaseHook):
  40. """
  41. This hook connects to a mail server by using the smtp protocol.
  42. .. note:: Please call this Hook as context manager via `with`
  43. to automatically open and close the connection to the mail server.
  44. :param smtp_conn_id: The :ref:`smtp connection id <howto/connection:smtp>`
  45. that contains the information used to authenticate the client.
  46. """
  47. conn_name_attr = "smtp_conn_id"
  48. default_conn_name = "smtp_default"
  49. conn_type = "smtp"
  50. hook_name = "SMTP"
  51. def __init__(self, smtp_conn_id: str = default_conn_name) -> None:
  52. super().__init__()
  53. self.smtp_conn_id = smtp_conn_id
  54. self.smtp_connection: Connection | None = None
  55. self.smtp_client: smtplib.SMTP_SSL | smtplib.SMTP | None = None
  56. def __enter__(self) -> SmtpHook:
  57. return self.get_conn()
  58. def __exit__(self, exc_type, exc_val, exc_tb):
  59. self.smtp_client.close()
  60. def get_conn(self) -> SmtpHook:
  61. """
  62. Login to the smtp server.
  63. .. note:: Please call this Hook as context manager via `with`
  64. to automatically open and close the connection to the smtp server.
  65. :return: an authorized SmtpHook object.
  66. """
  67. if not self.smtp_client:
  68. try:
  69. self.smtp_connection = self.get_connection(self.smtp_conn_id)
  70. except AirflowNotFoundException:
  71. raise AirflowException("SMTP connection is not found.")
  72. for attempt in range(1, self.smtp_retry_limit + 1):
  73. try:
  74. self.smtp_client = self._build_client()
  75. except smtplib.SMTPServerDisconnected:
  76. if attempt == self.smtp_retry_limit:
  77. raise AirflowException("Unable to connect to smtp server")
  78. else:
  79. if self.smtp_starttls:
  80. self.smtp_client.starttls()
  81. if self.smtp_user and self.smtp_password:
  82. self.smtp_client.login(self.smtp_user, self.smtp_password)
  83. break
  84. return self
  85. def _build_client(self) -> smtplib.SMTP_SSL | smtplib.SMTP:
  86. SMTP: type[smtplib.SMTP_SSL] | type[smtplib.SMTP]
  87. if self.use_ssl:
  88. SMTP = smtplib.SMTP_SSL
  89. else:
  90. SMTP = smtplib.SMTP
  91. smtp_kwargs: dict[str, Any] = {"host": self.host}
  92. if self.port:
  93. smtp_kwargs["port"] = self.port
  94. smtp_kwargs["timeout"] = self.timeout
  95. if self.use_ssl:
  96. ssl_context_string = self.ssl_context
  97. if ssl_context_string is None or ssl_context_string == "default":
  98. ssl_context = ssl.create_default_context()
  99. elif ssl_context_string == "none":
  100. ssl_context = None
  101. else:
  102. raise RuntimeError(
  103. f"The connection extra field `ssl_context` must "
  104. f"be set to 'default' or 'none' but it is set to '{ssl_context_string}'."
  105. )
  106. smtp_kwargs["context"] = ssl_context
  107. return SMTP(**smtp_kwargs)
  108. @classmethod
  109. def get_connection_form_widgets(cls) -> dict[str, Any]:
  110. """Return connection widgets to add to connection form."""
  111. from flask_appbuilder.fieldwidgets import BS3TextFieldWidget
  112. from flask_babel import lazy_gettext
  113. from wtforms import BooleanField, IntegerField, StringField
  114. from wtforms.validators import NumberRange
  115. return {
  116. "from_email": StringField(lazy_gettext("From email"), widget=BS3TextFieldWidget()),
  117. "timeout": IntegerField(
  118. lazy_gettext("Connection timeout"),
  119. validators=[NumberRange(min=0)],
  120. widget=BS3TextFieldWidget(),
  121. default=30,
  122. ),
  123. "retry_limit": IntegerField(
  124. lazy_gettext("Number of Retries"),
  125. validators=[NumberRange(min=0)],
  126. widget=BS3TextFieldWidget(),
  127. default=5,
  128. ),
  129. "disable_tls": BooleanField(lazy_gettext("Disable TLS"), default=False),
  130. "disable_ssl": BooleanField(lazy_gettext("Disable SSL"), default=False),
  131. "subject_template": StringField(
  132. lazy_gettext("Path to the subject template"), widget=BS3TextFieldWidget()
  133. ),
  134. "html_content_template": StringField(
  135. lazy_gettext("Path to the html content template"), widget=BS3TextFieldWidget()
  136. ),
  137. }
  138. def test_connection(self) -> tuple[bool, str]:
  139. """Test SMTP connectivity from UI."""
  140. try:
  141. smtp_client = self.get_conn().smtp_client
  142. if smtp_client:
  143. status = smtp_client.noop()[0]
  144. if status == 250:
  145. return True, "Connection successfully tested"
  146. except Exception as e:
  147. return False, str(e)
  148. return False, "Failed to establish connection"
  149. def send_email_smtp(
  150. self,
  151. *,
  152. to: str | Iterable[str],
  153. subject: str | None = None,
  154. html_content: str | None = None,
  155. from_email: str | None = None,
  156. files: list[str] | None = None,
  157. dryrun: bool = False,
  158. cc: str | Iterable[str] | None = None,
  159. bcc: str | Iterable[str] | None = None,
  160. mime_subtype: str = "mixed",
  161. mime_charset: str = "utf-8",
  162. custom_headers: dict[str, Any] | None = None,
  163. **kwargs,
  164. ) -> None:
  165. """
  166. Send an email with html content.
  167. :param to: Recipient email address or list of addresses.
  168. :param subject: Email subject. If it's None, the hook will check if there is a path to a subject
  169. file provided in the connection, and raises an exception if not.
  170. :param html_content: Email body in HTML format. If it's None, the hook will check if there is a path
  171. to a html content file provided in the connection, and raises an exception if not.
  172. :param from_email: Sender email address. If it's None, the hook will check if there is an email
  173. provided in the connection, and raises an exception if not.
  174. :param files: List of file paths to attach to the email.
  175. :param dryrun: If True, the email will not be sent, but all other actions will be performed.
  176. :param cc: Carbon copy recipient email address or list of addresses.
  177. :param bcc: Blind carbon copy recipient email address or list of addresses.
  178. :param mime_subtype: MIME subtype of the email.
  179. :param mime_charset: MIME charset of the email.
  180. :param custom_headers: Dictionary of custom headers to include in the email.
  181. :param kwargs: Additional keyword arguments.
  182. >>> send_email_smtp(
  183. 'test@example.com', 'foo', '<b>Foo</b> bar', ['/dev/null'], dryrun=True
  184. )
  185. """
  186. if not self.smtp_client:
  187. raise AirflowException("The 'smtp_client' should be initialized before!")
  188. from_email = from_email or self.from_email
  189. if not from_email:
  190. raise AirflowException("You should provide `from_email` or define it in the connection.")
  191. if not subject:
  192. if self.subject_template is None:
  193. raise AirflowException(
  194. "You should provide `subject` or define `subject_template` in the connection."
  195. )
  196. subject = self._read_template(self.subject_template)
  197. if not html_content:
  198. if self.html_content_template is None:
  199. raise AirflowException(
  200. "You should provide `html_content` or define `html_content_template` in the connection."
  201. )
  202. html_content = self._read_template(self.html_content_template)
  203. mime_msg, recipients = self._build_mime_message(
  204. mail_from=from_email,
  205. to=to,
  206. subject=subject,
  207. html_content=html_content,
  208. files=files,
  209. cc=cc,
  210. bcc=bcc,
  211. mime_subtype=mime_subtype,
  212. mime_charset=mime_charset,
  213. custom_headers=custom_headers,
  214. )
  215. if not dryrun:
  216. for attempt in range(1, self.smtp_retry_limit + 1):
  217. try:
  218. self.smtp_client.sendmail(
  219. from_addr=from_email, to_addrs=recipients, msg=mime_msg.as_string()
  220. )
  221. except smtplib.SMTPServerDisconnected as e:
  222. if attempt == self.smtp_retry_limit:
  223. raise e
  224. else:
  225. break
  226. def _build_mime_message(
  227. self,
  228. mail_from: str | None,
  229. to: str | Iterable[str],
  230. subject: str,
  231. html_content: str,
  232. files: list[str] | None = None,
  233. cc: str | Iterable[str] | None = None,
  234. bcc: str | Iterable[str] | None = None,
  235. mime_subtype: str = "mixed",
  236. mime_charset: str = "utf-8",
  237. custom_headers: dict[str, Any] | None = None,
  238. ) -> tuple[MIMEMultipart, list[str]]:
  239. """
  240. Build a MIME message that can be used to send an email and returns a full list of recipients.
  241. :param mail_from: Email address to set as the email's "From" field.
  242. :param to: A string or iterable of strings containing email addresses
  243. to set as the email's "To" field.
  244. :param subject: The subject of the email.
  245. :param html_content: The content of the email in HTML format.
  246. :param files: A list of paths to files to be attached to the email.
  247. :param cc: A string or iterable of strings containing email addresses
  248. to set as the email's "CC" field.
  249. :param bcc: A string or iterable of strings containing email addresses
  250. to set as the email's "BCC" field.
  251. :param mime_subtype: The subtype of the MIME message. Default: "mixed".
  252. :param mime_charset: The charset of the email. Default: "utf-8".
  253. :param custom_headers: Additional headers to add to the MIME message.
  254. No validations are run on these values, and they should be able to be encoded.
  255. :return: A tuple containing the email as a MIMEMultipart object and
  256. a list of recipient email addresses.
  257. """
  258. to = self._get_email_address_list(to)
  259. msg = MIMEMultipart(mime_subtype)
  260. msg["Subject"] = subject
  261. if mail_from:
  262. msg["From"] = mail_from
  263. msg["To"] = ", ".join(to)
  264. recipients = to
  265. if cc:
  266. cc = self._get_email_address_list(cc)
  267. msg["CC"] = ", ".join(cc)
  268. recipients += cc
  269. if bcc:
  270. # don't add bcc in header
  271. bcc = self._get_email_address_list(bcc)
  272. recipients += bcc
  273. msg["Date"] = formatdate(localtime=True)
  274. mime_text = MIMEText(html_content, "html", mime_charset)
  275. msg.attach(mime_text)
  276. for fname in files or []:
  277. basename = os.path.basename(fname)
  278. with open(fname, "rb") as file:
  279. part = MIMEApplication(file.read(), Name=basename)
  280. part["Content-Disposition"] = f'attachment; filename="{basename}"'
  281. part["Content-ID"] = f"<{basename}>"
  282. msg.attach(part)
  283. if custom_headers:
  284. for header_key, header_value in custom_headers.items():
  285. msg[header_key] = header_value
  286. return msg, recipients
  287. def _get_email_address_list(self, addresses: str | Iterable[str]) -> list[str]:
  288. """
  289. Return a list of email addresses from the provided input.
  290. :param addresses: A string or iterable of strings containing email addresses.
  291. :return: A list of email addresses.
  292. :raises TypeError: If the input is not a string or iterable of strings.
  293. """
  294. if isinstance(addresses, str):
  295. return self._get_email_list_from_str(addresses)
  296. elif isinstance(addresses, collections.abc.Iterable):
  297. if not all(isinstance(item, str) for item in addresses):
  298. raise TypeError("The items in your iterable must be strings.")
  299. return list(addresses)
  300. else:
  301. raise TypeError(f"Unexpected argument type: Received '{type(addresses).__name__}'.")
  302. def _get_email_list_from_str(self, addresses: str) -> list[str]:
  303. """
  304. Extract a list of email addresses from a string.
  305. The string can contain multiple email addresses separated by
  306. any of the following delimiters: ',' or ';'.
  307. :param addresses: A string containing one or more email addresses.
  308. :return: A list of email addresses.
  309. """
  310. pattern = r"\s*[,;]\s*"
  311. return re.split(pattern, addresses)
  312. @property
  313. def conn(self) -> Connection:
  314. if not self.smtp_connection:
  315. raise AirflowException("The smtp connection should be loaded before!")
  316. return self.smtp_connection
  317. @property
  318. def smtp_retry_limit(self) -> int:
  319. return int(self.conn.extra_dejson.get("retry_limit", 5))
  320. @property
  321. def from_email(self) -> str | None:
  322. return self.conn.extra_dejson.get("from_email")
  323. @property
  324. def smtp_user(self) -> str:
  325. return self.conn.login
  326. @property
  327. def smtp_password(self) -> str:
  328. return self.conn.password
  329. @property
  330. def smtp_starttls(self) -> bool:
  331. return not bool(self.conn.extra_dejson.get("disable_tls", False))
  332. @property
  333. def host(self) -> str:
  334. return self.conn.host
  335. @property
  336. def port(self) -> int:
  337. return self.conn.port
  338. @property
  339. def timeout(self) -> int:
  340. return int(self.conn.extra_dejson.get("timeout", 30))
  341. @property
  342. def use_ssl(self) -> bool:
  343. return not bool(self.conn.extra_dejson.get("disable_ssl", False))
  344. @property
  345. def subject_template(self) -> str | None:
  346. return self.conn.extra_dejson.get("subject_template")
  347. @property
  348. def html_content_template(self) -> str | None:
  349. return self.conn.extra_dejson.get("html_content_template")
  350. @property
  351. def ssl_context(self) -> str | None:
  352. return self.conn.extra_dejson.get("ssl_context")
  353. @staticmethod
  354. def _read_template(template_path: str) -> str:
  355. """
  356. Read the content of a template file.
  357. :param template_path: The path to the template file.
  358. :return: The content of the template file.
  359. """
  360. return Path(template_path).read_text()
  361. @classmethod
  362. def get_ui_field_behaviour(cls) -> dict[str, Any]:
  363. """Return custom field behaviour."""
  364. return {
  365. "hidden_fields": ["schema", "extra"],
  366. "relabeling": {},
  367. }