validate_email.py 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180
  1. from typing import Optional, Union, TYPE_CHECKING
  2. import unicodedata
  3. from .exceptions_types import EmailSyntaxError, ValidatedEmail
  4. from .syntax import split_email, validate_email_local_part, validate_email_domain_name, validate_email_domain_literal, validate_email_length
  5. from .rfc_constants import CASE_INSENSITIVE_MAILBOX_NAMES
  6. if TYPE_CHECKING:
  7. import dns.resolver
  8. _Resolver = dns.resolver.Resolver
  9. else:
  10. _Resolver = object
  11. def validate_email(
  12. email: Union[str, bytes],
  13. /, # prior arguments are positional-only
  14. *, # subsequent arguments are keyword-only
  15. allow_smtputf8: Optional[bool] = None,
  16. allow_empty_local: bool = False,
  17. allow_quoted_local: Optional[bool] = None,
  18. allow_domain_literal: Optional[bool] = None,
  19. allow_display_name: Optional[bool] = None,
  20. check_deliverability: Optional[bool] = None,
  21. test_environment: Optional[bool] = None,
  22. globally_deliverable: Optional[bool] = None,
  23. timeout: Optional[int] = None,
  24. dns_resolver: Optional[_Resolver] = None
  25. ) -> ValidatedEmail:
  26. """
  27. Given an email address, and some options, returns a ValidatedEmail instance
  28. with information about the address if it is valid or, if the address is not
  29. valid, raises an EmailNotValidError. This is the main function of the module.
  30. """
  31. # Fill in default values of arguments.
  32. from . import ALLOW_SMTPUTF8, ALLOW_QUOTED_LOCAL, ALLOW_DOMAIN_LITERAL, ALLOW_DISPLAY_NAME, \
  33. GLOBALLY_DELIVERABLE, CHECK_DELIVERABILITY, TEST_ENVIRONMENT, DEFAULT_TIMEOUT
  34. if allow_smtputf8 is None:
  35. allow_smtputf8 = ALLOW_SMTPUTF8
  36. if allow_quoted_local is None:
  37. allow_quoted_local = ALLOW_QUOTED_LOCAL
  38. if allow_domain_literal is None:
  39. allow_domain_literal = ALLOW_DOMAIN_LITERAL
  40. if allow_display_name is None:
  41. allow_display_name = ALLOW_DISPLAY_NAME
  42. if check_deliverability is None:
  43. check_deliverability = CHECK_DELIVERABILITY
  44. if test_environment is None:
  45. test_environment = TEST_ENVIRONMENT
  46. if globally_deliverable is None:
  47. globally_deliverable = GLOBALLY_DELIVERABLE
  48. if timeout is None and dns_resolver is None:
  49. timeout = DEFAULT_TIMEOUT
  50. # Allow email to be a str or bytes instance. If bytes,
  51. # it must be ASCII because that's how the bytes work
  52. # on the wire with SMTP.
  53. if not isinstance(email, str):
  54. try:
  55. email = email.decode("ascii")
  56. except ValueError as e:
  57. raise EmailSyntaxError("The email address is not valid ASCII.") from e
  58. # Split the address into the display name (or None), the local part
  59. # (before the @-sign), and the domain part (after the @-sign).
  60. # Normally, there is only one @-sign. But the awkward "quoted string"
  61. # local part form (RFC 5321 4.1.2) allows @-signs in the local
  62. # part if the local part is quoted.
  63. display_name, local_part, domain_part, is_quoted_local_part \
  64. = split_email(email)
  65. # Collect return values in this instance.
  66. ret = ValidatedEmail()
  67. ret.original = ((local_part if not is_quoted_local_part
  68. else ('"' + local_part + '"'))
  69. + "@" + domain_part) # drop the display name, if any, for email length tests at the end
  70. ret.display_name = display_name
  71. # Validate the email address's local part syntax and get a normalized form.
  72. # If the original address was quoted and the decoded local part is a valid
  73. # unquoted local part, then we'll get back a normalized (unescaped) local
  74. # part.
  75. local_part_info = validate_email_local_part(local_part,
  76. allow_smtputf8=allow_smtputf8,
  77. allow_empty_local=allow_empty_local,
  78. quoted_local_part=is_quoted_local_part)
  79. ret.local_part = local_part_info["local_part"]
  80. ret.ascii_local_part = local_part_info["ascii_local_part"]
  81. ret.smtputf8 = local_part_info["smtputf8"]
  82. # RFC 6532 section 3.1 says that Unicode NFC normalization should be applied,
  83. # so we'll return the NFC-normalized local part. Since the caller may use that
  84. # string in place of the original string, ensure it is also valid.
  85. normalized_local_part = unicodedata.normalize("NFC", ret.local_part)
  86. if normalized_local_part != ret.local_part:
  87. try:
  88. validate_email_local_part(normalized_local_part,
  89. allow_smtputf8=allow_smtputf8,
  90. allow_empty_local=allow_empty_local,
  91. quoted_local_part=is_quoted_local_part)
  92. except EmailSyntaxError as e:
  93. raise EmailSyntaxError("After Unicode normalization: " + str(e)) from e
  94. ret.local_part = normalized_local_part
  95. # If a quoted local part isn't allowed but is present, now raise an exception.
  96. # This is done after any exceptions raised by validate_email_local_part so
  97. # that mandatory checks have highest precedence.
  98. if is_quoted_local_part and not allow_quoted_local:
  99. raise EmailSyntaxError("Quoting the part before the @-sign is not allowed here.")
  100. # Some local parts are required to be case-insensitive, so we should normalize
  101. # to lowercase.
  102. # RFC 2142
  103. if ret.ascii_local_part is not None \
  104. and ret.ascii_local_part.lower() in CASE_INSENSITIVE_MAILBOX_NAMES \
  105. and ret.local_part is not None:
  106. ret.ascii_local_part = ret.ascii_local_part.lower()
  107. ret.local_part = ret.local_part.lower()
  108. # Validate the email address's domain part syntax and get a normalized form.
  109. is_domain_literal = False
  110. if len(domain_part) == 0:
  111. raise EmailSyntaxError("There must be something after the @-sign.")
  112. elif domain_part.startswith("[") and domain_part.endswith("]"):
  113. # Parse the address in the domain literal and get back a normalized domain.
  114. domain_literal_info = validate_email_domain_literal(domain_part[1:-1])
  115. if not allow_domain_literal:
  116. raise EmailSyntaxError("A bracketed IP address after the @-sign is not allowed here.")
  117. ret.domain = domain_literal_info["domain"]
  118. ret.ascii_domain = domain_literal_info["domain"] # Domain literals are always ASCII.
  119. ret.domain_address = domain_literal_info["domain_address"]
  120. is_domain_literal = True # Prevent deliverability checks.
  121. else:
  122. # Check the syntax of the domain and get back a normalized
  123. # internationalized and ASCII form.
  124. domain_name_info = validate_email_domain_name(domain_part, test_environment=test_environment, globally_deliverable=globally_deliverable)
  125. ret.domain = domain_name_info["domain"]
  126. ret.ascii_domain = domain_name_info["ascii_domain"]
  127. # Construct the complete normalized form.
  128. ret.normalized = ret.local_part + "@" + ret.domain
  129. # If the email address has an ASCII form, add it.
  130. if not ret.smtputf8:
  131. if not ret.ascii_domain:
  132. raise Exception("Missing ASCII domain.")
  133. ret.ascii_email = (ret.ascii_local_part or "") + "@" + ret.ascii_domain
  134. else:
  135. ret.ascii_email = None
  136. # Check the length of the address.
  137. validate_email_length(ret)
  138. # Check that a display name is permitted. It's the last syntax check
  139. # because we always check against optional parsing features last.
  140. if display_name is not None and not allow_display_name:
  141. raise EmailSyntaxError("A display name and angle brackets around the email address are not permitted here.")
  142. if check_deliverability and not test_environment:
  143. # Validate the email address's deliverability using DNS
  144. # and update the returned ValidatedEmail object with metadata.
  145. if is_domain_literal:
  146. # There is nothing to check --- skip deliverability checks.
  147. return ret
  148. # Lazy load `deliverability` as it is slow to import (due to dns.resolver)
  149. from .deliverability import validate_email_deliverability
  150. deliverability_info = validate_email_deliverability(
  151. ret.ascii_domain, ret.domain, timeout, dns_resolver
  152. )
  153. mx = deliverability_info.get("mx")
  154. if mx is not None:
  155. ret.mx = mx
  156. ret.mx_fallback_type = deliverability_info.get("mx_fallback_type")
  157. return ret