rfc_constants.py 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051
  1. # These constants are defined by the email specifications.
  2. import re
  3. # Based on RFC 5322 3.2.3, these characters are permitted in email
  4. # addresses (not taking into account internationalization) separated by dots:
  5. ATEXT = r'a-zA-Z0-9_!#\$%&\'\*\+\-/=\?\^`\{\|\}~'
  6. ATEXT_RE = re.compile('[.' + ATEXT + ']') # ATEXT plus dots
  7. DOT_ATOM_TEXT = re.compile('[' + ATEXT + ']+(?:\\.[' + ATEXT + r']+)*\Z')
  8. # RFC 6531 3.3 extends the allowed characters in internationalized
  9. # addresses to also include three specific ranges of UTF8 defined in
  10. # RFC 3629 section 4, which appear to be the Unicode code points from
  11. # U+0080 to U+10FFFF.
  12. ATEXT_INTL = ATEXT + "\u0080-\U0010FFFF"
  13. ATEXT_INTL_DOT_RE = re.compile('[.' + ATEXT_INTL + ']') # ATEXT_INTL plus dots
  14. DOT_ATOM_TEXT_INTL = re.compile('[' + ATEXT_INTL + ']+(?:\\.[' + ATEXT_INTL + r']+)*\Z')
  15. # The domain part of the email address, after IDNA (ASCII) encoding,
  16. # must also satisfy the requirements of RFC 952/RFC 1123 2.1 which
  17. # restrict the allowed characters of hostnames further.
  18. ATEXT_HOSTNAME_INTL = re.compile(r"[a-zA-Z0-9\-\." + "\u0080-\U0010FFFF" + "]")
  19. HOSTNAME_LABEL = r'(?:(?:[a-zA-Z0-9][a-zA-Z0-9\-]*)?[a-zA-Z0-9])'
  20. DOT_ATOM_TEXT_HOSTNAME = re.compile(HOSTNAME_LABEL + r'(?:\.' + HOSTNAME_LABEL + r')*\Z')
  21. DOMAIN_NAME_REGEX = re.compile(r"[A-Za-z]\Z") # all TLDs currently end with a letter
  22. # Domain literal (RFC 5322 3.4.1)
  23. DOMAIN_LITERAL_CHARS = re.compile(r"[\u0021-\u00FA\u005E-\u007E]")
  24. # Quoted-string local part (RFC 5321 4.1.2, internationalized by RFC 6531 3.3)
  25. # The permitted characters in a quoted string are the characters in the range
  26. # 32-126, except that quotes and (literal) backslashes can only appear when escaped
  27. # by a backslash. When internationalized, UTF-8 strings are also permitted except
  28. # the ASCII characters that are not previously permitted (see above).
  29. # QUOTED_LOCAL_PART_ADDR = re.compile(r"^\"((?:[\u0020-\u0021\u0023-\u005B\u005D-\u007E]|\\[\u0020-\u007E])*)\"@(.*)")
  30. QTEXT_INTL = re.compile(r"[\u0020-\u007E\u0080-\U0010FFFF]")
  31. # Length constants
  32. # RFC 3696 + errata 1003 + errata 1690 (https://www.rfc-editor.org/errata_search.php?rfc=3696&eid=1690)
  33. # explains the maximum length of an email address is 254 octets.
  34. EMAIL_MAX_LENGTH = 254
  35. LOCAL_PART_MAX_LENGTH = 64
  36. DNS_LABEL_LENGTH_LIMIT = 63 # in "octets", RFC 1035 2.3.1
  37. DOMAIN_MAX_LENGTH = 253 # in "octets" as transmitted, RFC 1035 2.3.4 and RFC 5321 4.5.3.1.2, and see https://stackoverflow.com/questions/32290167/what-is-the-maximum-length-of-a-dns-name
  38. # RFC 2142
  39. CASE_INSENSITIVE_MAILBOX_NAMES = [
  40. 'info', 'marketing', 'sales', 'support', # section 3
  41. 'abuse', 'noc', 'security', # section 4
  42. 'postmaster', 'hostmaster', 'usenet', 'news', 'webmaster', 'www', 'uucp', 'ftp', # section 5
  43. ]