_punycode.py 2.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667
  1. # Copyright 2014 Mathias Bynens <https://mathiasbynens.be/>
  2. # Copyright 2021 Taneli Hukkinen
  3. #
  4. # Permission is hereby granted, free of charge, to any person obtaining
  5. # a copy of this software and associated documentation files (the
  6. # "Software"), to deal in the Software without restriction, including
  7. # without limitation the rights to use, copy, modify, merge, publish,
  8. # distribute, sublicense, and/or sell copies of the Software, and to
  9. # permit persons to whom the Software is furnished to do so, subject to
  10. # the following conditions:
  11. #
  12. # The above copyright notice and this permission notice shall be
  13. # included in all copies or substantial portions of the Software.
  14. #
  15. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  16. # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  17. # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  18. # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  19. # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  20. # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  21. # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  22. import codecs
  23. import re
  24. from typing import Callable
  25. REGEX_SEPARATORS = re.compile(r"[\x2E\u3002\uFF0E\uFF61]")
  26. REGEX_NON_ASCII = re.compile(r"[^\0-\x7E]")
  27. def encode(uni: str) -> str:
  28. return codecs.encode(uni, encoding="punycode").decode()
  29. def decode(ascii: str) -> str:
  30. return codecs.decode(ascii, encoding="punycode") # type: ignore
  31. def map_domain(string: str, fn: Callable[[str], str]) -> str:
  32. parts = string.split("@")
  33. result = ""
  34. if len(parts) > 1:
  35. # In email addresses, only the domain name should be punycoded. Leave
  36. # the local part (i.e. everything up to `@`) intact.
  37. result = parts[0] + "@"
  38. string = parts[1]
  39. labels = REGEX_SEPARATORS.split(string)
  40. encoded = ".".join(fn(label) for label in labels)
  41. return result + encoded
  42. def to_unicode(obj: str) -> str:
  43. def mapping(obj: str) -> str:
  44. if obj.startswith("xn--"):
  45. return decode(obj[4:].lower())
  46. return obj
  47. return map_domain(obj, mapping)
  48. def to_ascii(obj: str) -> str:
  49. def mapping(obj: str) -> str:
  50. if REGEX_NON_ASCII.search(obj):
  51. return "xn--" + encode(obj)
  52. return obj
  53. return map_domain(obj, mapping)