utils.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. # This file is dual licensed under the terms of the Apache License, Version
  2. # 2.0, and the BSD License. See the LICENSE file in the root of this repository
  3. # for complete details.
  4. from __future__ import annotations
  5. import functools
  6. import re
  7. from typing import NewType, Tuple, Union, cast
  8. from .tags import Tag, parse_tag
  9. from .version import InvalidVersion, Version, _TrimmedRelease
  10. BuildTag = Union[Tuple[()], Tuple[int, str]]
  11. NormalizedName = NewType("NormalizedName", str)
  12. class InvalidName(ValueError):
  13. """
  14. An invalid distribution name; users should refer to the packaging user guide.
  15. """
  16. class InvalidWheelFilename(ValueError):
  17. """
  18. An invalid wheel filename was found, users should refer to PEP 427.
  19. """
  20. class InvalidSdistFilename(ValueError):
  21. """
  22. An invalid sdist filename was found, users should refer to the packaging user guide.
  23. """
  24. # Core metadata spec for `Name`
  25. _validate_regex = re.compile(
  26. r"^([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])$", re.IGNORECASE
  27. )
  28. _canonicalize_regex = re.compile(r"[-_.]+")
  29. _normalized_regex = re.compile(r"^([a-z0-9]|[a-z0-9]([a-z0-9-](?!--))*[a-z0-9])$")
  30. # PEP 427: The build number must start with a digit.
  31. _build_tag_regex = re.compile(r"(\d+)(.*)")
  32. def canonicalize_name(name: str, *, validate: bool = False) -> NormalizedName:
  33. if validate and not _validate_regex.match(name):
  34. raise InvalidName(f"name is invalid: {name!r}")
  35. # This is taken from PEP 503.
  36. value = _canonicalize_regex.sub("-", name).lower()
  37. return cast(NormalizedName, value)
  38. def is_normalized_name(name: str) -> bool:
  39. return _normalized_regex.match(name) is not None
  40. @functools.singledispatch
  41. def canonicalize_version(
  42. version: Version | str, *, strip_trailing_zero: bool = True
  43. ) -> str:
  44. """
  45. Return a canonical form of a version as a string.
  46. >>> canonicalize_version('1.0.1')
  47. '1.0.1'
  48. Per PEP 625, versions may have multiple canonical forms, differing
  49. only by trailing zeros.
  50. >>> canonicalize_version('1.0.0')
  51. '1'
  52. >>> canonicalize_version('1.0.0', strip_trailing_zero=False)
  53. '1.0.0'
  54. Invalid versions are returned unaltered.
  55. >>> canonicalize_version('foo bar baz')
  56. 'foo bar baz'
  57. """
  58. return str(_TrimmedRelease(str(version)) if strip_trailing_zero else version)
  59. @canonicalize_version.register
  60. def _(version: str, *, strip_trailing_zero: bool = True) -> str:
  61. try:
  62. parsed = Version(version)
  63. except InvalidVersion:
  64. # Legacy versions cannot be normalized
  65. return version
  66. return canonicalize_version(parsed, strip_trailing_zero=strip_trailing_zero)
  67. def parse_wheel_filename(
  68. filename: str,
  69. ) -> tuple[NormalizedName, Version, BuildTag, frozenset[Tag]]:
  70. if not filename.endswith(".whl"):
  71. raise InvalidWheelFilename(
  72. f"Invalid wheel filename (extension must be '.whl'): {filename!r}"
  73. )
  74. filename = filename[:-4]
  75. dashes = filename.count("-")
  76. if dashes not in (4, 5):
  77. raise InvalidWheelFilename(
  78. f"Invalid wheel filename (wrong number of parts): {filename!r}"
  79. )
  80. parts = filename.split("-", dashes - 2)
  81. name_part = parts[0]
  82. # See PEP 427 for the rules on escaping the project name.
  83. if "__" in name_part or re.match(r"^[\w\d._]*$", name_part, re.UNICODE) is None:
  84. raise InvalidWheelFilename(f"Invalid project name: {filename!r}")
  85. name = canonicalize_name(name_part)
  86. try:
  87. version = Version(parts[1])
  88. except InvalidVersion as e:
  89. raise InvalidWheelFilename(
  90. f"Invalid wheel filename (invalid version): {filename!r}"
  91. ) from e
  92. if dashes == 5:
  93. build_part = parts[2]
  94. build_match = _build_tag_regex.match(build_part)
  95. if build_match is None:
  96. raise InvalidWheelFilename(
  97. f"Invalid build number: {build_part} in {filename!r}"
  98. )
  99. build = cast(BuildTag, (int(build_match.group(1)), build_match.group(2)))
  100. else:
  101. build = ()
  102. tags = parse_tag(parts[-1])
  103. return (name, version, build, tags)
  104. def parse_sdist_filename(filename: str) -> tuple[NormalizedName, Version]:
  105. if filename.endswith(".tar.gz"):
  106. file_stem = filename[: -len(".tar.gz")]
  107. elif filename.endswith(".zip"):
  108. file_stem = filename[: -len(".zip")]
  109. else:
  110. raise InvalidSdistFilename(
  111. f"Invalid sdist filename (extension must be '.tar.gz' or '.zip'):"
  112. f" {filename!r}"
  113. )
  114. # We are requiring a PEP 440 version, which cannot contain dashes,
  115. # so we split on the last dash.
  116. name_part, sep, version_part = file_stem.rpartition("-")
  117. if not sep:
  118. raise InvalidSdistFilename(f"Invalid sdist filename: {filename!r}")
  119. name = canonicalize_name(name_part)
  120. try:
  121. version = Version(version_part)
  122. except InvalidVersion as e:
  123. raise InvalidSdistFilename(
  124. f"Invalid sdist filename (invalid version): {filename!r}"
  125. ) from e
  126. return (name, version)