123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113 |
- import os
- import re
- _default_seps = os.sep + str(os.altsep) * bool(os.altsep)
- class Translator:
- """
- >>> Translator('xyz')
- Traceback (most recent call last):
- ...
- AssertionError: Invalid separators
- >>> Translator('')
- Traceback (most recent call last):
- ...
- AssertionError: Invalid separators
- """
- seps: str
- def __init__(self, seps: str = _default_seps):
- assert seps and set(seps) <= set(_default_seps), "Invalid separators"
- self.seps = seps
- def translate(self, pattern):
- """
- Given a glob pattern, produce a regex that matches it.
- """
- return self.extend(self.match_dirs(self.translate_core(pattern)))
- def extend(self, pattern):
- r"""
- Extend regex for pattern-wide concerns.
- Apply '(?s:)' to create a non-matching group that
- matches newlines (valid on Unix).
- Append '\Z' to imply fullmatch even when match is used.
- """
- return rf'(?s:{pattern})\Z'
- def match_dirs(self, pattern):
- """
- Ensure that zipfile.Path directory names are matched.
- zipfile.Path directory names always end in a slash.
- """
- return rf'{pattern}[/]?'
- def translate_core(self, pattern):
- r"""
- Given a glob pattern, produce a regex that matches it.
- >>> t = Translator()
- >>> t.translate_core('*.txt').replace('\\\\', '')
- '[^/]*\\.txt'
- >>> t.translate_core('a?txt')
- 'a[^/]txt'
- >>> t.translate_core('**/*').replace('\\\\', '')
- '.*/[^/][^/]*'
- """
- self.restrict_rglob(pattern)
- return ''.join(map(self.replace, separate(self.star_not_empty(pattern))))
- def replace(self, match):
- """
- Perform the replacements for a match from :func:`separate`.
- """
- return match.group('set') or (
- re.escape(match.group(0))
- .replace('\\*\\*', r'.*')
- .replace('\\*', rf'[^{re.escape(self.seps)}]*')
- .replace('\\?', r'[^/]')
- )
- def restrict_rglob(self, pattern):
- """
- Raise ValueError if ** appears in anything but a full path segment.
- >>> Translator().translate('**foo')
- Traceback (most recent call last):
- ...
- ValueError: ** must appear alone in a path segment
- """
- seps_pattern = rf'[{re.escape(self.seps)}]+'
- segments = re.split(seps_pattern, pattern)
- if any('**' in segment and segment != '**' for segment in segments):
- raise ValueError("** must appear alone in a path segment")
- def star_not_empty(self, pattern):
- """
- Ensure that * will not match an empty segment.
- """
- def handle_segment(match):
- segment = match.group(0)
- return '?*' if segment == '*' else segment
- not_seps_pattern = rf'[^{re.escape(self.seps)}]+'
- return re.sub(not_seps_pattern, handle_segment, pattern)
- def separate(pattern):
- """
- Separate out character sets to avoid translating their contents.
- >>> [m.group(0) for m in separate('*.txt')]
- ['*.txt']
- >>> [m.group(0) for m in separate('a[?]txt')]
- ['a', '[?]', 'txt']
- """
- return re.finditer(r'([^\[]+)|(?P<set>[\[].*?[\]])|([\[][^\]]*$)', pattern)
|