cache_mapper.py 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475
  1. from __future__ import annotations
  2. import abc
  3. import hashlib
  4. from fsspec.implementations.local import make_path_posix
  5. class AbstractCacheMapper(abc.ABC):
  6. """Abstract super-class for mappers from remote URLs to local cached
  7. basenames.
  8. """
  9. @abc.abstractmethod
  10. def __call__(self, path: str) -> str: ...
  11. def __eq__(self, other: object) -> bool:
  12. # Identity only depends on class. When derived classes have attributes
  13. # they will need to be included.
  14. return isinstance(other, type(self))
  15. def __hash__(self) -> int:
  16. # Identity only depends on class. When derived classes have attributes
  17. # they will need to be included.
  18. return hash(type(self))
  19. class BasenameCacheMapper(AbstractCacheMapper):
  20. """Cache mapper that uses the basename of the remote URL and a fixed number
  21. of directory levels above this.
  22. The default is zero directory levels, meaning different paths with the same
  23. basename will have the same cached basename.
  24. """
  25. def __init__(self, directory_levels: int = 0):
  26. if directory_levels < 0:
  27. raise ValueError(
  28. "BasenameCacheMapper requires zero or positive directory_levels"
  29. )
  30. self.directory_levels = directory_levels
  31. # Separator for directories when encoded as strings.
  32. self._separator = "_@_"
  33. def __call__(self, path: str) -> str:
  34. path = make_path_posix(path)
  35. prefix, *bits = path.rsplit("/", self.directory_levels + 1)
  36. if bits:
  37. return self._separator.join(bits)
  38. else:
  39. return prefix # No separator found, simple filename
  40. def __eq__(self, other: object) -> bool:
  41. return super().__eq__(other) and self.directory_levels == other.directory_levels
  42. def __hash__(self) -> int:
  43. return super().__hash__() ^ hash(self.directory_levels)
  44. class HashCacheMapper(AbstractCacheMapper):
  45. """Cache mapper that uses a hash of the remote URL."""
  46. def __call__(self, path: str) -> str:
  47. return hashlib.sha256(path.encode()).hexdigest()
  48. def create_cache_mapper(same_names: bool) -> AbstractCacheMapper:
  49. """Factory method to create cache mapper for backward compatibility with
  50. ``CachingFileSystem`` constructor using ``same_names`` kwarg.
  51. """
  52. if same_names:
  53. return BasenameCacheMapper()
  54. else:
  55. return HashCacheMapper()