mapping.py 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251
  1. import array
  2. import logging
  3. import posixpath
  4. import warnings
  5. from collections.abc import MutableMapping
  6. from functools import cached_property
  7. from fsspec.core import url_to_fs
  8. logger = logging.getLogger("fsspec.mapping")
  9. class FSMap(MutableMapping):
  10. """Wrap a FileSystem instance as a mutable wrapping.
  11. The keys of the mapping become files under the given root, and the
  12. values (which must be bytes) the contents of those files.
  13. Parameters
  14. ----------
  15. root: string
  16. prefix for all the files
  17. fs: FileSystem instance
  18. check: bool (=True)
  19. performs a touch at the location, to check for write access.
  20. Examples
  21. --------
  22. >>> fs = FileSystem(**parameters) # doctest: +SKIP
  23. >>> d = FSMap('my-data/path/', fs) # doctest: +SKIP
  24. or, more likely
  25. >>> d = fs.get_mapper('my-data/path/')
  26. >>> d['loc1'] = b'Hello World' # doctest: +SKIP
  27. >>> list(d.keys()) # doctest: +SKIP
  28. ['loc1']
  29. >>> d['loc1'] # doctest: +SKIP
  30. b'Hello World'
  31. """
  32. def __init__(self, root, fs, check=False, create=False, missing_exceptions=None):
  33. self.fs = fs
  34. self.root = fs._strip_protocol(root)
  35. self._root_key_to_str = fs._strip_protocol(posixpath.join(root, "x"))[:-1]
  36. if missing_exceptions is None:
  37. missing_exceptions = (
  38. FileNotFoundError,
  39. IsADirectoryError,
  40. NotADirectoryError,
  41. )
  42. self.missing_exceptions = missing_exceptions
  43. self.check = check
  44. self.create = create
  45. if create:
  46. if not self.fs.exists(root):
  47. self.fs.mkdir(root)
  48. if check:
  49. if not self.fs.exists(root):
  50. raise ValueError(
  51. f"Path {root} does not exist. Create "
  52. f" with the ``create=True`` keyword"
  53. )
  54. self.fs.touch(root + "/a")
  55. self.fs.rm(root + "/a")
  56. @cached_property
  57. def dirfs(self):
  58. """dirfs instance that can be used with the same keys as the mapper"""
  59. from .implementations.dirfs import DirFileSystem
  60. return DirFileSystem(path=self._root_key_to_str, fs=self.fs)
  61. def clear(self):
  62. """Remove all keys below root - empties out mapping"""
  63. logger.info("Clear mapping at %s", self.root)
  64. try:
  65. self.fs.rm(self.root, True)
  66. self.fs.mkdir(self.root)
  67. except: # noqa: E722
  68. pass
  69. def getitems(self, keys, on_error="raise"):
  70. """Fetch multiple items from the store
  71. If the backend is async-able, this might proceed concurrently
  72. Parameters
  73. ----------
  74. keys: list(str)
  75. They keys to be fetched
  76. on_error : "raise", "omit", "return"
  77. If raise, an underlying exception will be raised (converted to KeyError
  78. if the type is in self.missing_exceptions); if omit, keys with exception
  79. will simply not be included in the output; if "return", all keys are
  80. included in the output, but the value will be bytes or an exception
  81. instance.
  82. Returns
  83. -------
  84. dict(key, bytes|exception)
  85. """
  86. keys2 = [self._key_to_str(k) for k in keys]
  87. oe = on_error if on_error == "raise" else "return"
  88. try:
  89. out = self.fs.cat(keys2, on_error=oe)
  90. if isinstance(out, bytes):
  91. out = {keys2[0]: out}
  92. except self.missing_exceptions as e:
  93. raise KeyError from e
  94. out = {
  95. k: (KeyError() if isinstance(v, self.missing_exceptions) else v)
  96. for k, v in out.items()
  97. }
  98. return {
  99. key: out[k2] if on_error == "raise" else out.get(k2, KeyError(k2))
  100. for key, k2 in zip(keys, keys2)
  101. if on_error == "return" or not isinstance(out[k2], BaseException)
  102. }
  103. def setitems(self, values_dict):
  104. """Set the values of multiple items in the store
  105. Parameters
  106. ----------
  107. values_dict: dict(str, bytes)
  108. """
  109. values = {self._key_to_str(k): maybe_convert(v) for k, v in values_dict.items()}
  110. self.fs.pipe(values)
  111. def delitems(self, keys):
  112. """Remove multiple keys from the store"""
  113. self.fs.rm([self._key_to_str(k) for k in keys])
  114. def _key_to_str(self, key):
  115. """Generate full path for the key"""
  116. if not isinstance(key, str):
  117. # raise TypeError("key must be of type `str`, got `{type(key).__name__}`"
  118. warnings.warn(
  119. "from fsspec 2023.5 onward FSMap non-str keys will raise TypeError",
  120. DeprecationWarning,
  121. )
  122. if isinstance(key, list):
  123. key = tuple(key)
  124. key = str(key)
  125. return f"{self._root_key_to_str}{key}".rstrip("/")
  126. def _str_to_key(self, s):
  127. """Strip path of to leave key name"""
  128. return s[len(self.root) :].lstrip("/")
  129. def __getitem__(self, key, default=None):
  130. """Retrieve data"""
  131. k = self._key_to_str(key)
  132. try:
  133. result = self.fs.cat(k)
  134. except self.missing_exceptions as exc:
  135. if default is not None:
  136. return default
  137. raise KeyError(key) from exc
  138. return result
  139. def pop(self, key, default=None):
  140. """Pop data"""
  141. result = self.__getitem__(key, default)
  142. try:
  143. del self[key]
  144. except KeyError:
  145. pass
  146. return result
  147. def __setitem__(self, key, value):
  148. """Store value in key"""
  149. key = self._key_to_str(key)
  150. self.fs.mkdirs(self.fs._parent(key), exist_ok=True)
  151. self.fs.pipe_file(key, maybe_convert(value))
  152. def __iter__(self):
  153. return (self._str_to_key(x) for x in self.fs.find(self.root))
  154. def __len__(self):
  155. return len(self.fs.find(self.root))
  156. def __delitem__(self, key):
  157. """Remove key"""
  158. try:
  159. self.fs.rm(self._key_to_str(key))
  160. except Exception as exc:
  161. raise KeyError from exc
  162. def __contains__(self, key):
  163. """Does key exist in mapping?"""
  164. path = self._key_to_str(key)
  165. return self.fs.isfile(path)
  166. def __reduce__(self):
  167. return FSMap, (self.root, self.fs, False, False, self.missing_exceptions)
  168. def maybe_convert(value):
  169. if isinstance(value, array.array) or hasattr(value, "__array__"):
  170. # bytes-like things
  171. if hasattr(value, "dtype") and value.dtype.kind in "Mm":
  172. # The buffer interface doesn't support datetime64/timdelta64 numpy
  173. # arrays
  174. value = value.view("int64")
  175. value = bytes(memoryview(value))
  176. return value
  177. def get_mapper(
  178. url="",
  179. check=False,
  180. create=False,
  181. missing_exceptions=None,
  182. alternate_root=None,
  183. **kwargs,
  184. ):
  185. """Create key-value interface for given URL and options
  186. The URL will be of the form "protocol://location" and point to the root
  187. of the mapper required. All keys will be file-names below this location,
  188. and their values the contents of each key.
  189. Also accepts compound URLs like zip::s3://bucket/file.zip , see ``fsspec.open``.
  190. Parameters
  191. ----------
  192. url: str
  193. Root URL of mapping
  194. check: bool
  195. Whether to attempt to read from the location before instantiation, to
  196. check that the mapping does exist
  197. create: bool
  198. Whether to make the directory corresponding to the root before
  199. instantiating
  200. missing_exceptions: None or tuple
  201. If given, these exception types will be regarded as missing keys and
  202. return KeyError when trying to read data. By default, you get
  203. (FileNotFoundError, IsADirectoryError, NotADirectoryError)
  204. alternate_root: None or str
  205. In cases of complex URLs, the parser may fail to pick the correct part
  206. for the mapper root, so this arg can override
  207. Returns
  208. -------
  209. ``FSMap`` instance, the dict-like key-value store.
  210. """
  211. # Removing protocol here - could defer to each open() on the backend
  212. fs, urlpath = url_to_fs(url, **kwargs)
  213. root = alternate_root if alternate_root is not None else urlpath
  214. return FSMap(root, fs, check, create, missing_exceptions=missing_exceptions)