memory.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312
  1. from __future__ import annotations
  2. import logging
  3. from datetime import datetime, timezone
  4. from errno import ENOTEMPTY
  5. from io import BytesIO
  6. from pathlib import PurePath, PureWindowsPath
  7. from typing import Any, ClassVar
  8. from fsspec import AbstractFileSystem
  9. from fsspec.implementations.local import LocalFileSystem
  10. from fsspec.utils import stringify_path
  11. logger = logging.getLogger("fsspec.memoryfs")
  12. class MemoryFileSystem(AbstractFileSystem):
  13. """A filesystem based on a dict of BytesIO objects
  14. This is a global filesystem so instances of this class all point to the same
  15. in memory filesystem.
  16. """
  17. store: ClassVar[dict[str, Any]] = {} # global, do not overwrite!
  18. pseudo_dirs = [""] # global, do not overwrite!
  19. protocol = "memory"
  20. root_marker = "/"
  21. @classmethod
  22. def _strip_protocol(cls, path):
  23. if isinstance(path, PurePath):
  24. if isinstance(path, PureWindowsPath):
  25. return LocalFileSystem._strip_protocol(path)
  26. else:
  27. path = stringify_path(path)
  28. if path.startswith("memory://"):
  29. path = path[len("memory://") :]
  30. if "::" in path or "://" in path:
  31. return path.rstrip("/")
  32. path = path.lstrip("/").rstrip("/")
  33. return "/" + path if path else ""
  34. def ls(self, path, detail=True, **kwargs):
  35. path = self._strip_protocol(path)
  36. if path in self.store:
  37. # there is a key with this exact name
  38. if not detail:
  39. return [path]
  40. return [
  41. {
  42. "name": path,
  43. "size": self.store[path].size,
  44. "type": "file",
  45. "created": self.store[path].created.timestamp(),
  46. }
  47. ]
  48. paths = set()
  49. starter = path + "/"
  50. out = []
  51. for p2 in tuple(self.store):
  52. if p2.startswith(starter):
  53. if "/" not in p2[len(starter) :]:
  54. # exact child
  55. out.append(
  56. {
  57. "name": p2,
  58. "size": self.store[p2].size,
  59. "type": "file",
  60. "created": self.store[p2].created.timestamp(),
  61. }
  62. )
  63. elif len(p2) > len(starter):
  64. # implied child directory
  65. ppath = starter + p2[len(starter) :].split("/", 1)[0]
  66. if ppath not in paths:
  67. out = out or []
  68. out.append(
  69. {
  70. "name": ppath,
  71. "size": 0,
  72. "type": "directory",
  73. }
  74. )
  75. paths.add(ppath)
  76. for p2 in self.pseudo_dirs:
  77. if p2.startswith(starter):
  78. if "/" not in p2[len(starter) :]:
  79. # exact child pdir
  80. if p2 not in paths:
  81. out.append({"name": p2, "size": 0, "type": "directory"})
  82. paths.add(p2)
  83. else:
  84. # directory implied by deeper pdir
  85. ppath = starter + p2[len(starter) :].split("/", 1)[0]
  86. if ppath not in paths:
  87. out.append({"name": ppath, "size": 0, "type": "directory"})
  88. paths.add(ppath)
  89. if not out:
  90. if path in self.pseudo_dirs:
  91. # empty dir
  92. return []
  93. raise FileNotFoundError(path)
  94. if detail:
  95. return out
  96. return sorted([f["name"] for f in out])
  97. def mkdir(self, path, create_parents=True, **kwargs):
  98. path = self._strip_protocol(path)
  99. if path in self.store or path in self.pseudo_dirs:
  100. raise FileExistsError(path)
  101. if self._parent(path).strip("/") and self.isfile(self._parent(path)):
  102. raise NotADirectoryError(self._parent(path))
  103. if create_parents and self._parent(path).strip("/"):
  104. try:
  105. self.mkdir(self._parent(path), create_parents, **kwargs)
  106. except FileExistsError:
  107. pass
  108. if path and path not in self.pseudo_dirs:
  109. self.pseudo_dirs.append(path)
  110. def makedirs(self, path, exist_ok=False):
  111. try:
  112. self.mkdir(path, create_parents=True)
  113. except FileExistsError:
  114. if not exist_ok:
  115. raise
  116. def pipe_file(self, path, value, mode="overwrite", **kwargs):
  117. """Set the bytes of given file
  118. Avoids copies of the data if possible
  119. """
  120. mode = "xb" if mode == "create" else "wb"
  121. self.open(path, mode=mode, data=value)
  122. def rmdir(self, path):
  123. path = self._strip_protocol(path)
  124. if path == "":
  125. # silently avoid deleting FS root
  126. return
  127. if path in self.pseudo_dirs:
  128. if not self.ls(path):
  129. self.pseudo_dirs.remove(path)
  130. else:
  131. raise OSError(ENOTEMPTY, "Directory not empty", path)
  132. else:
  133. raise FileNotFoundError(path)
  134. def info(self, path, **kwargs):
  135. logger.debug("info: %s", path)
  136. path = self._strip_protocol(path)
  137. if path in self.pseudo_dirs or any(
  138. p.startswith(path + "/") for p in list(self.store) + self.pseudo_dirs
  139. ):
  140. return {
  141. "name": path,
  142. "size": 0,
  143. "type": "directory",
  144. }
  145. elif path in self.store:
  146. filelike = self.store[path]
  147. return {
  148. "name": path,
  149. "size": filelike.size,
  150. "type": "file",
  151. "created": getattr(filelike, "created", None),
  152. }
  153. else:
  154. raise FileNotFoundError(path)
  155. def _open(
  156. self,
  157. path,
  158. mode="rb",
  159. block_size=None,
  160. autocommit=True,
  161. cache_options=None,
  162. **kwargs,
  163. ):
  164. path = self._strip_protocol(path)
  165. if "x" in mode and self.exists(path):
  166. raise FileExistsError
  167. if path in self.pseudo_dirs:
  168. raise IsADirectoryError(path)
  169. parent = path
  170. while len(parent) > 1:
  171. parent = self._parent(parent)
  172. if self.isfile(parent):
  173. raise FileExistsError(parent)
  174. if mode in ["rb", "ab", "r+b"]:
  175. if path in self.store:
  176. f = self.store[path]
  177. if mode == "ab":
  178. # position at the end of file
  179. f.seek(0, 2)
  180. else:
  181. # position at the beginning of file
  182. f.seek(0)
  183. return f
  184. else:
  185. raise FileNotFoundError(path)
  186. elif mode in {"wb", "xb"}:
  187. if mode == "xb" and self.exists(path):
  188. raise FileExistsError
  189. m = MemoryFile(self, path, kwargs.get("data"))
  190. if not self._intrans:
  191. m.commit()
  192. return m
  193. else:
  194. name = self.__class__.__name__
  195. raise ValueError(f"unsupported file mode for {name}: {mode!r}")
  196. def cp_file(self, path1, path2, **kwargs):
  197. path1 = self._strip_protocol(path1)
  198. path2 = self._strip_protocol(path2)
  199. if self.isfile(path1):
  200. self.store[path2] = MemoryFile(
  201. self, path2, self.store[path1].getvalue()
  202. ) # implicit copy
  203. elif self.isdir(path1):
  204. if path2 not in self.pseudo_dirs:
  205. self.pseudo_dirs.append(path2)
  206. else:
  207. raise FileNotFoundError(path1)
  208. def cat_file(self, path, start=None, end=None, **kwargs):
  209. logger.debug("cat: %s", path)
  210. path = self._strip_protocol(path)
  211. try:
  212. return bytes(self.store[path].getbuffer()[start:end])
  213. except KeyError as e:
  214. raise FileNotFoundError(path) from e
  215. def _rm(self, path):
  216. path = self._strip_protocol(path)
  217. try:
  218. del self.store[path]
  219. except KeyError as e:
  220. raise FileNotFoundError(path) from e
  221. def modified(self, path):
  222. path = self._strip_protocol(path)
  223. try:
  224. return self.store[path].modified
  225. except KeyError as e:
  226. raise FileNotFoundError(path) from e
  227. def created(self, path):
  228. path = self._strip_protocol(path)
  229. try:
  230. return self.store[path].created
  231. except KeyError as e:
  232. raise FileNotFoundError(path) from e
  233. def isfile(self, path):
  234. path = self._strip_protocol(path)
  235. return path in self.store
  236. def rm(self, path, recursive=False, maxdepth=None):
  237. if isinstance(path, str):
  238. path = self._strip_protocol(path)
  239. else:
  240. path = [self._strip_protocol(p) for p in path]
  241. paths = self.expand_path(path, recursive=recursive, maxdepth=maxdepth)
  242. for p in reversed(paths):
  243. if self.isfile(p):
  244. self.rm_file(p)
  245. # If the expanded path doesn't exist, it is only because the expanded
  246. # path was a directory that does not exist in self.pseudo_dirs. This
  247. # is possible if you directly create files without making the
  248. # directories first.
  249. elif not self.exists(p):
  250. continue
  251. else:
  252. self.rmdir(p)
  253. class MemoryFile(BytesIO):
  254. """A BytesIO which can't close and works as a context manager
  255. Can initialise with data. Each path should only be active once at any moment.
  256. No need to provide fs, path if auto-committing (default)
  257. """
  258. def __init__(self, fs=None, path=None, data=None):
  259. logger.debug("open file %s", path)
  260. self.fs = fs
  261. self.path = path
  262. self.created = datetime.now(tz=timezone.utc)
  263. self.modified = datetime.now(tz=timezone.utc)
  264. if data:
  265. super().__init__(data)
  266. self.seek(0)
  267. @property
  268. def size(self):
  269. return self.getbuffer().nbytes
  270. def __enter__(self):
  271. return self
  272. def close(self):
  273. pass
  274. def discard(self):
  275. pass
  276. def commit(self):
  277. self.fs.store[self.path] = self
  278. self.modified = datetime.now(tz=timezone.utc)