spec.py 74 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242
  1. from __future__ import annotations
  2. import io
  3. import json
  4. import logging
  5. import os
  6. import threading
  7. import warnings
  8. import weakref
  9. from errno import ESPIPE
  10. from glob import has_magic
  11. from hashlib import sha256
  12. from typing import Any, ClassVar
  13. from .callbacks import DEFAULT_CALLBACK
  14. from .config import apply_config, conf
  15. from .dircache import DirCache
  16. from .transaction import Transaction
  17. from .utils import (
  18. _unstrip_protocol,
  19. glob_translate,
  20. isfilelike,
  21. other_paths,
  22. read_block,
  23. stringify_path,
  24. tokenize,
  25. )
  26. logger = logging.getLogger("fsspec")
  27. def make_instance(cls, args, kwargs):
  28. return cls(*args, **kwargs)
  29. class _Cached(type):
  30. """
  31. Metaclass for caching file system instances.
  32. Notes
  33. -----
  34. Instances are cached according to
  35. * The values of the class attributes listed in `_extra_tokenize_attributes`
  36. * The arguments passed to ``__init__``.
  37. This creates an additional reference to the filesystem, which prevents the
  38. filesystem from being garbage collected when all *user* references go away.
  39. A call to the :meth:`AbstractFileSystem.clear_instance_cache` must *also*
  40. be made for a filesystem instance to be garbage collected.
  41. """
  42. def __init__(cls, *args, **kwargs):
  43. super().__init__(*args, **kwargs)
  44. # Note: we intentionally create a reference here, to avoid garbage
  45. # collecting instances when all other references are gone. To really
  46. # delete a FileSystem, the cache must be cleared.
  47. if conf.get("weakref_instance_cache"): # pragma: no cover
  48. # debug option for analysing fork/spawn conditions
  49. cls._cache = weakref.WeakValueDictionary()
  50. else:
  51. cls._cache = {}
  52. cls._pid = os.getpid()
  53. def __call__(cls, *args, **kwargs):
  54. kwargs = apply_config(cls, kwargs)
  55. extra_tokens = tuple(
  56. getattr(cls, attr, None) for attr in cls._extra_tokenize_attributes
  57. )
  58. token = tokenize(
  59. cls, cls._pid, threading.get_ident(), *args, *extra_tokens, **kwargs
  60. )
  61. skip = kwargs.pop("skip_instance_cache", False)
  62. if os.getpid() != cls._pid:
  63. cls._cache.clear()
  64. cls._pid = os.getpid()
  65. if not skip and cls.cachable and token in cls._cache:
  66. cls._latest = token
  67. return cls._cache[token]
  68. else:
  69. obj = super().__call__(*args, **kwargs)
  70. # Setting _fs_token here causes some static linters to complain.
  71. obj._fs_token_ = token
  72. obj.storage_args = args
  73. obj.storage_options = kwargs
  74. if obj.async_impl and obj.mirror_sync_methods:
  75. from .asyn import mirror_sync_methods
  76. mirror_sync_methods(obj)
  77. if cls.cachable and not skip:
  78. cls._latest = token
  79. cls._cache[token] = obj
  80. return obj
  81. class AbstractFileSystem(metaclass=_Cached):
  82. """
  83. An abstract super-class for pythonic file-systems
  84. Implementations are expected to be compatible with or, better, subclass
  85. from here.
  86. """
  87. cachable = True # this class can be cached, instances reused
  88. _cached = False
  89. blocksize = 2**22
  90. sep = "/"
  91. protocol: ClassVar[str | tuple[str, ...]] = "abstract"
  92. _latest = None
  93. async_impl = False
  94. mirror_sync_methods = False
  95. root_marker = "" # For some FSs, may require leading '/' or other character
  96. transaction_type = Transaction
  97. #: Extra *class attributes* that should be considered when hashing.
  98. _extra_tokenize_attributes = ()
  99. # Set by _Cached metaclass
  100. storage_args: tuple[Any, ...]
  101. storage_options: dict[str, Any]
  102. def __init__(self, *args, **storage_options):
  103. """Create and configure file-system instance
  104. Instances may be cachable, so if similar enough arguments are seen
  105. a new instance is not required. The token attribute exists to allow
  106. implementations to cache instances if they wish.
  107. A reasonable default should be provided if there are no arguments.
  108. Subclasses should call this method.
  109. Parameters
  110. ----------
  111. use_listings_cache, listings_expiry_time, max_paths:
  112. passed to ``DirCache``, if the implementation supports
  113. directory listing caching. Pass use_listings_cache=False
  114. to disable such caching.
  115. skip_instance_cache: bool
  116. If this is a cachable implementation, pass True here to force
  117. creating a new instance even if a matching instance exists, and prevent
  118. storing this instance.
  119. asynchronous: bool
  120. loop: asyncio-compatible IOLoop or None
  121. """
  122. if self._cached:
  123. # reusing instance, don't change
  124. return
  125. self._cached = True
  126. self._intrans = False
  127. self._transaction = None
  128. self._invalidated_caches_in_transaction = []
  129. self.dircache = DirCache(**storage_options)
  130. if storage_options.pop("add_docs", None):
  131. warnings.warn("add_docs is no longer supported.", FutureWarning)
  132. if storage_options.pop("add_aliases", None):
  133. warnings.warn("add_aliases has been removed.", FutureWarning)
  134. # This is set in _Cached
  135. self._fs_token_ = None
  136. @property
  137. def fsid(self):
  138. """Persistent filesystem id that can be used to compare filesystems
  139. across sessions.
  140. """
  141. raise NotImplementedError
  142. @property
  143. def _fs_token(self):
  144. return self._fs_token_
  145. def __dask_tokenize__(self):
  146. return self._fs_token
  147. def __hash__(self):
  148. return int(self._fs_token, 16)
  149. def __eq__(self, other):
  150. return isinstance(other, type(self)) and self._fs_token == other._fs_token
  151. def __reduce__(self):
  152. return make_instance, (type(self), self.storage_args, self.storage_options)
  153. @classmethod
  154. def _strip_protocol(cls, path):
  155. """Turn path from fully-qualified to file-system-specific
  156. May require FS-specific handling, e.g., for relative paths or links.
  157. """
  158. if isinstance(path, list):
  159. return [cls._strip_protocol(p) for p in path]
  160. path = stringify_path(path)
  161. protos = (cls.protocol,) if isinstance(cls.protocol, str) else cls.protocol
  162. for protocol in protos:
  163. if path.startswith(protocol + "://"):
  164. path = path[len(protocol) + 3 :]
  165. elif path.startswith(protocol + "::"):
  166. path = path[len(protocol) + 2 :]
  167. path = path.rstrip("/")
  168. # use of root_marker to make minimum required path, e.g., "/"
  169. return path or cls.root_marker
  170. def unstrip_protocol(self, name: str) -> str:
  171. """Format FS-specific path to generic, including protocol"""
  172. protos = (self.protocol,) if isinstance(self.protocol, str) else self.protocol
  173. for protocol in protos:
  174. if name.startswith(f"{protocol}://"):
  175. return name
  176. return f"{protos[0]}://{name}"
  177. @staticmethod
  178. def _get_kwargs_from_urls(path):
  179. """If kwargs can be encoded in the paths, extract them here
  180. This should happen before instantiation of the class; incoming paths
  181. then should be amended to strip the options in methods.
  182. Examples may look like an sftp path "sftp://user@host:/my/path", where
  183. the user and host should become kwargs and later get stripped.
  184. """
  185. # by default, nothing happens
  186. return {}
  187. @classmethod
  188. def current(cls):
  189. """Return the most recently instantiated FileSystem
  190. If no instance has been created, then create one with defaults
  191. """
  192. if cls._latest in cls._cache:
  193. return cls._cache[cls._latest]
  194. return cls()
  195. @property
  196. def transaction(self):
  197. """A context within which files are committed together upon exit
  198. Requires the file class to implement `.commit()` and `.discard()`
  199. for the normal and exception cases.
  200. """
  201. if self._transaction is None:
  202. self._transaction = self.transaction_type(self)
  203. return self._transaction
  204. def start_transaction(self):
  205. """Begin write transaction for deferring files, non-context version"""
  206. self._intrans = True
  207. self._transaction = self.transaction_type(self)
  208. return self.transaction
  209. def end_transaction(self):
  210. """Finish write transaction, non-context version"""
  211. self.transaction.complete()
  212. self._transaction = None
  213. # The invalid cache must be cleared after the transaction is completed.
  214. for path in self._invalidated_caches_in_transaction:
  215. self.invalidate_cache(path)
  216. self._invalidated_caches_in_transaction.clear()
  217. def invalidate_cache(self, path=None):
  218. """
  219. Discard any cached directory information
  220. Parameters
  221. ----------
  222. path: string or None
  223. If None, clear all listings cached else listings at or under given
  224. path.
  225. """
  226. # Not necessary to implement invalidation mechanism, may have no cache.
  227. # But if have, you should call this method of parent class from your
  228. # subclass to ensure expiring caches after transacations correctly.
  229. # See the implementation of FTPFileSystem in ftp.py
  230. if self._intrans:
  231. self._invalidated_caches_in_transaction.append(path)
  232. def mkdir(self, path, create_parents=True, **kwargs):
  233. """
  234. Create directory entry at path
  235. For systems that don't have true directories, may create an for
  236. this instance only and not touch the real filesystem
  237. Parameters
  238. ----------
  239. path: str
  240. location
  241. create_parents: bool
  242. if True, this is equivalent to ``makedirs``
  243. kwargs:
  244. may be permissions, etc.
  245. """
  246. pass # not necessary to implement, may not have directories
  247. def makedirs(self, path, exist_ok=False):
  248. """Recursively make directories
  249. Creates directory at path and any intervening required directories.
  250. Raises exception if, for instance, the path already exists but is a
  251. file.
  252. Parameters
  253. ----------
  254. path: str
  255. leaf directory name
  256. exist_ok: bool (False)
  257. If False, will error if the target already exists
  258. """
  259. pass # not necessary to implement, may not have directories
  260. def rmdir(self, path):
  261. """Remove a directory, if empty"""
  262. pass # not necessary to implement, may not have directories
  263. def ls(self, path, detail=True, **kwargs):
  264. """List objects at path.
  265. This should include subdirectories and files at that location. The
  266. difference between a file and a directory must be clear when details
  267. are requested.
  268. The specific keys, or perhaps a FileInfo class, or similar, is TBD,
  269. but must be consistent across implementations.
  270. Must include:
  271. - full path to the entry (without protocol)
  272. - size of the entry, in bytes. If the value cannot be determined, will
  273. be ``None``.
  274. - type of entry, "file", "directory" or other
  275. Additional information
  276. may be present, appropriate to the file-system, e.g., generation,
  277. checksum, etc.
  278. May use refresh=True|False to allow use of self._ls_from_cache to
  279. check for a saved listing and avoid calling the backend. This would be
  280. common where listing may be expensive.
  281. Parameters
  282. ----------
  283. path: str
  284. detail: bool
  285. if True, gives a list of dictionaries, where each is the same as
  286. the result of ``info(path)``. If False, gives a list of paths
  287. (str).
  288. kwargs: may have additional backend-specific options, such as version
  289. information
  290. Returns
  291. -------
  292. List of strings if detail is False, or list of directory information
  293. dicts if detail is True.
  294. """
  295. raise NotImplementedError
  296. def _ls_from_cache(self, path):
  297. """Check cache for listing
  298. Returns listing, if found (may be empty list for a directly that exists
  299. but contains nothing), None if not in cache.
  300. """
  301. parent = self._parent(path)
  302. try:
  303. return self.dircache[path.rstrip("/")]
  304. except KeyError:
  305. pass
  306. try:
  307. files = [
  308. f
  309. for f in self.dircache[parent]
  310. if f["name"] == path
  311. or (f["name"] == path.rstrip("/") and f["type"] == "directory")
  312. ]
  313. if len(files) == 0:
  314. # parent dir was listed but did not contain this file
  315. raise FileNotFoundError(path)
  316. return files
  317. except KeyError:
  318. pass
  319. def walk(self, path, maxdepth=None, topdown=True, on_error="omit", **kwargs):
  320. """Return all files under the given path.
  321. List all files, recursing into subdirectories; output is iterator-style,
  322. like ``os.walk()``. For a simple list of files, ``find()`` is available.
  323. When topdown is True, the caller can modify the dirnames list in-place (perhaps
  324. using del or slice assignment), and walk() will
  325. only recurse into the subdirectories whose names remain in dirnames;
  326. this can be used to prune the search, impose a specific order of visiting,
  327. or even to inform walk() about directories the caller creates or renames before
  328. it resumes walk() again.
  329. Modifying dirnames when topdown is False has no effect. (see os.walk)
  330. Note that the "files" outputted will include anything that is not
  331. a directory, such as links.
  332. Parameters
  333. ----------
  334. path: str
  335. Root to recurse into
  336. maxdepth: int
  337. Maximum recursion depth. None means limitless, but not recommended
  338. on link-based file-systems.
  339. topdown: bool (True)
  340. Whether to walk the directory tree from the top downwards or from
  341. the bottom upwards.
  342. on_error: "omit", "raise", a callable
  343. if omit (default), path with exception will simply be empty;
  344. If raise, an underlying exception will be raised;
  345. if callable, it will be called with a single OSError instance as argument
  346. kwargs: passed to ``ls``
  347. """
  348. if maxdepth is not None and maxdepth < 1:
  349. raise ValueError("maxdepth must be at least 1")
  350. path = self._strip_protocol(path)
  351. full_dirs = {}
  352. dirs = {}
  353. files = {}
  354. detail = kwargs.pop("detail", False)
  355. try:
  356. listing = self.ls(path, detail=True, **kwargs)
  357. except (FileNotFoundError, OSError) as e:
  358. if on_error == "raise":
  359. raise
  360. if callable(on_error):
  361. on_error(e)
  362. return
  363. for info in listing:
  364. # each info name must be at least [path]/part , but here
  365. # we check also for names like [path]/part/
  366. pathname = info["name"].rstrip("/")
  367. name = pathname.rsplit("/", 1)[-1]
  368. if info["type"] == "directory" and pathname != path:
  369. # do not include "self" path
  370. full_dirs[name] = pathname
  371. dirs[name] = info
  372. elif pathname == path:
  373. # file-like with same name as give path
  374. files[""] = info
  375. else:
  376. files[name] = info
  377. if not detail:
  378. dirs = list(dirs)
  379. files = list(files)
  380. if topdown:
  381. # Yield before recursion if walking top down
  382. yield path, dirs, files
  383. if maxdepth is not None:
  384. maxdepth -= 1
  385. if maxdepth < 1:
  386. if not topdown:
  387. yield path, dirs, files
  388. return
  389. for d in dirs:
  390. yield from self.walk(
  391. full_dirs[d],
  392. maxdepth=maxdepth,
  393. detail=detail,
  394. topdown=topdown,
  395. **kwargs,
  396. )
  397. if not topdown:
  398. # Yield after recursion if walking bottom up
  399. yield path, dirs, files
  400. def find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs):
  401. """List all files below path.
  402. Like posix ``find`` command without conditions
  403. Parameters
  404. ----------
  405. path : str
  406. maxdepth: int or None
  407. If not None, the maximum number of levels to descend
  408. withdirs: bool
  409. Whether to include directory paths in the output. This is True
  410. when used by glob, but users usually only want files.
  411. kwargs are passed to ``ls``.
  412. """
  413. # TODO: allow equivalent of -name parameter
  414. path = self._strip_protocol(path)
  415. out = {}
  416. # Add the root directory if withdirs is requested
  417. # This is needed for posix glob compliance
  418. if withdirs and path != "" and self.isdir(path):
  419. out[path] = self.info(path)
  420. for _, dirs, files in self.walk(path, maxdepth, detail=True, **kwargs):
  421. if withdirs:
  422. files.update(dirs)
  423. out.update({info["name"]: info for name, info in files.items()})
  424. if not out and self.isfile(path):
  425. # walk works on directories, but find should also return [path]
  426. # when path happens to be a file
  427. out[path] = {}
  428. names = sorted(out)
  429. if not detail:
  430. return names
  431. else:
  432. return {name: out[name] for name in names}
  433. def du(self, path, total=True, maxdepth=None, withdirs=False, **kwargs):
  434. """Space used by files and optionally directories within a path
  435. Directory size does not include the size of its contents.
  436. Parameters
  437. ----------
  438. path: str
  439. total: bool
  440. Whether to sum all the file sizes
  441. maxdepth: int or None
  442. Maximum number of directory levels to descend, None for unlimited.
  443. withdirs: bool
  444. Whether to include directory paths in the output.
  445. kwargs: passed to ``find``
  446. Returns
  447. -------
  448. Dict of {path: size} if total=False, or int otherwise, where numbers
  449. refer to bytes used.
  450. """
  451. sizes = {}
  452. if withdirs and self.isdir(path):
  453. # Include top-level directory in output
  454. info = self.info(path)
  455. sizes[info["name"]] = info["size"]
  456. for f in self.find(path, maxdepth=maxdepth, withdirs=withdirs, **kwargs):
  457. info = self.info(f)
  458. sizes[info["name"]] = info["size"]
  459. if total:
  460. return sum(sizes.values())
  461. else:
  462. return sizes
  463. def glob(self, path, maxdepth=None, **kwargs):
  464. """
  465. Find files by glob-matching.
  466. If the path ends with '/', only folders are returned.
  467. We support ``"**"``,
  468. ``"?"`` and ``"[..]"``. We do not support ^ for pattern negation.
  469. The `maxdepth` option is applied on the first `**` found in the path.
  470. kwargs are passed to ``ls``.
  471. """
  472. if maxdepth is not None and maxdepth < 1:
  473. raise ValueError("maxdepth must be at least 1")
  474. import re
  475. seps = (os.path.sep, os.path.altsep) if os.path.altsep else (os.path.sep,)
  476. ends_with_sep = path.endswith(seps) # _strip_protocol strips trailing slash
  477. path = self._strip_protocol(path)
  478. append_slash_to_dirname = ends_with_sep or path.endswith(
  479. tuple(sep + "**" for sep in seps)
  480. )
  481. idx_star = path.find("*") if path.find("*") >= 0 else len(path)
  482. idx_qmark = path.find("?") if path.find("?") >= 0 else len(path)
  483. idx_brace = path.find("[") if path.find("[") >= 0 else len(path)
  484. min_idx = min(idx_star, idx_qmark, idx_brace)
  485. detail = kwargs.pop("detail", False)
  486. if not has_magic(path):
  487. if self.exists(path, **kwargs):
  488. if not detail:
  489. return [path]
  490. else:
  491. return {path: self.info(path, **kwargs)}
  492. else:
  493. if not detail:
  494. return [] # glob of non-existent returns empty
  495. else:
  496. return {}
  497. elif "/" in path[:min_idx]:
  498. min_idx = path[:min_idx].rindex("/")
  499. root = path[: min_idx + 1]
  500. depth = path[min_idx + 1 :].count("/") + 1
  501. else:
  502. root = ""
  503. depth = path[min_idx + 1 :].count("/") + 1
  504. if "**" in path:
  505. if maxdepth is not None:
  506. idx_double_stars = path.find("**")
  507. depth_double_stars = path[idx_double_stars:].count("/") + 1
  508. depth = depth - depth_double_stars + maxdepth
  509. else:
  510. depth = None
  511. allpaths = self.find(root, maxdepth=depth, withdirs=True, detail=True, **kwargs)
  512. pattern = glob_translate(path + ("/" if ends_with_sep else ""))
  513. pattern = re.compile(pattern)
  514. out = {
  515. p: info
  516. for p, info in sorted(allpaths.items())
  517. if pattern.match(
  518. p + "/"
  519. if append_slash_to_dirname and info["type"] == "directory"
  520. else p
  521. )
  522. }
  523. if detail:
  524. return out
  525. else:
  526. return list(out)
  527. def exists(self, path, **kwargs):
  528. """Is there a file at the given path"""
  529. try:
  530. self.info(path, **kwargs)
  531. return True
  532. except: # noqa: E722
  533. # any exception allowed bar FileNotFoundError?
  534. return False
  535. def lexists(self, path, **kwargs):
  536. """If there is a file at the given path (including
  537. broken links)"""
  538. return self.exists(path)
  539. def info(self, path, **kwargs):
  540. """Give details of entry at path
  541. Returns a single dictionary, with exactly the same information as ``ls``
  542. would with ``detail=True``.
  543. The default implementation calls ls and could be overridden by a
  544. shortcut. kwargs are passed on to ```ls()``.
  545. Some file systems might not be able to measure the file's size, in
  546. which case, the returned dict will include ``'size': None``.
  547. Returns
  548. -------
  549. dict with keys: name (full path in the FS), size (in bytes), type (file,
  550. directory, or something else) and other FS-specific keys.
  551. """
  552. path = self._strip_protocol(path)
  553. out = self.ls(self._parent(path), detail=True, **kwargs)
  554. out = [o for o in out if o["name"].rstrip("/") == path]
  555. if out:
  556. return out[0]
  557. out = self.ls(path, detail=True, **kwargs)
  558. path = path.rstrip("/")
  559. out1 = [o for o in out if o["name"].rstrip("/") == path]
  560. if len(out1) == 1:
  561. if "size" not in out1[0]:
  562. out1[0]["size"] = None
  563. return out1[0]
  564. elif len(out1) > 1 or out:
  565. return {"name": path, "size": 0, "type": "directory"}
  566. else:
  567. raise FileNotFoundError(path)
  568. def checksum(self, path):
  569. """Unique value for current version of file
  570. If the checksum is the same from one moment to another, the contents
  571. are guaranteed to be the same. If the checksum changes, the contents
  572. *might* have changed.
  573. This should normally be overridden; default will probably capture
  574. creation/modification timestamp (which would be good) or maybe
  575. access timestamp (which would be bad)
  576. """
  577. return int(tokenize(self.info(path)), 16)
  578. def size(self, path):
  579. """Size in bytes of file"""
  580. return self.info(path).get("size", None)
  581. def sizes(self, paths):
  582. """Size in bytes of each file in a list of paths"""
  583. return [self.size(p) for p in paths]
  584. def isdir(self, path):
  585. """Is this entry directory-like?"""
  586. try:
  587. return self.info(path)["type"] == "directory"
  588. except OSError:
  589. return False
  590. def isfile(self, path):
  591. """Is this entry file-like?"""
  592. try:
  593. return self.info(path)["type"] == "file"
  594. except: # noqa: E722
  595. return False
  596. def read_text(self, path, encoding=None, errors=None, newline=None, **kwargs):
  597. """Get the contents of the file as a string.
  598. Parameters
  599. ----------
  600. path: str
  601. URL of file on this filesystems
  602. encoding, errors, newline: same as `open`.
  603. """
  604. with self.open(
  605. path,
  606. mode="r",
  607. encoding=encoding,
  608. errors=errors,
  609. newline=newline,
  610. **kwargs,
  611. ) as f:
  612. return f.read()
  613. def write_text(
  614. self, path, value, encoding=None, errors=None, newline=None, **kwargs
  615. ):
  616. """Write the text to the given file.
  617. An existing file will be overwritten.
  618. Parameters
  619. ----------
  620. path: str
  621. URL of file on this filesystems
  622. value: str
  623. Text to write.
  624. encoding, errors, newline: same as `open`.
  625. """
  626. with self.open(
  627. path,
  628. mode="w",
  629. encoding=encoding,
  630. errors=errors,
  631. newline=newline,
  632. **kwargs,
  633. ) as f:
  634. return f.write(value)
  635. def cat_file(self, path, start=None, end=None, **kwargs):
  636. """Get the content of a file
  637. Parameters
  638. ----------
  639. path: URL of file on this filesystems
  640. start, end: int
  641. Bytes limits of the read. If negative, backwards from end,
  642. like usual python slices. Either can be None for start or
  643. end of file, respectively
  644. kwargs: passed to ``open()``.
  645. """
  646. # explicitly set buffering off?
  647. with self.open(path, "rb", **kwargs) as f:
  648. if start is not None:
  649. if start >= 0:
  650. f.seek(start)
  651. else:
  652. f.seek(max(0, f.size + start))
  653. if end is not None:
  654. if end < 0:
  655. end = f.size + end
  656. return f.read(end - f.tell())
  657. return f.read()
  658. def pipe_file(self, path, value, mode="overwrite", **kwargs):
  659. """Set the bytes of given file"""
  660. if mode == "create" and self.exists(path):
  661. # non-atomic but simple way; or could use "xb" in open(), which is likely
  662. # not as well supported
  663. raise FileExistsError
  664. with self.open(path, "wb", **kwargs) as f:
  665. f.write(value)
  666. def pipe(self, path, value=None, **kwargs):
  667. """Put value into path
  668. (counterpart to ``cat``)
  669. Parameters
  670. ----------
  671. path: string or dict(str, bytes)
  672. If a string, a single remote location to put ``value`` bytes; if a dict,
  673. a mapping of {path: bytesvalue}.
  674. value: bytes, optional
  675. If using a single path, these are the bytes to put there. Ignored if
  676. ``path`` is a dict
  677. """
  678. if isinstance(path, str):
  679. self.pipe_file(self._strip_protocol(path), value, **kwargs)
  680. elif isinstance(path, dict):
  681. for k, v in path.items():
  682. self.pipe_file(self._strip_protocol(k), v, **kwargs)
  683. else:
  684. raise ValueError("path must be str or dict")
  685. def cat_ranges(
  686. self, paths, starts, ends, max_gap=None, on_error="return", **kwargs
  687. ):
  688. """Get the contents of byte ranges from one or more files
  689. Parameters
  690. ----------
  691. paths: list
  692. A list of of filepaths on this filesystems
  693. starts, ends: int or list
  694. Bytes limits of the read. If using a single int, the same value will be
  695. used to read all the specified files.
  696. """
  697. if max_gap is not None:
  698. raise NotImplementedError
  699. if not isinstance(paths, list):
  700. raise TypeError
  701. if not isinstance(starts, list):
  702. starts = [starts] * len(paths)
  703. if not isinstance(ends, list):
  704. ends = [ends] * len(paths)
  705. if len(starts) != len(paths) or len(ends) != len(paths):
  706. raise ValueError
  707. out = []
  708. for p, s, e in zip(paths, starts, ends):
  709. try:
  710. out.append(self.cat_file(p, s, e))
  711. except Exception as e:
  712. if on_error == "return":
  713. out.append(e)
  714. else:
  715. raise
  716. return out
  717. def cat(self, path, recursive=False, on_error="raise", **kwargs):
  718. """Fetch (potentially multiple) paths' contents
  719. Parameters
  720. ----------
  721. recursive: bool
  722. If True, assume the path(s) are directories, and get all the
  723. contained files
  724. on_error : "raise", "omit", "return"
  725. If raise, an underlying exception will be raised (converted to KeyError
  726. if the type is in self.missing_exceptions); if omit, keys with exception
  727. will simply not be included in the output; if "return", all keys are
  728. included in the output, but the value will be bytes or an exception
  729. instance.
  730. kwargs: passed to cat_file
  731. Returns
  732. -------
  733. dict of {path: contents} if there are multiple paths
  734. or the path has been otherwise expanded
  735. """
  736. paths = self.expand_path(path, recursive=recursive)
  737. if (
  738. len(paths) > 1
  739. or isinstance(path, list)
  740. or paths[0] != self._strip_protocol(path)
  741. ):
  742. out = {}
  743. for path in paths:
  744. try:
  745. out[path] = self.cat_file(path, **kwargs)
  746. except Exception as e:
  747. if on_error == "raise":
  748. raise
  749. if on_error == "return":
  750. out[path] = e
  751. return out
  752. else:
  753. return self.cat_file(paths[0], **kwargs)
  754. def get_file(self, rpath, lpath, callback=DEFAULT_CALLBACK, outfile=None, **kwargs):
  755. """Copy single remote file to local"""
  756. from .implementations.local import LocalFileSystem
  757. if isfilelike(lpath):
  758. outfile = lpath
  759. elif self.isdir(rpath):
  760. os.makedirs(lpath, exist_ok=True)
  761. return None
  762. fs = LocalFileSystem(auto_mkdir=True)
  763. fs.makedirs(fs._parent(lpath), exist_ok=True)
  764. with self.open(rpath, "rb", **kwargs) as f1:
  765. if outfile is None:
  766. outfile = open(lpath, "wb")
  767. try:
  768. callback.set_size(getattr(f1, "size", None))
  769. data = True
  770. while data:
  771. data = f1.read(self.blocksize)
  772. segment_len = outfile.write(data)
  773. if segment_len is None:
  774. segment_len = len(data)
  775. callback.relative_update(segment_len)
  776. finally:
  777. if not isfilelike(lpath):
  778. outfile.close()
  779. def get(
  780. self,
  781. rpath,
  782. lpath,
  783. recursive=False,
  784. callback=DEFAULT_CALLBACK,
  785. maxdepth=None,
  786. **kwargs,
  787. ):
  788. """Copy file(s) to local.
  789. Copies a specific file or tree of files (if recursive=True). If lpath
  790. ends with a "/", it will be assumed to be a directory, and target files
  791. will go within. Can submit a list of paths, which may be glob-patterns
  792. and will be expanded.
  793. Calls get_file for each source.
  794. """
  795. if isinstance(lpath, list) and isinstance(rpath, list):
  796. # No need to expand paths when both source and destination
  797. # are provided as lists
  798. rpaths = rpath
  799. lpaths = lpath
  800. else:
  801. from .implementations.local import (
  802. LocalFileSystem,
  803. make_path_posix,
  804. trailing_sep,
  805. )
  806. source_is_str = isinstance(rpath, str)
  807. rpaths = self.expand_path(rpath, recursive=recursive, maxdepth=maxdepth)
  808. if source_is_str and (not recursive or maxdepth is not None):
  809. # Non-recursive glob does not copy directories
  810. rpaths = [p for p in rpaths if not (trailing_sep(p) or self.isdir(p))]
  811. if not rpaths:
  812. return
  813. if isinstance(lpath, str):
  814. lpath = make_path_posix(lpath)
  815. source_is_file = len(rpaths) == 1
  816. dest_is_dir = isinstance(lpath, str) and (
  817. trailing_sep(lpath) or LocalFileSystem().isdir(lpath)
  818. )
  819. exists = source_is_str and (
  820. (has_magic(rpath) and source_is_file)
  821. or (not has_magic(rpath) and dest_is_dir and not trailing_sep(rpath))
  822. )
  823. lpaths = other_paths(
  824. rpaths,
  825. lpath,
  826. exists=exists,
  827. flatten=not source_is_str,
  828. )
  829. callback.set_size(len(lpaths))
  830. for lpath, rpath in callback.wrap(zip(lpaths, rpaths)):
  831. with callback.branched(rpath, lpath) as child:
  832. self.get_file(rpath, lpath, callback=child, **kwargs)
  833. def put_file(
  834. self, lpath, rpath, callback=DEFAULT_CALLBACK, mode="overwrite", **kwargs
  835. ):
  836. """Copy single file to remote"""
  837. if mode == "create" and self.exists(rpath):
  838. raise FileExistsError
  839. if os.path.isdir(lpath):
  840. self.makedirs(rpath, exist_ok=True)
  841. return None
  842. with open(lpath, "rb") as f1:
  843. size = f1.seek(0, 2)
  844. callback.set_size(size)
  845. f1.seek(0)
  846. self.mkdirs(self._parent(os.fspath(rpath)), exist_ok=True)
  847. with self.open(rpath, "wb", **kwargs) as f2:
  848. while f1.tell() < size:
  849. data = f1.read(self.blocksize)
  850. segment_len = f2.write(data)
  851. if segment_len is None:
  852. segment_len = len(data)
  853. callback.relative_update(segment_len)
  854. def put(
  855. self,
  856. lpath,
  857. rpath,
  858. recursive=False,
  859. callback=DEFAULT_CALLBACK,
  860. maxdepth=None,
  861. **kwargs,
  862. ):
  863. """Copy file(s) from local.
  864. Copies a specific file or tree of files (if recursive=True). If rpath
  865. ends with a "/", it will be assumed to be a directory, and target files
  866. will go within.
  867. Calls put_file for each source.
  868. """
  869. if isinstance(lpath, list) and isinstance(rpath, list):
  870. # No need to expand paths when both source and destination
  871. # are provided as lists
  872. rpaths = rpath
  873. lpaths = lpath
  874. else:
  875. from .implementations.local import (
  876. LocalFileSystem,
  877. make_path_posix,
  878. trailing_sep,
  879. )
  880. source_is_str = isinstance(lpath, str)
  881. if source_is_str:
  882. lpath = make_path_posix(lpath)
  883. fs = LocalFileSystem()
  884. lpaths = fs.expand_path(lpath, recursive=recursive, maxdepth=maxdepth)
  885. if source_is_str and (not recursive or maxdepth is not None):
  886. # Non-recursive glob does not copy directories
  887. lpaths = [p for p in lpaths if not (trailing_sep(p) or fs.isdir(p))]
  888. if not lpaths:
  889. return
  890. source_is_file = len(lpaths) == 1
  891. dest_is_dir = isinstance(rpath, str) and (
  892. trailing_sep(rpath) or self.isdir(rpath)
  893. )
  894. rpath = (
  895. self._strip_protocol(rpath)
  896. if isinstance(rpath, str)
  897. else [self._strip_protocol(p) for p in rpath]
  898. )
  899. exists = source_is_str and (
  900. (has_magic(lpath) and source_is_file)
  901. or (not has_magic(lpath) and dest_is_dir and not trailing_sep(lpath))
  902. )
  903. rpaths = other_paths(
  904. lpaths,
  905. rpath,
  906. exists=exists,
  907. flatten=not source_is_str,
  908. )
  909. callback.set_size(len(rpaths))
  910. for lpath, rpath in callback.wrap(zip(lpaths, rpaths)):
  911. with callback.branched(lpath, rpath) as child:
  912. self.put_file(lpath, rpath, callback=child, **kwargs)
  913. def head(self, path, size=1024):
  914. """Get the first ``size`` bytes from file"""
  915. with self.open(path, "rb") as f:
  916. return f.read(size)
  917. def tail(self, path, size=1024):
  918. """Get the last ``size`` bytes from file"""
  919. with self.open(path, "rb") as f:
  920. f.seek(max(-size, -f.size), 2)
  921. return f.read()
  922. def cp_file(self, path1, path2, **kwargs):
  923. raise NotImplementedError
  924. def copy(
  925. self, path1, path2, recursive=False, maxdepth=None, on_error=None, **kwargs
  926. ):
  927. """Copy within two locations in the filesystem
  928. on_error : "raise", "ignore"
  929. If raise, any not-found exceptions will be raised; if ignore any
  930. not-found exceptions will cause the path to be skipped; defaults to
  931. raise unless recursive is true, where the default is ignore
  932. """
  933. if on_error is None and recursive:
  934. on_error = "ignore"
  935. elif on_error is None:
  936. on_error = "raise"
  937. if isinstance(path1, list) and isinstance(path2, list):
  938. # No need to expand paths when both source and destination
  939. # are provided as lists
  940. paths1 = path1
  941. paths2 = path2
  942. else:
  943. from .implementations.local import trailing_sep
  944. source_is_str = isinstance(path1, str)
  945. paths1 = self.expand_path(path1, recursive=recursive, maxdepth=maxdepth)
  946. if source_is_str and (not recursive or maxdepth is not None):
  947. # Non-recursive glob does not copy directories
  948. paths1 = [p for p in paths1 if not (trailing_sep(p) or self.isdir(p))]
  949. if not paths1:
  950. return
  951. source_is_file = len(paths1) == 1
  952. dest_is_dir = isinstance(path2, str) and (
  953. trailing_sep(path2) or self.isdir(path2)
  954. )
  955. exists = source_is_str and (
  956. (has_magic(path1) and source_is_file)
  957. or (not has_magic(path1) and dest_is_dir and not trailing_sep(path1))
  958. )
  959. paths2 = other_paths(
  960. paths1,
  961. path2,
  962. exists=exists,
  963. flatten=not source_is_str,
  964. )
  965. for p1, p2 in zip(paths1, paths2):
  966. try:
  967. self.cp_file(p1, p2, **kwargs)
  968. except FileNotFoundError:
  969. if on_error == "raise":
  970. raise
  971. def expand_path(self, path, recursive=False, maxdepth=None, **kwargs):
  972. """Turn one or more globs or directories into a list of all matching paths
  973. to files or directories.
  974. kwargs are passed to ``glob`` or ``find``, which may in turn call ``ls``
  975. """
  976. if maxdepth is not None and maxdepth < 1:
  977. raise ValueError("maxdepth must be at least 1")
  978. if isinstance(path, (str, os.PathLike)):
  979. out = self.expand_path([path], recursive, maxdepth)
  980. else:
  981. out = set()
  982. path = [self._strip_protocol(p) for p in path]
  983. for p in path:
  984. if has_magic(p):
  985. bit = set(self.glob(p, maxdepth=maxdepth, **kwargs))
  986. out |= bit
  987. if recursive:
  988. # glob call above expanded one depth so if maxdepth is defined
  989. # then decrement it in expand_path call below. If it is zero
  990. # after decrementing then avoid expand_path call.
  991. if maxdepth is not None and maxdepth <= 1:
  992. continue
  993. out |= set(
  994. self.expand_path(
  995. list(bit),
  996. recursive=recursive,
  997. maxdepth=maxdepth - 1 if maxdepth is not None else None,
  998. **kwargs,
  999. )
  1000. )
  1001. continue
  1002. elif recursive:
  1003. rec = set(
  1004. self.find(
  1005. p, maxdepth=maxdepth, withdirs=True, detail=False, **kwargs
  1006. )
  1007. )
  1008. out |= rec
  1009. if p not in out and (recursive is False or self.exists(p)):
  1010. # should only check once, for the root
  1011. out.add(p)
  1012. if not out:
  1013. raise FileNotFoundError(path)
  1014. return sorted(out)
  1015. def mv(self, path1, path2, recursive=False, maxdepth=None, **kwargs):
  1016. """Move file(s) from one location to another"""
  1017. if path1 == path2:
  1018. logger.debug("%s mv: The paths are the same, so no files were moved.", self)
  1019. else:
  1020. # explicitly raise exception to prevent data corruption
  1021. self.copy(
  1022. path1, path2, recursive=recursive, maxdepth=maxdepth, onerror="raise"
  1023. )
  1024. self.rm(path1, recursive=recursive)
  1025. def rm_file(self, path):
  1026. """Delete a file"""
  1027. self._rm(path)
  1028. def _rm(self, path):
  1029. """Delete one file"""
  1030. # this is the old name for the method, prefer rm_file
  1031. raise NotImplementedError
  1032. def rm(self, path, recursive=False, maxdepth=None):
  1033. """Delete files.
  1034. Parameters
  1035. ----------
  1036. path: str or list of str
  1037. File(s) to delete.
  1038. recursive: bool
  1039. If file(s) are directories, recursively delete contents and then
  1040. also remove the directory
  1041. maxdepth: int or None
  1042. Depth to pass to walk for finding files to delete, if recursive.
  1043. If None, there will be no limit and infinite recursion may be
  1044. possible.
  1045. """
  1046. path = self.expand_path(path, recursive=recursive, maxdepth=maxdepth)
  1047. for p in reversed(path):
  1048. self.rm_file(p)
  1049. @classmethod
  1050. def _parent(cls, path):
  1051. path = cls._strip_protocol(path)
  1052. if "/" in path:
  1053. parent = path.rsplit("/", 1)[0].lstrip(cls.root_marker)
  1054. return cls.root_marker + parent
  1055. else:
  1056. return cls.root_marker
  1057. def _open(
  1058. self,
  1059. path,
  1060. mode="rb",
  1061. block_size=None,
  1062. autocommit=True,
  1063. cache_options=None,
  1064. **kwargs,
  1065. ):
  1066. """Return raw bytes-mode file-like from the file-system"""
  1067. return AbstractBufferedFile(
  1068. self,
  1069. path,
  1070. mode,
  1071. block_size,
  1072. autocommit,
  1073. cache_options=cache_options,
  1074. **kwargs,
  1075. )
  1076. def open(
  1077. self,
  1078. path,
  1079. mode="rb",
  1080. block_size=None,
  1081. cache_options=None,
  1082. compression=None,
  1083. **kwargs,
  1084. ):
  1085. """
  1086. Return a file-like object from the filesystem
  1087. The resultant instance must function correctly in a context ``with``
  1088. block.
  1089. Parameters
  1090. ----------
  1091. path: str
  1092. Target file
  1093. mode: str like 'rb', 'w'
  1094. See builtin ``open()``
  1095. Mode "x" (exclusive write) may be implemented by the backend. Even if
  1096. it is, whether it is checked up front or on commit, and whether it is
  1097. atomic is implementation-dependent.
  1098. block_size: int
  1099. Some indication of buffering - this is a value in bytes
  1100. cache_options : dict, optional
  1101. Extra arguments to pass through to the cache.
  1102. compression: string or None
  1103. If given, open file using compression codec. Can either be a compression
  1104. name (a key in ``fsspec.compression.compr``) or "infer" to guess the
  1105. compression from the filename suffix.
  1106. encoding, errors, newline: passed on to TextIOWrapper for text mode
  1107. """
  1108. import io
  1109. path = self._strip_protocol(path)
  1110. if "b" not in mode:
  1111. mode = mode.replace("t", "") + "b"
  1112. text_kwargs = {
  1113. k: kwargs.pop(k)
  1114. for k in ["encoding", "errors", "newline"]
  1115. if k in kwargs
  1116. }
  1117. return io.TextIOWrapper(
  1118. self.open(
  1119. path,
  1120. mode,
  1121. block_size=block_size,
  1122. cache_options=cache_options,
  1123. compression=compression,
  1124. **kwargs,
  1125. ),
  1126. **text_kwargs,
  1127. )
  1128. else:
  1129. ac = kwargs.pop("autocommit", not self._intrans)
  1130. f = self._open(
  1131. path,
  1132. mode=mode,
  1133. block_size=block_size,
  1134. autocommit=ac,
  1135. cache_options=cache_options,
  1136. **kwargs,
  1137. )
  1138. if compression is not None:
  1139. from fsspec.compression import compr
  1140. from fsspec.core import get_compression
  1141. compression = get_compression(path, compression)
  1142. compress = compr[compression]
  1143. f = compress(f, mode=mode[0])
  1144. if not ac and "r" not in mode:
  1145. self.transaction.files.append(f)
  1146. return f
  1147. def touch(self, path, truncate=True, **kwargs):
  1148. """Create empty file, or update timestamp
  1149. Parameters
  1150. ----------
  1151. path: str
  1152. file location
  1153. truncate: bool
  1154. If True, always set file size to 0; if False, update timestamp and
  1155. leave file unchanged, if backend allows this
  1156. """
  1157. if truncate or not self.exists(path):
  1158. with self.open(path, "wb", **kwargs):
  1159. pass
  1160. else:
  1161. raise NotImplementedError # update timestamp, if possible
  1162. def ukey(self, path):
  1163. """Hash of file properties, to tell if it has changed"""
  1164. return sha256(str(self.info(path)).encode()).hexdigest()
  1165. def read_block(self, fn, offset, length, delimiter=None):
  1166. """Read a block of bytes from
  1167. Starting at ``offset`` of the file, read ``length`` bytes. If
  1168. ``delimiter`` is set then we ensure that the read starts and stops at
  1169. delimiter boundaries that follow the locations ``offset`` and ``offset
  1170. + length``. If ``offset`` is zero then we start at zero. The
  1171. bytestring returned WILL include the end delimiter string.
  1172. If offset+length is beyond the eof, reads to eof.
  1173. Parameters
  1174. ----------
  1175. fn: string
  1176. Path to filename
  1177. offset: int
  1178. Byte offset to start read
  1179. length: int
  1180. Number of bytes to read. If None, read to end.
  1181. delimiter: bytes (optional)
  1182. Ensure reading starts and stops at delimiter bytestring
  1183. Examples
  1184. --------
  1185. >>> fs.read_block('data/file.csv', 0, 13) # doctest: +SKIP
  1186. b'Alice, 100\\nBo'
  1187. >>> fs.read_block('data/file.csv', 0, 13, delimiter=b'\\n') # doctest: +SKIP
  1188. b'Alice, 100\\nBob, 200\\n'
  1189. Use ``length=None`` to read to the end of the file.
  1190. >>> fs.read_block('data/file.csv', 0, None, delimiter=b'\\n') # doctest: +SKIP
  1191. b'Alice, 100\\nBob, 200\\nCharlie, 300'
  1192. See Also
  1193. --------
  1194. :func:`fsspec.utils.read_block`
  1195. """
  1196. with self.open(fn, "rb") as f:
  1197. size = f.size
  1198. if length is None:
  1199. length = size
  1200. if size is not None and offset + length > size:
  1201. length = size - offset
  1202. return read_block(f, offset, length, delimiter)
  1203. def to_json(self, *, include_password: bool = True) -> str:
  1204. """
  1205. JSON representation of this filesystem instance.
  1206. Parameters
  1207. ----------
  1208. include_password: bool, default True
  1209. Whether to include the password (if any) in the output.
  1210. Returns
  1211. -------
  1212. JSON string with keys ``cls`` (the python location of this class),
  1213. protocol (text name of this class's protocol, first one in case of
  1214. multiple), ``args`` (positional args, usually empty), and all other
  1215. keyword arguments as their own keys.
  1216. Warnings
  1217. --------
  1218. Serialized filesystems may contain sensitive information which have been
  1219. passed to the constructor, such as passwords and tokens. Make sure you
  1220. store and send them in a secure environment!
  1221. """
  1222. from .json import FilesystemJSONEncoder
  1223. return json.dumps(
  1224. self,
  1225. cls=type(
  1226. "_FilesystemJSONEncoder",
  1227. (FilesystemJSONEncoder,),
  1228. {"include_password": include_password},
  1229. ),
  1230. )
  1231. @staticmethod
  1232. def from_json(blob: str) -> AbstractFileSystem:
  1233. """
  1234. Recreate a filesystem instance from JSON representation.
  1235. See ``.to_json()`` for the expected structure of the input.
  1236. Parameters
  1237. ----------
  1238. blob: str
  1239. Returns
  1240. -------
  1241. file system instance, not necessarily of this particular class.
  1242. Warnings
  1243. --------
  1244. This can import arbitrary modules (as determined by the ``cls`` key).
  1245. Make sure you haven't installed any modules that may execute malicious code
  1246. at import time.
  1247. """
  1248. from .json import FilesystemJSONDecoder
  1249. return json.loads(blob, cls=FilesystemJSONDecoder)
  1250. def to_dict(self, *, include_password: bool = True) -> dict[str, Any]:
  1251. """
  1252. JSON-serializable dictionary representation of this filesystem instance.
  1253. Parameters
  1254. ----------
  1255. include_password: bool, default True
  1256. Whether to include the password (if any) in the output.
  1257. Returns
  1258. -------
  1259. Dictionary with keys ``cls`` (the python location of this class),
  1260. protocol (text name of this class's protocol, first one in case of
  1261. multiple), ``args`` (positional args, usually empty), and all other
  1262. keyword arguments as their own keys.
  1263. Warnings
  1264. --------
  1265. Serialized filesystems may contain sensitive information which have been
  1266. passed to the constructor, such as passwords and tokens. Make sure you
  1267. store and send them in a secure environment!
  1268. """
  1269. from .json import FilesystemJSONEncoder
  1270. json_encoder = FilesystemJSONEncoder()
  1271. cls = type(self)
  1272. proto = self.protocol
  1273. storage_options = dict(self.storage_options)
  1274. if not include_password:
  1275. storage_options.pop("password", None)
  1276. return dict(
  1277. cls=f"{cls.__module__}:{cls.__name__}",
  1278. protocol=proto[0] if isinstance(proto, (tuple, list)) else proto,
  1279. args=json_encoder.make_serializable(self.storage_args),
  1280. **json_encoder.make_serializable(storage_options),
  1281. )
  1282. @staticmethod
  1283. def from_dict(dct: dict[str, Any]) -> AbstractFileSystem:
  1284. """
  1285. Recreate a filesystem instance from dictionary representation.
  1286. See ``.to_dict()`` for the expected structure of the input.
  1287. Parameters
  1288. ----------
  1289. dct: Dict[str, Any]
  1290. Returns
  1291. -------
  1292. file system instance, not necessarily of this particular class.
  1293. Warnings
  1294. --------
  1295. This can import arbitrary modules (as determined by the ``cls`` key).
  1296. Make sure you haven't installed any modules that may execute malicious code
  1297. at import time.
  1298. """
  1299. from .json import FilesystemJSONDecoder
  1300. json_decoder = FilesystemJSONDecoder()
  1301. dct = dict(dct) # Defensive copy
  1302. cls = FilesystemJSONDecoder.try_resolve_fs_cls(dct)
  1303. if cls is None:
  1304. raise ValueError("Not a serialized AbstractFileSystem")
  1305. dct.pop("cls", None)
  1306. dct.pop("protocol", None)
  1307. return cls(
  1308. *json_decoder.unmake_serializable(dct.pop("args", ())),
  1309. **json_decoder.unmake_serializable(dct),
  1310. )
  1311. def _get_pyarrow_filesystem(self):
  1312. """
  1313. Make a version of the FS instance which will be acceptable to pyarrow
  1314. """
  1315. # all instances already also derive from pyarrow
  1316. return self
  1317. def get_mapper(self, root="", check=False, create=False, missing_exceptions=None):
  1318. """Create key/value store based on this file-system
  1319. Makes a MutableMapping interface to the FS at the given root path.
  1320. See ``fsspec.mapping.FSMap`` for further details.
  1321. """
  1322. from .mapping import FSMap
  1323. return FSMap(
  1324. root,
  1325. self,
  1326. check=check,
  1327. create=create,
  1328. missing_exceptions=missing_exceptions,
  1329. )
  1330. @classmethod
  1331. def clear_instance_cache(cls):
  1332. """
  1333. Clear the cache of filesystem instances.
  1334. Notes
  1335. -----
  1336. Unless overridden by setting the ``cachable`` class attribute to False,
  1337. the filesystem class stores a reference to newly created instances. This
  1338. prevents Python's normal rules around garbage collection from working,
  1339. since the instances refcount will not drop to zero until
  1340. ``clear_instance_cache`` is called.
  1341. """
  1342. cls._cache.clear()
  1343. def created(self, path):
  1344. """Return the created timestamp of a file as a datetime.datetime"""
  1345. raise NotImplementedError
  1346. def modified(self, path):
  1347. """Return the modified timestamp of a file as a datetime.datetime"""
  1348. raise NotImplementedError
  1349. def tree(
  1350. self,
  1351. path: str = "/",
  1352. recursion_limit: int = 2,
  1353. max_display: int = 25,
  1354. display_size: bool = False,
  1355. prefix: str = "",
  1356. is_last: bool = True,
  1357. first: bool = True,
  1358. indent_size: int = 4,
  1359. ) -> str:
  1360. """
  1361. Return a tree-like structure of the filesystem starting from the given path as a string.
  1362. Parameters
  1363. ----------
  1364. path: Root path to start traversal from
  1365. recursion_limit: Maximum depth of directory traversal
  1366. max_display: Maximum number of items to display per directory
  1367. display_size: Whether to display file sizes
  1368. prefix: Current line prefix for visual tree structure
  1369. is_last: Whether current item is last in its level
  1370. first: Whether this is the first call (displays root path)
  1371. indent_size: Number of spaces by indent
  1372. Returns
  1373. -------
  1374. str: A string representing the tree structure.
  1375. Example
  1376. -------
  1377. >>> from fsspec import filesystem
  1378. >>> fs = filesystem('ftp', host='test.rebex.net', user='demo', password='password')
  1379. >>> tree = fs.tree(display_size=True, recursion_limit=3, indent_size=8, max_display=10)
  1380. >>> print(tree)
  1381. """
  1382. def format_bytes(n: int) -> str:
  1383. """Format bytes as text."""
  1384. for prefix, k in (
  1385. ("P", 2**50),
  1386. ("T", 2**40),
  1387. ("G", 2**30),
  1388. ("M", 2**20),
  1389. ("k", 2**10),
  1390. ):
  1391. if n >= 0.9 * k:
  1392. return f"{n / k:.2f} {prefix}b"
  1393. return f"{n}B"
  1394. result = []
  1395. if first:
  1396. result.append(path)
  1397. if recursion_limit:
  1398. indent = " " * indent_size
  1399. contents = self.ls(path, detail=True)
  1400. contents.sort(
  1401. key=lambda x: (x.get("type") != "directory", x.get("name", ""))
  1402. )
  1403. if max_display is not None and len(contents) > max_display:
  1404. displayed_contents = contents[:max_display]
  1405. remaining_count = len(contents) - max_display
  1406. else:
  1407. displayed_contents = contents
  1408. remaining_count = 0
  1409. for i, item in enumerate(displayed_contents):
  1410. is_last_item = (i == len(displayed_contents) - 1) and (
  1411. remaining_count == 0
  1412. )
  1413. branch = (
  1414. "└" + ("─" * (indent_size - 2))
  1415. if is_last_item
  1416. else "├" + ("─" * (indent_size - 2))
  1417. )
  1418. branch += " "
  1419. new_prefix = prefix + (
  1420. indent if is_last_item else "│" + " " * (indent_size - 1)
  1421. )
  1422. name = os.path.basename(item.get("name", ""))
  1423. if display_size and item.get("type") == "directory":
  1424. sub_contents = self.ls(item.get("name", ""), detail=True)
  1425. num_files = sum(
  1426. 1 for sub_item in sub_contents if sub_item.get("type") == "file"
  1427. )
  1428. num_folders = sum(
  1429. 1
  1430. for sub_item in sub_contents
  1431. if sub_item.get("type") == "directory"
  1432. )
  1433. if num_files == 0 and num_folders == 0:
  1434. size = " (empty folder)"
  1435. elif num_files == 0:
  1436. size = f" ({num_folders} subfolder{'s' if num_folders > 1 else ''})"
  1437. elif num_folders == 0:
  1438. size = f" ({num_files} file{'s' if num_files > 1 else ''})"
  1439. else:
  1440. size = f" ({num_files} file{'s' if num_files > 1 else ''}, {num_folders} subfolder{'s' if num_folders > 1 else ''})"
  1441. elif display_size and item.get("type") == "file":
  1442. size = f" ({format_bytes(item.get('size', 0))})"
  1443. else:
  1444. size = ""
  1445. result.append(f"{prefix}{branch}{name}{size}")
  1446. if item.get("type") == "directory" and recursion_limit > 0:
  1447. result.append(
  1448. self.tree(
  1449. path=item.get("name", ""),
  1450. recursion_limit=recursion_limit - 1,
  1451. max_display=max_display,
  1452. display_size=display_size,
  1453. prefix=new_prefix,
  1454. is_last=is_last_item,
  1455. first=False,
  1456. indent_size=indent_size,
  1457. )
  1458. )
  1459. if remaining_count > 0:
  1460. more_message = f"{remaining_count} more item(s) not displayed."
  1461. result.append(
  1462. f"{prefix}{'└' + ('─' * (indent_size - 2))} {more_message}"
  1463. )
  1464. return "\n".join(_ for _ in result if _)
  1465. # ------------------------------------------------------------------------
  1466. # Aliases
  1467. def read_bytes(self, path, start=None, end=None, **kwargs):
  1468. """Alias of `AbstractFileSystem.cat_file`."""
  1469. return self.cat_file(path, start=start, end=end, **kwargs)
  1470. def write_bytes(self, path, value, **kwargs):
  1471. """Alias of `AbstractFileSystem.pipe_file`."""
  1472. self.pipe_file(path, value, **kwargs)
  1473. def makedir(self, path, create_parents=True, **kwargs):
  1474. """Alias of `AbstractFileSystem.mkdir`."""
  1475. return self.mkdir(path, create_parents=create_parents, **kwargs)
  1476. def mkdirs(self, path, exist_ok=False):
  1477. """Alias of `AbstractFileSystem.makedirs`."""
  1478. return self.makedirs(path, exist_ok=exist_ok)
  1479. def listdir(self, path, detail=True, **kwargs):
  1480. """Alias of `AbstractFileSystem.ls`."""
  1481. return self.ls(path, detail=detail, **kwargs)
  1482. def cp(self, path1, path2, **kwargs):
  1483. """Alias of `AbstractFileSystem.copy`."""
  1484. return self.copy(path1, path2, **kwargs)
  1485. def move(self, path1, path2, **kwargs):
  1486. """Alias of `AbstractFileSystem.mv`."""
  1487. return self.mv(path1, path2, **kwargs)
  1488. def stat(self, path, **kwargs):
  1489. """Alias of `AbstractFileSystem.info`."""
  1490. return self.info(path, **kwargs)
  1491. def disk_usage(self, path, total=True, maxdepth=None, **kwargs):
  1492. """Alias of `AbstractFileSystem.du`."""
  1493. return self.du(path, total=total, maxdepth=maxdepth, **kwargs)
  1494. def rename(self, path1, path2, **kwargs):
  1495. """Alias of `AbstractFileSystem.mv`."""
  1496. return self.mv(path1, path2, **kwargs)
  1497. def delete(self, path, recursive=False, maxdepth=None):
  1498. """Alias of `AbstractFileSystem.rm`."""
  1499. return self.rm(path, recursive=recursive, maxdepth=maxdepth)
  1500. def upload(self, lpath, rpath, recursive=False, **kwargs):
  1501. """Alias of `AbstractFileSystem.put`."""
  1502. return self.put(lpath, rpath, recursive=recursive, **kwargs)
  1503. def download(self, rpath, lpath, recursive=False, **kwargs):
  1504. """Alias of `AbstractFileSystem.get`."""
  1505. return self.get(rpath, lpath, recursive=recursive, **kwargs)
  1506. def sign(self, path, expiration=100, **kwargs):
  1507. """Create a signed URL representing the given path
  1508. Some implementations allow temporary URLs to be generated, as a
  1509. way of delegating credentials.
  1510. Parameters
  1511. ----------
  1512. path : str
  1513. The path on the filesystem
  1514. expiration : int
  1515. Number of seconds to enable the URL for (if supported)
  1516. Returns
  1517. -------
  1518. URL : str
  1519. The signed URL
  1520. Raises
  1521. ------
  1522. NotImplementedError : if method is not implemented for a filesystem
  1523. """
  1524. raise NotImplementedError("Sign is not implemented for this filesystem")
  1525. def _isfilestore(self):
  1526. # Originally inherited from pyarrow DaskFileSystem. Keeping this
  1527. # here for backwards compatibility as long as pyarrow uses its
  1528. # legacy fsspec-compatible filesystems and thus accepts fsspec
  1529. # filesystems as well
  1530. return False
  1531. class AbstractBufferedFile(io.IOBase):
  1532. """Convenient class to derive from to provide buffering
  1533. In the case that the backend does not provide a pythonic file-like object
  1534. already, this class contains much of the logic to build one. The only
  1535. methods that need to be overridden are ``_upload_chunk``,
  1536. ``_initiate_upload`` and ``_fetch_range``.
  1537. """
  1538. DEFAULT_BLOCK_SIZE = 5 * 2**20
  1539. _details = None
  1540. def __init__(
  1541. self,
  1542. fs,
  1543. path,
  1544. mode="rb",
  1545. block_size="default",
  1546. autocommit=True,
  1547. cache_type="readahead",
  1548. cache_options=None,
  1549. size=None,
  1550. **kwargs,
  1551. ):
  1552. """
  1553. Template for files with buffered reading and writing
  1554. Parameters
  1555. ----------
  1556. fs: instance of FileSystem
  1557. path: str
  1558. location in file-system
  1559. mode: str
  1560. Normal file modes. Currently only 'wb', 'ab' or 'rb'. Some file
  1561. systems may be read-only, and some may not support append.
  1562. block_size: int
  1563. Buffer size for reading or writing, 'default' for class default
  1564. autocommit: bool
  1565. Whether to write to final destination; may only impact what
  1566. happens when file is being closed.
  1567. cache_type: {"readahead", "none", "mmap", "bytes"}, default "readahead"
  1568. Caching policy in read mode. See the definitions in ``core``.
  1569. cache_options : dict
  1570. Additional options passed to the constructor for the cache specified
  1571. by `cache_type`.
  1572. size: int
  1573. If given and in read mode, suppressed having to look up the file size
  1574. kwargs:
  1575. Gets stored as self.kwargs
  1576. """
  1577. from .core import caches
  1578. self.path = path
  1579. self.fs = fs
  1580. self.mode = mode
  1581. self.blocksize = (
  1582. self.DEFAULT_BLOCK_SIZE if block_size in ["default", None] else block_size
  1583. )
  1584. self.loc = 0
  1585. self.autocommit = autocommit
  1586. self.end = None
  1587. self.start = None
  1588. self.closed = False
  1589. if cache_options is None:
  1590. cache_options = {}
  1591. if "trim" in kwargs:
  1592. warnings.warn(
  1593. "Passing 'trim' to control the cache behavior has been deprecated. "
  1594. "Specify it within the 'cache_options' argument instead.",
  1595. FutureWarning,
  1596. )
  1597. cache_options["trim"] = kwargs.pop("trim")
  1598. self.kwargs = kwargs
  1599. if mode not in {"ab", "rb", "wb", "xb"}:
  1600. raise NotImplementedError("File mode not supported")
  1601. if mode == "rb":
  1602. if size is not None:
  1603. self.size = size
  1604. else:
  1605. self.size = self.details["size"]
  1606. self.cache = caches[cache_type](
  1607. self.blocksize, self._fetch_range, self.size, **cache_options
  1608. )
  1609. else:
  1610. self.buffer = io.BytesIO()
  1611. self.offset = None
  1612. self.forced = False
  1613. self.location = None
  1614. @property
  1615. def details(self):
  1616. if self._details is None:
  1617. self._details = self.fs.info(self.path)
  1618. return self._details
  1619. @details.setter
  1620. def details(self, value):
  1621. self._details = value
  1622. self.size = value["size"]
  1623. @property
  1624. def full_name(self):
  1625. return _unstrip_protocol(self.path, self.fs)
  1626. @property
  1627. def closed(self):
  1628. # get around this attr being read-only in IOBase
  1629. # use getattr here, since this can be called during del
  1630. return getattr(self, "_closed", True)
  1631. @closed.setter
  1632. def closed(self, c):
  1633. self._closed = c
  1634. def __hash__(self):
  1635. if "w" in self.mode:
  1636. return id(self)
  1637. else:
  1638. return int(tokenize(self.details), 16)
  1639. def __eq__(self, other):
  1640. """Files are equal if they have the same checksum, only in read mode"""
  1641. if self is other:
  1642. return True
  1643. return (
  1644. isinstance(other, type(self))
  1645. and self.mode == "rb"
  1646. and other.mode == "rb"
  1647. and hash(self) == hash(other)
  1648. )
  1649. def commit(self):
  1650. """Move from temp to final destination"""
  1651. def discard(self):
  1652. """Throw away temporary file"""
  1653. def info(self):
  1654. """File information about this path"""
  1655. if self.readable():
  1656. return self.details
  1657. else:
  1658. raise ValueError("Info not available while writing")
  1659. def tell(self):
  1660. """Current file location"""
  1661. return self.loc
  1662. def seek(self, loc, whence=0):
  1663. """Set current file location
  1664. Parameters
  1665. ----------
  1666. loc: int
  1667. byte location
  1668. whence: {0, 1, 2}
  1669. from start of file, current location or end of file, resp.
  1670. """
  1671. loc = int(loc)
  1672. if not self.mode == "rb":
  1673. raise OSError(ESPIPE, "Seek only available in read mode")
  1674. if whence == 0:
  1675. nloc = loc
  1676. elif whence == 1:
  1677. nloc = self.loc + loc
  1678. elif whence == 2:
  1679. nloc = self.size + loc
  1680. else:
  1681. raise ValueError(f"invalid whence ({whence}, should be 0, 1 or 2)")
  1682. if nloc < 0:
  1683. raise ValueError("Seek before start of file")
  1684. self.loc = nloc
  1685. return self.loc
  1686. def write(self, data):
  1687. """
  1688. Write data to buffer.
  1689. Buffer only sent on flush() or if buffer is greater than
  1690. or equal to blocksize.
  1691. Parameters
  1692. ----------
  1693. data: bytes
  1694. Set of bytes to be written.
  1695. """
  1696. if not self.writable():
  1697. raise ValueError("File not in write mode")
  1698. if self.closed:
  1699. raise ValueError("I/O operation on closed file.")
  1700. if self.forced:
  1701. raise ValueError("This file has been force-flushed, can only close")
  1702. out = self.buffer.write(data)
  1703. self.loc += out
  1704. if self.buffer.tell() >= self.blocksize:
  1705. self.flush()
  1706. return out
  1707. def flush(self, force=False):
  1708. """
  1709. Write buffered data to backend store.
  1710. Writes the current buffer, if it is larger than the block-size, or if
  1711. the file is being closed.
  1712. Parameters
  1713. ----------
  1714. force: bool
  1715. When closing, write the last block even if it is smaller than
  1716. blocks are allowed to be. Disallows further writing to this file.
  1717. """
  1718. if self.closed:
  1719. raise ValueError("Flush on closed file")
  1720. if force and self.forced:
  1721. raise ValueError("Force flush cannot be called more than once")
  1722. if force:
  1723. self.forced = True
  1724. if self.readable():
  1725. # no-op to flush on read-mode
  1726. return
  1727. if not force and self.buffer.tell() < self.blocksize:
  1728. # Defer write on small block
  1729. return
  1730. if self.offset is None:
  1731. # Initialize a multipart upload
  1732. self.offset = 0
  1733. try:
  1734. self._initiate_upload()
  1735. except:
  1736. self.closed = True
  1737. raise
  1738. if self._upload_chunk(final=force) is not False:
  1739. self.offset += self.buffer.seek(0, 2)
  1740. self.buffer = io.BytesIO()
  1741. def _upload_chunk(self, final=False):
  1742. """Write one part of a multi-block file upload
  1743. Parameters
  1744. ==========
  1745. final: bool
  1746. This is the last block, so should complete file, if
  1747. self.autocommit is True.
  1748. """
  1749. # may not yet have been initialized, may need to call _initialize_upload
  1750. def _initiate_upload(self):
  1751. """Create remote file/upload"""
  1752. pass
  1753. def _fetch_range(self, start, end):
  1754. """Get the specified set of bytes from remote"""
  1755. return self.fs.cat_file(self.path, start=start, end=end)
  1756. def read(self, length=-1):
  1757. """
  1758. Return data from cache, or fetch pieces as necessary
  1759. Parameters
  1760. ----------
  1761. length: int (-1)
  1762. Number of bytes to read; if <0, all remaining bytes.
  1763. """
  1764. length = -1 if length is None else int(length)
  1765. if self.mode != "rb":
  1766. raise ValueError("File not in read mode")
  1767. if length < 0:
  1768. length = self.size - self.loc
  1769. if self.closed:
  1770. raise ValueError("I/O operation on closed file.")
  1771. if length == 0:
  1772. # don't even bother calling fetch
  1773. return b""
  1774. out = self.cache._fetch(self.loc, self.loc + length)
  1775. logger.debug(
  1776. "%s read: %i - %i %s",
  1777. self,
  1778. self.loc,
  1779. self.loc + length,
  1780. self.cache._log_stats(),
  1781. )
  1782. self.loc += len(out)
  1783. return out
  1784. def readinto(self, b):
  1785. """mirrors builtin file's readinto method
  1786. https://docs.python.org/3/library/io.html#io.RawIOBase.readinto
  1787. """
  1788. out = memoryview(b).cast("B")
  1789. data = self.read(out.nbytes)
  1790. out[: len(data)] = data
  1791. return len(data)
  1792. def readuntil(self, char=b"\n", blocks=None):
  1793. """Return data between current position and first occurrence of char
  1794. char is included in the output, except if the end of the tile is
  1795. encountered first.
  1796. Parameters
  1797. ----------
  1798. char: bytes
  1799. Thing to find
  1800. blocks: None or int
  1801. How much to read in each go. Defaults to file blocksize - which may
  1802. mean a new read on every call.
  1803. """
  1804. out = []
  1805. while True:
  1806. start = self.tell()
  1807. part = self.read(blocks or self.blocksize)
  1808. if len(part) == 0:
  1809. break
  1810. found = part.find(char)
  1811. if found > -1:
  1812. out.append(part[: found + len(char)])
  1813. self.seek(start + found + len(char))
  1814. break
  1815. out.append(part)
  1816. return b"".join(out)
  1817. def readline(self):
  1818. """Read until and including the first occurrence of newline character
  1819. Note that, because of character encoding, this is not necessarily a
  1820. true line ending.
  1821. """
  1822. return self.readuntil(b"\n")
  1823. def __next__(self):
  1824. out = self.readline()
  1825. if out:
  1826. return out
  1827. raise StopIteration
  1828. def __iter__(self):
  1829. return self
  1830. def readlines(self):
  1831. """Return all data, split by the newline character, including the newline character"""
  1832. data = self.read()
  1833. lines = data.split(b"\n")
  1834. out = [l + b"\n" for l in lines[:-1]]
  1835. if data.endswith(b"\n"):
  1836. return out
  1837. else:
  1838. return out + [lines[-1]]
  1839. # return list(self) ???
  1840. def readinto1(self, b):
  1841. return self.readinto(b)
  1842. def close(self):
  1843. """Close file
  1844. Finalizes writes, discards cache
  1845. """
  1846. if getattr(self, "_unclosable", False):
  1847. return
  1848. if self.closed:
  1849. return
  1850. try:
  1851. if self.mode == "rb":
  1852. self.cache = None
  1853. else:
  1854. if not self.forced:
  1855. self.flush(force=True)
  1856. if self.fs is not None:
  1857. self.fs.invalidate_cache(self.path)
  1858. self.fs.invalidate_cache(self.fs._parent(self.path))
  1859. finally:
  1860. self.closed = True
  1861. def readable(self):
  1862. """Whether opened for reading"""
  1863. return "r" in self.mode and not self.closed
  1864. def seekable(self):
  1865. """Whether is seekable (only in read mode)"""
  1866. return self.readable()
  1867. def writable(self):
  1868. """Whether opened for writing"""
  1869. return self.mode in {"wb", "ab", "xb"} and not self.closed
  1870. def __reduce__(self):
  1871. if self.mode != "rb":
  1872. raise RuntimeError("Pickling a writeable file is not supported")
  1873. return reopen, (
  1874. self.fs,
  1875. self.path,
  1876. self.mode,
  1877. self.blocksize,
  1878. self.loc,
  1879. self.size,
  1880. self.autocommit,
  1881. self.cache.name if self.cache else "none",
  1882. self.kwargs,
  1883. )
  1884. def __del__(self):
  1885. if not self.closed:
  1886. self.close()
  1887. def __str__(self):
  1888. return f"<File-like object {type(self.fs).__name__}, {self.path}>"
  1889. __repr__ = __str__
  1890. def __enter__(self):
  1891. return self
  1892. def __exit__(self, *args):
  1893. self.close()
  1894. def reopen(fs, path, mode, blocksize, loc, size, autocommit, cache_type, kwargs):
  1895. file = fs.open(
  1896. path,
  1897. mode=mode,
  1898. block_size=blocksize,
  1899. autocommit=autocommit,
  1900. cache_type=cache_type,
  1901. size=size,
  1902. **kwargs,
  1903. )
  1904. if loc > 0:
  1905. file.seek(loc)
  1906. return file