core.py 37 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075
  1. from __future__ import annotations
  2. import os
  3. import sys
  4. import warnings
  5. from copy import copy
  6. from pathlib import Path
  7. from types import MappingProxyType
  8. from typing import IO
  9. from typing import TYPE_CHECKING
  10. from typing import Any
  11. from typing import BinaryIO
  12. from typing import Generator
  13. from typing import Literal
  14. from typing import Mapping
  15. from typing import Sequence
  16. from typing import TextIO
  17. from typing import TypeVar
  18. from typing import overload
  19. from urllib.parse import urlsplit
  20. from fsspec.registry import get_filesystem_class
  21. from fsspec.spec import AbstractFileSystem
  22. from upath._compat import FSSpecAccessorShim
  23. from upath._compat import PathlibPathShim
  24. from upath._compat import method_and_classmethod
  25. from upath._compat import str_remove_prefix
  26. from upath._compat import str_remove_suffix
  27. from upath._flavour import LazyFlavourDescriptor
  28. from upath._flavour import upath_get_kwargs_from_url
  29. from upath._flavour import upath_urijoin
  30. from upath._protocol import compatible_protocol
  31. from upath._protocol import get_upath_protocol
  32. from upath._stat import UPathStatResult
  33. from upath.registry import get_upath_class
  34. if TYPE_CHECKING:
  35. from urllib.parse import SplitResult
  36. if sys.version_info >= (3, 11):
  37. from typing import Self
  38. else:
  39. from typing_extensions import Self
  40. __all__ = ["UPath"]
  41. def __getattr__(name):
  42. if name == "_UriFlavour":
  43. from upath._flavour import default_flavour
  44. warnings.warn(
  45. "upath.core._UriFlavour should not be used anymore."
  46. " Please follow the universal_pathlib==0.2.0 migration guide at"
  47. " https://github.com/fsspec/universal_pathlib for more"
  48. " information.",
  49. DeprecationWarning,
  50. stacklevel=2,
  51. )
  52. return default_flavour
  53. elif name == "PT":
  54. warnings.warn(
  55. "upath.core.PT should not be used anymore."
  56. " Please follow the universal_pathlib==0.2.0 migration guide at"
  57. " https://github.com/fsspec/universal_pathlib for more"
  58. " information.",
  59. DeprecationWarning,
  60. stacklevel=2,
  61. )
  62. return TypeVar("PT", bound="UPath")
  63. else:
  64. raise AttributeError(name)
  65. _FSSPEC_HAS_WORKING_GLOB = None
  66. def _check_fsspec_has_working_glob():
  67. global _FSSPEC_HAS_WORKING_GLOB
  68. from fsspec.implementations.memory import MemoryFileSystem
  69. m = type("_M", (MemoryFileSystem,), {"store": {}, "pseudo_dirs": [""]})()
  70. m.touch("a.txt")
  71. m.touch("f/b.txt")
  72. g = _FSSPEC_HAS_WORKING_GLOB = len(m.glob("**/*.txt")) == 2
  73. return g
  74. def _make_instance(cls, args, kwargs):
  75. """helper for pickling UPath instances"""
  76. return cls(*args, **kwargs)
  77. _unset: Any = object()
  78. # accessors are deprecated
  79. _FSSpecAccessor = FSSpecAccessorShim
  80. class UPath(PathlibPathShim, Path):
  81. __slots__ = (
  82. "_protocol",
  83. "_storage_options",
  84. "_fs_cached",
  85. *PathlibPathShim.__missing_py312_slots__,
  86. "__drv",
  87. "__root",
  88. "__parts",
  89. )
  90. if TYPE_CHECKING:
  91. # public
  92. anchor: str
  93. drive: str
  94. parent: Self
  95. parents: Sequence[Self]
  96. parts: tuple[str, ...]
  97. root: str
  98. stem: str
  99. suffix: str
  100. suffixes: list[str]
  101. def with_name(self, name: str) -> Self: ...
  102. def with_stem(self, stem: str) -> Self: ...
  103. def with_suffix(self, suffix: str) -> Self: ...
  104. # private attributes
  105. _protocol: str
  106. _storage_options: dict[str, Any]
  107. _fs_cached: AbstractFileSystem
  108. _tail: str
  109. _protocol_dispatch: bool | None = None
  110. _flavour = LazyFlavourDescriptor()
  111. if sys.version_info >= (3, 13):
  112. parser = _flavour
  113. # === upath.UPath constructor =====================================
  114. def __new__(
  115. cls, *args, protocol: str | None = None, **storage_options: Any
  116. ) -> UPath:
  117. # fill empty arguments
  118. if not args:
  119. args = (".",)
  120. # create a copy if UPath class
  121. part0, *parts = args
  122. if not parts and not storage_options and isinstance(part0, cls):
  123. return copy(part0)
  124. # deprecate 'scheme'
  125. if "scheme" in storage_options:
  126. warnings.warn(
  127. "use 'protocol' kwarg instead of 'scheme'",
  128. DeprecationWarning,
  129. stacklevel=2,
  130. )
  131. protocol = storage_options.pop("scheme")
  132. # determine the protocol
  133. pth_protocol = get_upath_protocol(
  134. part0, protocol=protocol, storage_options=storage_options
  135. )
  136. # determine which UPath subclass to dispatch to
  137. if cls._protocol_dispatch or cls._protocol_dispatch is None:
  138. upath_cls = get_upath_class(protocol=pth_protocol)
  139. if upath_cls is None:
  140. raise ValueError(f"Unsupported filesystem: {pth_protocol!r}")
  141. else:
  142. # user subclasses can request to disable protocol dispatch
  143. # by setting MyUPathSubclass._protocol_dispatch to `False`.
  144. # This will effectively ignore the registered UPath
  145. # implementations and return an instance of MyUPathSubclass.
  146. # This can be useful if a subclass wants to extend the UPath
  147. # api, and it is fine to rely on the default implementation
  148. # for all supported user protocols.
  149. upath_cls = cls
  150. # create a new instance
  151. if cls is UPath:
  152. # we called UPath() directly, and want an instance based on the
  153. # provided or detected protocol (i.e. upath_cls)
  154. obj: UPath = object.__new__(upath_cls)
  155. obj._protocol = pth_protocol
  156. elif issubclass(cls, upath_cls):
  157. # we called a sub- or sub-sub-class of UPath, i.e. S3Path() and the
  158. # corresponding upath_cls based on protocol is equal-to or a
  159. # parent-of the cls.
  160. obj = object.__new__(cls)
  161. obj._protocol = pth_protocol
  162. elif issubclass(cls, UPath):
  163. # we called a subclass of UPath directly, i.e. S3Path() but the
  164. # detected protocol would return a non-related UPath subclass, i.e.
  165. # S3Path("file:///abc"). This behavior is going to raise an error
  166. # in future versions
  167. msg_protocol = repr(pth_protocol)
  168. if not pth_protocol:
  169. msg_protocol += " (empty string)"
  170. msg = (
  171. f"{cls.__name__!s}(...) detected protocol {msg_protocol!s} and"
  172. f" returns a {upath_cls.__name__} instance that isn't a direct"
  173. f" subclass of {cls.__name__}. This will raise an exception in"
  174. " future universal_pathlib versions. To prevent the issue, use"
  175. " UPath(...) to create instances of unrelated protocols or you"
  176. f" can instead derive your subclass {cls.__name__!s}(...) from"
  177. f" {upath_cls.__name__} or alternatively override behavior via"
  178. f" registering the {cls.__name__} implementation with protocol"
  179. f" {msg_protocol!s} replacing the default implementation."
  180. )
  181. warnings.warn(msg, DeprecationWarning, stacklevel=2)
  182. obj = object.__new__(upath_cls)
  183. obj._protocol = pth_protocol
  184. upath_cls.__init__(
  185. obj, *args, protocol=pth_protocol, **storage_options
  186. ) # type: ignore
  187. else:
  188. raise RuntimeError("UPath.__new__ expected cls to be subclass of UPath")
  189. return obj
  190. def __init__(
  191. self, *args, protocol: str | None = None, **storage_options: Any
  192. ) -> None:
  193. # allow subclasses to customize __init__ arg parsing
  194. base_options = getattr(self, "_storage_options", {})
  195. args, protocol, storage_options = type(self)._transform_init_args(
  196. args, protocol or self._protocol, {**base_options, **storage_options}
  197. )
  198. if self._protocol != protocol and protocol:
  199. self._protocol = protocol
  200. # retrieve storage_options
  201. if args:
  202. args0 = args[0]
  203. if isinstance(args0, UPath):
  204. self._storage_options = {**args0.storage_options, **storage_options}
  205. else:
  206. if hasattr(args0, "__fspath__"):
  207. _args0 = args0.__fspath__()
  208. else:
  209. _args0 = str(args0)
  210. self._storage_options = type(self)._parse_storage_options(
  211. _args0, protocol, storage_options
  212. )
  213. else:
  214. self._storage_options = storage_options.copy()
  215. # check that UPath subclasses in args are compatible
  216. # TODO:
  217. # Future versions of UPath could verify that storage_options
  218. # can be combined between UPath instances. Not sure if this
  219. # is really necessary though. A warning might be enough...
  220. if not compatible_protocol(self._protocol, *args):
  221. raise ValueError("can't combine incompatible UPath protocols")
  222. # fill ._raw_paths
  223. if hasattr(self, "_raw_paths"):
  224. return
  225. super().__init__(*args)
  226. # === upath.UPath PUBLIC ADDITIONAL API ===========================
  227. @property
  228. def protocol(self) -> str:
  229. """The fsspec protocol for the path."""
  230. return self._protocol
  231. @property
  232. def storage_options(self) -> Mapping[str, Any]:
  233. """The fsspec storage options for the path."""
  234. return MappingProxyType(self._storage_options)
  235. @property
  236. def fs(self) -> AbstractFileSystem:
  237. """The cached fsspec filesystem instance for the path."""
  238. try:
  239. return self._fs_cached
  240. except AttributeError:
  241. fs = self._fs_cached = self._fs_factory(
  242. str(self), self.protocol, self.storage_options
  243. )
  244. return fs
  245. @property
  246. def path(self) -> str:
  247. """The path that a fsspec filesystem can use."""
  248. return super().__str__()
  249. def joinuri(self, uri: str | os.PathLike[str]) -> UPath:
  250. """Join with urljoin behavior for UPath instances"""
  251. # short circuit if the new uri uses a different protocol
  252. other_protocol = get_upath_protocol(uri)
  253. if other_protocol and other_protocol != self._protocol:
  254. return UPath(uri)
  255. return UPath(
  256. upath_urijoin(str(self), str(uri)),
  257. protocol=other_protocol or self._protocol,
  258. **self.storage_options,
  259. )
  260. # === upath.UPath CUSTOMIZABLE API ================================
  261. @classmethod
  262. def _transform_init_args(
  263. cls,
  264. args: tuple[str | os.PathLike, ...],
  265. protocol: str,
  266. storage_options: dict[str, Any],
  267. ) -> tuple[tuple[str | os.PathLike, ...], str, dict[str, Any]]:
  268. """allow customization of init args in subclasses"""
  269. return args, protocol, storage_options
  270. @classmethod
  271. def _parse_storage_options(
  272. cls, urlpath: str, protocol: str, storage_options: Mapping[str, Any]
  273. ) -> dict[str, Any]:
  274. """Parse storage_options from the urlpath"""
  275. pth_storage_options = upath_get_kwargs_from_url(urlpath)
  276. return {**pth_storage_options, **storage_options}
  277. @classmethod
  278. def _fs_factory(
  279. cls, urlpath: str, protocol: str, storage_options: Mapping[str, Any]
  280. ) -> AbstractFileSystem:
  281. """Instantiate the filesystem_spec filesystem class"""
  282. fs_cls = get_filesystem_class(protocol)
  283. so_dct = fs_cls._get_kwargs_from_urls(urlpath)
  284. so_dct.update(storage_options)
  285. return fs_cls(**storage_options)
  286. # === upath.UPath COMPATIBILITY API ===============================
  287. def __init_subclass__(cls, **kwargs):
  288. """provide a clean migration path for custom user subclasses"""
  289. # Check if the user subclass has a custom `__new__` method
  290. has_custom_new_method = (
  291. cls.__new__ is not UPath.__new__
  292. and cls.__name__ not in {"PosixUPath", "WindowsUPath"}
  293. )
  294. if has_custom_new_method and cls._protocol_dispatch is None:
  295. warnings.warn(
  296. "Detected a customized `__new__` method in subclass"
  297. f" {cls.__name__!r}. Protocol dispatch will be disabled"
  298. " for this subclass. Please follow the"
  299. " universal_pathlib==0.2.0 migration guide at"
  300. " https://github.com/fsspec/universal_pathlib for more"
  301. " information.",
  302. DeprecationWarning,
  303. stacklevel=2,
  304. )
  305. cls._protocol_dispatch = False
  306. # Check if the user subclass has defined a custom accessor class
  307. accessor_cls = getattr(cls, "_default_accessor", None)
  308. has_custom_legacy_accessor = (
  309. accessor_cls is not None
  310. and issubclass(accessor_cls, FSSpecAccessorShim)
  311. and accessor_cls is not FSSpecAccessorShim
  312. )
  313. has_customized_fs_instantiation = (
  314. accessor_cls.__init__ is not FSSpecAccessorShim.__init__
  315. or hasattr(accessor_cls, "_fs")
  316. )
  317. if has_custom_legacy_accessor and has_customized_fs_instantiation:
  318. warnings.warn(
  319. "Detected a customized `__init__` method or `_fs` attribute"
  320. f" in the provided `_FSSpecAccessor` subclass of {cls.__name__!r}."
  321. " It is recommended to instead override the `UPath._fs_factory`"
  322. " classmethod to customize filesystem instantiation. Please follow"
  323. " the universal_pathlib==0.2.0 migration guide at"
  324. " https://github.com/fsspec/universal_pathlib for more"
  325. " information.",
  326. DeprecationWarning,
  327. stacklevel=2,
  328. )
  329. def _fs_factory(
  330. cls_, urlpath: str, protocol: str, storage_options: Mapping[str, Any]
  331. ) -> AbstractFileSystem:
  332. url = urlsplit(urlpath)
  333. if protocol:
  334. url = url._replace(scheme=protocol)
  335. inst = cls_._default_accessor(url, **storage_options)
  336. return inst._fs
  337. def _parse_storage_options(
  338. cls_, urlpath: str, protocol: str, storage_options: Mapping[str, Any]
  339. ) -> dict[str, Any]:
  340. url = urlsplit(urlpath)
  341. if protocol:
  342. url = url._replace(scheme=protocol)
  343. inst = cls_._default_accessor(url, **storage_options)
  344. return inst._fs.storage_options
  345. cls._fs_factory = classmethod(_fs_factory)
  346. cls._parse_storage_options = classmethod(_parse_storage_options)
  347. @property
  348. def _path(self):
  349. warnings.warn(
  350. "UPath._path is deprecated and should not be used."
  351. " Please follow the universal_pathlib==0.2.0 migration guide at"
  352. " https://github.com/fsspec/universal_pathlib for more"
  353. " information.",
  354. DeprecationWarning,
  355. stacklevel=2,
  356. )
  357. return self.path
  358. @property
  359. def _kwargs(self):
  360. warnings.warn(
  361. "UPath._kwargs is deprecated. Please use"
  362. " UPath.storage_options instead. Follow the"
  363. " universal_pathlib==0.2.0 migration guide at"
  364. " https://github.com/fsspec/universal_pathlib for more"
  365. " information.",
  366. DeprecationWarning,
  367. stacklevel=2,
  368. )
  369. return self.storage_options
  370. @property
  371. def _url(self) -> SplitResult:
  372. # TODO:
  373. # _url should be deprecated, but for now there is no good way of
  374. # accessing query parameters from urlpaths...
  375. return urlsplit(self.as_posix())
  376. if not TYPE_CHECKING:
  377. # allow mypy to catch missing attributes
  378. def __getattr__(self, item):
  379. if item == "_accessor":
  380. warnings.warn(
  381. "UPath._accessor is deprecated. Please use"
  382. " UPath.fs instead. Follow the"
  383. " universal_pathlib==0.2.0 migration guide at"
  384. " https://github.com/fsspec/universal_pathlib for more"
  385. " information.",
  386. DeprecationWarning,
  387. stacklevel=2,
  388. )
  389. if hasattr(self, "_default_accessor"):
  390. accessor_cls = self._default_accessor
  391. else:
  392. accessor_cls = FSSpecAccessorShim
  393. return accessor_cls.from_path(self)
  394. else:
  395. raise AttributeError(item)
  396. @classmethod
  397. def _from_parts(cls, parts, **kwargs):
  398. warnings.warn(
  399. "UPath._from_parts is deprecated and should not be used."
  400. " Please follow the universal_pathlib==0.2.0 migration guide at"
  401. " https://github.com/fsspec/universal_pathlib for more"
  402. " information.",
  403. DeprecationWarning,
  404. stacklevel=2,
  405. )
  406. parsed_url = kwargs.pop("url", None)
  407. if parsed_url:
  408. if protocol := parsed_url.scheme:
  409. kwargs["protocol"] = protocol
  410. if netloc := parsed_url.netloc:
  411. kwargs["netloc"] = netloc
  412. obj = UPath.__new__(cls, parts, **kwargs)
  413. obj.__init__(*parts, **kwargs)
  414. return obj
  415. @classmethod
  416. def _parse_args(cls, args):
  417. warnings.warn(
  418. "UPath._parse_args is deprecated and should not be used."
  419. " Please follow the universal_pathlib==0.2.0 migration guide at"
  420. " https://github.com/fsspec/universal_pathlib for more"
  421. " information.",
  422. DeprecationWarning,
  423. stacklevel=2,
  424. )
  425. # TODO !!!
  426. pth = cls._flavour.join(*args)
  427. return cls._parse_path(pth)
  428. @property
  429. def _drv(self):
  430. # direct access to ._drv should emit a warning,
  431. # but there is no good way of doing this for now...
  432. try:
  433. return self.__drv
  434. except AttributeError:
  435. self._load_parts()
  436. return self.__drv
  437. @_drv.setter
  438. def _drv(self, value):
  439. self.__drv = value
  440. @property
  441. def _root(self):
  442. # direct access to ._root should emit a warning,
  443. # but there is no good way of doing this for now...
  444. try:
  445. return self.__root
  446. except AttributeError:
  447. self._load_parts()
  448. return self.__root
  449. @_root.setter
  450. def _root(self, value):
  451. self.__root = value
  452. @property
  453. def _parts(self):
  454. # UPath._parts is not used anymore, and not available
  455. # in pathlib.Path for Python 3.12 and later.
  456. # Direct access to ._parts should emit a deprecation warning,
  457. # but there is no good way of doing this for now...
  458. try:
  459. return self.__parts
  460. except AttributeError:
  461. self._load_parts()
  462. self.__parts = super().parts
  463. return list(self.__parts)
  464. @_parts.setter
  465. def _parts(self, value):
  466. self.__parts = value
  467. @property
  468. def _cparts(self):
  469. # required for pathlib.Path.__eq__ compatibility on Python <3.12
  470. return self.parts
  471. # === pathlib.PurePath ============================================
  472. def __reduce__(self):
  473. args = tuple(self._raw_paths)
  474. kwargs = {
  475. "protocol": self._protocol,
  476. **self._storage_options,
  477. }
  478. return _make_instance, (type(self), args, kwargs)
  479. def with_segments(self, *pathsegments: str | os.PathLike[str]) -> Self:
  480. return type(self)(
  481. *pathsegments,
  482. protocol=self._protocol,
  483. **self._storage_options,
  484. )
  485. def joinpath(self, *pathsegments: str | os.PathLike[str]) -> Self:
  486. return self.with_segments(self, *pathsegments)
  487. def __truediv__(self, key: str | os.PathLike[str]) -> Self:
  488. try:
  489. return self.joinpath(key)
  490. except TypeError:
  491. return NotImplemented
  492. def __rtruediv__(self, key: str | os.PathLike[str]) -> Self:
  493. try:
  494. return self.with_segments(key, self)
  495. except TypeError:
  496. return NotImplemented
  497. # === upath.UPath non-standard changes ============================
  498. # NOTE:
  499. # this is a classmethod on the parent class, but we need to
  500. # override it here to make it possible to provide the _flavour
  501. # with the correct protocol...
  502. # pathlib 3.12 never calls this on the class. Only on the instance.
  503. @method_and_classmethod
  504. def _parse_path(self_or_cls, path): # noqa: B902
  505. if isinstance(self_or_cls, type):
  506. warnings.warn(
  507. "UPath._parse_path should not be used as a classmethod."
  508. " Please file an issue on the universal_pathlib issue tracker"
  509. " and describe your use case.",
  510. DeprecationWarning,
  511. stacklevel=2,
  512. )
  513. flavour = self_or_cls._flavour
  514. if flavour.supports_empty_parts:
  515. drv, root, rel = flavour.splitroot(path)
  516. if not root:
  517. parsed = []
  518. else:
  519. parsed = list(map(sys.intern, rel.split(flavour.sep)))
  520. if parsed[-1] == ".":
  521. parsed[-1] = ""
  522. parsed = [x for x in parsed if x != "."]
  523. if not flavour.has_meaningful_trailing_slash and parsed[-1] == "":
  524. parsed.pop()
  525. return drv, root, parsed
  526. if not path:
  527. return "", "", []
  528. sep = flavour.sep
  529. altsep = flavour.altsep
  530. if altsep:
  531. path = path.replace(altsep, sep)
  532. drv, root, rel = flavour.splitroot(path)
  533. if not root and drv.startswith(sep) and not drv.endswith(sep):
  534. drv_parts = drv.split(sep)
  535. if len(drv_parts) == 4 and drv_parts[2] not in "?.":
  536. # e.g. //server/share
  537. root = sep
  538. elif len(drv_parts) == 6:
  539. # e.g. //?/unc/server/share
  540. root = sep
  541. parsed = [sys.intern(str(x)) for x in rel.split(sep) if x and x != "."]
  542. return drv, root, parsed
  543. @method_and_classmethod
  544. def _format_parsed_parts(self_or_cls, drv, root, tail, **kwargs): # noqa: B902
  545. if isinstance(self_or_cls, type):
  546. warnings.warn(
  547. "UPath._format_parsed_path should not be used as a classmethod."
  548. " Please file an issue on the universal_pathlib issue tracker"
  549. " and describe your use case.",
  550. DeprecationWarning,
  551. stacklevel=2,
  552. )
  553. flavour = self_or_cls._flavour
  554. if kwargs:
  555. warnings.warn(
  556. "UPath._format_parsed_parts should not be used with"
  557. " additional kwargs. Please follow the"
  558. " universal_pathlib==0.2.0 migration guide at"
  559. " https://github.com/fsspec/universal_pathlib for more"
  560. " information.",
  561. DeprecationWarning,
  562. stacklevel=2,
  563. )
  564. if "url" in kwargs and tail[:1] == [f"{drv}{root}"]:
  565. # This was called from code that expected py38-py311 behavior
  566. # of _format_parsed_parts, which takes drv, root and parts
  567. tail = tail[1:]
  568. if drv or root:
  569. return drv + root + flavour.sep.join(tail)
  570. elif tail and flavour.splitdrive(tail[0])[0]:
  571. tail = ["."] + tail
  572. return flavour.sep.join(tail)
  573. # === upath.UPath changes =========================================
  574. def __str__(self):
  575. if self._protocol:
  576. return f"{self._protocol}://{self.path}"
  577. else:
  578. return self.path
  579. def __fspath__(self):
  580. msg = (
  581. "in a future version of UPath this will be set to None"
  582. " unless the filesystem is local (or caches locally)"
  583. )
  584. warnings.warn(msg, PendingDeprecationWarning, stacklevel=2)
  585. return str(self)
  586. def __bytes__(self):
  587. msg = (
  588. "in a future version of UPath this will be set to None"
  589. " unless the filesystem is local (or caches locally)"
  590. )
  591. warnings.warn(msg, PendingDeprecationWarning, stacklevel=2)
  592. return os.fsencode(self)
  593. def as_uri(self) -> str:
  594. return str(self)
  595. def is_reserved(self) -> bool:
  596. return False
  597. def __eq__(self, other: object) -> bool:
  598. """UPaths are considered equal if their protocol, path and
  599. storage_options are equal."""
  600. if not isinstance(other, UPath):
  601. return NotImplemented
  602. return (
  603. self.path == other.path
  604. and self.protocol == other.protocol
  605. and self.storage_options == other.storage_options
  606. )
  607. def __hash__(self) -> int:
  608. """The returned hash is based on the protocol and path only.
  609. Note: in the future, if hash collisions become an issue, we
  610. can add `fsspec.utils.tokenize(storage_options)`
  611. """
  612. return hash((self.protocol, self.path))
  613. def relative_to( # type: ignore[override]
  614. self,
  615. other,
  616. /,
  617. *_deprecated,
  618. walk_up=False,
  619. ) -> Self:
  620. if isinstance(other, UPath) and self.storage_options != other.storage_options:
  621. raise ValueError(
  622. "paths have different storage_options:"
  623. f" {self.storage_options!r} != {other.storage_options!r}"
  624. )
  625. return super().relative_to(other, *_deprecated, walk_up=walk_up)
  626. def is_relative_to(self, other, /, *_deprecated) -> bool: # type: ignore[override]
  627. if isinstance(other, UPath) and self.storage_options != other.storage_options:
  628. return False
  629. return super().is_relative_to(other, *_deprecated)
  630. @property
  631. def name(self) -> str:
  632. tail = self._tail
  633. if not tail:
  634. return ""
  635. name = tail[-1]
  636. if not name and len(tail) >= 2:
  637. return tail[-2]
  638. else:
  639. return name
  640. # === pathlib.Path ================================================
  641. def stat( # type: ignore[override]
  642. self,
  643. *,
  644. follow_symlinks=True,
  645. ) -> UPathStatResult:
  646. if not follow_symlinks:
  647. warnings.warn(
  648. f"{type(self).__name__}.stat(follow_symlinks=False):"
  649. " is currently ignored.",
  650. UserWarning,
  651. stacklevel=2,
  652. )
  653. return UPathStatResult.from_info(self.fs.stat(self.path))
  654. def lstat(self) -> UPathStatResult: # type: ignore[override]
  655. return self.stat(follow_symlinks=False)
  656. def exists(self, *, follow_symlinks=True) -> bool:
  657. return self.fs.exists(self.path)
  658. def is_dir(self) -> bool:
  659. return self.fs.isdir(self.path)
  660. def is_file(self) -> bool:
  661. return self.fs.isfile(self.path)
  662. def is_mount(self) -> bool:
  663. return False
  664. def is_symlink(self) -> bool:
  665. try:
  666. info = self.fs.info(self.path)
  667. if "islink" in info:
  668. return bool(info["islink"])
  669. except FileNotFoundError:
  670. return False
  671. return False
  672. def is_junction(self) -> bool:
  673. return False
  674. def is_block_device(self) -> bool:
  675. return False
  676. def is_char_device(self) -> bool:
  677. return False
  678. def is_fifo(self) -> bool:
  679. return False
  680. def is_socket(self) -> bool:
  681. return False
  682. def samefile(self, other_path) -> bool:
  683. st = self.stat()
  684. if isinstance(other_path, UPath):
  685. other_st = other_path.stat()
  686. else:
  687. other_st = self.with_segments(other_path).stat()
  688. return st == other_st
  689. @overload # type: ignore[override]
  690. def open(
  691. self,
  692. mode: Literal["r", "w", "a"] = "r",
  693. buffering: int = ...,
  694. encoding: str = ...,
  695. errors: str = ...,
  696. newline: str = ...,
  697. **fsspec_kwargs: Any,
  698. ) -> TextIO: ...
  699. @overload
  700. def open( # type: ignore[override]
  701. self,
  702. mode: Literal["rb", "wb", "ab"],
  703. buffering: int = ...,
  704. encoding: str = ...,
  705. errors: str = ...,
  706. newline: str = ...,
  707. **fsspec_kwargs: Any,
  708. ) -> BinaryIO: ...
  709. def open(
  710. self,
  711. mode: str = "r",
  712. *args: Any,
  713. **fsspec_kwargs: Any,
  714. ) -> IO[Any]:
  715. """
  716. Open the file pointed by this path and return a file object, as
  717. the built-in open() function does.
  718. Parameters
  719. ----------
  720. mode:
  721. Opening mode. Default is 'r'.
  722. buffering:
  723. Default is the block size of the underlying fsspec filesystem.
  724. encoding:
  725. Encoding is only used in text mode. Default is None.
  726. errors:
  727. Error handling for encoding. Only used in text mode. Default is None.
  728. newline:
  729. Newline handling. Only used in text mode. Default is None.
  730. **fsspec_kwargs:
  731. Additional options for the fsspec filesystem.
  732. """
  733. # match the signature of pathlib.Path.open()
  734. for key, value in zip(["buffering", "encoding", "errors", "newline"], args):
  735. if key in fsspec_kwargs:
  736. raise TypeError(
  737. f"{type(self).__name__}.open() got multiple values for '{key}'"
  738. )
  739. fsspec_kwargs[key] = value
  740. # translate pathlib buffering to fs block_size
  741. if "buffering" in fsspec_kwargs:
  742. fsspec_kwargs.setdefault("block_size", fsspec_kwargs.pop("buffering"))
  743. return self.fs.open(self.path, mode=mode, **fsspec_kwargs)
  744. def iterdir(self) -> Generator[UPath, None, None]:
  745. for name in self.fs.listdir(self.path):
  746. # fsspec returns dictionaries
  747. if isinstance(name, dict):
  748. name = name.get("name")
  749. if name in {".", ".."}:
  750. # Yielding a path object for these makes little sense
  751. continue
  752. # only want the path name with iterdir
  753. _, _, name = str_remove_suffix(name, "/").rpartition(self._flavour.sep)
  754. yield self.with_segments(*self.parts, name)
  755. def _scandir(self):
  756. raise NotImplementedError # todo
  757. def _make_child_relpath(self, name):
  758. path = super()._make_child_relpath(name)
  759. del path._str # fix _str = str(self) assignment
  760. return path
  761. def glob(
  762. self, pattern: str, *, case_sensitive=None
  763. ) -> Generator[UPath, None, None]:
  764. path_pattern = self.joinpath(pattern).path
  765. sep = self._flavour.sep
  766. base = self.fs._strip_protocol(self.path)
  767. for name in self.fs.glob(path_pattern):
  768. name = str_remove_prefix(str_remove_prefix(name, base), sep)
  769. yield self.joinpath(name)
  770. def rglob(
  771. self, pattern: str, *, case_sensitive=None
  772. ) -> Generator[UPath, None, None]:
  773. if _FSSPEC_HAS_WORKING_GLOB is None:
  774. _check_fsspec_has_working_glob()
  775. if _FSSPEC_HAS_WORKING_GLOB:
  776. r_path_pattern = self.joinpath("**", pattern).path
  777. sep = self._flavour.sep
  778. base = self.fs._strip_protocol(self.path)
  779. for name in self.fs.glob(r_path_pattern):
  780. name = str_remove_prefix(str_remove_prefix(name, base), sep)
  781. yield self.joinpath(name)
  782. else:
  783. path_pattern = self.joinpath(pattern).path
  784. r_path_pattern = self.joinpath("**", pattern).path
  785. sep = self._flavour.sep
  786. base = self.fs._strip_protocol(self.path)
  787. seen = set()
  788. for p in (path_pattern, r_path_pattern):
  789. for name in self.fs.glob(p):
  790. name = str_remove_prefix(str_remove_prefix(name, base), sep)
  791. if name in seen:
  792. continue
  793. else:
  794. seen.add(name)
  795. yield self.joinpath(name)
  796. @classmethod
  797. def cwd(cls) -> UPath:
  798. if cls is UPath:
  799. return get_upath_class("").cwd() # type: ignore[union-attr]
  800. else:
  801. raise NotImplementedError
  802. @classmethod
  803. def home(cls) -> UPath:
  804. if cls is UPath:
  805. return get_upath_class("").home() # type: ignore[union-attr]
  806. else:
  807. raise NotImplementedError
  808. def absolute(self) -> Self:
  809. return self
  810. def is_absolute(self) -> bool:
  811. return self._flavour.isabs(str(self))
  812. def resolve(self, strict: bool = False) -> Self:
  813. _parts = self.parts
  814. # Do not attempt to normalize path if no parts are dots
  815. if ".." not in _parts and "." not in _parts:
  816. return self
  817. resolved: list[str] = []
  818. resolvable_parts = _parts[1:]
  819. for part in resolvable_parts:
  820. if part == "..":
  821. if resolved:
  822. resolved.pop()
  823. elif part != ".":
  824. resolved.append(part)
  825. return self.with_segments(*_parts[:1], *resolved)
  826. def owner(self) -> str:
  827. raise NotImplementedError
  828. def group(self) -> str:
  829. raise NotImplementedError
  830. def readlink(self) -> Self:
  831. raise NotImplementedError
  832. def touch(self, mode=0o666, exist_ok=True) -> None:
  833. exists = self.fs.exists(self.path)
  834. if exists and not exist_ok:
  835. raise FileExistsError(str(self))
  836. if not exists:
  837. self.fs.touch(self.path, truncate=True)
  838. else:
  839. try:
  840. self.fs.touch(self.path, truncate=False)
  841. except (NotImplementedError, ValueError):
  842. pass # unsupported by filesystem
  843. def mkdir(self, mode=0o777, parents=False, exist_ok=False) -> None:
  844. if parents and not exist_ok and self.exists():
  845. raise FileExistsError(str(self))
  846. try:
  847. self.fs.mkdir(
  848. self.path,
  849. create_parents=parents,
  850. mode=mode,
  851. )
  852. except FileExistsError:
  853. if not exist_ok:
  854. raise FileExistsError(str(self))
  855. if not self.is_dir():
  856. raise FileExistsError(str(self))
  857. def chmod(self, mode: int, *, follow_symlinks: bool = True) -> None:
  858. raise NotImplementedError
  859. def lchmod(self, mode: int) -> None:
  860. raise NotImplementedError
  861. def unlink(self, missing_ok: bool = False) -> None:
  862. if not self.exists():
  863. if not missing_ok:
  864. raise FileNotFoundError(str(self))
  865. return
  866. self.fs.rm(self.path, recursive=False)
  867. def rmdir(self, recursive: bool = True) -> None: # fixme: non-standard
  868. if not self.is_dir():
  869. raise NotADirectoryError(str(self))
  870. if not recursive and next(self.iterdir()): # type: ignore[arg-type]
  871. raise OSError(f"Not recursive and directory not empty: {self}")
  872. self.fs.rm(self.path, recursive=recursive)
  873. def rename(
  874. self,
  875. target: str | os.PathLike[str] | UPath,
  876. *, # note: non-standard compared to pathlib
  877. recursive: bool = _unset,
  878. maxdepth: int | None = _unset,
  879. **kwargs: Any,
  880. ) -> Self:
  881. if isinstance(target, str) and self.storage_options:
  882. target = UPath(target, **self.storage_options)
  883. target_protocol = get_upath_protocol(target)
  884. if target_protocol:
  885. if target_protocol != self.protocol:
  886. raise ValueError(
  887. f"expected protocol {self.protocol!r}, got: {target_protocol!r}"
  888. )
  889. if not isinstance(target, UPath):
  890. target_ = UPath(target, **self.storage_options)
  891. else:
  892. target_ = target
  893. # avoid calling .resolve for subclasses of UPath
  894. if ".." in target_.parts or "." in target_.parts:
  895. target_ = target_.resolve()
  896. else:
  897. parent = self.parent
  898. # avoid calling .resolve for subclasses of UPath
  899. if ".." in parent.parts or "." in parent.parts:
  900. parent = parent.resolve()
  901. target_ = parent.joinpath(os.path.normpath(target))
  902. assert isinstance(target_, type(self)), "identical protocols enforced above"
  903. if recursive is not _unset:
  904. kwargs["recursive"] = recursive
  905. if maxdepth is not _unset:
  906. kwargs["maxdepth"] = maxdepth
  907. self.fs.mv(
  908. self.path,
  909. target_.path,
  910. **kwargs,
  911. )
  912. return target_
  913. def replace(self, target: str | os.PathLike[str] | UPath) -> UPath:
  914. raise NotImplementedError # todo
  915. def symlink_to( # type: ignore[override]
  916. self,
  917. target: str | os.PathLike[str] | UPath,
  918. target_is_directory: bool = False,
  919. ) -> None:
  920. raise NotImplementedError
  921. def hardlink_to( # type: ignore[override]
  922. self,
  923. target: str | os.PathLike[str] | UPath,
  924. ) -> None:
  925. raise NotImplementedError
  926. def expanduser(self) -> Self:
  927. return self