_flavour_sources.py 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966
  1. """ upath._flavour_sources
  2. <experimental!>
  3. Warning
  4. -------
  5. Do not modify this file manually!
  6. It is generated by `dev/generate_flavours.py`
  7. To be able to parse the different filesystem uri schemes, we need
  8. the string parsing functionality each of the filesystem implementations.
  9. In an attempt to support parsing uris without having to import the
  10. specific filesystems, we extract the necessary subset of the
  11. AbstractFileSystem classes and generate a new "flavour" class for
  12. each of the known filesystems. This will allow us to provide a
  13. `PurePath` equivalent `PureUPath` for each protocol in the future
  14. without a direct dependency on the underlying filesystem package.
  15. """
  16. #
  17. # skipping protocols:
  18. # - blockcache
  19. # - cached
  20. # - dir
  21. # - filecache
  22. # - simplecache
  23. # protocol import errors:
  24. # - gdrive (Please install gdrivefs for access to Google Drive)
  25. # - generic (GenericFileSystem: '_strip_protocol' not a classmethod)
  26. #
  27. from __future__ import annotations
  28. import logging
  29. import os
  30. import re
  31. from pathlib import PurePath
  32. from pathlib import PureWindowsPath
  33. from typing import Any
  34. from typing import Literal
  35. from typing import cast
  36. from urllib.parse import parse_qs
  37. from urllib.parse import urlsplit
  38. from fsspec.implementations.local import make_path_posix
  39. from fsspec.utils import infer_storage_options
  40. from fsspec.utils import stringify_path
  41. __all__ = [
  42. "AbstractFileSystemFlavour",
  43. "FileSystemFlavourBase",
  44. "flavour_registry",
  45. ]
  46. logger = logging.getLogger(__name__)
  47. flavour_registry: dict[str, type[FileSystemFlavourBase]] = {}
  48. class FileSystemFlavourBase:
  49. """base class for the fsspec flavours"""
  50. protocol: str | tuple[str, ...]
  51. root_marker: Literal["/", ""]
  52. sep: Literal["/"]
  53. @classmethod
  54. def _strip_protocol(cls, path):
  55. raise NotImplementedError
  56. @staticmethod
  57. def _get_kwargs_from_urls(path):
  58. raise NotImplementedError
  59. @classmethod
  60. def _parent(cls, path):
  61. raise NotImplementedError
  62. def __init_subclass__(cls: Any, **kwargs):
  63. if isinstance(cls.protocol, str):
  64. protocols = (cls.protocol,)
  65. else:
  66. protocols = tuple(cls.protocol)
  67. for protocol in protocols:
  68. if protocol in flavour_registry:
  69. raise ValueError(f"protocol {protocol!r} already registered")
  70. flavour_registry[protocol] = cls
  71. class AbstractFileSystemFlavour(FileSystemFlavourBase):
  72. __orig_class__ = 'fsspec.spec.AbstractFileSystem'
  73. __orig_version__ = '2024.10.0'
  74. protocol: str | tuple[str, ...] = 'abstract'
  75. root_marker: Literal['', '/'] = ''
  76. sep: Literal['/'] = '/'
  77. @classmethod
  78. def _strip_protocol(cls, path):
  79. """Turn path from fully-qualified to file-system-specific
  80. May require FS-specific handling, e.g., for relative paths or links.
  81. """
  82. if isinstance(path, list):
  83. return [cls._strip_protocol(p) for p in path]
  84. path = stringify_path(path)
  85. protos = (cls.protocol,) if isinstance(cls.protocol, str) else cls.protocol
  86. for protocol in protos:
  87. if path.startswith(protocol + "://"):
  88. path = path[len(protocol) + 3 :]
  89. elif path.startswith(protocol + "::"):
  90. path = path[len(protocol) + 2 :]
  91. path = path.rstrip("/")
  92. # use of root_marker to make minimum required path, e.g., "/"
  93. return path or cls.root_marker
  94. @staticmethod
  95. def _get_kwargs_from_urls(path):
  96. """If kwargs can be encoded in the paths, extract them here
  97. This should happen before instantiation of the class; incoming paths
  98. then should be amended to strip the options in methods.
  99. Examples may look like an sftp path "sftp://user@host:/my/path", where
  100. the user and host should become kwargs and later get stripped.
  101. """
  102. # by default, nothing happens
  103. return {}
  104. @classmethod
  105. def _parent(cls, path):
  106. path = cls._strip_protocol(path)
  107. if "/" in path:
  108. parent = path.rsplit("/", 1)[0].lstrip(cls.root_marker)
  109. return cls.root_marker + parent
  110. else:
  111. return cls.root_marker
  112. class AsyncLocalFileSystemFlavour(AbstractFileSystemFlavour):
  113. __orig_class__ = 'morefs.asyn_local.AsyncLocalFileSystem'
  114. __orig_version__ = '0.2.2'
  115. protocol = ()
  116. root_marker = '/'
  117. sep = '/'
  118. local_file = True
  119. @classmethod
  120. def _strip_protocol(cls, path):
  121. path = stringify_path(path)
  122. if path.startswith("file://"):
  123. path = path[7:]
  124. elif path.startswith("file:"):
  125. path = path[5:]
  126. elif path.startswith("local://"):
  127. path = path[8:]
  128. elif path.startswith("local:"):
  129. path = path[6:]
  130. path = make_path_posix(path)
  131. if os.sep != "/":
  132. # This code-path is a stripped down version of
  133. # > drive, path = ntpath.splitdrive(path)
  134. if path[1:2] == ":":
  135. # Absolute drive-letter path, e.g. X:\Windows
  136. # Relative path with drive, e.g. X:Windows
  137. drive, path = path[:2], path[2:]
  138. elif path[:2] == "//":
  139. # UNC drives, e.g. \\server\share or \\?\UNC\server\share
  140. # Device drives, e.g. \\.\device or \\?\device
  141. if (index1 := path.find("/", 2)) == -1 or (
  142. index2 := path.find("/", index1 + 1)
  143. ) == -1:
  144. drive, path = path, ""
  145. else:
  146. drive, path = path[:index2], path[index2:]
  147. else:
  148. # Relative path, e.g. Windows
  149. drive = ""
  150. path = path.rstrip("/") or cls.root_marker
  151. return drive + path
  152. else:
  153. return path.rstrip("/") or cls.root_marker
  154. @classmethod
  155. def _parent(cls, path):
  156. path = cls._strip_protocol(path)
  157. if os.sep == "/":
  158. # posix native
  159. return path.rsplit("/", 1)[0] or "/"
  160. else:
  161. # NT
  162. path_ = path.rsplit("/", 1)[0]
  163. if len(path_) <= 3:
  164. if path_[1:2] == ":":
  165. # nt root (something like c:/)
  166. return path_[0] + ":/"
  167. # More cases may be required here
  168. return path_
  169. class AzureBlobFileSystemFlavour(AbstractFileSystemFlavour):
  170. __orig_class__ = 'adlfs.spec.AzureBlobFileSystem'
  171. __orig_version__ = '2024.7.0'
  172. protocol = ('abfs', 'az', 'abfss')
  173. root_marker = ''
  174. sep = '/'
  175. @classmethod
  176. def _strip_protocol(cls, path: str):
  177. """
  178. Remove the protocol from the input path
  179. Parameters
  180. ----------
  181. path: str
  182. Path to remove the protocol from
  183. Returns
  184. -------
  185. str
  186. Returns a path without the protocol
  187. """
  188. if isinstance(path, list): # type: ignore[unreachable]
  189. return [cls._strip_protocol(p) for p in path] # type: ignore[unreachable]
  190. STORE_SUFFIX = ".dfs.core.windows.net"
  191. logger.debug(f"_strip_protocol for {path}")
  192. if not path.startswith(("abfs://", "az://", "abfss://")):
  193. path = path.lstrip("/")
  194. path = "abfs://" + path
  195. ops = infer_storage_options(path)
  196. if "username" in ops:
  197. if ops.get("username", None):
  198. ops["path"] = ops["username"] + ops["path"]
  199. # we need to make sure that the path retains
  200. # the format {host}/{path}
  201. # here host is the container_name
  202. elif ops.get("host", None):
  203. if (
  204. ops["host"].count(STORE_SUFFIX) == 0
  205. ): # no store-suffix, so this is container-name
  206. ops["path"] = ops["host"] + ops["path"]
  207. url_query = ops.get("url_query")
  208. if url_query is not None:
  209. ops["path"] = f"{ops['path']}?{url_query}"
  210. logger.debug(f"_strip_protocol({path}) = {ops}")
  211. stripped_path = ops["path"].lstrip("/")
  212. return stripped_path
  213. @staticmethod
  214. def _get_kwargs_from_urls(urlpath):
  215. """Get the account_name from the urlpath and pass to storage_options"""
  216. ops = infer_storage_options(urlpath)
  217. out = {}
  218. host = ops.get("host", None)
  219. if host:
  220. match = re.match(
  221. r"(?P<account_name>.+)\.(dfs|blob)\.core\.windows\.net", host
  222. )
  223. if match:
  224. account_name = match.groupdict()["account_name"]
  225. out["account_name"] = account_name
  226. url_query = ops.get("url_query")
  227. if url_query is not None:
  228. from urllib.parse import parse_qs
  229. parsed = parse_qs(url_query)
  230. if "versionid" in parsed:
  231. out["version_aware"] = True
  232. return out
  233. class AzureDatalakeFileSystemFlavour(AbstractFileSystemFlavour):
  234. __orig_class__ = 'adlfs.gen1.AzureDatalakeFileSystem'
  235. __orig_version__ = '2024.7.0'
  236. protocol = ('adl',)
  237. root_marker = ''
  238. sep = '/'
  239. @classmethod
  240. def _strip_protocol(cls, path):
  241. ops = infer_storage_options(path)
  242. return ops["path"]
  243. @staticmethod
  244. def _get_kwargs_from_urls(paths):
  245. """Get the store_name from the urlpath and pass to storage_options"""
  246. ops = infer_storage_options(paths)
  247. out = {}
  248. if ops.get("host", None):
  249. out["store_name"] = ops["host"]
  250. return out
  251. class BoxFileSystemFlavour(AbstractFileSystemFlavour):
  252. __orig_class__ = 'boxfs.boxfs.BoxFileSystem'
  253. __orig_version__ = '0.3.0'
  254. protocol = ('box',)
  255. root_marker = '/'
  256. sep = '/'
  257. @classmethod
  258. def _strip_protocol(cls, path) -> str:
  259. path = super()._strip_protocol(path)
  260. path = path.replace("\\", "/")
  261. # Make all paths start with root marker
  262. if not path.startswith(cls.root_marker):
  263. path = cls.root_marker + path
  264. return path
  265. class DaskWorkerFileSystemFlavour(AbstractFileSystemFlavour):
  266. __orig_class__ = 'fsspec.implementations.dask.DaskWorkerFileSystem'
  267. __orig_version__ = '2024.10.0'
  268. protocol = ('dask',)
  269. root_marker = ''
  270. sep = '/'
  271. @staticmethod
  272. def _get_kwargs_from_urls(path):
  273. so = infer_storage_options(path)
  274. if "host" in so and "port" in so:
  275. return {"client": f"{so['host']}:{so['port']}"}
  276. else:
  277. return {}
  278. class DataFileSystemFlavour(AbstractFileSystemFlavour):
  279. __orig_class__ = 'fsspec.implementations.data.DataFileSystem'
  280. __orig_version__ = '2024.10.0'
  281. protocol = ('data',)
  282. root_marker = ''
  283. sep = '/'
  284. class DatabricksFileSystemFlavour(AbstractFileSystemFlavour):
  285. __orig_class__ = 'fsspec.implementations.dbfs.DatabricksFileSystem'
  286. __orig_version__ = '2024.10.0'
  287. protocol = ('dbfs',)
  288. root_marker = ''
  289. sep = '/'
  290. class DictFSFlavour(AbstractFileSystemFlavour):
  291. __orig_class__ = 'morefs.dict.DictFS'
  292. __orig_version__ = '0.2.2'
  293. protocol = ('dictfs',)
  294. root_marker = ''
  295. sep = '/'
  296. @classmethod
  297. def _strip_protocol(cls, path: str) -> str:
  298. if path.startswith("dictfs://"):
  299. path = path[len("dictfs://") :]
  300. if "::" in path or "://" in path:
  301. return path.rstrip("/")
  302. path = path.lstrip("/").rstrip("/")
  303. return "/" + path if path else cls.root_marker
  304. class DropboxDriveFileSystemFlavour(AbstractFileSystemFlavour):
  305. __orig_class__ = 'dropboxdrivefs.core.DropboxDriveFileSystem'
  306. __orig_version__ = '1.4.1'
  307. protocol = ('dropbox',)
  308. root_marker = ''
  309. sep = '/'
  310. class FTPFileSystemFlavour(AbstractFileSystemFlavour):
  311. __orig_class__ = 'fsspec.implementations.ftp.FTPFileSystem'
  312. __orig_version__ = '2024.10.0'
  313. protocol = ('ftp',)
  314. root_marker = '/'
  315. sep = '/'
  316. @classmethod
  317. def _strip_protocol(cls, path):
  318. return "/" + infer_storage_options(path)["path"].lstrip("/").rstrip("/")
  319. @staticmethod
  320. def _get_kwargs_from_urls(urlpath):
  321. out = infer_storage_options(urlpath)
  322. out.pop("path", None)
  323. out.pop("protocol", None)
  324. return out
  325. class GCSFileSystemFlavour(AbstractFileSystemFlavour):
  326. __orig_class__ = 'gcsfs.core.GCSFileSystem'
  327. __orig_version__ = '2024.10.0'
  328. protocol = ('gs', 'gcs')
  329. root_marker = ''
  330. sep = '/'
  331. @classmethod
  332. def _strip_protocol(cls, path):
  333. if isinstance(path, list):
  334. return [cls._strip_protocol(p) for p in path]
  335. path = stringify_path(path)
  336. protos = (cls.protocol,) if isinstance(cls.protocol, str) else cls.protocol
  337. for protocol in protos:
  338. if path.startswith(protocol + "://"):
  339. path = path[len(protocol) + 3 :]
  340. elif path.startswith(protocol + "::"):
  341. path = path[len(protocol) + 2 :]
  342. # use of root_marker to make minimum required path, e.g., "/"
  343. return path or cls.root_marker
  344. @classmethod
  345. def _get_kwargs_from_urls(cls, path):
  346. _, _, generation = cls._split_path(path, version_aware=True)
  347. if generation is not None:
  348. return {"version_aware": True}
  349. return {}
  350. @classmethod
  351. def _split_path(cls, path, version_aware=False):
  352. """
  353. Normalise GCS path string into bucket and key.
  354. Parameters
  355. ----------
  356. path : string
  357. Input path, like `gcs://mybucket/path/to/file`.
  358. Path is of the form: '[gs|gcs://]bucket[/key][?querystring][#fragment]'
  359. GCS allows object generation (object version) to be specified in either
  360. the URL fragment or the `generation` query parameter. When provided,
  361. the fragment will take priority over the `generation` query paramenter.
  362. Returns
  363. -------
  364. (bucket, key, generation) tuple
  365. """
  366. path = cls._strip_protocol(path).lstrip("/")
  367. if "/" not in path:
  368. return path, "", None
  369. bucket, keypart = path.split("/", 1)
  370. key = keypart
  371. generation = None
  372. if version_aware:
  373. parts = urlsplit(keypart)
  374. try:
  375. if parts.fragment:
  376. generation = parts.fragment
  377. elif parts.query:
  378. parsed = parse_qs(parts.query)
  379. if "generation" in parsed:
  380. generation = parsed["generation"][0]
  381. # Sanity check whether this could be a valid generation ID. If
  382. # it is not, assume that # or ? characters are supposed to be
  383. # part of the object name.
  384. if generation is not None:
  385. int(generation)
  386. key = parts.path
  387. except ValueError:
  388. generation = None
  389. return (
  390. bucket,
  391. key,
  392. generation,
  393. )
  394. class GitFileSystemFlavour(AbstractFileSystemFlavour):
  395. __orig_class__ = 'fsspec.implementations.git.GitFileSystem'
  396. __orig_version__ = '2024.10.0'
  397. protocol = ('git',)
  398. root_marker = ''
  399. sep = '/'
  400. @classmethod
  401. def _strip_protocol(cls, path):
  402. path = super()._strip_protocol(path).lstrip("/")
  403. if ":" in path:
  404. path = path.split(":", 1)[1]
  405. if "@" in path:
  406. path = path.split("@", 1)[1]
  407. return path.lstrip("/")
  408. @staticmethod
  409. def _get_kwargs_from_urls(path):
  410. if path.startswith("git://"):
  411. path = path[6:]
  412. out = {}
  413. if ":" in path:
  414. out["path"], path = path.split(":", 1)
  415. if "@" in path:
  416. out["ref"], path = path.split("@", 1)
  417. return out
  418. class GithubFileSystemFlavour(AbstractFileSystemFlavour):
  419. __orig_class__ = 'fsspec.implementations.github.GithubFileSystem'
  420. __orig_version__ = '2024.10.0'
  421. protocol = ('github',)
  422. root_marker = ''
  423. sep = '/'
  424. @classmethod
  425. def _strip_protocol(cls, path):
  426. opts = infer_storage_options(path)
  427. if "username" not in opts:
  428. return super()._strip_protocol(path)
  429. return opts["path"].lstrip("/")
  430. @staticmethod
  431. def _get_kwargs_from_urls(path):
  432. opts = infer_storage_options(path)
  433. if "username" not in opts:
  434. return {}
  435. out = {"org": opts["username"], "repo": opts["password"]}
  436. if opts["host"]:
  437. out["sha"] = opts["host"]
  438. return out
  439. class HTTPFileSystemFlavour(AbstractFileSystemFlavour):
  440. __orig_class__ = 'fsspec.implementations.http.HTTPFileSystem'
  441. __orig_version__ = '2024.10.0'
  442. protocol = ('http', 'https')
  443. root_marker = ''
  444. sep = '/'
  445. @classmethod
  446. def _strip_protocol(cls, path):
  447. """For HTTP, we always want to keep the full URL"""
  448. return path
  449. @classmethod
  450. def _parent(cls, path):
  451. # override, since _strip_protocol is different for URLs
  452. par = super()._parent(path)
  453. if len(par) > 7: # "http://..."
  454. return par
  455. return ""
  456. class HadoopFileSystemFlavour(AbstractFileSystemFlavour):
  457. __orig_class__ = 'fsspec.implementations.arrow.HadoopFileSystem'
  458. __orig_version__ = '2024.10.0'
  459. protocol = ('hdfs', 'arrow_hdfs')
  460. root_marker = '/'
  461. sep = '/'
  462. @classmethod
  463. def _strip_protocol(cls, path):
  464. ops = infer_storage_options(path)
  465. path = ops["path"]
  466. if path.startswith("//"):
  467. # special case for "hdfs://path" (without the triple slash)
  468. path = path[1:]
  469. return path
  470. @staticmethod
  471. def _get_kwargs_from_urls(path):
  472. ops = infer_storage_options(path)
  473. out = {}
  474. if ops.get("host", None):
  475. out["host"] = ops["host"]
  476. if ops.get("username", None):
  477. out["user"] = ops["username"]
  478. if ops.get("port", None):
  479. out["port"] = ops["port"]
  480. if ops.get("url_query", None):
  481. queries = parse_qs(ops["url_query"])
  482. if queries.get("replication", None):
  483. out["replication"] = int(queries["replication"][0])
  484. return out
  485. class HfFileSystemFlavour(AbstractFileSystemFlavour):
  486. __orig_class__ = 'huggingface_hub.hf_file_system.HfFileSystem'
  487. __orig_version__ = '0.26.5'
  488. protocol = ('hf',)
  489. root_marker = ''
  490. sep = '/'
  491. class JupyterFileSystemFlavour(AbstractFileSystemFlavour):
  492. __orig_class__ = 'fsspec.implementations.jupyter.JupyterFileSystem'
  493. __orig_version__ = '2024.10.0'
  494. protocol = ('jupyter', 'jlab')
  495. root_marker = ''
  496. sep = '/'
  497. class LakeFSFileSystemFlavour(AbstractFileSystemFlavour):
  498. __orig_class__ = 'lakefs_spec.spec.LakeFSFileSystem'
  499. __orig_version__ = '0.11.0'
  500. protocol = ('lakefs',)
  501. root_marker = ''
  502. sep = '/'
  503. @classmethod
  504. def _strip_protocol(cls, path):
  505. """Copied verbatim from the base class, save for the slash rstrip."""
  506. if isinstance(path, list):
  507. return [cls._strip_protocol(p) for p in path]
  508. spath = super()._strip_protocol(path)
  509. if stringify_path(path).endswith("/"):
  510. return spath + "/"
  511. return spath
  512. class LibArchiveFileSystemFlavour(AbstractFileSystemFlavour):
  513. __orig_class__ = 'fsspec.implementations.libarchive.LibArchiveFileSystem'
  514. __orig_version__ = '2024.10.0'
  515. protocol = ('libarchive',)
  516. root_marker = ''
  517. sep = '/'
  518. @classmethod
  519. def _strip_protocol(cls, path):
  520. # file paths are always relative to the archive root
  521. return super()._strip_protocol(path).lstrip("/")
  522. class LocalFileSystemFlavour(AbstractFileSystemFlavour):
  523. __orig_class__ = 'fsspec.implementations.local.LocalFileSystem'
  524. __orig_version__ = '2024.10.0'
  525. protocol = ('file', 'local')
  526. root_marker = '/'
  527. sep = '/'
  528. local_file = True
  529. @classmethod
  530. def _strip_protocol(cls, path):
  531. path = stringify_path(path)
  532. if path.startswith("file://"):
  533. path = path[7:]
  534. elif path.startswith("file:"):
  535. path = path[5:]
  536. elif path.startswith("local://"):
  537. path = path[8:]
  538. elif path.startswith("local:"):
  539. path = path[6:]
  540. path = make_path_posix(path)
  541. if os.sep != "/":
  542. # This code-path is a stripped down version of
  543. # > drive, path = ntpath.splitdrive(path)
  544. if path[1:2] == ":":
  545. # Absolute drive-letter path, e.g. X:\Windows
  546. # Relative path with drive, e.g. X:Windows
  547. drive, path = path[:2], path[2:]
  548. elif path[:2] == "//":
  549. # UNC drives, e.g. \\server\share or \\?\UNC\server\share
  550. # Device drives, e.g. \\.\device or \\?\device
  551. if (index1 := path.find("/", 2)) == -1 or (
  552. index2 := path.find("/", index1 + 1)
  553. ) == -1:
  554. drive, path = path, ""
  555. else:
  556. drive, path = path[:index2], path[index2:]
  557. else:
  558. # Relative path, e.g. Windows
  559. drive = ""
  560. path = path.rstrip("/") or cls.root_marker
  561. return drive + path
  562. else:
  563. return path.rstrip("/") or cls.root_marker
  564. @classmethod
  565. def _parent(cls, path):
  566. path = cls._strip_protocol(path)
  567. if os.sep == "/":
  568. # posix native
  569. return path.rsplit("/", 1)[0] or "/"
  570. else:
  571. # NT
  572. path_ = path.rsplit("/", 1)[0]
  573. if len(path_) <= 3:
  574. if path_[1:2] == ":":
  575. # nt root (something like c:/)
  576. return path_[0] + ":/"
  577. # More cases may be required here
  578. return path_
  579. class MemFSFlavour(AbstractFileSystemFlavour):
  580. __orig_class__ = 'morefs.memory.MemFS'
  581. __orig_version__ = '0.2.2'
  582. protocol = ('memfs',)
  583. root_marker = ''
  584. sep = '/'
  585. @classmethod
  586. def _strip_protocol(cls, path):
  587. if path.startswith("memfs://"):
  588. path = path[len("memfs://") :]
  589. return MemoryFileSystemFlavour._strip_protocol(path) # pylint: disable=protected-access
  590. class MemoryFileSystemFlavour(AbstractFileSystemFlavour):
  591. __orig_class__ = 'fsspec.implementations.memory.MemoryFileSystem'
  592. __orig_version__ = '2024.10.0'
  593. protocol = ('memory',)
  594. root_marker = '/'
  595. sep = '/'
  596. @classmethod
  597. def _strip_protocol(cls, path):
  598. if isinstance(path, PurePath):
  599. if isinstance(path, PureWindowsPath):
  600. return LocalFileSystemFlavour._strip_protocol(path)
  601. else:
  602. path = stringify_path(path)
  603. if path.startswith("memory://"):
  604. path = path[len("memory://") :]
  605. if "::" in path or "://" in path:
  606. return path.rstrip("/")
  607. path = path.lstrip("/").rstrip("/")
  608. return "/" + path if path else ""
  609. class OCIFileSystemFlavour(AbstractFileSystemFlavour):
  610. __orig_class__ = 'ocifs.core.OCIFileSystem'
  611. __orig_version__ = '1.3.1'
  612. protocol = ('oci', 'ocilake')
  613. root_marker = ''
  614. sep = '/'
  615. @classmethod
  616. def _strip_protocol(cls, path):
  617. if isinstance(path, list):
  618. return [cls._strip_protocol(p) for p in path]
  619. path = stringify_path(path)
  620. stripped_path = super()._strip_protocol(path)
  621. if stripped_path == cls.root_marker and "@" in path:
  622. return "@" + path.rstrip("/").split("@", 1)[1]
  623. return stripped_path
  624. @classmethod
  625. def _parent(cls, path):
  626. path = cls._strip_protocol(path.rstrip("/"))
  627. if "/" in path:
  628. return cls.root_marker + path.rsplit("/", 1)[0]
  629. elif "@" in path:
  630. return cls.root_marker + "@" + path.split("@", 1)[1]
  631. else:
  632. raise ValueError(f"the following path does not specify a namespace: {path}")
  633. class OSSFileSystemFlavour(AbstractFileSystemFlavour):
  634. __orig_class__ = 'ossfs.core.OSSFileSystem'
  635. __orig_version__ = '2023.12.0'
  636. protocol = ('oss',)
  637. root_marker = ''
  638. sep = '/'
  639. @classmethod
  640. def _strip_protocol(cls, path):
  641. """Turn path from fully-qualified to file-system-specifi
  642. Parameters
  643. ----------
  644. path : Union[str, List[str]]
  645. Input path, like
  646. `http://oss-cn-hangzhou.aliyuncs.com/mybucket/myobject`
  647. `oss://mybucket/myobject`
  648. Examples
  649. --------
  650. >>> _strip_protocol(
  651. "http://oss-cn-hangzhou.aliyuncs.com/mybucket/myobject"
  652. )
  653. ('/mybucket/myobject')
  654. >>> _strip_protocol(
  655. "oss://mybucket/myobject"
  656. )
  657. ('/mybucket/myobject')
  658. """
  659. if isinstance(path, list):
  660. return [cls._strip_protocol(p) for p in path]
  661. path_string = stringify_path(path)
  662. if path_string.startswith("oss://"):
  663. path_string = path_string[5:]
  664. parser_re = r"https?://(?P<endpoint>oss.+aliyuncs\.com)(?P<path>/.+)"
  665. matcher = re.compile(parser_re).match(path_string)
  666. if matcher:
  667. path_string = matcher["path"]
  668. return path_string or cls.root_marker
  669. class OverlayFileSystemFlavour(AbstractFileSystemFlavour):
  670. __orig_class__ = 'morefs.overlay.OverlayFileSystem'
  671. __orig_version__ = '0.2.2'
  672. protocol = ('overlayfs',)
  673. root_marker = ''
  674. sep = '/'
  675. class ReferenceFileSystemFlavour(AbstractFileSystemFlavour):
  676. __orig_class__ = 'fsspec.implementations.reference.ReferenceFileSystem'
  677. __orig_version__ = '2024.10.0'
  678. protocol = ('reference',)
  679. root_marker = ''
  680. sep = '/'
  681. class S3FileSystemFlavour(AbstractFileSystemFlavour):
  682. __orig_class__ = 's3fs.core.S3FileSystem'
  683. __orig_version__ = '2024.10.0'
  684. protocol = ('s3', 's3a')
  685. root_marker = ''
  686. sep = '/'
  687. @staticmethod
  688. def _get_kwargs_from_urls(urlpath):
  689. """
  690. When we have a urlpath that contains a ?versionId=
  691. Assume that we want to use version_aware mode for
  692. the filesystem.
  693. """
  694. url_storage_opts = infer_storage_options(urlpath)
  695. url_query = url_storage_opts.get("url_query")
  696. out = {}
  697. if url_query is not None:
  698. from urllib.parse import parse_qs
  699. parsed = parse_qs(url_query)
  700. if "versionId" in parsed:
  701. out["version_aware"] = True
  702. return out
  703. class SFTPFileSystemFlavour(AbstractFileSystemFlavour):
  704. __orig_class__ = 'fsspec.implementations.sftp.SFTPFileSystem'
  705. __orig_version__ = '2024.10.0'
  706. protocol = ('sftp', 'ssh')
  707. root_marker = ''
  708. sep = '/'
  709. @classmethod
  710. def _strip_protocol(cls, path):
  711. return infer_storage_options(path)["path"]
  712. @staticmethod
  713. def _get_kwargs_from_urls(urlpath):
  714. out = infer_storage_options(urlpath)
  715. out.pop("path", None)
  716. out.pop("protocol", None)
  717. return out
  718. class SMBFileSystemFlavour(AbstractFileSystemFlavour):
  719. __orig_class__ = 'fsspec.implementations.smb.SMBFileSystem'
  720. __orig_version__ = '2024.10.0'
  721. protocol = ('smb',)
  722. root_marker = ''
  723. sep = '/'
  724. @classmethod
  725. def _strip_protocol(cls, path):
  726. return infer_storage_options(path)["path"]
  727. @staticmethod
  728. def _get_kwargs_from_urls(path):
  729. # smb://workgroup;user:password@host:port/share/folder/file.csv
  730. out = infer_storage_options(path)
  731. out.pop("path", None)
  732. out.pop("protocol", None)
  733. return out
  734. class TarFileSystemFlavour(AbstractFileSystemFlavour):
  735. __orig_class__ = 'fsspec.implementations.tar.TarFileSystem'
  736. __orig_version__ = '2024.10.0'
  737. protocol = ('tar',)
  738. root_marker = ''
  739. sep = '/'
  740. class WandbFSFlavour(AbstractFileSystemFlavour):
  741. __orig_class__ = 'wandbfs._wandbfs.WandbFS'
  742. __orig_version__ = '0.0.2'
  743. protocol = ('wandb',)
  744. root_marker = ''
  745. sep = '/'
  746. class WebHDFSFlavour(AbstractFileSystemFlavour):
  747. __orig_class__ = 'fsspec.implementations.webhdfs.WebHDFS'
  748. __orig_version__ = '2024.10.0'
  749. protocol = ('webhdfs', 'webHDFS')
  750. root_marker = ''
  751. sep = '/'
  752. @classmethod
  753. def _strip_protocol(cls, path):
  754. return infer_storage_options(path)["path"]
  755. @staticmethod
  756. def _get_kwargs_from_urls(urlpath):
  757. out = infer_storage_options(urlpath)
  758. out.pop("path", None)
  759. out.pop("protocol", None)
  760. if "username" in out:
  761. out["user"] = out.pop("username")
  762. return out
  763. class WebdavFileSystemFlavour(AbstractFileSystemFlavour):
  764. __orig_class__ = 'webdav4.fsspec.WebdavFileSystem'
  765. __orig_version__ = '0.10.0'
  766. protocol = ('webdav', 'dav')
  767. root_marker = ''
  768. sep = '/'
  769. @classmethod
  770. def _strip_protocol(cls, path: str) -> str:
  771. """Strips protocol from the given path, overriding for type-casting."""
  772. stripped = super()._strip_protocol(path)
  773. return cast(str, stripped)
  774. class XRootDFileSystemFlavour(AbstractFileSystemFlavour):
  775. __orig_class__ = 'fsspec_xrootd.xrootd.XRootDFileSystem'
  776. __orig_version__ = '0.4.0'
  777. protocol = ('root',)
  778. root_marker = '/'
  779. sep = '/'
  780. @classmethod
  781. def _strip_protocol(cls, path: str | list[str]) -> Any:
  782. if isinstance(path, str):
  783. if path.startswith(cls.protocol):
  784. x = urlsplit(path); return (x.path + f'?{x.query}' if x.query else '').rstrip("/") or cls.root_marker
  785. # assume already stripped
  786. return path.rstrip("/") or cls.root_marker
  787. elif isinstance(path, list):
  788. return [cls._strip_protocol(item) for item in path]
  789. else:
  790. raise ValueError("Strip protocol not given string or list")
  791. @staticmethod
  792. def _get_kwargs_from_urls(u: str) -> dict[Any, Any]:
  793. url = urlsplit(u)
  794. # The hostid encapsulates user,pass,host,port in one string
  795. return {"hostid": url.netloc}
  796. class ZipFileSystemFlavour(AbstractFileSystemFlavour):
  797. __orig_class__ = 'fsspec.implementations.zip.ZipFileSystem'
  798. __orig_version__ = '2024.10.0'
  799. protocol = ('zip',)
  800. root_marker = ''
  801. sep = '/'
  802. @classmethod
  803. def _strip_protocol(cls, path):
  804. # zip file paths are always relative to the archive root
  805. return super()._strip_protocol(path).lstrip("/")
  806. class _DVCFileSystemFlavour(AbstractFileSystemFlavour):
  807. __orig_class__ = 'dvc.fs.dvc._DVCFileSystem'
  808. __orig_version__ = '3.58.0'
  809. protocol = ('dvc',)
  810. root_marker = '/'
  811. sep = '/'