git.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. import os
  2. import pygit2
  3. from fsspec.spec import AbstractFileSystem
  4. from .memory import MemoryFile
  5. class GitFileSystem(AbstractFileSystem):
  6. """Browse the files of a local git repo at any hash/tag/branch
  7. (experimental backend)
  8. """
  9. root_marker = ""
  10. cachable = True
  11. def __init__(self, path=None, fo=None, ref=None, **kwargs):
  12. """
  13. Parameters
  14. ----------
  15. path: str (optional)
  16. Local location of the repo (uses current directory if not given).
  17. May be deprecated in favour of ``fo``. When used with a higher
  18. level function such as fsspec.open(), may be of the form
  19. "git://[path-to-repo[:]][ref@]path/to/file" (but the actual
  20. file path should not contain "@" or ":").
  21. fo: str (optional)
  22. Same as ``path``, but passed as part of a chained URL. This one
  23. takes precedence if both are given.
  24. ref: str (optional)
  25. Reference to work with, could be a hash, tag or branch name. Defaults
  26. to current working tree. Note that ``ls`` and ``open`` also take hash,
  27. so this becomes the default for those operations
  28. kwargs
  29. """
  30. super().__init__(**kwargs)
  31. self.repo = pygit2.Repository(fo or path or os.getcwd())
  32. self.ref = ref or "master"
  33. @classmethod
  34. def _strip_protocol(cls, path):
  35. path = super()._strip_protocol(path).lstrip("/")
  36. if ":" in path:
  37. path = path.split(":", 1)[1]
  38. if "@" in path:
  39. path = path.split("@", 1)[1]
  40. return path.lstrip("/")
  41. def _path_to_object(self, path, ref):
  42. comm, ref = self.repo.resolve_refish(ref or self.ref)
  43. parts = path.split("/")
  44. tree = comm.tree
  45. for part in parts:
  46. if part and isinstance(tree, pygit2.Tree):
  47. if part not in tree:
  48. raise FileNotFoundError(path)
  49. tree = tree[part]
  50. return tree
  51. @staticmethod
  52. def _get_kwargs_from_urls(path):
  53. if path.startswith("git://"):
  54. path = path[6:]
  55. out = {}
  56. if ":" in path:
  57. out["path"], path = path.split(":", 1)
  58. if "@" in path:
  59. out["ref"], path = path.split("@", 1)
  60. return out
  61. @staticmethod
  62. def _object_to_info(obj, path=None):
  63. # obj.name and obj.filemode are None for the root tree!
  64. is_dir = isinstance(obj, pygit2.Tree)
  65. return {
  66. "type": "directory" if is_dir else "file",
  67. "name": (
  68. "/".join([path, obj.name or ""]).lstrip("/") if path else obj.name
  69. ),
  70. "hex": str(obj.id),
  71. "mode": "100644" if obj.filemode is None else f"{obj.filemode:o}",
  72. "size": 0 if is_dir else obj.size,
  73. }
  74. def ls(self, path, detail=True, ref=None, **kwargs):
  75. tree = self._path_to_object(self._strip_protocol(path), ref)
  76. return [
  77. GitFileSystem._object_to_info(obj, path)
  78. if detail
  79. else GitFileSystem._object_to_info(obj, path)["name"]
  80. for obj in (tree if isinstance(tree, pygit2.Tree) else [tree])
  81. ]
  82. def info(self, path, ref=None, **kwargs):
  83. tree = self._path_to_object(self._strip_protocol(path), ref)
  84. return GitFileSystem._object_to_info(tree, path)
  85. def ukey(self, path, ref=None):
  86. return self.info(path, ref=ref)["hex"]
  87. def _open(
  88. self,
  89. path,
  90. mode="rb",
  91. block_size=None,
  92. autocommit=True,
  93. cache_options=None,
  94. ref=None,
  95. **kwargs,
  96. ):
  97. obj = self._path_to_object(path, ref or self.ref)
  98. return MemoryFile(data=obj.data)