get.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587
  1. from hashlib import md5
  2. from itertools import product
  3. import pytest
  4. from fsspec.implementations.local import make_path_posix
  5. from fsspec.tests.abstract.common import GLOB_EDGE_CASES_TESTS
  6. class AbstractGetTests:
  7. def test_get_file_to_existing_directory(
  8. self,
  9. fs,
  10. fs_join,
  11. fs_bulk_operations_scenario_0,
  12. local_fs,
  13. local_join,
  14. local_target,
  15. ):
  16. # Copy scenario 1a
  17. source = fs_bulk_operations_scenario_0
  18. target = local_target
  19. local_fs.mkdir(target)
  20. assert local_fs.isdir(target)
  21. target_file2 = local_join(target, "file2")
  22. target_subfile1 = local_join(target, "subfile1")
  23. # Copy from source directory
  24. fs.get(fs_join(source, "file2"), target)
  25. assert local_fs.isfile(target_file2)
  26. # Copy from sub directory
  27. fs.get(fs_join(source, "subdir", "subfile1"), target)
  28. assert local_fs.isfile(target_subfile1)
  29. # Remove copied files
  30. local_fs.rm([target_file2, target_subfile1])
  31. assert not local_fs.exists(target_file2)
  32. assert not local_fs.exists(target_subfile1)
  33. # Repeat with trailing slash on target
  34. fs.get(fs_join(source, "file2"), target + "/")
  35. assert local_fs.isdir(target)
  36. assert local_fs.isfile(target_file2)
  37. fs.get(fs_join(source, "subdir", "subfile1"), target + "/")
  38. assert local_fs.isfile(target_subfile1)
  39. def test_get_file_to_new_directory(
  40. self,
  41. fs,
  42. fs_join,
  43. fs_bulk_operations_scenario_0,
  44. local_fs,
  45. local_join,
  46. local_target,
  47. ):
  48. # Copy scenario 1b
  49. source = fs_bulk_operations_scenario_0
  50. target = local_target
  51. local_fs.mkdir(target)
  52. fs.get(
  53. fs_join(source, "subdir", "subfile1"), local_join(target, "newdir/")
  54. ) # Note trailing slash
  55. assert local_fs.isdir(target)
  56. assert local_fs.isdir(local_join(target, "newdir"))
  57. assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
  58. def test_get_file_to_file_in_existing_directory(
  59. self,
  60. fs,
  61. fs_join,
  62. fs_bulk_operations_scenario_0,
  63. local_fs,
  64. local_join,
  65. local_target,
  66. ):
  67. # Copy scenario 1c
  68. source = fs_bulk_operations_scenario_0
  69. target = local_target
  70. local_fs.mkdir(target)
  71. fs.get(fs_join(source, "subdir", "subfile1"), local_join(target, "newfile"))
  72. assert local_fs.isfile(local_join(target, "newfile"))
  73. def test_get_file_to_file_in_new_directory(
  74. self,
  75. fs,
  76. fs_join,
  77. fs_bulk_operations_scenario_0,
  78. local_fs,
  79. local_join,
  80. local_target,
  81. ):
  82. # Copy scenario 1d
  83. source = fs_bulk_operations_scenario_0
  84. target = local_target
  85. local_fs.mkdir(target)
  86. fs.get(
  87. fs_join(source, "subdir", "subfile1"),
  88. local_join(target, "newdir", "newfile"),
  89. )
  90. assert local_fs.isdir(local_join(target, "newdir"))
  91. assert local_fs.isfile(local_join(target, "newdir", "newfile"))
  92. def test_get_directory_to_existing_directory(
  93. self,
  94. fs,
  95. fs_join,
  96. fs_bulk_operations_scenario_0,
  97. local_fs,
  98. local_join,
  99. local_target,
  100. ):
  101. # Copy scenario 1e
  102. source = fs_bulk_operations_scenario_0
  103. target = local_target
  104. local_fs.mkdir(target)
  105. assert local_fs.isdir(target)
  106. for source_slash, target_slash in zip([False, True], [False, True]):
  107. s = fs_join(source, "subdir")
  108. if source_slash:
  109. s += "/"
  110. t = target + "/" if target_slash else target
  111. # Without recursive does nothing
  112. fs.get(s, t)
  113. assert local_fs.ls(target) == []
  114. # With recursive
  115. fs.get(s, t, recursive=True)
  116. if source_slash:
  117. assert local_fs.isfile(local_join(target, "subfile1"))
  118. assert local_fs.isfile(local_join(target, "subfile2"))
  119. assert local_fs.isdir(local_join(target, "nesteddir"))
  120. assert local_fs.isfile(local_join(target, "nesteddir", "nestedfile"))
  121. assert not local_fs.exists(local_join(target, "subdir"))
  122. local_fs.rm(
  123. [
  124. local_join(target, "subfile1"),
  125. local_join(target, "subfile2"),
  126. local_join(target, "nesteddir"),
  127. ],
  128. recursive=True,
  129. )
  130. else:
  131. assert local_fs.isdir(local_join(target, "subdir"))
  132. assert local_fs.isfile(local_join(target, "subdir", "subfile1"))
  133. assert local_fs.isfile(local_join(target, "subdir", "subfile2"))
  134. assert local_fs.isdir(local_join(target, "subdir", "nesteddir"))
  135. assert local_fs.isfile(
  136. local_join(target, "subdir", "nesteddir", "nestedfile")
  137. )
  138. local_fs.rm(local_join(target, "subdir"), recursive=True)
  139. assert local_fs.ls(target) == []
  140. # Limit recursive by maxdepth
  141. fs.get(s, t, recursive=True, maxdepth=1)
  142. if source_slash:
  143. assert local_fs.isfile(local_join(target, "subfile1"))
  144. assert local_fs.isfile(local_join(target, "subfile2"))
  145. assert not local_fs.exists(local_join(target, "nesteddir"))
  146. assert not local_fs.exists(local_join(target, "subdir"))
  147. local_fs.rm(
  148. [
  149. local_join(target, "subfile1"),
  150. local_join(target, "subfile2"),
  151. ],
  152. recursive=True,
  153. )
  154. else:
  155. assert local_fs.isdir(local_join(target, "subdir"))
  156. assert local_fs.isfile(local_join(target, "subdir", "subfile1"))
  157. assert local_fs.isfile(local_join(target, "subdir", "subfile2"))
  158. assert not local_fs.exists(local_join(target, "subdir", "nesteddir"))
  159. local_fs.rm(local_join(target, "subdir"), recursive=True)
  160. assert local_fs.ls(target) == []
  161. def test_get_directory_to_new_directory(
  162. self,
  163. fs,
  164. fs_join,
  165. fs_bulk_operations_scenario_0,
  166. local_fs,
  167. local_join,
  168. local_target,
  169. ):
  170. # Copy scenario 1f
  171. source = fs_bulk_operations_scenario_0
  172. target = local_target
  173. local_fs.mkdir(target)
  174. for source_slash, target_slash in zip([False, True], [False, True]):
  175. s = fs_join(source, "subdir")
  176. if source_slash:
  177. s += "/"
  178. t = local_join(target, "newdir")
  179. if target_slash:
  180. t += "/"
  181. # Without recursive does nothing
  182. fs.get(s, t)
  183. assert local_fs.ls(target) == []
  184. # With recursive
  185. fs.get(s, t, recursive=True)
  186. assert local_fs.isdir(local_join(target, "newdir"))
  187. assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
  188. assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
  189. assert local_fs.isdir(local_join(target, "newdir", "nesteddir"))
  190. assert local_fs.isfile(
  191. local_join(target, "newdir", "nesteddir", "nestedfile")
  192. )
  193. assert not local_fs.exists(local_join(target, "subdir"))
  194. local_fs.rm(local_join(target, "newdir"), recursive=True)
  195. assert local_fs.ls(target) == []
  196. # Limit recursive by maxdepth
  197. fs.get(s, t, recursive=True, maxdepth=1)
  198. assert local_fs.isdir(local_join(target, "newdir"))
  199. assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
  200. assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
  201. assert not local_fs.exists(local_join(target, "newdir", "nesteddir"))
  202. assert not local_fs.exists(local_join(target, "subdir"))
  203. local_fs.rm(local_join(target, "newdir"), recursive=True)
  204. assert not local_fs.exists(local_join(target, "newdir"))
  205. def test_get_glob_to_existing_directory(
  206. self,
  207. fs,
  208. fs_join,
  209. fs_bulk_operations_scenario_0,
  210. local_fs,
  211. local_join,
  212. local_target,
  213. ):
  214. # Copy scenario 1g
  215. source = fs_bulk_operations_scenario_0
  216. target = local_target
  217. local_fs.mkdir(target)
  218. for target_slash in [False, True]:
  219. t = target + "/" if target_slash else target
  220. # Without recursive
  221. fs.get(fs_join(source, "subdir", "*"), t)
  222. assert local_fs.isfile(local_join(target, "subfile1"))
  223. assert local_fs.isfile(local_join(target, "subfile2"))
  224. assert not local_fs.isdir(local_join(target, "nesteddir"))
  225. assert not local_fs.exists(local_join(target, "nesteddir", "nestedfile"))
  226. assert not local_fs.exists(local_join(target, "subdir"))
  227. local_fs.rm(
  228. [
  229. local_join(target, "subfile1"),
  230. local_join(target, "subfile2"),
  231. ],
  232. recursive=True,
  233. )
  234. assert local_fs.ls(target) == []
  235. # With recursive
  236. for glob, recursive in zip(["*", "**"], [True, False]):
  237. fs.get(fs_join(source, "subdir", glob), t, recursive=recursive)
  238. assert local_fs.isfile(local_join(target, "subfile1"))
  239. assert local_fs.isfile(local_join(target, "subfile2"))
  240. assert local_fs.isdir(local_join(target, "nesteddir"))
  241. assert local_fs.isfile(local_join(target, "nesteddir", "nestedfile"))
  242. assert not local_fs.exists(local_join(target, "subdir"))
  243. local_fs.rm(
  244. [
  245. local_join(target, "subfile1"),
  246. local_join(target, "subfile2"),
  247. local_join(target, "nesteddir"),
  248. ],
  249. recursive=True,
  250. )
  251. assert local_fs.ls(target) == []
  252. # Limit recursive by maxdepth
  253. fs.get(
  254. fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1
  255. )
  256. assert local_fs.isfile(local_join(target, "subfile1"))
  257. assert local_fs.isfile(local_join(target, "subfile2"))
  258. assert not local_fs.exists(local_join(target, "nesteddir"))
  259. assert not local_fs.exists(local_join(target, "subdir"))
  260. local_fs.rm(
  261. [
  262. local_join(target, "subfile1"),
  263. local_join(target, "subfile2"),
  264. ],
  265. recursive=True,
  266. )
  267. assert local_fs.ls(target) == []
  268. def test_get_glob_to_new_directory(
  269. self,
  270. fs,
  271. fs_join,
  272. fs_bulk_operations_scenario_0,
  273. local_fs,
  274. local_join,
  275. local_target,
  276. ):
  277. # Copy scenario 1h
  278. source = fs_bulk_operations_scenario_0
  279. target = local_target
  280. local_fs.mkdir(target)
  281. for target_slash in [False, True]:
  282. t = fs_join(target, "newdir")
  283. if target_slash:
  284. t += "/"
  285. # Without recursive
  286. fs.get(fs_join(source, "subdir", "*"), t)
  287. assert local_fs.isdir(local_join(target, "newdir"))
  288. assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
  289. assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
  290. assert not local_fs.exists(local_join(target, "newdir", "nesteddir"))
  291. assert not local_fs.exists(
  292. local_join(target, "newdir", "nesteddir", "nestedfile")
  293. )
  294. assert not local_fs.exists(local_join(target, "subdir"))
  295. assert not local_fs.exists(local_join(target, "newdir", "subdir"))
  296. local_fs.rm(local_join(target, "newdir"), recursive=True)
  297. assert local_fs.ls(target) == []
  298. # With recursive
  299. for glob, recursive in zip(["*", "**"], [True, False]):
  300. fs.get(fs_join(source, "subdir", glob), t, recursive=recursive)
  301. assert local_fs.isdir(local_join(target, "newdir"))
  302. assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
  303. assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
  304. assert local_fs.isdir(local_join(target, "newdir", "nesteddir"))
  305. assert local_fs.isfile(
  306. local_join(target, "newdir", "nesteddir", "nestedfile")
  307. )
  308. assert not local_fs.exists(local_join(target, "subdir"))
  309. assert not local_fs.exists(local_join(target, "newdir", "subdir"))
  310. local_fs.rm(local_join(target, "newdir"), recursive=True)
  311. assert not local_fs.exists(local_join(target, "newdir"))
  312. # Limit recursive by maxdepth
  313. fs.get(
  314. fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1
  315. )
  316. assert local_fs.isdir(local_join(target, "newdir"))
  317. assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
  318. assert local_fs.isfile(local_join(target, "newdir", "subfile2"))
  319. assert not local_fs.exists(local_join(target, "newdir", "nesteddir"))
  320. assert not local_fs.exists(local_join(target, "subdir"))
  321. assert not local_fs.exists(local_join(target, "newdir", "subdir"))
  322. local_fs.rm(local_fs.ls(target, detail=False), recursive=True)
  323. assert not local_fs.exists(local_join(target, "newdir"))
  324. @pytest.mark.parametrize(
  325. GLOB_EDGE_CASES_TESTS["argnames"],
  326. GLOB_EDGE_CASES_TESTS["argvalues"],
  327. )
  328. def test_get_glob_edge_cases(
  329. self,
  330. path,
  331. recursive,
  332. maxdepth,
  333. expected,
  334. fs,
  335. fs_join,
  336. fs_glob_edge_cases_files,
  337. local_fs,
  338. local_join,
  339. local_target,
  340. ):
  341. # Copy scenario 1g
  342. source = fs_glob_edge_cases_files
  343. target = local_target
  344. for new_dir, target_slash in product([True, False], [True, False]):
  345. local_fs.mkdir(target)
  346. t = local_join(target, "newdir") if new_dir else target
  347. t = t + "/" if target_slash else t
  348. fs.get(fs_join(source, path), t, recursive=recursive, maxdepth=maxdepth)
  349. output = local_fs.find(target)
  350. if new_dir:
  351. prefixed_expected = [
  352. make_path_posix(local_join(target, "newdir", p)) for p in expected
  353. ]
  354. else:
  355. prefixed_expected = [
  356. make_path_posix(local_join(target, p)) for p in expected
  357. ]
  358. assert sorted(output) == sorted(prefixed_expected)
  359. try:
  360. local_fs.rm(target, recursive=True)
  361. except FileNotFoundError:
  362. pass
  363. def test_get_list_of_files_to_existing_directory(
  364. self,
  365. fs,
  366. fs_join,
  367. fs_bulk_operations_scenario_0,
  368. local_fs,
  369. local_join,
  370. local_target,
  371. ):
  372. # Copy scenario 2a
  373. source = fs_bulk_operations_scenario_0
  374. target = local_target
  375. local_fs.mkdir(target)
  376. source_files = [
  377. fs_join(source, "file1"),
  378. fs_join(source, "file2"),
  379. fs_join(source, "subdir", "subfile1"),
  380. ]
  381. for target_slash in [False, True]:
  382. t = target + "/" if target_slash else target
  383. fs.get(source_files, t)
  384. assert local_fs.isfile(local_join(target, "file1"))
  385. assert local_fs.isfile(local_join(target, "file2"))
  386. assert local_fs.isfile(local_join(target, "subfile1"))
  387. local_fs.rm(
  388. [
  389. local_join(target, "file1"),
  390. local_join(target, "file2"),
  391. local_join(target, "subfile1"),
  392. ],
  393. recursive=True,
  394. )
  395. assert local_fs.ls(target) == []
  396. def test_get_list_of_files_to_new_directory(
  397. self,
  398. fs,
  399. fs_join,
  400. fs_bulk_operations_scenario_0,
  401. local_fs,
  402. local_join,
  403. local_target,
  404. ):
  405. # Copy scenario 2b
  406. source = fs_bulk_operations_scenario_0
  407. target = local_target
  408. local_fs.mkdir(target)
  409. source_files = [
  410. fs_join(source, "file1"),
  411. fs_join(source, "file2"),
  412. fs_join(source, "subdir", "subfile1"),
  413. ]
  414. fs.get(source_files, local_join(target, "newdir") + "/") # Note trailing slash
  415. assert local_fs.isdir(local_join(target, "newdir"))
  416. assert local_fs.isfile(local_join(target, "newdir", "file1"))
  417. assert local_fs.isfile(local_join(target, "newdir", "file2"))
  418. assert local_fs.isfile(local_join(target, "newdir", "subfile1"))
  419. def test_get_directory_recursive(
  420. self, fs, fs_join, fs_path, local_fs, local_join, local_target
  421. ):
  422. # https://github.com/fsspec/filesystem_spec/issues/1062
  423. # Recursive cp/get/put of source directory into non-existent target directory.
  424. src = fs_join(fs_path, "src")
  425. src_file = fs_join(src, "file")
  426. fs.mkdir(src)
  427. fs.touch(src_file)
  428. target = local_target
  429. # get without slash
  430. assert not local_fs.exists(target)
  431. for loop in range(2):
  432. fs.get(src, target, recursive=True)
  433. assert local_fs.isdir(target)
  434. if loop == 0:
  435. assert local_fs.isfile(local_join(target, "file"))
  436. assert not local_fs.exists(local_join(target, "src"))
  437. else:
  438. assert local_fs.isfile(local_join(target, "file"))
  439. assert local_fs.isdir(local_join(target, "src"))
  440. assert local_fs.isfile(local_join(target, "src", "file"))
  441. local_fs.rm(target, recursive=True)
  442. # get with slash
  443. assert not local_fs.exists(target)
  444. for loop in range(2):
  445. fs.get(src + "/", target, recursive=True)
  446. assert local_fs.isdir(target)
  447. assert local_fs.isfile(local_join(target, "file"))
  448. assert not local_fs.exists(local_join(target, "src"))
  449. def test_get_directory_without_files_with_same_name_prefix(
  450. self,
  451. fs,
  452. fs_join,
  453. local_fs,
  454. local_join,
  455. local_target,
  456. fs_dir_and_file_with_same_name_prefix,
  457. ):
  458. # Create the test dirs
  459. source = fs_dir_and_file_with_same_name_prefix
  460. target = local_target
  461. # Test without glob
  462. fs.get(fs_join(source, "subdir"), target, recursive=True)
  463. assert local_fs.isfile(local_join(target, "subfile.txt"))
  464. assert not local_fs.isfile(local_join(target, "subdir.txt"))
  465. local_fs.rm([local_join(target, "subfile.txt")])
  466. assert local_fs.ls(target) == []
  467. # Test with glob
  468. fs.get(fs_join(source, "subdir*"), target, recursive=True)
  469. assert local_fs.isdir(local_join(target, "subdir"))
  470. assert local_fs.isfile(local_join(target, "subdir", "subfile.txt"))
  471. assert local_fs.isfile(local_join(target, "subdir.txt"))
  472. def test_get_with_source_and_destination_as_list(
  473. self,
  474. fs,
  475. fs_join,
  476. local_fs,
  477. local_join,
  478. local_target,
  479. fs_10_files_with_hashed_names,
  480. ):
  481. # Create the test dir
  482. source = fs_10_files_with_hashed_names
  483. target = local_target
  484. # Create list of files for source and destination
  485. source_files = []
  486. destination_files = []
  487. for i in range(10):
  488. hashed_i = md5(str(i).encode("utf-8")).hexdigest()
  489. source_files.append(fs_join(source, f"{hashed_i}.txt"))
  490. destination_files.append(
  491. make_path_posix(local_join(target, f"{hashed_i}.txt"))
  492. )
  493. # Copy and assert order was kept
  494. fs.get(rpath=source_files, lpath=destination_files)
  495. for i in range(10):
  496. file_content = local_fs.cat(destination_files[i]).decode("utf-8")
  497. assert file_content == str(i)