put.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591
  1. from hashlib import md5
  2. from itertools import product
  3. import pytest
  4. from fsspec.tests.abstract.common import GLOB_EDGE_CASES_TESTS
  5. class AbstractPutTests:
  6. def test_put_file_to_existing_directory(
  7. self,
  8. fs,
  9. fs_join,
  10. fs_target,
  11. local_join,
  12. local_bulk_operations_scenario_0,
  13. supports_empty_directories,
  14. ):
  15. # Copy scenario 1a
  16. source = local_bulk_operations_scenario_0
  17. target = fs_target
  18. fs.mkdir(target)
  19. if not supports_empty_directories:
  20. # Force target directory to exist by adding a dummy file
  21. fs.touch(fs_join(target, "dummy"))
  22. assert fs.isdir(target)
  23. target_file2 = fs_join(target, "file2")
  24. target_subfile1 = fs_join(target, "subfile1")
  25. # Copy from source directory
  26. fs.put(local_join(source, "file2"), target)
  27. assert fs.isfile(target_file2)
  28. # Copy from sub directory
  29. fs.put(local_join(source, "subdir", "subfile1"), target)
  30. assert fs.isfile(target_subfile1)
  31. # Remove copied files
  32. fs.rm([target_file2, target_subfile1])
  33. assert not fs.exists(target_file2)
  34. assert not fs.exists(target_subfile1)
  35. # Repeat with trailing slash on target
  36. fs.put(local_join(source, "file2"), target + "/")
  37. assert fs.isdir(target)
  38. assert fs.isfile(target_file2)
  39. fs.put(local_join(source, "subdir", "subfile1"), target + "/")
  40. assert fs.isfile(target_subfile1)
  41. def test_put_file_to_new_directory(
  42. self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0
  43. ):
  44. # Copy scenario 1b
  45. source = local_bulk_operations_scenario_0
  46. target = fs_target
  47. fs.mkdir(target)
  48. fs.put(
  49. local_join(source, "subdir", "subfile1"), fs_join(target, "newdir/")
  50. ) # Note trailing slash
  51. assert fs.isdir(target)
  52. assert fs.isdir(fs_join(target, "newdir"))
  53. assert fs.isfile(fs_join(target, "newdir", "subfile1"))
  54. def test_put_file_to_file_in_existing_directory(
  55. self,
  56. fs,
  57. fs_join,
  58. fs_target,
  59. local_join,
  60. supports_empty_directories,
  61. local_bulk_operations_scenario_0,
  62. ):
  63. # Copy scenario 1c
  64. source = local_bulk_operations_scenario_0
  65. target = fs_target
  66. fs.mkdir(target)
  67. if not supports_empty_directories:
  68. # Force target directory to exist by adding a dummy file
  69. fs.touch(fs_join(target, "dummy"))
  70. assert fs.isdir(target)
  71. fs.put(local_join(source, "subdir", "subfile1"), fs_join(target, "newfile"))
  72. assert fs.isfile(fs_join(target, "newfile"))
  73. def test_put_file_to_file_in_new_directory(
  74. self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0
  75. ):
  76. # Copy scenario 1d
  77. source = local_bulk_operations_scenario_0
  78. target = fs_target
  79. fs.mkdir(target)
  80. fs.put(
  81. local_join(source, "subdir", "subfile1"),
  82. fs_join(target, "newdir", "newfile"),
  83. )
  84. assert fs.isdir(fs_join(target, "newdir"))
  85. assert fs.isfile(fs_join(target, "newdir", "newfile"))
  86. def test_put_directory_to_existing_directory(
  87. self,
  88. fs,
  89. fs_join,
  90. fs_target,
  91. local_bulk_operations_scenario_0,
  92. supports_empty_directories,
  93. ):
  94. # Copy scenario 1e
  95. source = local_bulk_operations_scenario_0
  96. target = fs_target
  97. fs.mkdir(target)
  98. if not supports_empty_directories:
  99. # Force target directory to exist by adding a dummy file
  100. dummy = fs_join(target, "dummy")
  101. fs.touch(dummy)
  102. assert fs.isdir(target)
  103. for source_slash, target_slash in zip([False, True], [False, True]):
  104. s = fs_join(source, "subdir")
  105. if source_slash:
  106. s += "/"
  107. t = target + "/" if target_slash else target
  108. # Without recursive does nothing
  109. fs.put(s, t)
  110. assert fs.ls(target, detail=False) == (
  111. [] if supports_empty_directories else [dummy]
  112. )
  113. # With recursive
  114. fs.put(s, t, recursive=True)
  115. if source_slash:
  116. assert fs.isfile(fs_join(target, "subfile1"))
  117. assert fs.isfile(fs_join(target, "subfile2"))
  118. assert fs.isdir(fs_join(target, "nesteddir"))
  119. assert fs.isfile(fs_join(target, "nesteddir", "nestedfile"))
  120. assert not fs.exists(fs_join(target, "subdir"))
  121. fs.rm(
  122. [
  123. fs_join(target, "subfile1"),
  124. fs_join(target, "subfile2"),
  125. fs_join(target, "nesteddir"),
  126. ],
  127. recursive=True,
  128. )
  129. else:
  130. assert fs.isdir(fs_join(target, "subdir"))
  131. assert fs.isfile(fs_join(target, "subdir", "subfile1"))
  132. assert fs.isfile(fs_join(target, "subdir", "subfile2"))
  133. assert fs.isdir(fs_join(target, "subdir", "nesteddir"))
  134. assert fs.isfile(fs_join(target, "subdir", "nesteddir", "nestedfile"))
  135. fs.rm(fs_join(target, "subdir"), recursive=True)
  136. assert fs.ls(target, detail=False) == (
  137. [] if supports_empty_directories else [dummy]
  138. )
  139. # Limit recursive by maxdepth
  140. fs.put(s, t, recursive=True, maxdepth=1)
  141. if source_slash:
  142. assert fs.isfile(fs_join(target, "subfile1"))
  143. assert fs.isfile(fs_join(target, "subfile2"))
  144. assert not fs.exists(fs_join(target, "nesteddir"))
  145. assert not fs.exists(fs_join(target, "subdir"))
  146. fs.rm(
  147. [
  148. fs_join(target, "subfile1"),
  149. fs_join(target, "subfile2"),
  150. ],
  151. recursive=True,
  152. )
  153. else:
  154. assert fs.isdir(fs_join(target, "subdir"))
  155. assert fs.isfile(fs_join(target, "subdir", "subfile1"))
  156. assert fs.isfile(fs_join(target, "subdir", "subfile2"))
  157. assert not fs.exists(fs_join(target, "subdir", "nesteddir"))
  158. fs.rm(fs_join(target, "subdir"), recursive=True)
  159. assert fs.ls(target, detail=False) == (
  160. [] if supports_empty_directories else [dummy]
  161. )
  162. def test_put_directory_to_new_directory(
  163. self,
  164. fs,
  165. fs_join,
  166. fs_target,
  167. local_bulk_operations_scenario_0,
  168. supports_empty_directories,
  169. ):
  170. # Copy scenario 1f
  171. source = local_bulk_operations_scenario_0
  172. target = fs_target
  173. fs.mkdir(target)
  174. for source_slash, target_slash in zip([False, True], [False, True]):
  175. s = fs_join(source, "subdir")
  176. if source_slash:
  177. s += "/"
  178. t = fs_join(target, "newdir")
  179. if target_slash:
  180. t += "/"
  181. # Without recursive does nothing
  182. fs.put(s, t)
  183. if supports_empty_directories:
  184. assert fs.ls(target) == []
  185. else:
  186. with pytest.raises(FileNotFoundError):
  187. fs.ls(target)
  188. # With recursive
  189. fs.put(s, t, recursive=True)
  190. assert fs.isdir(fs_join(target, "newdir"))
  191. assert fs.isfile(fs_join(target, "newdir", "subfile1"))
  192. assert fs.isfile(fs_join(target, "newdir", "subfile2"))
  193. assert fs.isdir(fs_join(target, "newdir", "nesteddir"))
  194. assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile"))
  195. assert not fs.exists(fs_join(target, "subdir"))
  196. fs.rm(fs_join(target, "newdir"), recursive=True)
  197. assert not fs.exists(fs_join(target, "newdir"))
  198. # Limit recursive by maxdepth
  199. fs.put(s, t, recursive=True, maxdepth=1)
  200. assert fs.isdir(fs_join(target, "newdir"))
  201. assert fs.isfile(fs_join(target, "newdir", "subfile1"))
  202. assert fs.isfile(fs_join(target, "newdir", "subfile2"))
  203. assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
  204. assert not fs.exists(fs_join(target, "subdir"))
  205. fs.rm(fs_join(target, "newdir"), recursive=True)
  206. assert not fs.exists(fs_join(target, "newdir"))
  207. def test_put_glob_to_existing_directory(
  208. self,
  209. fs,
  210. fs_join,
  211. fs_target,
  212. local_join,
  213. supports_empty_directories,
  214. local_bulk_operations_scenario_0,
  215. ):
  216. # Copy scenario 1g
  217. source = local_bulk_operations_scenario_0
  218. target = fs_target
  219. fs.mkdir(target)
  220. if not supports_empty_directories:
  221. # Force target directory to exist by adding a dummy file
  222. dummy = fs_join(target, "dummy")
  223. fs.touch(dummy)
  224. assert fs.isdir(target)
  225. for target_slash in [False, True]:
  226. t = target + "/" if target_slash else target
  227. # Without recursive
  228. fs.put(local_join(source, "subdir", "*"), t)
  229. assert fs.isfile(fs_join(target, "subfile1"))
  230. assert fs.isfile(fs_join(target, "subfile2"))
  231. assert not fs.isdir(fs_join(target, "nesteddir"))
  232. assert not fs.exists(fs_join(target, "nesteddir", "nestedfile"))
  233. assert not fs.exists(fs_join(target, "subdir"))
  234. fs.rm(
  235. [
  236. fs_join(target, "subfile1"),
  237. fs_join(target, "subfile2"),
  238. ],
  239. recursive=True,
  240. )
  241. assert fs.ls(target, detail=False) == (
  242. [] if supports_empty_directories else [dummy]
  243. )
  244. # With recursive
  245. for glob, recursive in zip(["*", "**"], [True, False]):
  246. fs.put(local_join(source, "subdir", glob), t, recursive=recursive)
  247. assert fs.isfile(fs_join(target, "subfile1"))
  248. assert fs.isfile(fs_join(target, "subfile2"))
  249. assert fs.isdir(fs_join(target, "nesteddir"))
  250. assert fs.isfile(fs_join(target, "nesteddir", "nestedfile"))
  251. assert not fs.exists(fs_join(target, "subdir"))
  252. fs.rm(
  253. [
  254. fs_join(target, "subfile1"),
  255. fs_join(target, "subfile2"),
  256. fs_join(target, "nesteddir"),
  257. ],
  258. recursive=True,
  259. )
  260. assert fs.ls(target, detail=False) == (
  261. [] if supports_empty_directories else [dummy]
  262. )
  263. # Limit recursive by maxdepth
  264. fs.put(
  265. local_join(source, "subdir", glob),
  266. t,
  267. recursive=recursive,
  268. maxdepth=1,
  269. )
  270. assert fs.isfile(fs_join(target, "subfile1"))
  271. assert fs.isfile(fs_join(target, "subfile2"))
  272. assert not fs.exists(fs_join(target, "nesteddir"))
  273. assert not fs.exists(fs_join(target, "subdir"))
  274. fs.rm(
  275. [
  276. fs_join(target, "subfile1"),
  277. fs_join(target, "subfile2"),
  278. ],
  279. recursive=True,
  280. )
  281. assert fs.ls(target, detail=False) == (
  282. [] if supports_empty_directories else [dummy]
  283. )
  284. def test_put_glob_to_new_directory(
  285. self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0
  286. ):
  287. # Copy scenario 1h
  288. source = local_bulk_operations_scenario_0
  289. target = fs_target
  290. fs.mkdir(target)
  291. for target_slash in [False, True]:
  292. t = fs_join(target, "newdir")
  293. if target_slash:
  294. t += "/"
  295. # Without recursive
  296. fs.put(local_join(source, "subdir", "*"), t)
  297. assert fs.isdir(fs_join(target, "newdir"))
  298. assert fs.isfile(fs_join(target, "newdir", "subfile1"))
  299. assert fs.isfile(fs_join(target, "newdir", "subfile2"))
  300. assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
  301. assert not fs.exists(fs_join(target, "newdir", "nesteddir", "nestedfile"))
  302. assert not fs.exists(fs_join(target, "subdir"))
  303. assert not fs.exists(fs_join(target, "newdir", "subdir"))
  304. fs.rm(fs_join(target, "newdir"), recursive=True)
  305. assert not fs.exists(fs_join(target, "newdir"))
  306. # With recursive
  307. for glob, recursive in zip(["*", "**"], [True, False]):
  308. fs.put(local_join(source, "subdir", glob), t, recursive=recursive)
  309. assert fs.isdir(fs_join(target, "newdir"))
  310. assert fs.isfile(fs_join(target, "newdir", "subfile1"))
  311. assert fs.isfile(fs_join(target, "newdir", "subfile2"))
  312. assert fs.isdir(fs_join(target, "newdir", "nesteddir"))
  313. assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile"))
  314. assert not fs.exists(fs_join(target, "subdir"))
  315. assert not fs.exists(fs_join(target, "newdir", "subdir"))
  316. fs.rm(fs_join(target, "newdir"), recursive=True)
  317. assert not fs.exists(fs_join(target, "newdir"))
  318. # Limit recursive by maxdepth
  319. fs.put(
  320. local_join(source, "subdir", glob),
  321. t,
  322. recursive=recursive,
  323. maxdepth=1,
  324. )
  325. assert fs.isdir(fs_join(target, "newdir"))
  326. assert fs.isfile(fs_join(target, "newdir", "subfile1"))
  327. assert fs.isfile(fs_join(target, "newdir", "subfile2"))
  328. assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
  329. assert not fs.exists(fs_join(target, "subdir"))
  330. assert not fs.exists(fs_join(target, "newdir", "subdir"))
  331. fs.rm(fs_join(target, "newdir"), recursive=True)
  332. assert not fs.exists(fs_join(target, "newdir"))
  333. @pytest.mark.parametrize(
  334. GLOB_EDGE_CASES_TESTS["argnames"],
  335. GLOB_EDGE_CASES_TESTS["argvalues"],
  336. )
  337. def test_put_glob_edge_cases(
  338. self,
  339. path,
  340. recursive,
  341. maxdepth,
  342. expected,
  343. fs,
  344. fs_join,
  345. fs_target,
  346. local_glob_edge_cases_files,
  347. local_join,
  348. fs_sanitize_path,
  349. ):
  350. # Copy scenario 1g
  351. source = local_glob_edge_cases_files
  352. target = fs_target
  353. for new_dir, target_slash in product([True, False], [True, False]):
  354. fs.mkdir(target)
  355. t = fs_join(target, "newdir") if new_dir else target
  356. t = t + "/" if target_slash else t
  357. fs.put(local_join(source, path), t, recursive=recursive, maxdepth=maxdepth)
  358. output = fs.find(target)
  359. if new_dir:
  360. prefixed_expected = [
  361. fs_sanitize_path(fs_join(target, "newdir", p)) for p in expected
  362. ]
  363. else:
  364. prefixed_expected = [
  365. fs_sanitize_path(fs_join(target, p)) for p in expected
  366. ]
  367. assert sorted(output) == sorted(prefixed_expected)
  368. try:
  369. fs.rm(target, recursive=True)
  370. except FileNotFoundError:
  371. pass
  372. def test_put_list_of_files_to_existing_directory(
  373. self,
  374. fs,
  375. fs_join,
  376. fs_target,
  377. local_join,
  378. local_bulk_operations_scenario_0,
  379. supports_empty_directories,
  380. ):
  381. # Copy scenario 2a
  382. source = local_bulk_operations_scenario_0
  383. target = fs_target
  384. fs.mkdir(target)
  385. if not supports_empty_directories:
  386. # Force target directory to exist by adding a dummy file
  387. dummy = fs_join(target, "dummy")
  388. fs.touch(dummy)
  389. assert fs.isdir(target)
  390. source_files = [
  391. local_join(source, "file1"),
  392. local_join(source, "file2"),
  393. local_join(source, "subdir", "subfile1"),
  394. ]
  395. for target_slash in [False, True]:
  396. t = target + "/" if target_slash else target
  397. fs.put(source_files, t)
  398. assert fs.isfile(fs_join(target, "file1"))
  399. assert fs.isfile(fs_join(target, "file2"))
  400. assert fs.isfile(fs_join(target, "subfile1"))
  401. fs.rm(
  402. [
  403. fs_join(target, "file1"),
  404. fs_join(target, "file2"),
  405. fs_join(target, "subfile1"),
  406. ],
  407. recursive=True,
  408. )
  409. assert fs.ls(target, detail=False) == (
  410. [] if supports_empty_directories else [dummy]
  411. )
  412. def test_put_list_of_files_to_new_directory(
  413. self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0
  414. ):
  415. # Copy scenario 2b
  416. source = local_bulk_operations_scenario_0
  417. target = fs_target
  418. fs.mkdir(target)
  419. source_files = [
  420. local_join(source, "file1"),
  421. local_join(source, "file2"),
  422. local_join(source, "subdir", "subfile1"),
  423. ]
  424. fs.put(source_files, fs_join(target, "newdir") + "/") # Note trailing slash
  425. assert fs.isdir(fs_join(target, "newdir"))
  426. assert fs.isfile(fs_join(target, "newdir", "file1"))
  427. assert fs.isfile(fs_join(target, "newdir", "file2"))
  428. assert fs.isfile(fs_join(target, "newdir", "subfile1"))
  429. def test_put_directory_recursive(
  430. self, fs, fs_join, fs_target, local_fs, local_join, local_path
  431. ):
  432. # https://github.com/fsspec/filesystem_spec/issues/1062
  433. # Recursive cp/get/put of source directory into non-existent target directory.
  434. src = local_join(local_path, "src")
  435. src_file = local_join(src, "file")
  436. local_fs.mkdir(src)
  437. local_fs.touch(src_file)
  438. target = fs_target
  439. # put without slash
  440. assert not fs.exists(target)
  441. for loop in range(2):
  442. fs.put(src, target, recursive=True)
  443. assert fs.isdir(target)
  444. if loop == 0:
  445. assert fs.isfile(fs_join(target, "file"))
  446. assert not fs.exists(fs_join(target, "src"))
  447. else:
  448. assert fs.isfile(fs_join(target, "file"))
  449. assert fs.isdir(fs_join(target, "src"))
  450. assert fs.isfile(fs_join(target, "src", "file"))
  451. fs.rm(target, recursive=True)
  452. # put with slash
  453. assert not fs.exists(target)
  454. for loop in range(2):
  455. fs.put(src + "/", target, recursive=True)
  456. assert fs.isdir(target)
  457. assert fs.isfile(fs_join(target, "file"))
  458. assert not fs.exists(fs_join(target, "src"))
  459. def test_put_directory_without_files_with_same_name_prefix(
  460. self,
  461. fs,
  462. fs_join,
  463. fs_target,
  464. local_join,
  465. local_dir_and_file_with_same_name_prefix,
  466. supports_empty_directories,
  467. ):
  468. # Create the test dirs
  469. source = local_dir_and_file_with_same_name_prefix
  470. target = fs_target
  471. # Test without glob
  472. fs.put(local_join(source, "subdir"), fs_target, recursive=True)
  473. assert fs.isfile(fs_join(fs_target, "subfile.txt"))
  474. assert not fs.isfile(fs_join(fs_target, "subdir.txt"))
  475. fs.rm([fs_join(target, "subfile.txt")])
  476. if supports_empty_directories:
  477. assert fs.ls(target) == []
  478. else:
  479. assert not fs.exists(target)
  480. # Test with glob
  481. fs.put(local_join(source, "subdir*"), fs_target, recursive=True)
  482. assert fs.isdir(fs_join(fs_target, "subdir"))
  483. assert fs.isfile(fs_join(fs_target, "subdir", "subfile.txt"))
  484. assert fs.isfile(fs_join(fs_target, "subdir.txt"))
  485. def test_copy_with_source_and_destination_as_list(
  486. self, fs, fs_target, fs_join, local_join, local_10_files_with_hashed_names
  487. ):
  488. # Create the test dir
  489. source = local_10_files_with_hashed_names
  490. target = fs_target
  491. # Create list of files for source and destination
  492. source_files = []
  493. destination_files = []
  494. for i in range(10):
  495. hashed_i = md5(str(i).encode("utf-8")).hexdigest()
  496. source_files.append(local_join(source, f"{hashed_i}.txt"))
  497. destination_files.append(fs_join(target, f"{hashed_i}.txt"))
  498. # Copy and assert order was kept
  499. fs.put(lpath=source_files, rpath=destination_files)
  500. for i in range(10):
  501. file_content = fs.cat(destination_files[i]).decode("utf-8")
  502. assert file_content == str(i)