copy.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557
  1. from hashlib import md5
  2. from itertools import product
  3. import pytest
  4. from fsspec.tests.abstract.common import GLOB_EDGE_CASES_TESTS
  5. class AbstractCopyTests:
  6. def test_copy_file_to_existing_directory(
  7. self,
  8. fs,
  9. fs_join,
  10. fs_bulk_operations_scenario_0,
  11. fs_target,
  12. supports_empty_directories,
  13. ):
  14. # Copy scenario 1a
  15. source = fs_bulk_operations_scenario_0
  16. target = fs_target
  17. fs.mkdir(target)
  18. if not supports_empty_directories:
  19. # Force target directory to exist by adding a dummy file
  20. fs.touch(fs_join(target, "dummy"))
  21. assert fs.isdir(target)
  22. target_file2 = fs_join(target, "file2")
  23. target_subfile1 = fs_join(target, "subfile1")
  24. # Copy from source directory
  25. fs.cp(fs_join(source, "file2"), target)
  26. assert fs.isfile(target_file2)
  27. # Copy from sub directory
  28. fs.cp(fs_join(source, "subdir", "subfile1"), target)
  29. assert fs.isfile(target_subfile1)
  30. # Remove copied files
  31. fs.rm([target_file2, target_subfile1])
  32. assert not fs.exists(target_file2)
  33. assert not fs.exists(target_subfile1)
  34. # Repeat with trailing slash on target
  35. fs.cp(fs_join(source, "file2"), target + "/")
  36. assert fs.isdir(target)
  37. assert fs.isfile(target_file2)
  38. fs.cp(fs_join(source, "subdir", "subfile1"), target + "/")
  39. assert fs.isfile(target_subfile1)
  40. def test_copy_file_to_new_directory(
  41. self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
  42. ):
  43. # Copy scenario 1b
  44. source = fs_bulk_operations_scenario_0
  45. target = fs_target
  46. fs.mkdir(target)
  47. fs.cp(
  48. fs_join(source, "subdir", "subfile1"), fs_join(target, "newdir/")
  49. ) # Note trailing slash
  50. assert fs.isdir(target)
  51. assert fs.isdir(fs_join(target, "newdir"))
  52. assert fs.isfile(fs_join(target, "newdir", "subfile1"))
  53. def test_copy_file_to_file_in_existing_directory(
  54. self,
  55. fs,
  56. fs_join,
  57. fs_bulk_operations_scenario_0,
  58. fs_target,
  59. supports_empty_directories,
  60. ):
  61. # Copy scenario 1c
  62. source = fs_bulk_operations_scenario_0
  63. target = fs_target
  64. fs.mkdir(target)
  65. if not supports_empty_directories:
  66. # Force target directory to exist by adding a dummy file
  67. fs.touch(fs_join(target, "dummy"))
  68. assert fs.isdir(target)
  69. fs.cp(fs_join(source, "subdir", "subfile1"), fs_join(target, "newfile"))
  70. assert fs.isfile(fs_join(target, "newfile"))
  71. def test_copy_file_to_file_in_new_directory(
  72. self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
  73. ):
  74. # Copy scenario 1d
  75. source = fs_bulk_operations_scenario_0
  76. target = fs_target
  77. fs.mkdir(target)
  78. fs.cp(
  79. fs_join(source, "subdir", "subfile1"), fs_join(target, "newdir", "newfile")
  80. )
  81. assert fs.isdir(fs_join(target, "newdir"))
  82. assert fs.isfile(fs_join(target, "newdir", "newfile"))
  83. def test_copy_directory_to_existing_directory(
  84. self,
  85. fs,
  86. fs_join,
  87. fs_bulk_operations_scenario_0,
  88. fs_target,
  89. supports_empty_directories,
  90. ):
  91. # Copy scenario 1e
  92. source = fs_bulk_operations_scenario_0
  93. target = fs_target
  94. fs.mkdir(target)
  95. if not supports_empty_directories:
  96. # Force target directory to exist by adding a dummy file
  97. dummy = fs_join(target, "dummy")
  98. fs.touch(dummy)
  99. assert fs.isdir(target)
  100. for source_slash, target_slash in zip([False, True], [False, True]):
  101. s = fs_join(source, "subdir")
  102. if source_slash:
  103. s += "/"
  104. t = target + "/" if target_slash else target
  105. # Without recursive does nothing
  106. fs.cp(s, t)
  107. assert fs.ls(target, detail=False) == (
  108. [] if supports_empty_directories else [dummy]
  109. )
  110. # With recursive
  111. fs.cp(s, t, recursive=True)
  112. if source_slash:
  113. assert fs.isfile(fs_join(target, "subfile1"))
  114. assert fs.isfile(fs_join(target, "subfile2"))
  115. assert fs.isdir(fs_join(target, "nesteddir"))
  116. assert fs.isfile(fs_join(target, "nesteddir", "nestedfile"))
  117. assert not fs.exists(fs_join(target, "subdir"))
  118. fs.rm(
  119. [
  120. fs_join(target, "subfile1"),
  121. fs_join(target, "subfile2"),
  122. fs_join(target, "nesteddir"),
  123. ],
  124. recursive=True,
  125. )
  126. else:
  127. assert fs.isdir(fs_join(target, "subdir"))
  128. assert fs.isfile(fs_join(target, "subdir", "subfile1"))
  129. assert fs.isfile(fs_join(target, "subdir", "subfile2"))
  130. assert fs.isdir(fs_join(target, "subdir", "nesteddir"))
  131. assert fs.isfile(fs_join(target, "subdir", "nesteddir", "nestedfile"))
  132. fs.rm(fs_join(target, "subdir"), recursive=True)
  133. assert fs.ls(target, detail=False) == (
  134. [] if supports_empty_directories else [dummy]
  135. )
  136. # Limit recursive by maxdepth
  137. fs.cp(s, t, recursive=True, maxdepth=1)
  138. if source_slash:
  139. assert fs.isfile(fs_join(target, "subfile1"))
  140. assert fs.isfile(fs_join(target, "subfile2"))
  141. assert not fs.exists(fs_join(target, "nesteddir"))
  142. assert not fs.exists(fs_join(target, "subdir"))
  143. fs.rm(
  144. [
  145. fs_join(target, "subfile1"),
  146. fs_join(target, "subfile2"),
  147. ],
  148. recursive=True,
  149. )
  150. else:
  151. assert fs.isdir(fs_join(target, "subdir"))
  152. assert fs.isfile(fs_join(target, "subdir", "subfile1"))
  153. assert fs.isfile(fs_join(target, "subdir", "subfile2"))
  154. assert not fs.exists(fs_join(target, "subdir", "nesteddir"))
  155. fs.rm(fs_join(target, "subdir"), recursive=True)
  156. assert fs.ls(target, detail=False) == (
  157. [] if supports_empty_directories else [dummy]
  158. )
  159. def test_copy_directory_to_new_directory(
  160. self,
  161. fs,
  162. fs_join,
  163. fs_bulk_operations_scenario_0,
  164. fs_target,
  165. supports_empty_directories,
  166. ):
  167. # Copy scenario 1f
  168. source = fs_bulk_operations_scenario_0
  169. target = fs_target
  170. fs.mkdir(target)
  171. for source_slash, target_slash in zip([False, True], [False, True]):
  172. s = fs_join(source, "subdir")
  173. if source_slash:
  174. s += "/"
  175. t = fs_join(target, "newdir")
  176. if target_slash:
  177. t += "/"
  178. # Without recursive does nothing
  179. fs.cp(s, t)
  180. if supports_empty_directories:
  181. assert fs.ls(target) == []
  182. else:
  183. with pytest.raises(FileNotFoundError):
  184. fs.ls(target)
  185. # With recursive
  186. fs.cp(s, t, recursive=True)
  187. assert fs.isdir(fs_join(target, "newdir"))
  188. assert fs.isfile(fs_join(target, "newdir", "subfile1"))
  189. assert fs.isfile(fs_join(target, "newdir", "subfile2"))
  190. assert fs.isdir(fs_join(target, "newdir", "nesteddir"))
  191. assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile"))
  192. assert not fs.exists(fs_join(target, "subdir"))
  193. fs.rm(fs_join(target, "newdir"), recursive=True)
  194. assert not fs.exists(fs_join(target, "newdir"))
  195. # Limit recursive by maxdepth
  196. fs.cp(s, t, recursive=True, maxdepth=1)
  197. assert fs.isdir(fs_join(target, "newdir"))
  198. assert fs.isfile(fs_join(target, "newdir", "subfile1"))
  199. assert fs.isfile(fs_join(target, "newdir", "subfile2"))
  200. assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
  201. assert not fs.exists(fs_join(target, "subdir"))
  202. fs.rm(fs_join(target, "newdir"), recursive=True)
  203. assert not fs.exists(fs_join(target, "newdir"))
  204. def test_copy_glob_to_existing_directory(
  205. self,
  206. fs,
  207. fs_join,
  208. fs_bulk_operations_scenario_0,
  209. fs_target,
  210. supports_empty_directories,
  211. ):
  212. # Copy scenario 1g
  213. source = fs_bulk_operations_scenario_0
  214. target = fs_target
  215. fs.mkdir(target)
  216. if not supports_empty_directories:
  217. # Force target directory to exist by adding a dummy file
  218. dummy = fs_join(target, "dummy")
  219. fs.touch(dummy)
  220. assert fs.isdir(target)
  221. for target_slash in [False, True]:
  222. t = target + "/" if target_slash else target
  223. # Without recursive
  224. fs.cp(fs_join(source, "subdir", "*"), t)
  225. assert fs.isfile(fs_join(target, "subfile1"))
  226. assert fs.isfile(fs_join(target, "subfile2"))
  227. assert not fs.isdir(fs_join(target, "nesteddir"))
  228. assert not fs.exists(fs_join(target, "nesteddir", "nestedfile"))
  229. assert not fs.exists(fs_join(target, "subdir"))
  230. fs.rm(
  231. [
  232. fs_join(target, "subfile1"),
  233. fs_join(target, "subfile2"),
  234. ],
  235. recursive=True,
  236. )
  237. assert fs.ls(target, detail=False) == (
  238. [] if supports_empty_directories else [dummy]
  239. )
  240. # With recursive
  241. for glob, recursive in zip(["*", "**"], [True, False]):
  242. fs.cp(fs_join(source, "subdir", glob), t, recursive=recursive)
  243. assert fs.isfile(fs_join(target, "subfile1"))
  244. assert fs.isfile(fs_join(target, "subfile2"))
  245. assert fs.isdir(fs_join(target, "nesteddir"))
  246. assert fs.isfile(fs_join(target, "nesteddir", "nestedfile"))
  247. assert not fs.exists(fs_join(target, "subdir"))
  248. fs.rm(
  249. [
  250. fs_join(target, "subfile1"),
  251. fs_join(target, "subfile2"),
  252. fs_join(target, "nesteddir"),
  253. ],
  254. recursive=True,
  255. )
  256. assert fs.ls(target, detail=False) == (
  257. [] if supports_empty_directories else [dummy]
  258. )
  259. # Limit recursive by maxdepth
  260. fs.cp(
  261. fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1
  262. )
  263. assert fs.isfile(fs_join(target, "subfile1"))
  264. assert fs.isfile(fs_join(target, "subfile2"))
  265. assert not fs.exists(fs_join(target, "nesteddir"))
  266. assert not fs.exists(fs_join(target, "subdir"))
  267. fs.rm(
  268. [
  269. fs_join(target, "subfile1"),
  270. fs_join(target, "subfile2"),
  271. ],
  272. recursive=True,
  273. )
  274. assert fs.ls(target, detail=False) == (
  275. [] if supports_empty_directories else [dummy]
  276. )
  277. def test_copy_glob_to_new_directory(
  278. self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
  279. ):
  280. # Copy scenario 1h
  281. source = fs_bulk_operations_scenario_0
  282. target = fs_target
  283. fs.mkdir(target)
  284. for target_slash in [False, True]:
  285. t = fs_join(target, "newdir")
  286. if target_slash:
  287. t += "/"
  288. # Without recursive
  289. fs.cp(fs_join(source, "subdir", "*"), t)
  290. assert fs.isdir(fs_join(target, "newdir"))
  291. assert fs.isfile(fs_join(target, "newdir", "subfile1"))
  292. assert fs.isfile(fs_join(target, "newdir", "subfile2"))
  293. assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
  294. assert not fs.exists(fs_join(target, "newdir", "nesteddir", "nestedfile"))
  295. assert not fs.exists(fs_join(target, "subdir"))
  296. assert not fs.exists(fs_join(target, "newdir", "subdir"))
  297. fs.rm(fs_join(target, "newdir"), recursive=True)
  298. assert not fs.exists(fs_join(target, "newdir"))
  299. # With recursive
  300. for glob, recursive in zip(["*", "**"], [True, False]):
  301. fs.cp(fs_join(source, "subdir", glob), t, recursive=recursive)
  302. assert fs.isdir(fs_join(target, "newdir"))
  303. assert fs.isfile(fs_join(target, "newdir", "subfile1"))
  304. assert fs.isfile(fs_join(target, "newdir", "subfile2"))
  305. assert fs.isdir(fs_join(target, "newdir", "nesteddir"))
  306. assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile"))
  307. assert not fs.exists(fs_join(target, "subdir"))
  308. assert not fs.exists(fs_join(target, "newdir", "subdir"))
  309. fs.rm(fs_join(target, "newdir"), recursive=True)
  310. assert not fs.exists(fs_join(target, "newdir"))
  311. # Limit recursive by maxdepth
  312. fs.cp(
  313. fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1
  314. )
  315. assert fs.isdir(fs_join(target, "newdir"))
  316. assert fs.isfile(fs_join(target, "newdir", "subfile1"))
  317. assert fs.isfile(fs_join(target, "newdir", "subfile2"))
  318. assert not fs.exists(fs_join(target, "newdir", "nesteddir"))
  319. assert not fs.exists(fs_join(target, "subdir"))
  320. assert not fs.exists(fs_join(target, "newdir", "subdir"))
  321. fs.rm(fs_join(target, "newdir"), recursive=True)
  322. assert not fs.exists(fs_join(target, "newdir"))
  323. @pytest.mark.parametrize(
  324. GLOB_EDGE_CASES_TESTS["argnames"],
  325. GLOB_EDGE_CASES_TESTS["argvalues"],
  326. )
  327. def test_copy_glob_edge_cases(
  328. self,
  329. path,
  330. recursive,
  331. maxdepth,
  332. expected,
  333. fs,
  334. fs_join,
  335. fs_glob_edge_cases_files,
  336. fs_target,
  337. fs_sanitize_path,
  338. ):
  339. # Copy scenario 1g
  340. source = fs_glob_edge_cases_files
  341. target = fs_target
  342. for new_dir, target_slash in product([True, False], [True, False]):
  343. fs.mkdir(target)
  344. t = fs_join(target, "newdir") if new_dir else target
  345. t = t + "/" if target_slash else t
  346. fs.copy(fs_join(source, path), t, recursive=recursive, maxdepth=maxdepth)
  347. output = fs.find(target)
  348. if new_dir:
  349. prefixed_expected = [
  350. fs_sanitize_path(fs_join(target, "newdir", p)) for p in expected
  351. ]
  352. else:
  353. prefixed_expected = [
  354. fs_sanitize_path(fs_join(target, p)) for p in expected
  355. ]
  356. assert sorted(output) == sorted(prefixed_expected)
  357. try:
  358. fs.rm(target, recursive=True)
  359. except FileNotFoundError:
  360. pass
  361. def test_copy_list_of_files_to_existing_directory(
  362. self,
  363. fs,
  364. fs_join,
  365. fs_bulk_operations_scenario_0,
  366. fs_target,
  367. supports_empty_directories,
  368. ):
  369. # Copy scenario 2a
  370. source = fs_bulk_operations_scenario_0
  371. target = fs_target
  372. fs.mkdir(target)
  373. if not supports_empty_directories:
  374. # Force target directory to exist by adding a dummy file
  375. dummy = fs_join(target, "dummy")
  376. fs.touch(dummy)
  377. assert fs.isdir(target)
  378. source_files = [
  379. fs_join(source, "file1"),
  380. fs_join(source, "file2"),
  381. fs_join(source, "subdir", "subfile1"),
  382. ]
  383. for target_slash in [False, True]:
  384. t = target + "/" if target_slash else target
  385. fs.cp(source_files, t)
  386. assert fs.isfile(fs_join(target, "file1"))
  387. assert fs.isfile(fs_join(target, "file2"))
  388. assert fs.isfile(fs_join(target, "subfile1"))
  389. fs.rm(
  390. [
  391. fs_join(target, "file1"),
  392. fs_join(target, "file2"),
  393. fs_join(target, "subfile1"),
  394. ],
  395. recursive=True,
  396. )
  397. assert fs.ls(target, detail=False) == (
  398. [] if supports_empty_directories else [dummy]
  399. )
  400. def test_copy_list_of_files_to_new_directory(
  401. self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
  402. ):
  403. # Copy scenario 2b
  404. source = fs_bulk_operations_scenario_0
  405. target = fs_target
  406. fs.mkdir(target)
  407. source_files = [
  408. fs_join(source, "file1"),
  409. fs_join(source, "file2"),
  410. fs_join(source, "subdir", "subfile1"),
  411. ]
  412. fs.cp(source_files, fs_join(target, "newdir") + "/") # Note trailing slash
  413. assert fs.isdir(fs_join(target, "newdir"))
  414. assert fs.isfile(fs_join(target, "newdir", "file1"))
  415. assert fs.isfile(fs_join(target, "newdir", "file2"))
  416. assert fs.isfile(fs_join(target, "newdir", "subfile1"))
  417. def test_copy_two_files_new_directory(
  418. self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
  419. ):
  420. # This is a duplicate of test_copy_list_of_files_to_new_directory and
  421. # can eventually be removed.
  422. source = fs_bulk_operations_scenario_0
  423. target = fs_target
  424. assert not fs.exists(target)
  425. fs.cp([fs_join(source, "file1"), fs_join(source, "file2")], target)
  426. assert fs.isdir(target)
  427. assert fs.isfile(fs_join(target, "file1"))
  428. assert fs.isfile(fs_join(target, "file2"))
  429. def test_copy_directory_without_files_with_same_name_prefix(
  430. self,
  431. fs,
  432. fs_join,
  433. fs_target,
  434. fs_dir_and_file_with_same_name_prefix,
  435. supports_empty_directories,
  436. ):
  437. # Create the test dirs
  438. source = fs_dir_and_file_with_same_name_prefix
  439. target = fs_target
  440. # Test without glob
  441. fs.cp(fs_join(source, "subdir"), target, recursive=True)
  442. assert fs.isfile(fs_join(target, "subfile.txt"))
  443. assert not fs.isfile(fs_join(target, "subdir.txt"))
  444. fs.rm([fs_join(target, "subfile.txt")])
  445. if supports_empty_directories:
  446. assert fs.ls(target) == []
  447. else:
  448. assert not fs.exists(target)
  449. # Test with glob
  450. fs.cp(fs_join(source, "subdir*"), target, recursive=True)
  451. assert fs.isdir(fs_join(target, "subdir"))
  452. assert fs.isfile(fs_join(target, "subdir", "subfile.txt"))
  453. assert fs.isfile(fs_join(target, "subdir.txt"))
  454. def test_copy_with_source_and_destination_as_list(
  455. self, fs, fs_target, fs_join, fs_10_files_with_hashed_names
  456. ):
  457. # Create the test dir
  458. source = fs_10_files_with_hashed_names
  459. target = fs_target
  460. # Create list of files for source and destination
  461. source_files = []
  462. destination_files = []
  463. for i in range(10):
  464. hashed_i = md5(str(i).encode("utf-8")).hexdigest()
  465. source_files.append(fs_join(source, f"{hashed_i}.txt"))
  466. destination_files.append(fs_join(target, f"{hashed_i}.txt"))
  467. # Copy and assert order was kept
  468. fs.copy(path1=source_files, path2=destination_files)
  469. for i in range(10):
  470. file_content = fs.cat(destination_files[i]).decode("utf-8")
  471. assert file_content == str(i)