compression.py 1.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940
  1. #
  2. # Licensed to the Apache Software Foundation (ASF) under one
  3. # or more contributor license agreements. See the NOTICE file
  4. # distributed with this work for additional information
  5. # regarding copyright ownership. The ASF licenses this file
  6. # to you under the Apache License, Version 2.0 (the
  7. # "License"); you may not use this file except in compliance
  8. # with the License. You may obtain a copy of the License at
  9. #
  10. # http://www.apache.org/licenses/LICENSE-2.0
  11. #
  12. # Unless required by applicable law or agreed to in writing,
  13. # software distributed under the License is distributed on an
  14. # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  15. # KIND, either express or implied. See the License for the
  16. # specific language governing permissions and limitations
  17. # under the License.
  18. from __future__ import annotations
  19. import bz2
  20. import gzip
  21. import shutil
  22. from tempfile import NamedTemporaryFile
  23. def uncompress_file(input_file_name, file_extension, dest_dir):
  24. """Uncompress gz and bz2 files."""
  25. if file_extension.lower() not in (".gz", ".bz2"):
  26. raise NotImplementedError(
  27. f"Received {file_extension} format. Only gz and bz2 files can currently be uncompressed."
  28. )
  29. if file_extension.lower() == ".gz":
  30. fmodule = gzip.GzipFile
  31. elif file_extension.lower() == ".bz2":
  32. fmodule = bz2.BZ2File
  33. with fmodule(input_file_name, mode="rb") as f_compressed, NamedTemporaryFile(
  34. dir=dest_dir, mode="wb", delete=False
  35. ) as f_uncompressed:
  36. shutil.copyfileobj(f_compressed, f_uncompressed)
  37. return f_uncompressed.name