deltalake.py 2.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879
  1. #
  2. # Licensed to the Apache Software Foundation (ASF) under one
  3. # or more contributor license agreements. See the NOTICE file
  4. # distributed with this work for additional information
  5. # regarding copyright ownership. The ASF licenses this file
  6. # to you under the Apache License, Version 2.0 (the
  7. # "License"); you may not use this file except in compliance
  8. # with the License. You may obtain a copy of the License at
  9. #
  10. # http://www.apache.org/licenses/LICENSE-2.0
  11. #
  12. # Unless required by applicable law or agreed to in writing,
  13. # software distributed under the License is distributed on an
  14. # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  15. # KIND, either express or implied. See the License for the
  16. # specific language governing permissions and limitations
  17. # under the License.
  18. from __future__ import annotations
  19. from typing import TYPE_CHECKING
  20. from airflow.utils.module_loading import qualname
  21. serializers = ["deltalake.table.DeltaTable"]
  22. deserializers = serializers
  23. stringifiers = serializers
  24. if TYPE_CHECKING:
  25. from airflow.serialization.serde import U
  26. __version__ = 1
  27. def serialize(o: object) -> tuple[U, str, int, bool]:
  28. from deltalake.table import DeltaTable
  29. if not isinstance(o, DeltaTable):
  30. return "", "", 0, False
  31. from airflow.models.crypto import get_fernet
  32. # we encrypt the information here until we have as part of the
  33. # storage options can have sensitive information
  34. fernet = get_fernet()
  35. properties: dict = {}
  36. for k, v in o._storage_options.items() if o._storage_options else {}:
  37. properties[k] = fernet.encrypt(v.encode("utf-8")).decode("utf-8")
  38. data = {
  39. "table_uri": o.table_uri,
  40. "version": o.version(),
  41. "storage_options": properties,
  42. }
  43. return data, qualname(o), __version__, True
  44. def deserialize(classname: str, version: int, data: dict):
  45. from deltalake.table import DeltaTable
  46. from airflow.models.crypto import get_fernet
  47. if version > __version__:
  48. raise TypeError("serialized version is newer than class version")
  49. if classname == qualname(DeltaTable):
  50. fernet = get_fernet()
  51. properties = {}
  52. for k, v in data["storage_options"].items():
  53. properties[k] = fernet.decrypt(v.encode("utf-8")).decode("utf-8")
  54. if len(properties) == 0:
  55. storage_options = None
  56. else:
  57. storage_options = properties
  58. return DeltaTable(data["table_uri"], version=data["version"], storage_options=storage_options)
  59. raise TypeError(f"do not know how to deserialize {classname}")