_data.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424
  1. # Copyright (c) "Neo4j"
  2. # Neo4j Sweden AB [https://neo4j.com]
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # https://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. from __future__ import annotations
  16. import typing as t
  17. from abc import (
  18. ABC,
  19. abstractmethod,
  20. )
  21. from collections.abc import (
  22. Mapping,
  23. Sequence,
  24. Set,
  25. )
  26. from functools import reduce
  27. from operator import xor as xor_operator
  28. from ._codec.hydration import BrokenHydrationObject
  29. from ._conf import iter_items
  30. if t.TYPE_CHECKING:
  31. from typing_extensions import deprecated
  32. else:
  33. from ._meta import deprecated
  34. from ._spatial import Point
  35. from .exceptions import BrokenRecordError
  36. from .graph import (
  37. Node,
  38. Path,
  39. Relationship,
  40. )
  41. from .time import (
  42. Date,
  43. DateTime,
  44. Duration,
  45. Time,
  46. )
  47. _T = t.TypeVar("_T")
  48. _K = t.Union[int, str]
  49. class Record(tuple, Mapping):
  50. """
  51. Immutable, ordered collection of key-value pairs.
  52. It is generally closer to a :func:`collections.namedtuple` than to a
  53. :class:`OrderedDict` in as much as iteration of the collection will yield
  54. values rather than keys.
  55. """
  56. __keys: tuple[str]
  57. def __new__(cls, iterable=()):
  58. keys = []
  59. values = []
  60. for key, value in iter_items(iterable):
  61. keys.append(key)
  62. values.append(value)
  63. inst = tuple.__new__(cls, values)
  64. inst.__keys = tuple(keys)
  65. return inst
  66. def _broken_record_error(self, index):
  67. return BrokenRecordError(
  68. f"Record contains broken data at {index} ('{self.__keys[index]}')"
  69. )
  70. def _super_getitem_single(self, index):
  71. value = super().__getitem__(index)
  72. if isinstance(value, BrokenHydrationObject):
  73. raise self._broken_record_error(index) from value.error
  74. return value
  75. def __repr__(self) -> str:
  76. fields = " ".join(
  77. f"{field}={value!r}"
  78. for field, value in zip(self.__keys, super().__iter__())
  79. )
  80. return f"<{self.__class__.__name__} {fields}>"
  81. __str__ = __repr__
  82. def __eq__(self, other: object) -> bool:
  83. """
  84. Compare this record with another object for equality.
  85. In order to be flexible regarding comparison, the equality rules
  86. for a record permit comparison with any other Sequence or Mapping.
  87. :param other:
  88. :returns:
  89. """
  90. compare_as_sequence = isinstance(other, Sequence)
  91. compare_as_mapping = isinstance(other, Mapping)
  92. if compare_as_sequence and compare_as_mapping:
  93. other = t.cast(t.Mapping, other)
  94. return list(self) == list(other) and dict(self) == dict(other)
  95. elif compare_as_sequence:
  96. other = t.cast(t.Sequence, other)
  97. return list(self) == list(other)
  98. elif compare_as_mapping:
  99. other = t.cast(t.Mapping, other)
  100. return dict(self) == dict(other)
  101. else:
  102. return False
  103. def __ne__(self, other: object) -> bool:
  104. return not self.__eq__(other)
  105. def __hash__(self):
  106. return reduce(xor_operator, map(hash, self.items()))
  107. def __iter__(self) -> t.Iterator[t.Any]:
  108. for i, v in enumerate(super().__iter__()):
  109. if isinstance(v, BrokenHydrationObject):
  110. raise self._broken_record_error(i) from v.error
  111. yield v
  112. def __getitem__( # type: ignore[override]
  113. self, key: _K | slice
  114. ) -> t.Any:
  115. if isinstance(key, slice):
  116. keys = self.__keys[key]
  117. values = super().__getitem__(key)
  118. return self.__class__(zip(keys, values))
  119. try:
  120. index = self.index(key)
  121. except IndexError:
  122. return None
  123. else:
  124. return self._super_getitem_single(index)
  125. # TODO: 6.0 - remove
  126. @deprecated("This method is deprecated and will be removed in the future.")
  127. def __getslice__(self, start, stop): # noqa: PLW3201 will be removed
  128. key = slice(start, stop)
  129. keys = self.__keys[key]
  130. values = tuple(self)[key]
  131. return self.__class__(zip(keys, values))
  132. def get(self, key: str, default: object = None) -> t.Any:
  133. """
  134. Obtain a value from the record by key.
  135. The ``default`` is returned if the key does not exist.
  136. :param key: a key
  137. :param default: default value
  138. :returns: a value
  139. """
  140. try:
  141. index = self.__keys.index(str(key))
  142. except ValueError:
  143. return default
  144. if 0 <= index < len(self):
  145. return self._super_getitem_single(index)
  146. else:
  147. return default
  148. def index(self, key: _K) -> int: # type: ignore[override]
  149. """
  150. Return the index of the given item.
  151. :param key: a key
  152. :returns: index
  153. """
  154. if isinstance(key, int):
  155. if 0 <= key < len(self.__keys):
  156. return key
  157. raise IndexError(key)
  158. elif isinstance(key, str):
  159. try:
  160. return self.__keys.index(key)
  161. except ValueError as exc:
  162. raise KeyError(key) from exc
  163. else:
  164. raise TypeError(key)
  165. def value(self, key: _K = 0, default: object = None) -> t.Any:
  166. """
  167. Obtain a single value from the record by index or key.
  168. If no index or key is specified, the first value is returned.
  169. If the specified item does not exist, the default value is returned.
  170. :param key: an index or key
  171. :param default: default value
  172. :returns: a single value
  173. """
  174. try:
  175. index = self.index(key)
  176. except (IndexError, KeyError):
  177. return default
  178. else:
  179. return self[index]
  180. def keys(self) -> list[str]: # type: ignore[override]
  181. """
  182. Return the keys of the record.
  183. :returns: list of key names
  184. """
  185. return list(self.__keys)
  186. def values(self, *keys: _K) -> list[t.Any]: # type: ignore[override]
  187. """
  188. Return the values of the record.
  189. The values returned can optionally be filtered to include only certain
  190. values by index or key.
  191. :param keys: indexes or keys of the items to include; if none
  192. are provided, all values will be included
  193. :returns: list of values
  194. """
  195. if keys:
  196. d: list[t.Any] = []
  197. for key in keys:
  198. try:
  199. i = self.index(key)
  200. except KeyError:
  201. d.append(None)
  202. else:
  203. d.append(self[i])
  204. return d
  205. return list(self)
  206. def items(self, *keys):
  207. """
  208. Return the fields of the record as a list of key and value tuples.
  209. :returns: a list of value tuples
  210. """
  211. if keys:
  212. d = []
  213. for key in keys:
  214. try:
  215. i = self.index(key)
  216. except KeyError:
  217. d.append((key, None))
  218. else:
  219. d.append((self.__keys[i], self[i]))
  220. return d
  221. return [
  222. (self.__keys[i], self._super_getitem_single(i))
  223. for i in range(len(self))
  224. ]
  225. def data(self, *keys: _K) -> dict[str, t.Any]:
  226. """
  227. Return the record as a dictionary.
  228. Return the keys and values of this record as a dictionary, optionally
  229. including only certain values by index or key.
  230. Keys provided in the items that are not in the record will be inserted
  231. with a value of :data:`None`; indexes provided that are out of bounds
  232. will trigger an :exc:`IndexError`.
  233. This function provides a convenient but opinionated way to transform
  234. the record into a mostly JSON serializable format. It is mainly useful
  235. for interactive sessions and rapid prototyping.
  236. The transformation works as follows:
  237. * Nodes are transformed into dictionaries of their
  238. properties.
  239. * No indication of their original type remains.
  240. * Not all information is serialized (e.g., labels and element_id are
  241. absent).
  242. * Relationships are transformed to a tuple of
  243. ``(start_node, type, end_node)``, where the nodes are transformed
  244. as described above, and type is the relationship type name
  245. (:class:`str`).
  246. * No indication of their original type remains.
  247. * No other information (properties, element_id, start_node,
  248. end_node, ...) is serialized.
  249. * Paths are transformed into lists of nodes and relationships. No
  250. indication of the original type remains.
  251. * :class:`list` and :class:`dict` values are recursively transformed.
  252. * Every other type remains unchanged.
  253. * Spatial types and durations inherit from :class:`tuple`. Hence,
  254. they are JSON serializable, but, like graph types, type
  255. information will be lost in the process.
  256. * The remaining temporal types are not JSON serializable.
  257. You will have to implement a custom serializer should you need more
  258. control over the output format.
  259. :param keys: Indexes or keys of the items to include. If none are
  260. provided, all values will be included.
  261. :returns: dictionary of values, keyed by field name
  262. :raises: :exc:`IndexError` if an out-of-bounds index is specified.
  263. """
  264. return RecordExporter().transform(dict(self.items(*keys)))
  265. class DataTransformer(ABC):
  266. """Abstract base class for transforming data from one form into another."""
  267. @abstractmethod
  268. def transform(self, x):
  269. """
  270. Transform a value, or collection of values.
  271. :param x: input value
  272. :returns: output value
  273. """
  274. class RecordExporter(DataTransformer):
  275. """Transformer class used by the :meth:`.Record.data` method."""
  276. def transform(self, x):
  277. if isinstance(x, Node):
  278. return self.transform(dict(x))
  279. elif isinstance(x, Relationship):
  280. return (
  281. self.transform(dict(x.start_node)),
  282. x.__class__.__name__,
  283. self.transform(dict(x.end_node)),
  284. )
  285. elif isinstance(x, Path):
  286. path = [self.transform(x.start_node)]
  287. for i, relationship in enumerate(x.relationships):
  288. path.append(self.transform(relationship.__class__.__name__))
  289. path.append(self.transform(x.nodes[i + 1]))
  290. return path
  291. elif isinstance(x, (str, Point, Date, Time, DateTime, Duration)):
  292. return x
  293. elif isinstance(x, (Sequence, Set)):
  294. typ = type(x)
  295. return typ(map(self.transform, x))
  296. elif isinstance(x, Mapping):
  297. typ = type(x)
  298. return typ((k, self.transform(v)) for k, v in x.items())
  299. else:
  300. return x
  301. class RecordTableRowExporter(DataTransformer):
  302. """Transformer class used by the :meth:`.Result.to_df` method."""
  303. @staticmethod
  304. def _escape_map_key(key: str) -> str:
  305. return key.replace("\\", "\\\\").replace(".", "\\.")
  306. def transform(self, x):
  307. assert isinstance(x, Mapping)
  308. typ = type(x)
  309. return typ(
  310. item
  311. for k, v in x.items()
  312. for item in self._transform(
  313. v, prefix=self._escape_map_key(k)
  314. ).items()
  315. )
  316. def _transform(self, x, prefix):
  317. if isinstance(x, Node):
  318. res = {
  319. f"{prefix}().element_id": x.element_id,
  320. f"{prefix}().labels": x.labels,
  321. }
  322. res.update((f"{prefix}().prop.{k}", v) for k, v in x.items())
  323. return res
  324. elif isinstance(x, Relationship):
  325. res = {
  326. f"{prefix}->.element_id": x.element_id,
  327. f"{prefix}->.start.element_id": x.start_node.element_id,
  328. f"{prefix}->.end.element_id": x.end_node.element_id,
  329. f"{prefix}->.type": x.__class__.__name__,
  330. }
  331. res.update((f"{prefix}->.prop.{k}", v) for k, v in x.items())
  332. return res
  333. elif isinstance(x, (Path, str)):
  334. return {prefix: x}
  335. elif isinstance(x, Sequence):
  336. return dict(
  337. item
  338. for i, v in enumerate(x)
  339. for item in self._transform(
  340. v, prefix=f"{prefix}[].{i}"
  341. ).items()
  342. )
  343. elif isinstance(x, Mapping):
  344. typ = type(x)
  345. return typ(
  346. item
  347. for k, v in x.items()
  348. for item in self._transform(
  349. v, prefix=f"{prefix}{{}}.{self._escape_map_key(k)}"
  350. ).items()
  351. )
  352. else:
  353. return {prefix: x}