__diff.py 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234
  1. #!/usr/bin/env python
  2. #
  3. # Author: Mike McKerns (mmckerns @caltech and @uqfoundation)
  4. # Copyright (c) 2008-2016 California Institute of Technology.
  5. # Copyright (c) 2016-2024 The Uncertainty Quantification Foundation.
  6. # License: 3-clause BSD. The full license text is available at:
  7. # - https://github.com/uqfoundation/dill/blob/master/LICENSE
  8. """
  9. Module to show if an object has changed since it was memorised
  10. """
  11. import builtins
  12. import os
  13. import sys
  14. import types
  15. try:
  16. import numpy.ma
  17. HAS_NUMPY = True
  18. except ImportError:
  19. HAS_NUMPY = False
  20. # pypy doesn't use reference counting
  21. getrefcount = getattr(sys, 'getrefcount', lambda x:0)
  22. # memo of objects indexed by id to a tuple (attributes, sequence items)
  23. # attributes is a dict indexed by attribute name to attribute id
  24. # sequence items is either a list of ids, of a dictionary of keys to ids
  25. memo = {}
  26. id_to_obj = {}
  27. # types that cannot have changing attributes
  28. builtins_types = set((str, list, dict, set, frozenset, int))
  29. dont_memo = set(id(i) for i in (memo, sys.modules, sys.path_importer_cache,
  30. os.environ, id_to_obj))
  31. def get_attrs(obj):
  32. """
  33. Gets all the attributes of an object though its __dict__ or return None
  34. """
  35. if type(obj) in builtins_types \
  36. or type(obj) is type and obj in builtins_types:
  37. return
  38. return getattr(obj, '__dict__', None)
  39. def get_seq(obj, cache={str: False, frozenset: False, list: True, set: True,
  40. dict: True, tuple: True, type: False,
  41. types.ModuleType: False, types.FunctionType: False,
  42. types.BuiltinFunctionType: False}):
  43. """
  44. Gets all the items in a sequence or return None
  45. """
  46. try:
  47. o_type = obj.__class__
  48. except AttributeError:
  49. o_type = type(obj)
  50. hsattr = hasattr
  51. if o_type in cache:
  52. if cache[o_type]:
  53. if hsattr(obj, "copy"):
  54. return obj.copy()
  55. return obj
  56. elif HAS_NUMPY and o_type in (numpy.ndarray, numpy.ma.core.MaskedConstant):
  57. if obj.shape and obj.size:
  58. return obj
  59. else:
  60. return []
  61. elif hsattr(obj, "__contains__") and hsattr(obj, "__iter__") \
  62. and hsattr(obj, "__len__") and hsattr(o_type, "__contains__") \
  63. and hsattr(o_type, "__iter__") and hsattr(o_type, "__len__"):
  64. cache[o_type] = True
  65. if hsattr(obj, "copy"):
  66. return obj.copy()
  67. return obj
  68. else:
  69. cache[o_type] = False
  70. return None
  71. def memorise(obj, force=False):
  72. """
  73. Adds an object to the memo, and recursively adds all the objects
  74. attributes, and if it is a container, its items. Use force=True to update
  75. an object already in the memo. Updating is not recursively done.
  76. """
  77. obj_id = id(obj)
  78. if obj_id in memo and not force or obj_id in dont_memo:
  79. return
  80. id_ = id
  81. g = get_attrs(obj)
  82. if g is None:
  83. attrs_id = None
  84. else:
  85. attrs_id = dict((key,id_(value)) for key, value in g.items())
  86. s = get_seq(obj)
  87. if s is None:
  88. seq_id = None
  89. elif hasattr(s, "items"):
  90. seq_id = dict((id_(key),id_(value)) for key, value in s.items())
  91. elif not hasattr(s, "__len__"): #XXX: avoid TypeError from unexpected case
  92. seq_id = None
  93. else:
  94. seq_id = [id_(i) for i in s]
  95. memo[obj_id] = attrs_id, seq_id
  96. id_to_obj[obj_id] = obj
  97. mem = memorise
  98. if g is not None:
  99. [mem(value) for key, value in g.items()]
  100. if s is not None:
  101. if hasattr(s, "items"):
  102. [(mem(key), mem(item))
  103. for key, item in s.items()]
  104. else:
  105. if hasattr(s, '__len__'):
  106. [mem(item) for item in s]
  107. else: mem(s)
  108. def release_gone():
  109. itop, mp, src = id_to_obj.pop, memo.pop, getrefcount
  110. [(itop(id_), mp(id_)) for id_, obj in list(id_to_obj.items())
  111. if src(obj) < 4] #XXX: correct for pypy?
  112. def whats_changed(obj, seen=None, simple=False, first=True):
  113. """
  114. Check an object against the memo. Returns a list in the form
  115. (attribute changes, container changed). Attribute changes is a dict of
  116. attribute name to attribute value. container changed is a boolean.
  117. If simple is true, just returns a boolean. None for either item means
  118. that it has not been checked yet
  119. """
  120. # Special cases
  121. if first:
  122. # ignore the _ variable, which only appears in interactive sessions
  123. if "_" in builtins.__dict__:
  124. del builtins._
  125. if seen is None:
  126. seen = {}
  127. obj_id = id(obj)
  128. if obj_id in seen:
  129. if simple:
  130. return any(seen[obj_id])
  131. return seen[obj_id]
  132. # Safety checks
  133. if obj_id in dont_memo:
  134. seen[obj_id] = [{}, False]
  135. if simple:
  136. return False
  137. return seen[obj_id]
  138. elif obj_id not in memo:
  139. if simple:
  140. return True
  141. else:
  142. raise RuntimeError("Object not memorised " + str(obj))
  143. seen[obj_id] = ({}, False)
  144. chngd = whats_changed
  145. id_ = id
  146. # compare attributes
  147. attrs = get_attrs(obj)
  148. if attrs is None:
  149. changed = {}
  150. else:
  151. obj_attrs = memo[obj_id][0]
  152. obj_get = obj_attrs.get
  153. changed = dict((key,None) for key in obj_attrs if key not in attrs)
  154. for key, o in attrs.items():
  155. if id_(o) != obj_get(key, None) or chngd(o, seen, True, False):
  156. changed[key] = o
  157. # compare sequence
  158. items = get_seq(obj)
  159. seq_diff = False
  160. if (items is not None) and (hasattr(items, '__len__')):
  161. obj_seq = memo[obj_id][1]
  162. if (len(items) != len(obj_seq)):
  163. seq_diff = True
  164. elif hasattr(obj, "items"): # dict type obj
  165. obj_get = obj_seq.get
  166. for key, item in items.items():
  167. if id_(item) != obj_get(id_(key)) \
  168. or chngd(key, seen, True, False) \
  169. or chngd(item, seen, True, False):
  170. seq_diff = True
  171. break
  172. else:
  173. for i, j in zip(items, obj_seq): # list type obj
  174. if id_(i) != j or chngd(i, seen, True, False):
  175. seq_diff = True
  176. break
  177. seen[obj_id] = changed, seq_diff
  178. if simple:
  179. return changed or seq_diff
  180. return changed, seq_diff
  181. def has_changed(*args, **kwds):
  182. kwds['simple'] = True # ignore simple if passed in
  183. return whats_changed(*args, **kwds)
  184. __import__ = __import__
  185. def _imp(*args, **kwds):
  186. """
  187. Replaces the default __import__, to allow a module to be memorised
  188. before the user can change it
  189. """
  190. before = set(sys.modules.keys())
  191. mod = __import__(*args, **kwds)
  192. after = set(sys.modules.keys()).difference(before)
  193. for m in after:
  194. memorise(sys.modules[m])
  195. return mod
  196. builtins.__import__ = _imp
  197. if hasattr(builtins, "_"):
  198. del builtins._
  199. # memorise all already imported modules. This implies that this must be
  200. # imported first for any changes to be recorded
  201. for mod in list(sys.modules.values()):
  202. memorise(mod)
  203. release_gone()