detect.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
  1. #!/usr/bin/env python
  2. #
  3. # Author: Mike McKerns (mmckerns @caltech and @uqfoundation)
  4. # Copyright (c) 2008-2016 California Institute of Technology.
  5. # Copyright (c) 2016-2024 The Uncertainty Quantification Foundation.
  6. # License: 3-clause BSD. The full license text is available at:
  7. # - https://github.com/uqfoundation/dill/blob/master/LICENSE
  8. """
  9. Methods for detecting objects leading to pickling failures.
  10. """
  11. import dis
  12. from inspect import ismethod, isfunction, istraceback, isframe, iscode
  13. from .pointers import parent, reference, at, parents, children
  14. from .logger import trace
  15. __all__ = ['baditems','badobjects','badtypes','code','errors','freevars',
  16. 'getmodule','globalvars','nestedcode','nestedglobals','outermost',
  17. 'referredglobals','referrednested','trace','varnames']
  18. def getmodule(object, _filename=None, force=False):
  19. """get the module of the object"""
  20. from inspect import getmodule as getmod
  21. module = getmod(object, _filename)
  22. if module or not force: return module
  23. import builtins
  24. from .source import getname
  25. name = getname(object, force=True)
  26. return builtins if name in vars(builtins).keys() else None
  27. def outermost(func): # is analogous to getsource(func,enclosing=True)
  28. """get outermost enclosing object (i.e. the outer function in a closure)
  29. NOTE: this is the object-equivalent of getsource(func, enclosing=True)
  30. """
  31. if ismethod(func):
  32. _globals = func.__func__.__globals__ or {}
  33. elif isfunction(func):
  34. _globals = func.__globals__ or {}
  35. else:
  36. return #XXX: or raise? no matches
  37. _globals = _globals.items()
  38. # get the enclosing source
  39. from .source import getsourcelines
  40. try: lines,lnum = getsourcelines(func, enclosing=True)
  41. except Exception: #TypeError, IOError
  42. lines,lnum = [],None
  43. code = ''.join(lines)
  44. # get all possible names,objects that are named in the enclosing source
  45. _locals = ((name,obj) for (name,obj) in _globals if name in code)
  46. # now only save the objects that generate the enclosing block
  47. for name,obj in _locals: #XXX: don't really need 'name'
  48. try:
  49. if getsourcelines(obj) == (lines,lnum): return obj
  50. except Exception: #TypeError, IOError
  51. pass
  52. return #XXX: or raise? no matches
  53. def nestedcode(func, recurse=True): #XXX: or return dict of {co_name: co} ?
  54. """get the code objects for any nested functions (e.g. in a closure)"""
  55. func = code(func)
  56. if not iscode(func): return [] #XXX: or raise? no matches
  57. nested = set()
  58. for co in func.co_consts:
  59. if co is None: continue
  60. co = code(co)
  61. if co:
  62. nested.add(co)
  63. if recurse: nested |= set(nestedcode(co, recurse=True))
  64. return list(nested)
  65. def code(func):
  66. """get the code object for the given function or method
  67. NOTE: use dill.source.getsource(CODEOBJ) to get the source code
  68. """
  69. if ismethod(func): func = func.__func__
  70. if isfunction(func): func = func.__code__
  71. if istraceback(func): func = func.tb_frame
  72. if isframe(func): func = func.f_code
  73. if iscode(func): return func
  74. return
  75. #XXX: ugly: parse dis.dis for name after "<code object" in line and in globals?
  76. def referrednested(func, recurse=True): #XXX: return dict of {__name__: obj} ?
  77. """get functions defined inside of func (e.g. inner functions in a closure)
  78. NOTE: results may differ if the function has been executed or not.
  79. If len(nestedcode(func)) > len(referrednested(func)), try calling func().
  80. If possible, python builds code objects, but delays building functions
  81. until func() is called.
  82. """
  83. import gc
  84. funcs = set()
  85. # get the code objects, and try to track down by referrence
  86. for co in nestedcode(func, recurse):
  87. # look for function objects that refer to the code object
  88. for obj in gc.get_referrers(co):
  89. # get methods
  90. _ = getattr(obj, '__func__', None) # ismethod
  91. if getattr(_, '__code__', None) is co: funcs.add(obj)
  92. # get functions
  93. elif getattr(obj, '__code__', None) is co: funcs.add(obj)
  94. # get frame objects
  95. elif getattr(obj, 'f_code', None) is co: funcs.add(obj)
  96. # get code objects
  97. elif hasattr(obj, 'co_code') and obj is co: funcs.add(obj)
  98. # frameobjs => func.__code__.co_varnames not in func.__code__.co_cellvars
  99. # funcobjs => func.__code__.co_cellvars not in func.__code__.co_varnames
  100. # frameobjs are not found, however funcobjs are...
  101. # (see: test_mixins.quad ... and test_mixins.wtf)
  102. # after execution, code objects get compiled, and then may be found by gc
  103. return list(funcs)
  104. def freevars(func):
  105. """get objects defined in enclosing code that are referred to by func
  106. returns a dict of {name:object}"""
  107. if ismethod(func): func = func.__func__
  108. if isfunction(func):
  109. closures = func.__closure__ or ()
  110. func = func.__code__.co_freevars # get freevars
  111. else:
  112. return {}
  113. def get_cell_contents():
  114. for name, c in zip(func, closures):
  115. try:
  116. cell_contents = c.cell_contents
  117. except ValueError: # cell is empty
  118. continue
  119. yield name, c.cell_contents
  120. return dict(get_cell_contents())
  121. # thanks to Davies Liu for recursion of globals
  122. def nestedglobals(func, recurse=True):
  123. """get the names of any globals found within func"""
  124. func = code(func)
  125. if func is None: return list()
  126. import sys
  127. from .temp import capture
  128. CAN_NULL = sys.hexversion >= 0x30b00a7 # NULL may be prepended >= 3.11a7
  129. names = set()
  130. with capture('stdout') as out:
  131. dis.dis(func) #XXX: dis.dis(None) disassembles last traceback
  132. for line in out.getvalue().splitlines():
  133. if '_GLOBAL' in line:
  134. name = line.split('(')[-1].split(')')[0]
  135. if CAN_NULL:
  136. names.add(name.replace('NULL + ', '').replace(' + NULL', ''))
  137. else:
  138. names.add(name)
  139. for co in getattr(func, 'co_consts', tuple()):
  140. if co and recurse and iscode(co):
  141. names.update(nestedglobals(co, recurse=True))
  142. return list(names)
  143. def referredglobals(func, recurse=True, builtin=False):
  144. """get the names of objects in the global scope referred to by func"""
  145. return globalvars(func, recurse, builtin).keys()
  146. def globalvars(func, recurse=True, builtin=False):
  147. """get objects defined in global scope that are referred to by func
  148. return a dict of {name:object}"""
  149. if ismethod(func): func = func.__func__
  150. if isfunction(func):
  151. globs = vars(getmodule(sum)).copy() if builtin else {}
  152. # get references from within closure
  153. orig_func, func = func, set()
  154. for obj in orig_func.__closure__ or {}:
  155. try:
  156. cell_contents = obj.cell_contents
  157. except ValueError: # cell is empty
  158. pass
  159. else:
  160. _vars = globalvars(cell_contents, recurse, builtin) or {}
  161. func.update(_vars) #XXX: (above) be wary of infinte recursion?
  162. globs.update(_vars)
  163. # get globals
  164. globs.update(orig_func.__globals__ or {})
  165. # get names of references
  166. if not recurse:
  167. func.update(orig_func.__code__.co_names)
  168. else:
  169. func.update(nestedglobals(orig_func.__code__))
  170. # find globals for all entries of func
  171. for key in func.copy(): #XXX: unnecessary...?
  172. nested_func = globs.get(key)
  173. if nested_func is orig_func:
  174. #func.remove(key) if key in func else None
  175. continue #XXX: globalvars(func, False)?
  176. func.update(globalvars(nested_func, True, builtin))
  177. elif iscode(func):
  178. globs = vars(getmodule(sum)).copy() if builtin else {}
  179. #globs.update(globals())
  180. if not recurse:
  181. func = func.co_names # get names
  182. else:
  183. orig_func = func.co_name # to stop infinite recursion
  184. func = set(nestedglobals(func))
  185. # find globals for all entries of func
  186. for key in func.copy(): #XXX: unnecessary...?
  187. if key is orig_func:
  188. #func.remove(key) if key in func else None
  189. continue #XXX: globalvars(func, False)?
  190. nested_func = globs.get(key)
  191. func.update(globalvars(nested_func, True, builtin))
  192. else:
  193. return {}
  194. #NOTE: if name not in __globals__, then we skip it...
  195. return dict((name,globs[name]) for name in func if name in globs)
  196. def varnames(func):
  197. """get names of variables defined by func
  198. returns a tuple (local vars, local vars referrenced by nested functions)"""
  199. func = code(func)
  200. if not iscode(func):
  201. return () #XXX: better ((),())? or None?
  202. return func.co_varnames, func.co_cellvars
  203. def baditems(obj, exact=False, safe=False): #XXX: obj=globals() ?
  204. """get items in object that fail to pickle"""
  205. if not hasattr(obj,'__iter__'): # is not iterable
  206. return [j for j in (badobjects(obj,0,exact,safe),) if j is not None]
  207. obj = obj.values() if getattr(obj,'values',None) else obj
  208. _obj = [] # can't use a set, as items may be unhashable
  209. [_obj.append(badobjects(i,0,exact,safe)) for i in obj if i not in _obj]
  210. return [j for j in _obj if j is not None]
  211. def badobjects(obj, depth=0, exact=False, safe=False):
  212. """get objects that fail to pickle"""
  213. from dill import pickles
  214. if not depth:
  215. if pickles(obj,exact,safe): return None
  216. return obj
  217. return dict(((attr, badobjects(getattr(obj,attr),depth-1,exact,safe)) \
  218. for attr in dir(obj) if not pickles(getattr(obj,attr),exact,safe)))
  219. def badtypes(obj, depth=0, exact=False, safe=False):
  220. """get types for objects that fail to pickle"""
  221. from dill import pickles
  222. if not depth:
  223. if pickles(obj,exact,safe): return None
  224. return type(obj)
  225. return dict(((attr, badtypes(getattr(obj,attr),depth-1,exact,safe)) \
  226. for attr in dir(obj) if not pickles(getattr(obj,attr),exact,safe)))
  227. def errors(obj, depth=0, exact=False, safe=False):
  228. """get errors for objects that fail to pickle"""
  229. from dill import pickles, copy
  230. if not depth:
  231. try:
  232. pik = copy(obj)
  233. if exact:
  234. assert pik == obj, \
  235. "Unpickling produces %s instead of %s" % (pik,obj)
  236. assert type(pik) == type(obj), \
  237. "Unpickling produces %s instead of %s" % (type(pik),type(obj))
  238. return None
  239. except Exception:
  240. import sys
  241. return sys.exc_info()[1]
  242. _dict = {}
  243. for attr in dir(obj):
  244. try:
  245. _attr = getattr(obj,attr)
  246. except Exception:
  247. import sys
  248. _dict[attr] = sys.exc_info()[1]
  249. continue
  250. if not pickles(_attr,exact,safe):
  251. _dict[attr] = errors(_attr,depth-1,exact,safe)
  252. return _dict
  253. # EOF