session.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612
  1. #!/usr/bin/env python
  2. #
  3. # Author: Mike McKerns (mmckerns @caltech and @uqfoundation)
  4. # Author: Leonardo Gama (@leogama)
  5. # Copyright (c) 2008-2015 California Institute of Technology.
  6. # Copyright (c) 2016-2024 The Uncertainty Quantification Foundation.
  7. # License: 3-clause BSD. The full license text is available at:
  8. # - https://github.com/uqfoundation/dill/blob/master/LICENSE
  9. """
  10. Pickle and restore the intepreter session.
  11. """
  12. __all__ = [
  13. 'dump_module', 'load_module', 'load_module_asdict',
  14. 'dump_session', 'load_session' # backward compatibility
  15. ]
  16. import re
  17. import os
  18. import sys
  19. import warnings
  20. import pathlib
  21. import tempfile
  22. TEMPDIR = pathlib.PurePath(tempfile.gettempdir())
  23. # Type hints.
  24. from typing import Optional, Union
  25. from dill import _dill, Pickler, Unpickler
  26. from ._dill import (
  27. BuiltinMethodType, FunctionType, MethodType, ModuleType, TypeType,
  28. _import_module, _is_builtin_module, _is_imported_module, _main_module,
  29. _reverse_typemap, __builtin__, UnpicklingError,
  30. )
  31. def _module_map():
  32. """get map of imported modules"""
  33. from collections import defaultdict
  34. from types import SimpleNamespace
  35. modmap = SimpleNamespace(
  36. by_name=defaultdict(list),
  37. by_id=defaultdict(list),
  38. top_level={},
  39. )
  40. for modname, module in sys.modules.items():
  41. if modname in ('__main__', '__mp_main__') or not isinstance(module, ModuleType):
  42. continue
  43. if '.' not in modname:
  44. modmap.top_level[id(module)] = modname
  45. for objname, modobj in module.__dict__.items():
  46. modmap.by_name[objname].append((modobj, modname))
  47. modmap.by_id[id(modobj)].append((modobj, objname, modname))
  48. return modmap
  49. IMPORTED_AS_TYPES = (ModuleType, TypeType, FunctionType, MethodType, BuiltinMethodType)
  50. if 'PyCapsuleType' in _reverse_typemap:
  51. IMPORTED_AS_TYPES += (_reverse_typemap['PyCapsuleType'],)
  52. IMPORTED_AS_MODULES = ('ctypes', 'typing', 'subprocess', 'threading',
  53. r'concurrent\.futures(\.\w+)?', r'multiprocessing(\.\w+)?')
  54. IMPORTED_AS_MODULES = tuple(re.compile(x) for x in IMPORTED_AS_MODULES)
  55. def _lookup_module(modmap, name, obj, main_module):
  56. """lookup name or id of obj if module is imported"""
  57. for modobj, modname in modmap.by_name[name]:
  58. if modobj is obj and sys.modules[modname] is not main_module:
  59. return modname, name
  60. __module__ = getattr(obj, '__module__', None)
  61. if isinstance(obj, IMPORTED_AS_TYPES) or (__module__ is not None
  62. and any(regex.fullmatch(__module__) for regex in IMPORTED_AS_MODULES)):
  63. for modobj, objname, modname in modmap.by_id[id(obj)]:
  64. if sys.modules[modname] is not main_module:
  65. return modname, objname
  66. return None, None
  67. def _stash_modules(main_module):
  68. modmap = _module_map()
  69. newmod = ModuleType(main_module.__name__)
  70. imported = []
  71. imported_as = []
  72. imported_top_level = [] # keep separated for backward compatibility
  73. original = {}
  74. for name, obj in main_module.__dict__.items():
  75. if obj is main_module:
  76. original[name] = newmod # self-reference
  77. elif obj is main_module.__dict__:
  78. original[name] = newmod.__dict__
  79. # Avoid incorrectly matching a singleton value in another package (ex.: __doc__).
  80. elif any(obj is singleton for singleton in (None, False, True)) \
  81. or isinstance(obj, ModuleType) and _is_builtin_module(obj): # always saved by ref
  82. original[name] = obj
  83. else:
  84. source_module, objname = _lookup_module(modmap, name, obj, main_module)
  85. if source_module is not None:
  86. if objname == name:
  87. imported.append((source_module, name))
  88. else:
  89. imported_as.append((source_module, objname, name))
  90. else:
  91. try:
  92. imported_top_level.append((modmap.top_level[id(obj)], name))
  93. except KeyError:
  94. original[name] = obj
  95. if len(original) < len(main_module.__dict__):
  96. newmod.__dict__.update(original)
  97. newmod.__dill_imported = imported
  98. newmod.__dill_imported_as = imported_as
  99. newmod.__dill_imported_top_level = imported_top_level
  100. if getattr(newmod, '__loader__', None) is None and _is_imported_module(main_module):
  101. # Trick _is_imported_module() to force saving as an imported module.
  102. newmod.__loader__ = True # will be discarded by save_module()
  103. return newmod
  104. else:
  105. return main_module
  106. def _restore_modules(unpickler, main_module):
  107. try:
  108. for modname, name in main_module.__dict__.pop('__dill_imported'):
  109. main_module.__dict__[name] = unpickler.find_class(modname, name)
  110. for modname, objname, name in main_module.__dict__.pop('__dill_imported_as'):
  111. main_module.__dict__[name] = unpickler.find_class(modname, objname)
  112. for modname, name in main_module.__dict__.pop('__dill_imported_top_level'):
  113. main_module.__dict__[name] = __import__(modname)
  114. except KeyError:
  115. pass
  116. #NOTE: 06/03/15 renamed main_module to main
  117. def dump_module(
  118. filename: Union[str, os.PathLike] = None,
  119. module: Optional[Union[ModuleType, str]] = None,
  120. refimported: bool = False,
  121. **kwds
  122. ) -> None:
  123. """Pickle the current state of :py:mod:`__main__` or another module to a file.
  124. Save the contents of :py:mod:`__main__` (e.g. from an interactive
  125. interpreter session), an imported module, or a module-type object (e.g.
  126. built with :py:class:`~types.ModuleType`), to a file. The pickled
  127. module can then be restored with the function :py:func:`load_module`.
  128. Args:
  129. filename: a path-like object or a writable stream. If `None`
  130. (the default), write to a named file in a temporary directory.
  131. module: a module object or the name of an importable module. If `None`
  132. (the default), :py:mod:`__main__` is saved.
  133. refimported: if `True`, all objects identified as having been imported
  134. into the module's namespace are saved by reference. *Note:* this is
  135. similar but independent from ``dill.settings[`byref`]``, as
  136. ``refimported`` refers to virtually all imported objects, while
  137. ``byref`` only affects select objects.
  138. **kwds: extra keyword arguments passed to :py:class:`Pickler()`.
  139. Raises:
  140. :py:exc:`PicklingError`: if pickling fails.
  141. Examples:
  142. - Save current interpreter session state:
  143. >>> import dill
  144. >>> squared = lambda x: x*x
  145. >>> dill.dump_module() # save state of __main__ to /tmp/session.pkl
  146. - Save the state of an imported/importable module:
  147. >>> import dill
  148. >>> import pox
  149. >>> pox.plus_one = lambda x: x+1
  150. >>> dill.dump_module('pox_session.pkl', module=pox)
  151. - Save the state of a non-importable, module-type object:
  152. >>> import dill
  153. >>> from types import ModuleType
  154. >>> foo = ModuleType('foo')
  155. >>> foo.values = [1,2,3]
  156. >>> import math
  157. >>> foo.sin = math.sin
  158. >>> dill.dump_module('foo_session.pkl', module=foo, refimported=True)
  159. - Restore the state of the saved modules:
  160. >>> import dill
  161. >>> dill.load_module()
  162. >>> squared(2)
  163. 4
  164. >>> pox = dill.load_module('pox_session.pkl')
  165. >>> pox.plus_one(1)
  166. 2
  167. >>> foo = dill.load_module('foo_session.pkl')
  168. >>> [foo.sin(x) for x in foo.values]
  169. [0.8414709848078965, 0.9092974268256817, 0.1411200080598672]
  170. - Use `refimported` to save imported objects by reference:
  171. >>> import dill
  172. >>> from html.entities import html5
  173. >>> type(html5), len(html5)
  174. (dict, 2231)
  175. >>> import io
  176. >>> buf = io.BytesIO()
  177. >>> dill.dump_module(buf) # saves __main__, with html5 saved by value
  178. >>> len(buf.getvalue()) # pickle size in bytes
  179. 71665
  180. >>> buf = io.BytesIO()
  181. >>> dill.dump_module(buf, refimported=True) # html5 saved by reference
  182. >>> len(buf.getvalue())
  183. 438
  184. *Changed in version 0.3.6:* Function ``dump_session()`` was renamed to
  185. ``dump_module()``. Parameters ``main`` and ``byref`` were renamed to
  186. ``module`` and ``refimported``, respectively.
  187. Note:
  188. Currently, ``dill.settings['byref']`` and ``dill.settings['recurse']``
  189. don't apply to this function.
  190. """
  191. for old_par, par in [('main', 'module'), ('byref', 'refimported')]:
  192. if old_par in kwds:
  193. message = "The argument %r has been renamed %r" % (old_par, par)
  194. if old_par == 'byref':
  195. message += " to distinguish it from dill.settings['byref']"
  196. warnings.warn(message + ".", PendingDeprecationWarning)
  197. if locals()[par]: # the defaults are None and False
  198. raise TypeError("both %r and %r arguments were used" % (par, old_par))
  199. refimported = kwds.pop('byref', refimported)
  200. module = kwds.pop('main', module)
  201. from .settings import settings
  202. protocol = settings['protocol']
  203. main = module
  204. if main is None:
  205. main = _main_module
  206. elif isinstance(main, str):
  207. main = _import_module(main)
  208. if not isinstance(main, ModuleType):
  209. raise TypeError("%r is not a module" % main)
  210. if hasattr(filename, 'write'):
  211. file = filename
  212. else:
  213. if filename is None:
  214. filename = str(TEMPDIR/'session.pkl')
  215. file = open(filename, 'wb')
  216. try:
  217. pickler = Pickler(file, protocol, **kwds)
  218. pickler._original_main = main
  219. if refimported:
  220. main = _stash_modules(main)
  221. pickler._main = main #FIXME: dill.settings are disabled
  222. pickler._byref = False # disable pickling by name reference
  223. pickler._recurse = False # disable pickling recursion for globals
  224. pickler._session = True # is best indicator of when pickling a session
  225. pickler._first_pass = True
  226. pickler._main_modified = main is not pickler._original_main
  227. pickler.dump(main)
  228. finally:
  229. if file is not filename: # if newly opened file
  230. file.close()
  231. return
  232. # Backward compatibility.
  233. def dump_session(filename=None, main=None, byref=False, **kwds):
  234. warnings.warn("dump_session() has been renamed dump_module()", PendingDeprecationWarning)
  235. dump_module(filename, module=main, refimported=byref, **kwds)
  236. dump_session.__doc__ = dump_module.__doc__
  237. class _PeekableReader:
  238. """lightweight stream wrapper that implements peek()"""
  239. def __init__(self, stream):
  240. self.stream = stream
  241. def read(self, n):
  242. return self.stream.read(n)
  243. def readline(self):
  244. return self.stream.readline()
  245. def tell(self):
  246. return self.stream.tell()
  247. def close(self):
  248. return self.stream.close()
  249. def peek(self, n):
  250. stream = self.stream
  251. try:
  252. if hasattr(stream, 'flush'): stream.flush()
  253. position = stream.tell()
  254. stream.seek(position) # assert seek() works before reading
  255. chunk = stream.read(n)
  256. stream.seek(position)
  257. return chunk
  258. except (AttributeError, OSError):
  259. raise NotImplementedError("stream is not peekable: %r", stream) from None
  260. def _make_peekable(stream):
  261. """return stream as an object with a peek() method"""
  262. import io
  263. if hasattr(stream, 'peek'):
  264. return stream
  265. if not (hasattr(stream, 'tell') and hasattr(stream, 'seek')):
  266. try:
  267. return io.BufferedReader(stream)
  268. except Exception:
  269. pass
  270. return _PeekableReader(stream)
  271. def _identify_module(file, main=None):
  272. """identify the name of the module stored in the given file-type object"""
  273. from pickletools import genops
  274. UNICODE = {'UNICODE', 'BINUNICODE', 'SHORT_BINUNICODE'}
  275. found_import = False
  276. try:
  277. for opcode, arg, pos in genops(file.peek(256)):
  278. if not found_import:
  279. if opcode.name in ('GLOBAL', 'SHORT_BINUNICODE') and \
  280. arg.endswith('_import_module'):
  281. found_import = True
  282. else:
  283. if opcode.name in UNICODE:
  284. return arg
  285. else:
  286. raise UnpicklingError("reached STOP without finding main module")
  287. except (NotImplementedError, ValueError) as error:
  288. # ValueError occours when the end of the chunk is reached (without a STOP).
  289. if isinstance(error, NotImplementedError) and main is not None:
  290. # file is not peekable, but we have main.
  291. return None
  292. raise UnpicklingError("unable to identify main module") from error
  293. def load_module(
  294. filename: Union[str, os.PathLike] = None,
  295. module: Optional[Union[ModuleType, str]] = None,
  296. **kwds
  297. ) -> Optional[ModuleType]:
  298. """Update the selected module (default is :py:mod:`__main__`) with
  299. the state saved at ``filename``.
  300. Restore a module to the state saved with :py:func:`dump_module`. The
  301. saved module can be :py:mod:`__main__` (e.g. an interpreter session),
  302. an imported module, or a module-type object (e.g. created with
  303. :py:class:`~types.ModuleType`).
  304. When restoring the state of a non-importable module-type object, the
  305. current instance of this module may be passed as the argument ``main``.
  306. Otherwise, a new instance is created with :py:class:`~types.ModuleType`
  307. and returned.
  308. Args:
  309. filename: a path-like object or a readable stream. If `None`
  310. (the default), read from a named file in a temporary directory.
  311. module: a module object or the name of an importable module;
  312. the module name and kind (i.e. imported or non-imported) must
  313. match the name and kind of the module stored at ``filename``.
  314. **kwds: extra keyword arguments passed to :py:class:`Unpickler()`.
  315. Raises:
  316. :py:exc:`UnpicklingError`: if unpickling fails.
  317. :py:exc:`ValueError`: if the argument ``main`` and module saved
  318. at ``filename`` are incompatible.
  319. Returns:
  320. A module object, if the saved module is not :py:mod:`__main__` or
  321. a module instance wasn't provided with the argument ``main``.
  322. Examples:
  323. - Save the state of some modules:
  324. >>> import dill
  325. >>> squared = lambda x: x*x
  326. >>> dill.dump_module() # save state of __main__ to /tmp/session.pkl
  327. >>>
  328. >>> import pox # an imported module
  329. >>> pox.plus_one = lambda x: x+1
  330. >>> dill.dump_module('pox_session.pkl', module=pox)
  331. >>>
  332. >>> from types import ModuleType
  333. >>> foo = ModuleType('foo') # a module-type object
  334. >>> foo.values = [1,2,3]
  335. >>> import math
  336. >>> foo.sin = math.sin
  337. >>> dill.dump_module('foo_session.pkl', module=foo, refimported=True)
  338. - Restore the state of the interpreter:
  339. >>> import dill
  340. >>> dill.load_module() # updates __main__ from /tmp/session.pkl
  341. >>> squared(2)
  342. 4
  343. - Load the saved state of an importable module:
  344. >>> import dill
  345. >>> pox = dill.load_module('pox_session.pkl')
  346. >>> pox.plus_one(1)
  347. 2
  348. >>> import sys
  349. >>> pox in sys.modules.values()
  350. True
  351. - Load the saved state of a non-importable module-type object:
  352. >>> import dill
  353. >>> foo = dill.load_module('foo_session.pkl')
  354. >>> [foo.sin(x) for x in foo.values]
  355. [0.8414709848078965, 0.9092974268256817, 0.1411200080598672]
  356. >>> import math
  357. >>> foo.sin is math.sin # foo.sin was saved by reference
  358. True
  359. >>> import sys
  360. >>> foo in sys.modules.values()
  361. False
  362. - Update the state of a non-importable module-type object:
  363. >>> import dill
  364. >>> from types import ModuleType
  365. >>> foo = ModuleType('foo')
  366. >>> foo.values = ['a','b']
  367. >>> foo.sin = lambda x: x*x
  368. >>> dill.load_module('foo_session.pkl', module=foo)
  369. >>> [foo.sin(x) for x in foo.values]
  370. [0.8414709848078965, 0.9092974268256817, 0.1411200080598672]
  371. *Changed in version 0.3.6:* Function ``load_session()`` was renamed to
  372. ``load_module()``. Parameter ``main`` was renamed to ``module``.
  373. See also:
  374. :py:func:`load_module_asdict` to load the contents of module saved
  375. with :py:func:`dump_module` into a dictionary.
  376. """
  377. if 'main' in kwds:
  378. warnings.warn(
  379. "The argument 'main' has been renamed 'module'.",
  380. PendingDeprecationWarning
  381. )
  382. if module is not None:
  383. raise TypeError("both 'module' and 'main' arguments were used")
  384. module = kwds.pop('main')
  385. main = module
  386. if hasattr(filename, 'read'):
  387. file = filename
  388. else:
  389. if filename is None:
  390. filename = str(TEMPDIR/'session.pkl')
  391. file = open(filename, 'rb')
  392. try:
  393. file = _make_peekable(file)
  394. #FIXME: dill.settings are disabled
  395. unpickler = Unpickler(file, **kwds)
  396. unpickler._session = True
  397. # Resolve unpickler._main
  398. pickle_main = _identify_module(file, main)
  399. if main is None and pickle_main is not None:
  400. main = pickle_main
  401. if isinstance(main, str):
  402. if main.startswith('__runtime__.'):
  403. # Create runtime module to load the session into.
  404. main = ModuleType(main.partition('.')[-1])
  405. else:
  406. main = _import_module(main)
  407. if main is not None:
  408. if not isinstance(main, ModuleType):
  409. raise TypeError("%r is not a module" % main)
  410. unpickler._main = main
  411. else:
  412. main = unpickler._main
  413. # Check against the pickle's main.
  414. is_main_imported = _is_imported_module(main)
  415. if pickle_main is not None:
  416. is_runtime_mod = pickle_main.startswith('__runtime__.')
  417. if is_runtime_mod:
  418. pickle_main = pickle_main.partition('.')[-1]
  419. error_msg = "can't update{} module{} %r with the saved state of{} module{} %r"
  420. if is_runtime_mod and is_main_imported:
  421. raise ValueError(
  422. error_msg.format(" imported", "", "", "-type object")
  423. % (main.__name__, pickle_main)
  424. )
  425. if not is_runtime_mod and not is_main_imported:
  426. raise ValueError(
  427. error_msg.format("", "-type object", " imported", "")
  428. % (pickle_main, main.__name__)
  429. )
  430. if main.__name__ != pickle_main:
  431. raise ValueError(error_msg.format("", "", "", "") % (main.__name__, pickle_main))
  432. # This is for find_class() to be able to locate it.
  433. if not is_main_imported:
  434. runtime_main = '__runtime__.%s' % main.__name__
  435. sys.modules[runtime_main] = main
  436. loaded = unpickler.load()
  437. finally:
  438. if not hasattr(filename, 'read'): # if newly opened file
  439. file.close()
  440. try:
  441. del sys.modules[runtime_main]
  442. except (KeyError, NameError):
  443. pass
  444. assert loaded is main
  445. _restore_modules(unpickler, main)
  446. if main is _main_module or main is module:
  447. return None
  448. else:
  449. return main
  450. # Backward compatibility.
  451. def load_session(filename=None, main=None, **kwds):
  452. warnings.warn("load_session() has been renamed load_module().", PendingDeprecationWarning)
  453. load_module(filename, module=main, **kwds)
  454. load_session.__doc__ = load_module.__doc__
  455. def load_module_asdict(
  456. filename: Union[str, os.PathLike] = None,
  457. update: bool = False,
  458. **kwds
  459. ) -> dict:
  460. """
  461. Load the contents of a saved module into a dictionary.
  462. ``load_module_asdict()`` is the near-equivalent of::
  463. lambda filename: vars(dill.load_module(filename)).copy()
  464. however, does not alter the original module. Also, the path of
  465. the loaded module is stored in the ``__session__`` attribute.
  466. Args:
  467. filename: a path-like object or a readable stream. If `None`
  468. (the default), read from a named file in a temporary directory.
  469. update: if `True`, initialize the dictionary with the current state
  470. of the module prior to loading the state stored at filename.
  471. **kwds: extra keyword arguments passed to :py:class:`Unpickler()`
  472. Raises:
  473. :py:exc:`UnpicklingError`: if unpickling fails
  474. Returns:
  475. A copy of the restored module's dictionary.
  476. Note:
  477. If ``update`` is True, the corresponding module may first be imported
  478. into the current namespace before the saved state is loaded from
  479. filename to the dictionary. Note that any module that is imported into
  480. the current namespace as a side-effect of using ``update`` will not be
  481. modified by loading the saved module in filename to a dictionary.
  482. Example:
  483. >>> import dill
  484. >>> alist = [1, 2, 3]
  485. >>> anum = 42
  486. >>> dill.dump_module()
  487. >>> anum = 0
  488. >>> new_var = 'spam'
  489. >>> main = dill.load_module_asdict()
  490. >>> main['__name__'], main['__session__']
  491. ('__main__', '/tmp/session.pkl')
  492. >>> main is globals() # loaded objects don't reference globals
  493. False
  494. >>> main['alist'] == alist
  495. True
  496. >>> main['alist'] is alist # was saved by value
  497. False
  498. >>> main['anum'] == anum # changed after the session was saved
  499. False
  500. >>> new_var in main # would be True if the option 'update' was set
  501. False
  502. """
  503. if 'module' in kwds:
  504. raise TypeError("'module' is an invalid keyword argument for load_module_asdict()")
  505. if hasattr(filename, 'read'):
  506. file = filename
  507. else:
  508. if filename is None:
  509. filename = str(TEMPDIR/'session.pkl')
  510. file = open(filename, 'rb')
  511. try:
  512. file = _make_peekable(file)
  513. main_name = _identify_module(file)
  514. old_main = sys.modules.get(main_name)
  515. main = ModuleType(main_name)
  516. if update:
  517. if old_main is None:
  518. old_main = _import_module(main_name)
  519. main.__dict__.update(old_main.__dict__)
  520. else:
  521. main.__builtins__ = __builtin__
  522. sys.modules[main_name] = main
  523. load_module(file, **kwds)
  524. finally:
  525. if not hasattr(filename, 'read'): # if newly opened file
  526. file.close()
  527. try:
  528. if old_main is None:
  529. del sys.modules[main_name]
  530. else:
  531. sys.modules[main_name] = old_main
  532. except NameError: # failed before setting old_main
  533. pass
  534. main.__session__ = str(filename)
  535. return main.__dict__
  536. # Internal exports for backward compatibility with dill v0.3.5.1
  537. # Can't be placed in dill._dill because of circular import problems.
  538. for name in (
  539. '_lookup_module', '_module_map', '_restore_modules', '_stash_modules',
  540. 'dump_session', 'load_session' # backward compatibility functions
  541. ):
  542. setattr(_dill, name, globals()[name])
  543. del name