logger.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. #
  4. # Author: Leonardo Gama (@leogama)
  5. # Copyright (c) 2022-2024 The Uncertainty Quantification Foundation.
  6. # License: 3-clause BSD. The full license text is available at:
  7. # - https://github.com/uqfoundation/dill/blob/master/LICENSE
  8. """
  9. Logging utilities for dill.
  10. The 'logger' object is dill's top-level logger.
  11. The 'adapter' object wraps the logger and implements a 'trace()' method that
  12. generates a detailed tree-style trace for the pickling call at log level INFO.
  13. The 'trace()' function sets and resets dill's logger log level, enabling and
  14. disabling the pickling trace.
  15. The trace shows a tree structure depicting the depth of each object serialized
  16. *with dill save functions*, but not the ones that use save functions from
  17. 'pickle._Pickler.dispatch'. If the information is available, it also displays
  18. the size in bytes that the object contributed to the pickle stream (including
  19. its child objects). Sample trace output:
  20. >>> import dill, dill.tests
  21. >>> dill.detect.trace(True)
  22. >>> dill.dump_session(main=dill.tests)
  23. ┬ M1: <module 'dill.tests' from '.../dill/tests/__init__.py'>
  24. ├┬ F2: <function _import_module at 0x7f0d2dce1b80>
  25. │└ # F2 [32 B]
  26. ├┬ D2: <dict object at 0x7f0d2e98a540>
  27. │├┬ T4: <class '_frozen_importlib.ModuleSpec'>
  28. ││└ # T4 [35 B]
  29. │├┬ D2: <dict object at 0x7f0d2ef0e8c0>
  30. ││├┬ T4: <class '_frozen_importlib_external.SourceFileLoader'>
  31. │││└ # T4 [50 B]
  32. ││├┬ D2: <dict object at 0x7f0d2e988a40>
  33. │││└ # D2 [84 B]
  34. ││└ # D2 [413 B]
  35. │└ # D2 [763 B]
  36. └ # M1 [813 B]
  37. """
  38. __all__ = ['adapter', 'logger', 'trace']
  39. import codecs
  40. import contextlib
  41. import locale
  42. import logging
  43. import math
  44. import os
  45. from functools import partial
  46. from typing import TextIO, Union
  47. import dill
  48. # Tree drawing characters: Unicode to ASCII map.
  49. ASCII_MAP = str.maketrans({"│": "|", "├": "|", "┬": "+", "└": "`"})
  50. ## Notes about the design choices ##
  51. # Here is some domumentation of the Standard Library's logging internals that
  52. # can't be found completely in the official documentation. dill's logger is
  53. # obtained by calling logging.getLogger('dill') and therefore is an instance of
  54. # logging.getLoggerClass() at the call time. As this is controlled by the user,
  55. # in order to add some functionality to it it's necessary to use a LoggerAdapter
  56. # to wrap it, overriding some of the adapter's methods and creating new ones.
  57. #
  58. # Basic calling sequence
  59. # ======================
  60. #
  61. # Python's logging functionality can be conceptually divided into five steps:
  62. # 0. Check logging level -> abort if call level is greater than logger level
  63. # 1. Gather information -> construct a LogRecord from passed arguments and context
  64. # 2. Filter (optional) -> discard message if the record matches a filter
  65. # 3. Format -> format message with args, then format output string with message plus record
  66. # 4. Handle -> write the formatted string to output as defined in the handler
  67. #
  68. # dill.logging.logger.log -> # or logger.info, etc.
  69. # Logger.log -> \
  70. # Logger._log -> }- accept 'extra' parameter for custom record entries
  71. # Logger.makeRecord -> /
  72. # LogRecord.__init__
  73. # Logger.handle ->
  74. # Logger.callHandlers ->
  75. # Handler.handle ->
  76. # Filterer.filter ->
  77. # Filter.filter
  78. # StreamHandler.emit ->
  79. # Handler.format ->
  80. # Formatter.format ->
  81. # LogRecord.getMessage # does: record.message = msg % args
  82. # Formatter.formatMessage ->
  83. # PercentStyle.format # does: self._fmt % vars(record)
  84. #
  85. # NOTE: All methods from the second line on are from logging.__init__.py
  86. class TraceAdapter(logging.LoggerAdapter):
  87. """
  88. Tracks object tree depth and calculates pickled object size.
  89. A single instance of this wraps the module's logger, as the logging API
  90. doesn't allow setting it directly with a custom Logger subclass. The added
  91. 'trace()' method receives a pickle instance as the first argument and
  92. creates extra values to be added in the LogRecord from it, then calls
  93. 'info()'.
  94. Usage of logger with 'trace()' method:
  95. >>> from dill.logger import adapter as logger #NOTE: not dill.logger.logger
  96. >>> ...
  97. >>> def save_atype(pickler, obj):
  98. >>> logger.trace(pickler, "Message with %s and %r etc. placeholders", 'text', obj)
  99. >>> ...
  100. """
  101. def __init__(self, logger):
  102. self.logger = logger
  103. def addHandler(self, handler):
  104. formatter = TraceFormatter("%(prefix)s%(message)s%(suffix)s", handler=handler)
  105. handler.setFormatter(formatter)
  106. self.logger.addHandler(handler)
  107. def removeHandler(self, handler):
  108. self.logger.removeHandler(handler)
  109. def process(self, msg, kwargs):
  110. # A no-op override, as we don't have self.extra.
  111. return msg, kwargs
  112. def trace_setup(self, pickler):
  113. # Called by Pickler.dump().
  114. if not dill._dill.is_dill(pickler, child=False):
  115. return
  116. if self.isEnabledFor(logging.INFO):
  117. pickler._trace_depth = 1
  118. pickler._size_stack = []
  119. else:
  120. pickler._trace_depth = None
  121. def trace(self, pickler, msg, *args, **kwargs):
  122. if not hasattr(pickler, '_trace_depth'):
  123. logger.info(msg, *args, **kwargs)
  124. return
  125. if pickler._trace_depth is None:
  126. return
  127. extra = kwargs.get('extra', {})
  128. pushed_obj = msg.startswith('#')
  129. size = None
  130. try:
  131. # Streams are not required to be tellable.
  132. size = pickler._file.tell()
  133. frame = pickler.framer.current_frame
  134. try:
  135. size += frame.tell()
  136. except AttributeError:
  137. # PyPy may use a BytesBuilder as frame
  138. size += len(frame)
  139. except (AttributeError, TypeError):
  140. pass
  141. if size is not None:
  142. if not pushed_obj:
  143. pickler._size_stack.append(size)
  144. else:
  145. size -= pickler._size_stack.pop()
  146. extra['size'] = size
  147. if pushed_obj:
  148. pickler._trace_depth -= 1
  149. extra['depth'] = pickler._trace_depth
  150. kwargs['extra'] = extra
  151. self.info(msg, *args, **kwargs)
  152. if not pushed_obj:
  153. pickler._trace_depth += 1
  154. class TraceFormatter(logging.Formatter):
  155. """
  156. Generates message prefix and suffix from record.
  157. This Formatter adds prefix and suffix strings to the log message in trace
  158. mode (an also provides empty string defaults for normal logs).
  159. """
  160. def __init__(self, *args, handler=None, **kwargs):
  161. super().__init__(*args, **kwargs)
  162. try:
  163. encoding = handler.stream.encoding
  164. if encoding is None:
  165. raise AttributeError
  166. except AttributeError:
  167. encoding = locale.getpreferredencoding()
  168. try:
  169. encoding = codecs.lookup(encoding).name
  170. except LookupError:
  171. self.is_utf8 = False
  172. else:
  173. self.is_utf8 = (encoding == codecs.lookup('utf-8').name)
  174. def format(self, record):
  175. fields = {'prefix': "", 'suffix': ""}
  176. if getattr(record, 'depth', 0) > 0:
  177. if record.msg.startswith("#"):
  178. prefix = (record.depth - 1)*"│" + "└"
  179. elif record.depth == 1:
  180. prefix = "┬"
  181. else:
  182. prefix = (record.depth - 2)*"│" + "├┬"
  183. if not self.is_utf8:
  184. prefix = prefix.translate(ASCII_MAP) + "-"
  185. fields['prefix'] = prefix + " "
  186. if hasattr(record, 'size') and record.size is not None and record.size >= 1:
  187. # Show object size in human-readable form.
  188. power = int(math.log(record.size, 2)) // 10
  189. size = record.size >> power*10
  190. fields['suffix'] = " [%d %sB]" % (size, "KMGTP"[power] + "i" if power else "")
  191. vars(record).update(fields)
  192. return super().format(record)
  193. logger = logging.getLogger('dill')
  194. logger.propagate = False
  195. adapter = TraceAdapter(logger)
  196. stderr_handler = logging._StderrHandler()
  197. adapter.addHandler(stderr_handler)
  198. def trace(arg: Union[bool, TextIO, str, os.PathLike] = None, *, mode: str = 'a') -> None:
  199. """print a trace through the stack when pickling; useful for debugging
  200. With a single boolean argument, enable or disable the tracing.
  201. Example usage:
  202. >>> import dill
  203. >>> dill.detect.trace(True)
  204. >>> dill.dump_session()
  205. Alternatively, ``trace()`` can be used as a context manager. With no
  206. arguments, it just takes care of restoring the tracing state on exit.
  207. Either a file handle, or a file name and (optionally) a file mode may be
  208. specitfied to redirect the tracing output in the ``with`` block context. A
  209. log function is yielded by the manager so the user can write extra
  210. information to the file.
  211. Example usage:
  212. >>> from dill import detect
  213. >>> D = {'a': 42, 'b': {'x': None}}
  214. >>> with detect.trace():
  215. >>> dumps(D)
  216. ┬ D2: <dict object at 0x7f2721804800>
  217. ├┬ D2: <dict object at 0x7f27217f5c40>
  218. │└ # D2 [8 B]
  219. └ # D2 [22 B]
  220. >>> squared = lambda x: x**2
  221. >>> with detect.trace('output.txt', mode='w') as log:
  222. >>> log("> D = %r", D)
  223. >>> dumps(D)
  224. >>> log("> squared = %r", squared)
  225. >>> dumps(squared)
  226. Arguments:
  227. arg: a boolean value, or an optional file-like or path-like object for the context manager
  228. mode: mode string for ``open()`` if a file name is passed as the first argument
  229. """
  230. if repr(arg) not in ('False', 'True'):
  231. return TraceManager(file=arg, mode=mode)
  232. logger.setLevel(logging.INFO if arg else logging.WARNING)
  233. class TraceManager(contextlib.AbstractContextManager):
  234. """context manager version of trace(); can redirect the trace to a file"""
  235. def __init__(self, file, mode):
  236. self.file = file
  237. self.mode = mode
  238. self.redirect = file is not None
  239. self.file_is_stream = hasattr(file, 'write')
  240. def __enter__(self):
  241. if self.redirect:
  242. stderr_handler.flush()
  243. if self.file_is_stream:
  244. self.handler = logging.StreamHandler(self.file)
  245. else:
  246. self.handler = logging.FileHandler(self.file, self.mode)
  247. adapter.removeHandler(stderr_handler)
  248. adapter.addHandler(self.handler)
  249. self.old_level = adapter.getEffectiveLevel()
  250. adapter.setLevel(logging.INFO)
  251. return adapter.info
  252. def __exit__(self, *exc_info):
  253. adapter.setLevel(self.old_level)
  254. if self.redirect:
  255. adapter.removeHandler(self.handler)
  256. adapter.addHandler(stderr_handler)
  257. if not self.file_is_stream:
  258. self.handler.close()