web_log.py 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216
  1. import datetime
  2. import functools
  3. import logging
  4. import os
  5. import re
  6. import time as time_mod
  7. from collections import namedtuple
  8. from typing import Any, Callable, Dict, Iterable, List, Tuple # noqa
  9. from .abc import AbstractAccessLogger
  10. from .web_request import BaseRequest
  11. from .web_response import StreamResponse
  12. KeyMethod = namedtuple("KeyMethod", "key method")
  13. class AccessLogger(AbstractAccessLogger):
  14. """Helper object to log access.
  15. Usage:
  16. log = logging.getLogger("spam")
  17. log_format = "%a %{User-Agent}i"
  18. access_logger = AccessLogger(log, log_format)
  19. access_logger.log(request, response, time)
  20. Format:
  21. %% The percent sign
  22. %a Remote IP-address (IP-address of proxy if using reverse proxy)
  23. %t Time when the request was started to process
  24. %P The process ID of the child that serviced the request
  25. %r First line of request
  26. %s Response status code
  27. %b Size of response in bytes, including HTTP headers
  28. %T Time taken to serve the request, in seconds
  29. %Tf Time taken to serve the request, in seconds with floating fraction
  30. in .06f format
  31. %D Time taken to serve the request, in microseconds
  32. %{FOO}i request.headers['FOO']
  33. %{FOO}o response.headers['FOO']
  34. %{FOO}e os.environ['FOO']
  35. """
  36. LOG_FORMAT_MAP = {
  37. "a": "remote_address",
  38. "t": "request_start_time",
  39. "P": "process_id",
  40. "r": "first_request_line",
  41. "s": "response_status",
  42. "b": "response_size",
  43. "T": "request_time",
  44. "Tf": "request_time_frac",
  45. "D": "request_time_micro",
  46. "i": "request_header",
  47. "o": "response_header",
  48. }
  49. LOG_FORMAT = '%a %t "%r" %s %b "%{Referer}i" "%{User-Agent}i"'
  50. FORMAT_RE = re.compile(r"%(\{([A-Za-z0-9\-_]+)\}([ioe])|[atPrsbOD]|Tf?)")
  51. CLEANUP_RE = re.compile(r"(%[^s])")
  52. _FORMAT_CACHE: Dict[str, Tuple[str, List[KeyMethod]]] = {}
  53. def __init__(self, logger: logging.Logger, log_format: str = LOG_FORMAT) -> None:
  54. """Initialise the logger.
  55. logger is a logger object to be used for logging.
  56. log_format is a string with apache compatible log format description.
  57. """
  58. super().__init__(logger, log_format=log_format)
  59. _compiled_format = AccessLogger._FORMAT_CACHE.get(log_format)
  60. if not _compiled_format:
  61. _compiled_format = self.compile_format(log_format)
  62. AccessLogger._FORMAT_CACHE[log_format] = _compiled_format
  63. self._log_format, self._methods = _compiled_format
  64. def compile_format(self, log_format: str) -> Tuple[str, List[KeyMethod]]:
  65. """Translate log_format into form usable by modulo formatting
  66. All known atoms will be replaced with %s
  67. Also methods for formatting of those atoms will be added to
  68. _methods in appropriate order
  69. For example we have log_format = "%a %t"
  70. This format will be translated to "%s %s"
  71. Also contents of _methods will be
  72. [self._format_a, self._format_t]
  73. These method will be called and results will be passed
  74. to translated string format.
  75. Each _format_* method receive 'args' which is list of arguments
  76. given to self.log
  77. Exceptions are _format_e, _format_i and _format_o methods which
  78. also receive key name (by functools.partial)
  79. """
  80. # list of (key, method) tuples, we don't use an OrderedDict as users
  81. # can repeat the same key more than once
  82. methods = list()
  83. for atom in self.FORMAT_RE.findall(log_format):
  84. if atom[1] == "":
  85. format_key1 = self.LOG_FORMAT_MAP[atom[0]]
  86. m = getattr(AccessLogger, "_format_%s" % atom[0])
  87. key_method = KeyMethod(format_key1, m)
  88. else:
  89. format_key2 = (self.LOG_FORMAT_MAP[atom[2]], atom[1])
  90. m = getattr(AccessLogger, "_format_%s" % atom[2])
  91. key_method = KeyMethod(format_key2, functools.partial(m, atom[1]))
  92. methods.append(key_method)
  93. log_format = self.FORMAT_RE.sub(r"%s", log_format)
  94. log_format = self.CLEANUP_RE.sub(r"%\1", log_format)
  95. return log_format, methods
  96. @staticmethod
  97. def _format_i(
  98. key: str, request: BaseRequest, response: StreamResponse, time: float
  99. ) -> str:
  100. if request is None:
  101. return "(no headers)"
  102. # suboptimal, make istr(key) once
  103. return request.headers.get(key, "-")
  104. @staticmethod
  105. def _format_o(
  106. key: str, request: BaseRequest, response: StreamResponse, time: float
  107. ) -> str:
  108. # suboptimal, make istr(key) once
  109. return response.headers.get(key, "-")
  110. @staticmethod
  111. def _format_a(request: BaseRequest, response: StreamResponse, time: float) -> str:
  112. if request is None:
  113. return "-"
  114. ip = request.remote
  115. return ip if ip is not None else "-"
  116. @staticmethod
  117. def _format_t(request: BaseRequest, response: StreamResponse, time: float) -> str:
  118. tz = datetime.timezone(datetime.timedelta(seconds=-time_mod.timezone))
  119. now = datetime.datetime.now(tz)
  120. start_time = now - datetime.timedelta(seconds=time)
  121. return start_time.strftime("[%d/%b/%Y:%H:%M:%S %z]")
  122. @staticmethod
  123. def _format_P(request: BaseRequest, response: StreamResponse, time: float) -> str:
  124. return "<%s>" % os.getpid()
  125. @staticmethod
  126. def _format_r(request: BaseRequest, response: StreamResponse, time: float) -> str:
  127. if request is None:
  128. return "-"
  129. return "{} {} HTTP/{}.{}".format(
  130. request.method,
  131. request.path_qs,
  132. request.version.major,
  133. request.version.minor,
  134. )
  135. @staticmethod
  136. def _format_s(request: BaseRequest, response: StreamResponse, time: float) -> int:
  137. return response.status
  138. @staticmethod
  139. def _format_b(request: BaseRequest, response: StreamResponse, time: float) -> int:
  140. return response.body_length
  141. @staticmethod
  142. def _format_T(request: BaseRequest, response: StreamResponse, time: float) -> str:
  143. return str(round(time))
  144. @staticmethod
  145. def _format_Tf(request: BaseRequest, response: StreamResponse, time: float) -> str:
  146. return "%06f" % time
  147. @staticmethod
  148. def _format_D(request: BaseRequest, response: StreamResponse, time: float) -> str:
  149. return str(round(time * 1000000))
  150. def _format_line(
  151. self, request: BaseRequest, response: StreamResponse, time: float
  152. ) -> Iterable[Tuple[str, Callable[[BaseRequest, StreamResponse, float], str]]]:
  153. return [(key, method(request, response, time)) for key, method in self._methods]
  154. @property
  155. def enabled(self) -> bool:
  156. """Check if logger is enabled."""
  157. # Avoid formatting the log line if it will not be emitted.
  158. return self.logger.isEnabledFor(logging.INFO)
  159. def log(self, request: BaseRequest, response: StreamResponse, time: float) -> None:
  160. try:
  161. fmt_info = self._format_line(request, response, time)
  162. values = list()
  163. extra = dict()
  164. for key, value in fmt_info:
  165. values.append(value)
  166. if key.__class__ is str:
  167. extra[key] = value
  168. else:
  169. k1, k2 = key # type: ignore[misc]
  170. dct = extra.get(k1, {}) # type: ignore[var-annotated,has-type]
  171. dct[k2] = value # type: ignore[index,has-type]
  172. extra[k1] = dct # type: ignore[has-type,assignment]
  173. self.logger.info(self._log_format % tuple(values), extra=extra)
  174. except Exception:
  175. self.logger.exception("Error in logging")