_readers.py 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247
  1. # Code to read HTTP data
  2. #
  3. # Strategy: each reader is a callable which takes a ReceiveBuffer object, and
  4. # either:
  5. # 1) consumes some of it and returns an Event
  6. # 2) raises a LocalProtocolError (for consistency -- e.g. we call validate()
  7. # and it might raise a LocalProtocolError, so simpler just to always use
  8. # this)
  9. # 3) returns None, meaning "I need more data"
  10. #
  11. # If they have a .read_eof attribute, then this will be called if an EOF is
  12. # received -- but this is optional. Either way, the actual ConnectionClosed
  13. # event will be generated afterwards.
  14. #
  15. # READERS is a dict describing how to pick a reader. It maps states to either:
  16. # - a reader
  17. # - or, for body readers, a dict of per-framing reader factories
  18. import re
  19. from typing import Any, Callable, Dict, Iterable, NoReturn, Optional, Tuple, Type, Union
  20. from ._abnf import chunk_header, header_field, request_line, status_line
  21. from ._events import Data, EndOfMessage, InformationalResponse, Request, Response
  22. from ._receivebuffer import ReceiveBuffer
  23. from ._state import (
  24. CLIENT,
  25. CLOSED,
  26. DONE,
  27. IDLE,
  28. MUST_CLOSE,
  29. SEND_BODY,
  30. SEND_RESPONSE,
  31. SERVER,
  32. )
  33. from ._util import LocalProtocolError, RemoteProtocolError, Sentinel, validate
  34. __all__ = ["READERS"]
  35. header_field_re = re.compile(header_field.encode("ascii"))
  36. obs_fold_re = re.compile(rb"[ \t]+")
  37. def _obsolete_line_fold(lines: Iterable[bytes]) -> Iterable[bytes]:
  38. it = iter(lines)
  39. last: Optional[bytes] = None
  40. for line in it:
  41. match = obs_fold_re.match(line)
  42. if match:
  43. if last is None:
  44. raise LocalProtocolError("continuation line at start of headers")
  45. if not isinstance(last, bytearray):
  46. # Cast to a mutable type, avoiding copy on append to ensure O(n) time
  47. last = bytearray(last)
  48. last += b" "
  49. last += line[match.end() :]
  50. else:
  51. if last is not None:
  52. yield last
  53. last = line
  54. if last is not None:
  55. yield last
  56. def _decode_header_lines(
  57. lines: Iterable[bytes],
  58. ) -> Iterable[Tuple[bytes, bytes]]:
  59. for line in _obsolete_line_fold(lines):
  60. matches = validate(header_field_re, line, "illegal header line: {!r}", line)
  61. yield (matches["field_name"], matches["field_value"])
  62. request_line_re = re.compile(request_line.encode("ascii"))
  63. def maybe_read_from_IDLE_client(buf: ReceiveBuffer) -> Optional[Request]:
  64. lines = buf.maybe_extract_lines()
  65. if lines is None:
  66. if buf.is_next_line_obviously_invalid_request_line():
  67. raise LocalProtocolError("illegal request line")
  68. return None
  69. if not lines:
  70. raise LocalProtocolError("no request line received")
  71. matches = validate(
  72. request_line_re, lines[0], "illegal request line: {!r}", lines[0]
  73. )
  74. return Request(
  75. headers=list(_decode_header_lines(lines[1:])), _parsed=True, **matches
  76. )
  77. status_line_re = re.compile(status_line.encode("ascii"))
  78. def maybe_read_from_SEND_RESPONSE_server(
  79. buf: ReceiveBuffer,
  80. ) -> Union[InformationalResponse, Response, None]:
  81. lines = buf.maybe_extract_lines()
  82. if lines is None:
  83. if buf.is_next_line_obviously_invalid_request_line():
  84. raise LocalProtocolError("illegal request line")
  85. return None
  86. if not lines:
  87. raise LocalProtocolError("no response line received")
  88. matches = validate(status_line_re, lines[0], "illegal status line: {!r}", lines[0])
  89. http_version = (
  90. b"1.1" if matches["http_version"] is None else matches["http_version"]
  91. )
  92. reason = b"" if matches["reason"] is None else matches["reason"]
  93. status_code = int(matches["status_code"])
  94. class_: Union[Type[InformationalResponse], Type[Response]] = (
  95. InformationalResponse if status_code < 200 else Response
  96. )
  97. return class_(
  98. headers=list(_decode_header_lines(lines[1:])),
  99. _parsed=True,
  100. status_code=status_code,
  101. reason=reason,
  102. http_version=http_version,
  103. )
  104. class ContentLengthReader:
  105. def __init__(self, length: int) -> None:
  106. self._length = length
  107. self._remaining = length
  108. def __call__(self, buf: ReceiveBuffer) -> Union[Data, EndOfMessage, None]:
  109. if self._remaining == 0:
  110. return EndOfMessage()
  111. data = buf.maybe_extract_at_most(self._remaining)
  112. if data is None:
  113. return None
  114. self._remaining -= len(data)
  115. return Data(data=data)
  116. def read_eof(self) -> NoReturn:
  117. raise RemoteProtocolError(
  118. "peer closed connection without sending complete message body "
  119. "(received {} bytes, expected {})".format(
  120. self._length - self._remaining, self._length
  121. )
  122. )
  123. chunk_header_re = re.compile(chunk_header.encode("ascii"))
  124. class ChunkedReader:
  125. def __init__(self) -> None:
  126. self._bytes_in_chunk = 0
  127. # After reading a chunk, we have to throw away the trailing \r\n; if
  128. # this is >0 then we discard that many bytes before resuming regular
  129. # de-chunkification.
  130. self._bytes_to_discard = 0
  131. self._reading_trailer = False
  132. def __call__(self, buf: ReceiveBuffer) -> Union[Data, EndOfMessage, None]:
  133. if self._reading_trailer:
  134. lines = buf.maybe_extract_lines()
  135. if lines is None:
  136. return None
  137. return EndOfMessage(headers=list(_decode_header_lines(lines)))
  138. if self._bytes_to_discard > 0:
  139. data = buf.maybe_extract_at_most(self._bytes_to_discard)
  140. if data is None:
  141. return None
  142. self._bytes_to_discard -= len(data)
  143. if self._bytes_to_discard > 0:
  144. return None
  145. # else, fall through and read some more
  146. assert self._bytes_to_discard == 0
  147. if self._bytes_in_chunk == 0:
  148. # We need to refill our chunk count
  149. chunk_header = buf.maybe_extract_next_line()
  150. if chunk_header is None:
  151. return None
  152. matches = validate(
  153. chunk_header_re,
  154. chunk_header,
  155. "illegal chunk header: {!r}",
  156. chunk_header,
  157. )
  158. # XX FIXME: we discard chunk extensions. Does anyone care?
  159. self._bytes_in_chunk = int(matches["chunk_size"], base=16)
  160. if self._bytes_in_chunk == 0:
  161. self._reading_trailer = True
  162. return self(buf)
  163. chunk_start = True
  164. else:
  165. chunk_start = False
  166. assert self._bytes_in_chunk > 0
  167. data = buf.maybe_extract_at_most(self._bytes_in_chunk)
  168. if data is None:
  169. return None
  170. self._bytes_in_chunk -= len(data)
  171. if self._bytes_in_chunk == 0:
  172. self._bytes_to_discard = 2
  173. chunk_end = True
  174. else:
  175. chunk_end = False
  176. return Data(data=data, chunk_start=chunk_start, chunk_end=chunk_end)
  177. def read_eof(self) -> NoReturn:
  178. raise RemoteProtocolError(
  179. "peer closed connection without sending complete message body "
  180. "(incomplete chunked read)"
  181. )
  182. class Http10Reader:
  183. def __call__(self, buf: ReceiveBuffer) -> Optional[Data]:
  184. data = buf.maybe_extract_at_most(999999999)
  185. if data is None:
  186. return None
  187. return Data(data=data)
  188. def read_eof(self) -> EndOfMessage:
  189. return EndOfMessage()
  190. def expect_nothing(buf: ReceiveBuffer) -> None:
  191. if buf:
  192. raise LocalProtocolError("Got data when expecting EOF")
  193. return None
  194. ReadersType = Dict[
  195. Union[Type[Sentinel], Tuple[Type[Sentinel], Type[Sentinel]]],
  196. Union[Callable[..., Any], Dict[str, Callable[..., Any]]],
  197. ]
  198. READERS: ReadersType = {
  199. (CLIENT, IDLE): maybe_read_from_IDLE_client,
  200. (SERVER, IDLE): maybe_read_from_SEND_RESPONSE_server,
  201. (SERVER, SEND_RESPONSE): maybe_read_from_SEND_RESPONSE_server,
  202. (CLIENT, DONE): expect_nothing,
  203. (CLIENT, MUST_CLOSE): expect_nothing,
  204. (CLIENT, CLOSED): expect_nothing,
  205. (SERVER, DONE): expect_nothing,
  206. (SERVER, MUST_CLOSE): expect_nothing,
  207. (SERVER, CLOSED): expect_nothing,
  208. SEND_BODY: {
  209. "chunked": ChunkedReader,
  210. "content-length": ContentLengthReader,
  211. "http/1.0": Http10Reader,
  212. },
  213. }