_models.py 44 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277
  1. from __future__ import annotations
  2. import codecs
  3. import datetime
  4. import email.message
  5. import json as jsonlib
  6. import re
  7. import typing
  8. import urllib.request
  9. from collections.abc import Mapping
  10. from http.cookiejar import Cookie, CookieJar
  11. from ._content import ByteStream, UnattachedStream, encode_request, encode_response
  12. from ._decoders import (
  13. SUPPORTED_DECODERS,
  14. ByteChunker,
  15. ContentDecoder,
  16. IdentityDecoder,
  17. LineDecoder,
  18. MultiDecoder,
  19. TextChunker,
  20. TextDecoder,
  21. )
  22. from ._exceptions import (
  23. CookieConflict,
  24. HTTPStatusError,
  25. RequestNotRead,
  26. ResponseNotRead,
  27. StreamClosed,
  28. StreamConsumed,
  29. request_context,
  30. )
  31. from ._multipart import get_multipart_boundary_from_content_type
  32. from ._status_codes import codes
  33. from ._types import (
  34. AsyncByteStream,
  35. CookieTypes,
  36. HeaderTypes,
  37. QueryParamTypes,
  38. RequestContent,
  39. RequestData,
  40. RequestExtensions,
  41. RequestFiles,
  42. ResponseContent,
  43. ResponseExtensions,
  44. SyncByteStream,
  45. )
  46. from ._urls import URL
  47. from ._utils import to_bytes_or_str, to_str
  48. __all__ = ["Cookies", "Headers", "Request", "Response"]
  49. SENSITIVE_HEADERS = {"authorization", "proxy-authorization"}
  50. def _is_known_encoding(encoding: str) -> bool:
  51. """
  52. Return `True` if `encoding` is a known codec.
  53. """
  54. try:
  55. codecs.lookup(encoding)
  56. except LookupError:
  57. return False
  58. return True
  59. def _normalize_header_key(key: str | bytes, encoding: str | None = None) -> bytes:
  60. """
  61. Coerce str/bytes into a strictly byte-wise HTTP header key.
  62. """
  63. return key if isinstance(key, bytes) else key.encode(encoding or "ascii")
  64. def _normalize_header_value(value: str | bytes, encoding: str | None = None) -> bytes:
  65. """
  66. Coerce str/bytes into a strictly byte-wise HTTP header value.
  67. """
  68. if isinstance(value, bytes):
  69. return value
  70. if not isinstance(value, str):
  71. raise TypeError(f"Header value must be str or bytes, not {type(value)}")
  72. return value.encode(encoding or "ascii")
  73. def _parse_content_type_charset(content_type: str) -> str | None:
  74. # We used to use `cgi.parse_header()` here, but `cgi` became a dead battery.
  75. # See: https://peps.python.org/pep-0594/#cgi
  76. msg = email.message.Message()
  77. msg["content-type"] = content_type
  78. return msg.get_content_charset(failobj=None)
  79. def _parse_header_links(value: str) -> list[dict[str, str]]:
  80. """
  81. Returns a list of parsed link headers, for more info see:
  82. https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Link
  83. The generic syntax of those is:
  84. Link: < uri-reference >; param1=value1; param2="value2"
  85. So for instance:
  86. Link; '<http:/.../front.jpeg>; type="image/jpeg",<http://.../back.jpeg>;'
  87. would return
  88. [
  89. {"url": "http:/.../front.jpeg", "type": "image/jpeg"},
  90. {"url": "http://.../back.jpeg"},
  91. ]
  92. :param value: HTTP Link entity-header field
  93. :return: list of parsed link headers
  94. """
  95. links: list[dict[str, str]] = []
  96. replace_chars = " '\""
  97. value = value.strip(replace_chars)
  98. if not value:
  99. return links
  100. for val in re.split(", *<", value):
  101. try:
  102. url, params = val.split(";", 1)
  103. except ValueError:
  104. url, params = val, ""
  105. link = {"url": url.strip("<> '\"")}
  106. for param in params.split(";"):
  107. try:
  108. key, value = param.split("=")
  109. except ValueError:
  110. break
  111. link[key.strip(replace_chars)] = value.strip(replace_chars)
  112. links.append(link)
  113. return links
  114. def _obfuscate_sensitive_headers(
  115. items: typing.Iterable[tuple[typing.AnyStr, typing.AnyStr]],
  116. ) -> typing.Iterator[tuple[typing.AnyStr, typing.AnyStr]]:
  117. for k, v in items:
  118. if to_str(k.lower()) in SENSITIVE_HEADERS:
  119. v = to_bytes_or_str("[secure]", match_type_of=v)
  120. yield k, v
  121. class Headers(typing.MutableMapping[str, str]):
  122. """
  123. HTTP headers, as a case-insensitive multi-dict.
  124. """
  125. def __init__(
  126. self,
  127. headers: HeaderTypes | None = None,
  128. encoding: str | None = None,
  129. ) -> None:
  130. self._list = [] # type: typing.List[typing.Tuple[bytes, bytes, bytes]]
  131. if isinstance(headers, Headers):
  132. self._list = list(headers._list)
  133. elif isinstance(headers, Mapping):
  134. for k, v in headers.items():
  135. bytes_key = _normalize_header_key(k, encoding)
  136. bytes_value = _normalize_header_value(v, encoding)
  137. self._list.append((bytes_key, bytes_key.lower(), bytes_value))
  138. elif headers is not None:
  139. for k, v in headers:
  140. bytes_key = _normalize_header_key(k, encoding)
  141. bytes_value = _normalize_header_value(v, encoding)
  142. self._list.append((bytes_key, bytes_key.lower(), bytes_value))
  143. self._encoding = encoding
  144. @property
  145. def encoding(self) -> str:
  146. """
  147. Header encoding is mandated as ascii, but we allow fallbacks to utf-8
  148. or iso-8859-1.
  149. """
  150. if self._encoding is None:
  151. for encoding in ["ascii", "utf-8"]:
  152. for key, value in self.raw:
  153. try:
  154. key.decode(encoding)
  155. value.decode(encoding)
  156. except UnicodeDecodeError:
  157. break
  158. else:
  159. # The else block runs if 'break' did not occur, meaning
  160. # all values fitted the encoding.
  161. self._encoding = encoding
  162. break
  163. else:
  164. # The ISO-8859-1 encoding covers all 256 code points in a byte,
  165. # so will never raise decode errors.
  166. self._encoding = "iso-8859-1"
  167. return self._encoding
  168. @encoding.setter
  169. def encoding(self, value: str) -> None:
  170. self._encoding = value
  171. @property
  172. def raw(self) -> list[tuple[bytes, bytes]]:
  173. """
  174. Returns a list of the raw header items, as byte pairs.
  175. """
  176. return [(raw_key, value) for raw_key, _, value in self._list]
  177. def keys(self) -> typing.KeysView[str]:
  178. return {key.decode(self.encoding): None for _, key, value in self._list}.keys()
  179. def values(self) -> typing.ValuesView[str]:
  180. values_dict: dict[str, str] = {}
  181. for _, key, value in self._list:
  182. str_key = key.decode(self.encoding)
  183. str_value = value.decode(self.encoding)
  184. if str_key in values_dict:
  185. values_dict[str_key] += f", {str_value}"
  186. else:
  187. values_dict[str_key] = str_value
  188. return values_dict.values()
  189. def items(self) -> typing.ItemsView[str, str]:
  190. """
  191. Return `(key, value)` items of headers. Concatenate headers
  192. into a single comma separated value when a key occurs multiple times.
  193. """
  194. values_dict: dict[str, str] = {}
  195. for _, key, value in self._list:
  196. str_key = key.decode(self.encoding)
  197. str_value = value.decode(self.encoding)
  198. if str_key in values_dict:
  199. values_dict[str_key] += f", {str_value}"
  200. else:
  201. values_dict[str_key] = str_value
  202. return values_dict.items()
  203. def multi_items(self) -> list[tuple[str, str]]:
  204. """
  205. Return a list of `(key, value)` pairs of headers. Allow multiple
  206. occurrences of the same key without concatenating into a single
  207. comma separated value.
  208. """
  209. return [
  210. (key.decode(self.encoding), value.decode(self.encoding))
  211. for _, key, value in self._list
  212. ]
  213. def get(self, key: str, default: typing.Any = None) -> typing.Any:
  214. """
  215. Return a header value. If multiple occurrences of the header occur
  216. then concatenate them together with commas.
  217. """
  218. try:
  219. return self[key]
  220. except KeyError:
  221. return default
  222. def get_list(self, key: str, split_commas: bool = False) -> list[str]:
  223. """
  224. Return a list of all header values for a given key.
  225. If `split_commas=True` is passed, then any comma separated header
  226. values are split into multiple return strings.
  227. """
  228. get_header_key = key.lower().encode(self.encoding)
  229. values = [
  230. item_value.decode(self.encoding)
  231. for _, item_key, item_value in self._list
  232. if item_key.lower() == get_header_key
  233. ]
  234. if not split_commas:
  235. return values
  236. split_values = []
  237. for value in values:
  238. split_values.extend([item.strip() for item in value.split(",")])
  239. return split_values
  240. def update(self, headers: HeaderTypes | None = None) -> None: # type: ignore
  241. headers = Headers(headers)
  242. for key in headers.keys():
  243. if key in self:
  244. self.pop(key)
  245. self._list.extend(headers._list)
  246. def copy(self) -> Headers:
  247. return Headers(self, encoding=self.encoding)
  248. def __getitem__(self, key: str) -> str:
  249. """
  250. Return a single header value.
  251. If there are multiple headers with the same key, then we concatenate
  252. them with commas. See: https://tools.ietf.org/html/rfc7230#section-3.2.2
  253. """
  254. normalized_key = key.lower().encode(self.encoding)
  255. items = [
  256. header_value.decode(self.encoding)
  257. for _, header_key, header_value in self._list
  258. if header_key == normalized_key
  259. ]
  260. if items:
  261. return ", ".join(items)
  262. raise KeyError(key)
  263. def __setitem__(self, key: str, value: str) -> None:
  264. """
  265. Set the header `key` to `value`, removing any duplicate entries.
  266. Retains insertion order.
  267. """
  268. set_key = key.encode(self._encoding or "utf-8")
  269. set_value = value.encode(self._encoding or "utf-8")
  270. lookup_key = set_key.lower()
  271. found_indexes = [
  272. idx
  273. for idx, (_, item_key, _) in enumerate(self._list)
  274. if item_key == lookup_key
  275. ]
  276. for idx in reversed(found_indexes[1:]):
  277. del self._list[idx]
  278. if found_indexes:
  279. idx = found_indexes[0]
  280. self._list[idx] = (set_key, lookup_key, set_value)
  281. else:
  282. self._list.append((set_key, lookup_key, set_value))
  283. def __delitem__(self, key: str) -> None:
  284. """
  285. Remove the header `key`.
  286. """
  287. del_key = key.lower().encode(self.encoding)
  288. pop_indexes = [
  289. idx
  290. for idx, (_, item_key, _) in enumerate(self._list)
  291. if item_key.lower() == del_key
  292. ]
  293. if not pop_indexes:
  294. raise KeyError(key)
  295. for idx in reversed(pop_indexes):
  296. del self._list[idx]
  297. def __contains__(self, key: typing.Any) -> bool:
  298. header_key = key.lower().encode(self.encoding)
  299. return header_key in [key for _, key, _ in self._list]
  300. def __iter__(self) -> typing.Iterator[typing.Any]:
  301. return iter(self.keys())
  302. def __len__(self) -> int:
  303. return len(self._list)
  304. def __eq__(self, other: typing.Any) -> bool:
  305. try:
  306. other_headers = Headers(other)
  307. except ValueError:
  308. return False
  309. self_list = [(key, value) for _, key, value in self._list]
  310. other_list = [(key, value) for _, key, value in other_headers._list]
  311. return sorted(self_list) == sorted(other_list)
  312. def __repr__(self) -> str:
  313. class_name = self.__class__.__name__
  314. encoding_str = ""
  315. if self.encoding != "ascii":
  316. encoding_str = f", encoding={self.encoding!r}"
  317. as_list = list(_obfuscate_sensitive_headers(self.multi_items()))
  318. as_dict = dict(as_list)
  319. no_duplicate_keys = len(as_dict) == len(as_list)
  320. if no_duplicate_keys:
  321. return f"{class_name}({as_dict!r}{encoding_str})"
  322. return f"{class_name}({as_list!r}{encoding_str})"
  323. class Request:
  324. def __init__(
  325. self,
  326. method: str,
  327. url: URL | str,
  328. *,
  329. params: QueryParamTypes | None = None,
  330. headers: HeaderTypes | None = None,
  331. cookies: CookieTypes | None = None,
  332. content: RequestContent | None = None,
  333. data: RequestData | None = None,
  334. files: RequestFiles | None = None,
  335. json: typing.Any | None = None,
  336. stream: SyncByteStream | AsyncByteStream | None = None,
  337. extensions: RequestExtensions | None = None,
  338. ) -> None:
  339. self.method = method.upper()
  340. self.url = URL(url) if params is None else URL(url, params=params)
  341. self.headers = Headers(headers)
  342. self.extensions = {} if extensions is None else dict(extensions)
  343. if cookies:
  344. Cookies(cookies).set_cookie_header(self)
  345. if stream is None:
  346. content_type: str | None = self.headers.get("content-type")
  347. headers, stream = encode_request(
  348. content=content,
  349. data=data,
  350. files=files,
  351. json=json,
  352. boundary=get_multipart_boundary_from_content_type(
  353. content_type=content_type.encode(self.headers.encoding)
  354. if content_type
  355. else None
  356. ),
  357. )
  358. self._prepare(headers)
  359. self.stream = stream
  360. # Load the request body, except for streaming content.
  361. if isinstance(stream, ByteStream):
  362. self.read()
  363. else:
  364. # There's an important distinction between `Request(content=...)`,
  365. # and `Request(stream=...)`.
  366. #
  367. # Using `content=...` implies automatically populated `Host` and content
  368. # headers, of either `Content-Length: ...` or `Transfer-Encoding: chunked`.
  369. #
  370. # Using `stream=...` will not automatically include *any*
  371. # auto-populated headers.
  372. #
  373. # As an end-user you don't really need `stream=...`. It's only
  374. # useful when:
  375. #
  376. # * Preserving the request stream when copying requests, eg for redirects.
  377. # * Creating request instances on the *server-side* of the transport API.
  378. self.stream = stream
  379. def _prepare(self, default_headers: dict[str, str]) -> None:
  380. for key, value in default_headers.items():
  381. # Ignore Transfer-Encoding if the Content-Length has been set explicitly.
  382. if key.lower() == "transfer-encoding" and "Content-Length" in self.headers:
  383. continue
  384. self.headers.setdefault(key, value)
  385. auto_headers: list[tuple[bytes, bytes]] = []
  386. has_host = "Host" in self.headers
  387. has_content_length = (
  388. "Content-Length" in self.headers or "Transfer-Encoding" in self.headers
  389. )
  390. if not has_host and self.url.host:
  391. auto_headers.append((b"Host", self.url.netloc))
  392. if not has_content_length and self.method in ("POST", "PUT", "PATCH"):
  393. auto_headers.append((b"Content-Length", b"0"))
  394. self.headers = Headers(auto_headers + self.headers.raw)
  395. @property
  396. def content(self) -> bytes:
  397. if not hasattr(self, "_content"):
  398. raise RequestNotRead()
  399. return self._content
  400. def read(self) -> bytes:
  401. """
  402. Read and return the request content.
  403. """
  404. if not hasattr(self, "_content"):
  405. assert isinstance(self.stream, typing.Iterable)
  406. self._content = b"".join(self.stream)
  407. if not isinstance(self.stream, ByteStream):
  408. # If a streaming request has been read entirely into memory, then
  409. # we can replace the stream with a raw bytes implementation,
  410. # to ensure that any non-replayable streams can still be used.
  411. self.stream = ByteStream(self._content)
  412. return self._content
  413. async def aread(self) -> bytes:
  414. """
  415. Read and return the request content.
  416. """
  417. if not hasattr(self, "_content"):
  418. assert isinstance(self.stream, typing.AsyncIterable)
  419. self._content = b"".join([part async for part in self.stream])
  420. if not isinstance(self.stream, ByteStream):
  421. # If a streaming request has been read entirely into memory, then
  422. # we can replace the stream with a raw bytes implementation,
  423. # to ensure that any non-replayable streams can still be used.
  424. self.stream = ByteStream(self._content)
  425. return self._content
  426. def __repr__(self) -> str:
  427. class_name = self.__class__.__name__
  428. url = str(self.url)
  429. return f"<{class_name}({self.method!r}, {url!r})>"
  430. def __getstate__(self) -> dict[str, typing.Any]:
  431. return {
  432. name: value
  433. for name, value in self.__dict__.items()
  434. if name not in ["extensions", "stream"]
  435. }
  436. def __setstate__(self, state: dict[str, typing.Any]) -> None:
  437. for name, value in state.items():
  438. setattr(self, name, value)
  439. self.extensions = {}
  440. self.stream = UnattachedStream()
  441. class Response:
  442. def __init__(
  443. self,
  444. status_code: int,
  445. *,
  446. headers: HeaderTypes | None = None,
  447. content: ResponseContent | None = None,
  448. text: str | None = None,
  449. html: str | None = None,
  450. json: typing.Any = None,
  451. stream: SyncByteStream | AsyncByteStream | None = None,
  452. request: Request | None = None,
  453. extensions: ResponseExtensions | None = None,
  454. history: list[Response] | None = None,
  455. default_encoding: str | typing.Callable[[bytes], str] = "utf-8",
  456. ) -> None:
  457. self.status_code = status_code
  458. self.headers = Headers(headers)
  459. self._request: Request | None = request
  460. # When follow_redirects=False and a redirect is received,
  461. # the client will set `response.next_request`.
  462. self.next_request: Request | None = None
  463. self.extensions = {} if extensions is None else dict(extensions)
  464. self.history = [] if history is None else list(history)
  465. self.is_closed = False
  466. self.is_stream_consumed = False
  467. self.default_encoding = default_encoding
  468. if stream is None:
  469. headers, stream = encode_response(content, text, html, json)
  470. self._prepare(headers)
  471. self.stream = stream
  472. if isinstance(stream, ByteStream):
  473. # Load the response body, except for streaming content.
  474. self.read()
  475. else:
  476. # There's an important distinction between `Response(content=...)`,
  477. # and `Response(stream=...)`.
  478. #
  479. # Using `content=...` implies automatically populated content headers,
  480. # of either `Content-Length: ...` or `Transfer-Encoding: chunked`.
  481. #
  482. # Using `stream=...` will not automatically include any content headers.
  483. #
  484. # As an end-user you don't really need `stream=...`. It's only
  485. # useful when creating response instances having received a stream
  486. # from the transport API.
  487. self.stream = stream
  488. self._num_bytes_downloaded = 0
  489. def _prepare(self, default_headers: dict[str, str]) -> None:
  490. for key, value in default_headers.items():
  491. # Ignore Transfer-Encoding if the Content-Length has been set explicitly.
  492. if key.lower() == "transfer-encoding" and "content-length" in self.headers:
  493. continue
  494. self.headers.setdefault(key, value)
  495. @property
  496. def elapsed(self) -> datetime.timedelta:
  497. """
  498. Returns the time taken for the complete request/response
  499. cycle to complete.
  500. """
  501. if not hasattr(self, "_elapsed"):
  502. raise RuntimeError(
  503. "'.elapsed' may only be accessed after the response "
  504. "has been read or closed."
  505. )
  506. return self._elapsed
  507. @elapsed.setter
  508. def elapsed(self, elapsed: datetime.timedelta) -> None:
  509. self._elapsed = elapsed
  510. @property
  511. def request(self) -> Request:
  512. """
  513. Returns the request instance associated to the current response.
  514. """
  515. if self._request is None:
  516. raise RuntimeError(
  517. "The request instance has not been set on this response."
  518. )
  519. return self._request
  520. @request.setter
  521. def request(self, value: Request) -> None:
  522. self._request = value
  523. @property
  524. def http_version(self) -> str:
  525. try:
  526. http_version: bytes = self.extensions["http_version"]
  527. except KeyError:
  528. return "HTTP/1.1"
  529. else:
  530. return http_version.decode("ascii", errors="ignore")
  531. @property
  532. def reason_phrase(self) -> str:
  533. try:
  534. reason_phrase: bytes = self.extensions["reason_phrase"]
  535. except KeyError:
  536. return codes.get_reason_phrase(self.status_code)
  537. else:
  538. return reason_phrase.decode("ascii", errors="ignore")
  539. @property
  540. def url(self) -> URL:
  541. """
  542. Returns the URL for which the request was made.
  543. """
  544. return self.request.url
  545. @property
  546. def content(self) -> bytes:
  547. if not hasattr(self, "_content"):
  548. raise ResponseNotRead()
  549. return self._content
  550. @property
  551. def text(self) -> str:
  552. if not hasattr(self, "_text"):
  553. content = self.content
  554. if not content:
  555. self._text = ""
  556. else:
  557. decoder = TextDecoder(encoding=self.encoding or "utf-8")
  558. self._text = "".join([decoder.decode(self.content), decoder.flush()])
  559. return self._text
  560. @property
  561. def encoding(self) -> str | None:
  562. """
  563. Return an encoding to use for decoding the byte content into text.
  564. The priority for determining this is given by...
  565. * `.encoding = <>` has been set explicitly.
  566. * The encoding as specified by the charset parameter in the Content-Type header.
  567. * The encoding as determined by `default_encoding`, which may either be
  568. a string like "utf-8" indicating the encoding to use, or may be a callable
  569. which enables charset autodetection.
  570. """
  571. if not hasattr(self, "_encoding"):
  572. encoding = self.charset_encoding
  573. if encoding is None or not _is_known_encoding(encoding):
  574. if isinstance(self.default_encoding, str):
  575. encoding = self.default_encoding
  576. elif hasattr(self, "_content"):
  577. encoding = self.default_encoding(self._content)
  578. self._encoding = encoding or "utf-8"
  579. return self._encoding
  580. @encoding.setter
  581. def encoding(self, value: str) -> None:
  582. """
  583. Set the encoding to use for decoding the byte content into text.
  584. If the `text` attribute has been accessed, attempting to set the
  585. encoding will throw a ValueError.
  586. """
  587. if hasattr(self, "_text"):
  588. raise ValueError(
  589. "Setting encoding after `text` has been accessed is not allowed."
  590. )
  591. self._encoding = value
  592. @property
  593. def charset_encoding(self) -> str | None:
  594. """
  595. Return the encoding, as specified by the Content-Type header.
  596. """
  597. content_type = self.headers.get("Content-Type")
  598. if content_type is None:
  599. return None
  600. return _parse_content_type_charset(content_type)
  601. def _get_content_decoder(self) -> ContentDecoder:
  602. """
  603. Returns a decoder instance which can be used to decode the raw byte
  604. content, depending on the Content-Encoding used in the response.
  605. """
  606. if not hasattr(self, "_decoder"):
  607. decoders: list[ContentDecoder] = []
  608. values = self.headers.get_list("content-encoding", split_commas=True)
  609. for value in values:
  610. value = value.strip().lower()
  611. try:
  612. decoder_cls = SUPPORTED_DECODERS[value]
  613. decoders.append(decoder_cls())
  614. except KeyError:
  615. continue
  616. if len(decoders) == 1:
  617. self._decoder = decoders[0]
  618. elif len(decoders) > 1:
  619. self._decoder = MultiDecoder(children=decoders)
  620. else:
  621. self._decoder = IdentityDecoder()
  622. return self._decoder
  623. @property
  624. def is_informational(self) -> bool:
  625. """
  626. A property which is `True` for 1xx status codes, `False` otherwise.
  627. """
  628. return codes.is_informational(self.status_code)
  629. @property
  630. def is_success(self) -> bool:
  631. """
  632. A property which is `True` for 2xx status codes, `False` otherwise.
  633. """
  634. return codes.is_success(self.status_code)
  635. @property
  636. def is_redirect(self) -> bool:
  637. """
  638. A property which is `True` for 3xx status codes, `False` otherwise.
  639. Note that not all responses with a 3xx status code indicate a URL redirect.
  640. Use `response.has_redirect_location` to determine responses with a properly
  641. formed URL redirection.
  642. """
  643. return codes.is_redirect(self.status_code)
  644. @property
  645. def is_client_error(self) -> bool:
  646. """
  647. A property which is `True` for 4xx status codes, `False` otherwise.
  648. """
  649. return codes.is_client_error(self.status_code)
  650. @property
  651. def is_server_error(self) -> bool:
  652. """
  653. A property which is `True` for 5xx status codes, `False` otherwise.
  654. """
  655. return codes.is_server_error(self.status_code)
  656. @property
  657. def is_error(self) -> bool:
  658. """
  659. A property which is `True` for 4xx and 5xx status codes, `False` otherwise.
  660. """
  661. return codes.is_error(self.status_code)
  662. @property
  663. def has_redirect_location(self) -> bool:
  664. """
  665. Returns True for 3xx responses with a properly formed URL redirection,
  666. `False` otherwise.
  667. """
  668. return (
  669. self.status_code
  670. in (
  671. # 301 (Cacheable redirect. Method may change to GET.)
  672. codes.MOVED_PERMANENTLY,
  673. # 302 (Uncacheable redirect. Method may change to GET.)
  674. codes.FOUND,
  675. # 303 (Client should make a GET or HEAD request.)
  676. codes.SEE_OTHER,
  677. # 307 (Equiv. 302, but retain method)
  678. codes.TEMPORARY_REDIRECT,
  679. # 308 (Equiv. 301, but retain method)
  680. codes.PERMANENT_REDIRECT,
  681. )
  682. and "Location" in self.headers
  683. )
  684. def raise_for_status(self) -> Response:
  685. """
  686. Raise the `HTTPStatusError` if one occurred.
  687. """
  688. request = self._request
  689. if request is None:
  690. raise RuntimeError(
  691. "Cannot call `raise_for_status` as the request "
  692. "instance has not been set on this response."
  693. )
  694. if self.is_success:
  695. return self
  696. if self.has_redirect_location:
  697. message = (
  698. "{error_type} '{0.status_code} {0.reason_phrase}' for url '{0.url}'\n"
  699. "Redirect location: '{0.headers[location]}'\n"
  700. "For more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/{0.status_code}"
  701. )
  702. else:
  703. message = (
  704. "{error_type} '{0.status_code} {0.reason_phrase}' for url '{0.url}'\n"
  705. "For more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/{0.status_code}"
  706. )
  707. status_class = self.status_code // 100
  708. error_types = {
  709. 1: "Informational response",
  710. 3: "Redirect response",
  711. 4: "Client error",
  712. 5: "Server error",
  713. }
  714. error_type = error_types.get(status_class, "Invalid status code")
  715. message = message.format(self, error_type=error_type)
  716. raise HTTPStatusError(message, request=request, response=self)
  717. def json(self, **kwargs: typing.Any) -> typing.Any:
  718. return jsonlib.loads(self.content, **kwargs)
  719. @property
  720. def cookies(self) -> Cookies:
  721. if not hasattr(self, "_cookies"):
  722. self._cookies = Cookies()
  723. self._cookies.extract_cookies(self)
  724. return self._cookies
  725. @property
  726. def links(self) -> dict[str | None, dict[str, str]]:
  727. """
  728. Returns the parsed header links of the response, if any
  729. """
  730. header = self.headers.get("link")
  731. if header is None:
  732. return {}
  733. return {
  734. (link.get("rel") or link.get("url")): link
  735. for link in _parse_header_links(header)
  736. }
  737. @property
  738. def num_bytes_downloaded(self) -> int:
  739. return self._num_bytes_downloaded
  740. def __repr__(self) -> str:
  741. return f"<Response [{self.status_code} {self.reason_phrase}]>"
  742. def __getstate__(self) -> dict[str, typing.Any]:
  743. return {
  744. name: value
  745. for name, value in self.__dict__.items()
  746. if name not in ["extensions", "stream", "is_closed", "_decoder"]
  747. }
  748. def __setstate__(self, state: dict[str, typing.Any]) -> None:
  749. for name, value in state.items():
  750. setattr(self, name, value)
  751. self.is_closed = True
  752. self.extensions = {}
  753. self.stream = UnattachedStream()
  754. def read(self) -> bytes:
  755. """
  756. Read and return the response content.
  757. """
  758. if not hasattr(self, "_content"):
  759. self._content = b"".join(self.iter_bytes())
  760. return self._content
  761. def iter_bytes(self, chunk_size: int | None = None) -> typing.Iterator[bytes]:
  762. """
  763. A byte-iterator over the decoded response content.
  764. This allows us to handle gzip, deflate, brotli, and zstd encoded responses.
  765. """
  766. if hasattr(self, "_content"):
  767. chunk_size = len(self._content) if chunk_size is None else chunk_size
  768. for i in range(0, len(self._content), max(chunk_size, 1)):
  769. yield self._content[i : i + chunk_size]
  770. else:
  771. decoder = self._get_content_decoder()
  772. chunker = ByteChunker(chunk_size=chunk_size)
  773. with request_context(request=self._request):
  774. for raw_bytes in self.iter_raw():
  775. decoded = decoder.decode(raw_bytes)
  776. for chunk in chunker.decode(decoded):
  777. yield chunk
  778. decoded = decoder.flush()
  779. for chunk in chunker.decode(decoded):
  780. yield chunk # pragma: no cover
  781. for chunk in chunker.flush():
  782. yield chunk
  783. def iter_text(self, chunk_size: int | None = None) -> typing.Iterator[str]:
  784. """
  785. A str-iterator over the decoded response content
  786. that handles both gzip, deflate, etc but also detects the content's
  787. string encoding.
  788. """
  789. decoder = TextDecoder(encoding=self.encoding or "utf-8")
  790. chunker = TextChunker(chunk_size=chunk_size)
  791. with request_context(request=self._request):
  792. for byte_content in self.iter_bytes():
  793. text_content = decoder.decode(byte_content)
  794. for chunk in chunker.decode(text_content):
  795. yield chunk
  796. text_content = decoder.flush()
  797. for chunk in chunker.decode(text_content):
  798. yield chunk # pragma: no cover
  799. for chunk in chunker.flush():
  800. yield chunk
  801. def iter_lines(self) -> typing.Iterator[str]:
  802. decoder = LineDecoder()
  803. with request_context(request=self._request):
  804. for text in self.iter_text():
  805. for line in decoder.decode(text):
  806. yield line
  807. for line in decoder.flush():
  808. yield line
  809. def iter_raw(self, chunk_size: int | None = None) -> typing.Iterator[bytes]:
  810. """
  811. A byte-iterator over the raw response content.
  812. """
  813. if self.is_stream_consumed:
  814. raise StreamConsumed()
  815. if self.is_closed:
  816. raise StreamClosed()
  817. if not isinstance(self.stream, SyncByteStream):
  818. raise RuntimeError("Attempted to call a sync iterator on an async stream.")
  819. self.is_stream_consumed = True
  820. self._num_bytes_downloaded = 0
  821. chunker = ByteChunker(chunk_size=chunk_size)
  822. with request_context(request=self._request):
  823. for raw_stream_bytes in self.stream:
  824. self._num_bytes_downloaded += len(raw_stream_bytes)
  825. for chunk in chunker.decode(raw_stream_bytes):
  826. yield chunk
  827. for chunk in chunker.flush():
  828. yield chunk
  829. self.close()
  830. def close(self) -> None:
  831. """
  832. Close the response and release the connection.
  833. Automatically called if the response body is read to completion.
  834. """
  835. if not isinstance(self.stream, SyncByteStream):
  836. raise RuntimeError("Attempted to call an sync close on an async stream.")
  837. if not self.is_closed:
  838. self.is_closed = True
  839. with request_context(request=self._request):
  840. self.stream.close()
  841. async def aread(self) -> bytes:
  842. """
  843. Read and return the response content.
  844. """
  845. if not hasattr(self, "_content"):
  846. self._content = b"".join([part async for part in self.aiter_bytes()])
  847. return self._content
  848. async def aiter_bytes(
  849. self, chunk_size: int | None = None
  850. ) -> typing.AsyncIterator[bytes]:
  851. """
  852. A byte-iterator over the decoded response content.
  853. This allows us to handle gzip, deflate, brotli, and zstd encoded responses.
  854. """
  855. if hasattr(self, "_content"):
  856. chunk_size = len(self._content) if chunk_size is None else chunk_size
  857. for i in range(0, len(self._content), max(chunk_size, 1)):
  858. yield self._content[i : i + chunk_size]
  859. else:
  860. decoder = self._get_content_decoder()
  861. chunker = ByteChunker(chunk_size=chunk_size)
  862. with request_context(request=self._request):
  863. async for raw_bytes in self.aiter_raw():
  864. decoded = decoder.decode(raw_bytes)
  865. for chunk in chunker.decode(decoded):
  866. yield chunk
  867. decoded = decoder.flush()
  868. for chunk in chunker.decode(decoded):
  869. yield chunk # pragma: no cover
  870. for chunk in chunker.flush():
  871. yield chunk
  872. async def aiter_text(
  873. self, chunk_size: int | None = None
  874. ) -> typing.AsyncIterator[str]:
  875. """
  876. A str-iterator over the decoded response content
  877. that handles both gzip, deflate, etc but also detects the content's
  878. string encoding.
  879. """
  880. decoder = TextDecoder(encoding=self.encoding or "utf-8")
  881. chunker = TextChunker(chunk_size=chunk_size)
  882. with request_context(request=self._request):
  883. async for byte_content in self.aiter_bytes():
  884. text_content = decoder.decode(byte_content)
  885. for chunk in chunker.decode(text_content):
  886. yield chunk
  887. text_content = decoder.flush()
  888. for chunk in chunker.decode(text_content):
  889. yield chunk # pragma: no cover
  890. for chunk in chunker.flush():
  891. yield chunk
  892. async def aiter_lines(self) -> typing.AsyncIterator[str]:
  893. decoder = LineDecoder()
  894. with request_context(request=self._request):
  895. async for text in self.aiter_text():
  896. for line in decoder.decode(text):
  897. yield line
  898. for line in decoder.flush():
  899. yield line
  900. async def aiter_raw(
  901. self, chunk_size: int | None = None
  902. ) -> typing.AsyncIterator[bytes]:
  903. """
  904. A byte-iterator over the raw response content.
  905. """
  906. if self.is_stream_consumed:
  907. raise StreamConsumed()
  908. if self.is_closed:
  909. raise StreamClosed()
  910. if not isinstance(self.stream, AsyncByteStream):
  911. raise RuntimeError("Attempted to call an async iterator on an sync stream.")
  912. self.is_stream_consumed = True
  913. self._num_bytes_downloaded = 0
  914. chunker = ByteChunker(chunk_size=chunk_size)
  915. with request_context(request=self._request):
  916. async for raw_stream_bytes in self.stream:
  917. self._num_bytes_downloaded += len(raw_stream_bytes)
  918. for chunk in chunker.decode(raw_stream_bytes):
  919. yield chunk
  920. for chunk in chunker.flush():
  921. yield chunk
  922. await self.aclose()
  923. async def aclose(self) -> None:
  924. """
  925. Close the response and release the connection.
  926. Automatically called if the response body is read to completion.
  927. """
  928. if not isinstance(self.stream, AsyncByteStream):
  929. raise RuntimeError("Attempted to call an async close on an sync stream.")
  930. if not self.is_closed:
  931. self.is_closed = True
  932. with request_context(request=self._request):
  933. await self.stream.aclose()
  934. class Cookies(typing.MutableMapping[str, str]):
  935. """
  936. HTTP Cookies, as a mutable mapping.
  937. """
  938. def __init__(self, cookies: CookieTypes | None = None) -> None:
  939. if cookies is None or isinstance(cookies, dict):
  940. self.jar = CookieJar()
  941. if isinstance(cookies, dict):
  942. for key, value in cookies.items():
  943. self.set(key, value)
  944. elif isinstance(cookies, list):
  945. self.jar = CookieJar()
  946. for key, value in cookies:
  947. self.set(key, value)
  948. elif isinstance(cookies, Cookies):
  949. self.jar = CookieJar()
  950. for cookie in cookies.jar:
  951. self.jar.set_cookie(cookie)
  952. else:
  953. self.jar = cookies
  954. def extract_cookies(self, response: Response) -> None:
  955. """
  956. Loads any cookies based on the response `Set-Cookie` headers.
  957. """
  958. urllib_response = self._CookieCompatResponse(response)
  959. urllib_request = self._CookieCompatRequest(response.request)
  960. self.jar.extract_cookies(urllib_response, urllib_request) # type: ignore
  961. def set_cookie_header(self, request: Request) -> None:
  962. """
  963. Sets an appropriate 'Cookie:' HTTP header on the `Request`.
  964. """
  965. urllib_request = self._CookieCompatRequest(request)
  966. self.jar.add_cookie_header(urllib_request)
  967. def set(self, name: str, value: str, domain: str = "", path: str = "/") -> None:
  968. """
  969. Set a cookie value by name. May optionally include domain and path.
  970. """
  971. kwargs = {
  972. "version": 0,
  973. "name": name,
  974. "value": value,
  975. "port": None,
  976. "port_specified": False,
  977. "domain": domain,
  978. "domain_specified": bool(domain),
  979. "domain_initial_dot": domain.startswith("."),
  980. "path": path,
  981. "path_specified": bool(path),
  982. "secure": False,
  983. "expires": None,
  984. "discard": True,
  985. "comment": None,
  986. "comment_url": None,
  987. "rest": {"HttpOnly": None},
  988. "rfc2109": False,
  989. }
  990. cookie = Cookie(**kwargs) # type: ignore
  991. self.jar.set_cookie(cookie)
  992. def get( # type: ignore
  993. self,
  994. name: str,
  995. default: str | None = None,
  996. domain: str | None = None,
  997. path: str | None = None,
  998. ) -> str | None:
  999. """
  1000. Get a cookie by name. May optionally include domain and path
  1001. in order to specify exactly which cookie to retrieve.
  1002. """
  1003. value = None
  1004. for cookie in self.jar:
  1005. if cookie.name == name:
  1006. if domain is None or cookie.domain == domain:
  1007. if path is None or cookie.path == path:
  1008. if value is not None:
  1009. message = f"Multiple cookies exist with name={name}"
  1010. raise CookieConflict(message)
  1011. value = cookie.value
  1012. if value is None:
  1013. return default
  1014. return value
  1015. def delete(
  1016. self,
  1017. name: str,
  1018. domain: str | None = None,
  1019. path: str | None = None,
  1020. ) -> None:
  1021. """
  1022. Delete a cookie by name. May optionally include domain and path
  1023. in order to specify exactly which cookie to delete.
  1024. """
  1025. if domain is not None and path is not None:
  1026. return self.jar.clear(domain, path, name)
  1027. remove = [
  1028. cookie
  1029. for cookie in self.jar
  1030. if cookie.name == name
  1031. and (domain is None or cookie.domain == domain)
  1032. and (path is None or cookie.path == path)
  1033. ]
  1034. for cookie in remove:
  1035. self.jar.clear(cookie.domain, cookie.path, cookie.name)
  1036. def clear(self, domain: str | None = None, path: str | None = None) -> None:
  1037. """
  1038. Delete all cookies. Optionally include a domain and path in
  1039. order to only delete a subset of all the cookies.
  1040. """
  1041. args = []
  1042. if domain is not None:
  1043. args.append(domain)
  1044. if path is not None:
  1045. assert domain is not None
  1046. args.append(path)
  1047. self.jar.clear(*args)
  1048. def update(self, cookies: CookieTypes | None = None) -> None: # type: ignore
  1049. cookies = Cookies(cookies)
  1050. for cookie in cookies.jar:
  1051. self.jar.set_cookie(cookie)
  1052. def __setitem__(self, name: str, value: str) -> None:
  1053. return self.set(name, value)
  1054. def __getitem__(self, name: str) -> str:
  1055. value = self.get(name)
  1056. if value is None:
  1057. raise KeyError(name)
  1058. return value
  1059. def __delitem__(self, name: str) -> None:
  1060. return self.delete(name)
  1061. def __len__(self) -> int:
  1062. return len(self.jar)
  1063. def __iter__(self) -> typing.Iterator[str]:
  1064. return (cookie.name for cookie in self.jar)
  1065. def __bool__(self) -> bool:
  1066. for _ in self.jar:
  1067. return True
  1068. return False
  1069. def __repr__(self) -> str:
  1070. cookies_repr = ", ".join(
  1071. [
  1072. f"<Cookie {cookie.name}={cookie.value} for {cookie.domain} />"
  1073. for cookie in self.jar
  1074. ]
  1075. )
  1076. return f"<Cookies[{cookies_repr}]>"
  1077. class _CookieCompatRequest(urllib.request.Request):
  1078. """
  1079. Wraps a `Request` instance up in a compatibility interface suitable
  1080. for use with `CookieJar` operations.
  1081. """
  1082. def __init__(self, request: Request) -> None:
  1083. super().__init__(
  1084. url=str(request.url),
  1085. headers=dict(request.headers),
  1086. method=request.method,
  1087. )
  1088. self.request = request
  1089. def add_unredirected_header(self, key: str, value: str) -> None:
  1090. super().add_unredirected_header(key, value)
  1091. self.request.headers[key] = value
  1092. class _CookieCompatResponse:
  1093. """
  1094. Wraps a `Request` instance up in a compatibility interface suitable
  1095. for use with `CookieJar` operations.
  1096. """
  1097. def __init__(self, response: Response) -> None:
  1098. self.response = response
  1099. def info(self) -> email.message.Message:
  1100. info = email.message.Message()
  1101. for key, value in self.response.headers.multi_items():
  1102. # Note that setting `info[key]` here is an "append" operation,
  1103. # not a "replace" operation.
  1104. # https://docs.python.org/3/library/email.compat32-message.html#email.message.Message.__setitem__
  1105. info[key] = value
  1106. return info