_content.py 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240
  1. from __future__ import annotations
  2. import inspect
  3. import warnings
  4. from json import dumps as json_dumps
  5. from typing import (
  6. Any,
  7. AsyncIterable,
  8. AsyncIterator,
  9. Iterable,
  10. Iterator,
  11. Mapping,
  12. )
  13. from urllib.parse import urlencode
  14. from ._exceptions import StreamClosed, StreamConsumed
  15. from ._multipart import MultipartStream
  16. from ._types import (
  17. AsyncByteStream,
  18. RequestContent,
  19. RequestData,
  20. RequestFiles,
  21. ResponseContent,
  22. SyncByteStream,
  23. )
  24. from ._utils import peek_filelike_length, primitive_value_to_str
  25. __all__ = ["ByteStream"]
  26. class ByteStream(AsyncByteStream, SyncByteStream):
  27. def __init__(self, stream: bytes) -> None:
  28. self._stream = stream
  29. def __iter__(self) -> Iterator[bytes]:
  30. yield self._stream
  31. async def __aiter__(self) -> AsyncIterator[bytes]:
  32. yield self._stream
  33. class IteratorByteStream(SyncByteStream):
  34. CHUNK_SIZE = 65_536
  35. def __init__(self, stream: Iterable[bytes]) -> None:
  36. self._stream = stream
  37. self._is_stream_consumed = False
  38. self._is_generator = inspect.isgenerator(stream)
  39. def __iter__(self) -> Iterator[bytes]:
  40. if self._is_stream_consumed and self._is_generator:
  41. raise StreamConsumed()
  42. self._is_stream_consumed = True
  43. if hasattr(self._stream, "read"):
  44. # File-like interfaces should use 'read' directly.
  45. chunk = self._stream.read(self.CHUNK_SIZE)
  46. while chunk:
  47. yield chunk
  48. chunk = self._stream.read(self.CHUNK_SIZE)
  49. else:
  50. # Otherwise iterate.
  51. for part in self._stream:
  52. yield part
  53. class AsyncIteratorByteStream(AsyncByteStream):
  54. CHUNK_SIZE = 65_536
  55. def __init__(self, stream: AsyncIterable[bytes]) -> None:
  56. self._stream = stream
  57. self._is_stream_consumed = False
  58. self._is_generator = inspect.isasyncgen(stream)
  59. async def __aiter__(self) -> AsyncIterator[bytes]:
  60. if self._is_stream_consumed and self._is_generator:
  61. raise StreamConsumed()
  62. self._is_stream_consumed = True
  63. if hasattr(self._stream, "aread"):
  64. # File-like interfaces should use 'aread' directly.
  65. chunk = await self._stream.aread(self.CHUNK_SIZE)
  66. while chunk:
  67. yield chunk
  68. chunk = await self._stream.aread(self.CHUNK_SIZE)
  69. else:
  70. # Otherwise iterate.
  71. async for part in self._stream:
  72. yield part
  73. class UnattachedStream(AsyncByteStream, SyncByteStream):
  74. """
  75. If a request or response is serialized using pickle, then it is no longer
  76. attached to a stream for I/O purposes. Any stream operations should result
  77. in `httpx.StreamClosed`.
  78. """
  79. def __iter__(self) -> Iterator[bytes]:
  80. raise StreamClosed()
  81. async def __aiter__(self) -> AsyncIterator[bytes]:
  82. raise StreamClosed()
  83. yield b"" # pragma: no cover
  84. def encode_content(
  85. content: str | bytes | Iterable[bytes] | AsyncIterable[bytes],
  86. ) -> tuple[dict[str, str], SyncByteStream | AsyncByteStream]:
  87. if isinstance(content, (bytes, str)):
  88. body = content.encode("utf-8") if isinstance(content, str) else content
  89. content_length = len(body)
  90. headers = {"Content-Length": str(content_length)} if body else {}
  91. return headers, ByteStream(body)
  92. elif isinstance(content, Iterable) and not isinstance(content, dict):
  93. # `not isinstance(content, dict)` is a bit oddly specific, but it
  94. # catches a case that's easy for users to make in error, and would
  95. # otherwise pass through here, like any other bytes-iterable,
  96. # because `dict` happens to be iterable. See issue #2491.
  97. content_length_or_none = peek_filelike_length(content)
  98. if content_length_or_none is None:
  99. headers = {"Transfer-Encoding": "chunked"}
  100. else:
  101. headers = {"Content-Length": str(content_length_or_none)}
  102. return headers, IteratorByteStream(content) # type: ignore
  103. elif isinstance(content, AsyncIterable):
  104. headers = {"Transfer-Encoding": "chunked"}
  105. return headers, AsyncIteratorByteStream(content)
  106. raise TypeError(f"Unexpected type for 'content', {type(content)!r}")
  107. def encode_urlencoded_data(
  108. data: RequestData,
  109. ) -> tuple[dict[str, str], ByteStream]:
  110. plain_data = []
  111. for key, value in data.items():
  112. if isinstance(value, (list, tuple)):
  113. plain_data.extend([(key, primitive_value_to_str(item)) for item in value])
  114. else:
  115. plain_data.append((key, primitive_value_to_str(value)))
  116. body = urlencode(plain_data, doseq=True).encode("utf-8")
  117. content_length = str(len(body))
  118. content_type = "application/x-www-form-urlencoded"
  119. headers = {"Content-Length": content_length, "Content-Type": content_type}
  120. return headers, ByteStream(body)
  121. def encode_multipart_data(
  122. data: RequestData, files: RequestFiles, boundary: bytes | None
  123. ) -> tuple[dict[str, str], MultipartStream]:
  124. multipart = MultipartStream(data=data, files=files, boundary=boundary)
  125. headers = multipart.get_headers()
  126. return headers, multipart
  127. def encode_text(text: str) -> tuple[dict[str, str], ByteStream]:
  128. body = text.encode("utf-8")
  129. content_length = str(len(body))
  130. content_type = "text/plain; charset=utf-8"
  131. headers = {"Content-Length": content_length, "Content-Type": content_type}
  132. return headers, ByteStream(body)
  133. def encode_html(html: str) -> tuple[dict[str, str], ByteStream]:
  134. body = html.encode("utf-8")
  135. content_length = str(len(body))
  136. content_type = "text/html; charset=utf-8"
  137. headers = {"Content-Length": content_length, "Content-Type": content_type}
  138. return headers, ByteStream(body)
  139. def encode_json(json: Any) -> tuple[dict[str, str], ByteStream]:
  140. body = json_dumps(
  141. json, ensure_ascii=False, separators=(",", ":"), allow_nan=False
  142. ).encode("utf-8")
  143. content_length = str(len(body))
  144. content_type = "application/json"
  145. headers = {"Content-Length": content_length, "Content-Type": content_type}
  146. return headers, ByteStream(body)
  147. def encode_request(
  148. content: RequestContent | None = None,
  149. data: RequestData | None = None,
  150. files: RequestFiles | None = None,
  151. json: Any | None = None,
  152. boundary: bytes | None = None,
  153. ) -> tuple[dict[str, str], SyncByteStream | AsyncByteStream]:
  154. """
  155. Handles encoding the given `content`, `data`, `files`, and `json`,
  156. returning a two-tuple of (<headers>, <stream>).
  157. """
  158. if data is not None and not isinstance(data, Mapping):
  159. # We prefer to separate `content=<bytes|str|byte iterator|bytes aiterator>`
  160. # for raw request content, and `data=<form data>` for url encoded or
  161. # multipart form content.
  162. #
  163. # However for compat with requests, we *do* still support
  164. # `data=<bytes...>` usages. We deal with that case here, treating it
  165. # as if `content=<...>` had been supplied instead.
  166. message = "Use 'content=<...>' to upload raw bytes/text content."
  167. warnings.warn(message, DeprecationWarning, stacklevel=2)
  168. return encode_content(data)
  169. if content is not None:
  170. return encode_content(content)
  171. elif files:
  172. return encode_multipart_data(data or {}, files, boundary)
  173. elif data:
  174. return encode_urlencoded_data(data)
  175. elif json is not None:
  176. return encode_json(json)
  177. return {}, ByteStream(b"")
  178. def encode_response(
  179. content: ResponseContent | None = None,
  180. text: str | None = None,
  181. html: str | None = None,
  182. json: Any | None = None,
  183. ) -> tuple[dict[str, str], SyncByteStream | AsyncByteStream]:
  184. """
  185. Handles encoding the given `content`, returning a two-tuple of
  186. (<headers>, <stream>).
  187. """
  188. if content is not None:
  189. return encode_content(content)
  190. elif text is not None:
  191. return encode_text(text)
  192. elif html is not None:
  193. return encode_html(html)
  194. elif json is not None:
  195. return encode_json(json)
  196. return {}, ByteStream(b"")