| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641 |
- from __future__ import annotations
- import typing
- from urllib.parse import parse_qs, unquote, urlencode
- import idna
- from ._types import QueryParamTypes
- from ._urlparse import urlparse
- from ._utils import primitive_value_to_str
- __all__ = ["URL", "QueryParams"]
- class URL:
- """
- url = httpx.URL("HTTPS://jo%40email.com:a%20secret@müller.de:1234/pa%20th?search=ab#anchorlink")
- assert url.scheme == "https"
- assert url.username == "jo@email.com"
- assert url.password == "a secret"
- assert url.userinfo == b"jo%40email.com:a%20secret"
- assert url.host == "müller.de"
- assert url.raw_host == b"xn--mller-kva.de"
- assert url.port == 1234
- assert url.netloc == b"xn--mller-kva.de:1234"
- assert url.path == "/pa th"
- assert url.query == b"?search=ab"
- assert url.raw_path == b"/pa%20th?search=ab"
- assert url.fragment == "anchorlink"
- The components of a URL are broken down like this:
- https://jo%40email.com:a%20secret@müller.de:1234/pa%20th?search=ab#anchorlink
- [scheme] [ username ] [password] [ host ][port][ path ] [ query ] [fragment]
- [ userinfo ] [ netloc ][ raw_path ]
- Note that:
- * `url.scheme` is normalized to always be lowercased.
- * `url.host` is normalized to always be lowercased. Internationalized domain
- names are represented in unicode, without IDNA encoding applied. For instance:
- url = httpx.URL("http://中国.icom.museum")
- assert url.host == "中国.icom.museum"
- url = httpx.URL("http://xn--fiqs8s.icom.museum")
- assert url.host == "中国.icom.museum"
- * `url.raw_host` is normalized to always be lowercased, and is IDNA encoded.
- url = httpx.URL("http://中国.icom.museum")
- assert url.raw_host == b"xn--fiqs8s.icom.museum"
- url = httpx.URL("http://xn--fiqs8s.icom.museum")
- assert url.raw_host == b"xn--fiqs8s.icom.museum"
- * `url.port` is either None or an integer. URLs that include the default port for
- "http", "https", "ws", "wss", and "ftp" schemes have their port
- normalized to `None`.
- assert httpx.URL("http://example.com") == httpx.URL("http://example.com:80")
- assert httpx.URL("http://example.com").port is None
- assert httpx.URL("http://example.com:80").port is None
- * `url.userinfo` is raw bytes, without URL escaping. Usually you'll want to work
- with `url.username` and `url.password` instead, which handle the URL escaping.
- * `url.raw_path` is raw bytes of both the path and query, without URL escaping.
- This portion is used as the target when constructing HTTP requests. Usually you'll
- want to work with `url.path` instead.
- * `url.query` is raw bytes, without URL escaping. A URL query string portion can
- only be properly URL escaped when decoding the parameter names and values
- themselves.
- """
- def __init__(self, url: URL | str = "", **kwargs: typing.Any) -> None:
- if kwargs:
- allowed = {
- "scheme": str,
- "username": str,
- "password": str,
- "userinfo": bytes,
- "host": str,
- "port": int,
- "netloc": bytes,
- "path": str,
- "query": bytes,
- "raw_path": bytes,
- "fragment": str,
- "params": object,
- }
- # Perform type checking for all supported keyword arguments.
- for key, value in kwargs.items():
- if key not in allowed:
- message = f"{key!r} is an invalid keyword argument for URL()"
- raise TypeError(message)
- if value is not None and not isinstance(value, allowed[key]):
- expected = allowed[key].__name__
- seen = type(value).__name__
- message = f"Argument {key!r} must be {expected} but got {seen}"
- raise TypeError(message)
- if isinstance(value, bytes):
- kwargs[key] = value.decode("ascii")
- if "params" in kwargs:
- # Replace any "params" keyword with the raw "query" instead.
- #
- # Ensure that empty params use `kwargs["query"] = None` rather
- # than `kwargs["query"] = ""`, so that generated URLs do not
- # include an empty trailing "?".
- params = kwargs.pop("params")
- kwargs["query"] = None if not params else str(QueryParams(params))
- if isinstance(url, str):
- self._uri_reference = urlparse(url, **kwargs)
- elif isinstance(url, URL):
- self._uri_reference = url._uri_reference.copy_with(**kwargs)
- else:
- raise TypeError(
- "Invalid type for url. Expected str or httpx.URL,"
- f" got {type(url)}: {url!r}"
- )
- @property
- def scheme(self) -> str:
- """
- The URL scheme, such as "http", "https".
- Always normalised to lowercase.
- """
- return self._uri_reference.scheme
- @property
- def raw_scheme(self) -> bytes:
- """
- The raw bytes representation of the URL scheme, such as b"http", b"https".
- Always normalised to lowercase.
- """
- return self._uri_reference.scheme.encode("ascii")
- @property
- def userinfo(self) -> bytes:
- """
- The URL userinfo as a raw bytestring.
- For example: b"jo%40email.com:a%20secret".
- """
- return self._uri_reference.userinfo.encode("ascii")
- @property
- def username(self) -> str:
- """
- The URL username as a string, with URL decoding applied.
- For example: "jo@email.com"
- """
- userinfo = self._uri_reference.userinfo
- return unquote(userinfo.partition(":")[0])
- @property
- def password(self) -> str:
- """
- The URL password as a string, with URL decoding applied.
- For example: "a secret"
- """
- userinfo = self._uri_reference.userinfo
- return unquote(userinfo.partition(":")[2])
- @property
- def host(self) -> str:
- """
- The URL host as a string.
- Always normalized to lowercase, with IDNA hosts decoded into unicode.
- Examples:
- url = httpx.URL("http://www.EXAMPLE.org")
- assert url.host == "www.example.org"
- url = httpx.URL("http://中国.icom.museum")
- assert url.host == "中国.icom.museum"
- url = httpx.URL("http://xn--fiqs8s.icom.museum")
- assert url.host == "中国.icom.museum"
- url = httpx.URL("https://[::ffff:192.168.0.1]")
- assert url.host == "::ffff:192.168.0.1"
- """
- host: str = self._uri_reference.host
- if host.startswith("xn--"):
- host = idna.decode(host)
- return host
- @property
- def raw_host(self) -> bytes:
- """
- The raw bytes representation of the URL host.
- Always normalized to lowercase, and IDNA encoded.
- Examples:
- url = httpx.URL("http://www.EXAMPLE.org")
- assert url.raw_host == b"www.example.org"
- url = httpx.URL("http://中国.icom.museum")
- assert url.raw_host == b"xn--fiqs8s.icom.museum"
- url = httpx.URL("http://xn--fiqs8s.icom.museum")
- assert url.raw_host == b"xn--fiqs8s.icom.museum"
- url = httpx.URL("https://[::ffff:192.168.0.1]")
- assert url.raw_host == b"::ffff:192.168.0.1"
- """
- return self._uri_reference.host.encode("ascii")
- @property
- def port(self) -> int | None:
- """
- The URL port as an integer.
- Note that the URL class performs port normalization as per the WHATWG spec.
- Default ports for "http", "https", "ws", "wss", and "ftp" schemes are always
- treated as `None`.
- For example:
- assert httpx.URL("http://www.example.com") == httpx.URL("http://www.example.com:80")
- assert httpx.URL("http://www.example.com:80").port is None
- """
- return self._uri_reference.port
- @property
- def netloc(self) -> bytes:
- """
- Either `<host>` or `<host>:<port>` as bytes.
- Always normalized to lowercase, and IDNA encoded.
- This property may be used for generating the value of a request
- "Host" header.
- """
- return self._uri_reference.netloc.encode("ascii")
- @property
- def path(self) -> str:
- """
- The URL path as a string. Excluding the query string, and URL decoded.
- For example:
- url = httpx.URL("https://example.com/pa%20th")
- assert url.path == "/pa th"
- """
- path = self._uri_reference.path or "/"
- return unquote(path)
- @property
- def query(self) -> bytes:
- """
- The URL query string, as raw bytes, excluding the leading b"?".
- This is necessarily a bytewise interface, because we cannot
- perform URL decoding of this representation until we've parsed
- the keys and values into a QueryParams instance.
- For example:
- url = httpx.URL("https://example.com/?filter=some%20search%20terms")
- assert url.query == b"filter=some%20search%20terms"
- """
- query = self._uri_reference.query or ""
- return query.encode("ascii")
- @property
- def params(self) -> QueryParams:
- """
- The URL query parameters, neatly parsed and packaged into an immutable
- multidict representation.
- """
- return QueryParams(self._uri_reference.query)
- @property
- def raw_path(self) -> bytes:
- """
- The complete URL path and query string as raw bytes.
- Used as the target when constructing HTTP requests.
- For example:
- GET /users?search=some%20text HTTP/1.1
- Host: www.example.org
- Connection: close
- """
- path = self._uri_reference.path or "/"
- if self._uri_reference.query is not None:
- path += "?" + self._uri_reference.query
- return path.encode("ascii")
- @property
- def fragment(self) -> str:
- """
- The URL fragments, as used in HTML anchors.
- As a string, without the leading '#'.
- """
- return unquote(self._uri_reference.fragment or "")
- @property
- def is_absolute_url(self) -> bool:
- """
- Return `True` for absolute URLs such as 'http://example.com/path',
- and `False` for relative URLs such as '/path'.
- """
- # We don't use `.is_absolute` from `rfc3986` because it treats
- # URLs with a fragment portion as not absolute.
- # What we actually care about is if the URL provides
- # a scheme and hostname to which connections should be made.
- return bool(self._uri_reference.scheme and self._uri_reference.host)
- @property
- def is_relative_url(self) -> bool:
- """
- Return `False` for absolute URLs such as 'http://example.com/path',
- and `True` for relative URLs such as '/path'.
- """
- return not self.is_absolute_url
- def copy_with(self, **kwargs: typing.Any) -> URL:
- """
- Copy this URL, returning a new URL with some components altered.
- Accepts the same set of parameters as the components that are made
- available via properties on the `URL` class.
- For example:
- url = httpx.URL("https://www.example.com").copy_with(
- username="jo@gmail.com", password="a secret"
- )
- assert url == "https://jo%40email.com:a%20secret@www.example.com"
- """
- return URL(self, **kwargs)
- def copy_set_param(self, key: str, value: typing.Any = None) -> URL:
- return self.copy_with(params=self.params.set(key, value))
- def copy_add_param(self, key: str, value: typing.Any = None) -> URL:
- return self.copy_with(params=self.params.add(key, value))
- def copy_remove_param(self, key: str) -> URL:
- return self.copy_with(params=self.params.remove(key))
- def copy_merge_params(self, params: QueryParamTypes) -> URL:
- return self.copy_with(params=self.params.merge(params))
- def join(self, url: URL | str) -> URL:
- """
- Return an absolute URL, using this URL as the base.
- Eg.
- url = httpx.URL("https://www.example.com/test")
- url = url.join("/new/path")
- assert url == "https://www.example.com/new/path"
- """
- from urllib.parse import urljoin
- return URL(urljoin(str(self), str(URL(url))))
- def __hash__(self) -> int:
- return hash(str(self))
- def __eq__(self, other: typing.Any) -> bool:
- return isinstance(other, (URL, str)) and str(self) == str(URL(other))
- def __str__(self) -> str:
- return str(self._uri_reference)
- def __repr__(self) -> str:
- scheme, userinfo, host, port, path, query, fragment = self._uri_reference
- if ":" in userinfo:
- # Mask any password component.
- userinfo = f'{userinfo.split(":")[0]}:[secure]'
- authority = "".join(
- [
- f"{userinfo}@" if userinfo else "",
- f"[{host}]" if ":" in host else host,
- f":{port}" if port is not None else "",
- ]
- )
- url = "".join(
- [
- f"{self.scheme}:" if scheme else "",
- f"//{authority}" if authority else "",
- path,
- f"?{query}" if query is not None else "",
- f"#{fragment}" if fragment is not None else "",
- ]
- )
- return f"{self.__class__.__name__}({url!r})"
- @property
- def raw(self) -> tuple[bytes, bytes, int, bytes]: # pragma: nocover
- import collections
- import warnings
- warnings.warn("URL.raw is deprecated.")
- RawURL = collections.namedtuple(
- "RawURL", ["raw_scheme", "raw_host", "port", "raw_path"]
- )
- return RawURL(
- raw_scheme=self.raw_scheme,
- raw_host=self.raw_host,
- port=self.port,
- raw_path=self.raw_path,
- )
- class QueryParams(typing.Mapping[str, str]):
- """
- URL query parameters, as a multi-dict.
- """
- def __init__(self, *args: QueryParamTypes | None, **kwargs: typing.Any) -> None:
- assert len(args) < 2, "Too many arguments."
- assert not (args and kwargs), "Cannot mix named and unnamed arguments."
- value = args[0] if args else kwargs
- if value is None or isinstance(value, (str, bytes)):
- value = value.decode("ascii") if isinstance(value, bytes) else value
- self._dict = parse_qs(value, keep_blank_values=True)
- elif isinstance(value, QueryParams):
- self._dict = {k: list(v) for k, v in value._dict.items()}
- else:
- dict_value: dict[typing.Any, list[typing.Any]] = {}
- if isinstance(value, (list, tuple)):
- # Convert list inputs like:
- # [("a", "123"), ("a", "456"), ("b", "789")]
- # To a dict representation, like:
- # {"a": ["123", "456"], "b": ["789"]}
- for item in value:
- dict_value.setdefault(item[0], []).append(item[1])
- else:
- # Convert dict inputs like:
- # {"a": "123", "b": ["456", "789"]}
- # To dict inputs where values are always lists, like:
- # {"a": ["123"], "b": ["456", "789"]}
- dict_value = {
- k: list(v) if isinstance(v, (list, tuple)) else [v]
- for k, v in value.items()
- }
- # Ensure that keys and values are neatly coerced to strings.
- # We coerce values `True` and `False` to JSON-like "true" and "false"
- # representations, and coerce `None` values to the empty string.
- self._dict = {
- str(k): [primitive_value_to_str(item) for item in v]
- for k, v in dict_value.items()
- }
- def keys(self) -> typing.KeysView[str]:
- """
- Return all the keys in the query params.
- Usage:
- q = httpx.QueryParams("a=123&a=456&b=789")
- assert list(q.keys()) == ["a", "b"]
- """
- return self._dict.keys()
- def values(self) -> typing.ValuesView[str]:
- """
- Return all the values in the query params. If a key occurs more than once
- only the first item for that key is returned.
- Usage:
- q = httpx.QueryParams("a=123&a=456&b=789")
- assert list(q.values()) == ["123", "789"]
- """
- return {k: v[0] for k, v in self._dict.items()}.values()
- def items(self) -> typing.ItemsView[str, str]:
- """
- Return all items in the query params. If a key occurs more than once
- only the first item for that key is returned.
- Usage:
- q = httpx.QueryParams("a=123&a=456&b=789")
- assert list(q.items()) == [("a", "123"), ("b", "789")]
- """
- return {k: v[0] for k, v in self._dict.items()}.items()
- def multi_items(self) -> list[tuple[str, str]]:
- """
- Return all items in the query params. Allow duplicate keys to occur.
- Usage:
- q = httpx.QueryParams("a=123&a=456&b=789")
- assert list(q.multi_items()) == [("a", "123"), ("a", "456"), ("b", "789")]
- """
- multi_items: list[tuple[str, str]] = []
- for k, v in self._dict.items():
- multi_items.extend([(k, i) for i in v])
- return multi_items
- def get(self, key: typing.Any, default: typing.Any = None) -> typing.Any:
- """
- Get a value from the query param for a given key. If the key occurs
- more than once, then only the first value is returned.
- Usage:
- q = httpx.QueryParams("a=123&a=456&b=789")
- assert q.get("a") == "123"
- """
- if key in self._dict:
- return self._dict[str(key)][0]
- return default
- def get_list(self, key: str) -> list[str]:
- """
- Get all values from the query param for a given key.
- Usage:
- q = httpx.QueryParams("a=123&a=456&b=789")
- assert q.get_list("a") == ["123", "456"]
- """
- return list(self._dict.get(str(key), []))
- def set(self, key: str, value: typing.Any = None) -> QueryParams:
- """
- Return a new QueryParams instance, setting the value of a key.
- Usage:
- q = httpx.QueryParams("a=123")
- q = q.set("a", "456")
- assert q == httpx.QueryParams("a=456")
- """
- q = QueryParams()
- q._dict = dict(self._dict)
- q._dict[str(key)] = [primitive_value_to_str(value)]
- return q
- def add(self, key: str, value: typing.Any = None) -> QueryParams:
- """
- Return a new QueryParams instance, setting or appending the value of a key.
- Usage:
- q = httpx.QueryParams("a=123")
- q = q.add("a", "456")
- assert q == httpx.QueryParams("a=123&a=456")
- """
- q = QueryParams()
- q._dict = dict(self._dict)
- q._dict[str(key)] = q.get_list(key) + [primitive_value_to_str(value)]
- return q
- def remove(self, key: str) -> QueryParams:
- """
- Return a new QueryParams instance, removing the value of a key.
- Usage:
- q = httpx.QueryParams("a=123")
- q = q.remove("a")
- assert q == httpx.QueryParams("")
- """
- q = QueryParams()
- q._dict = dict(self._dict)
- q._dict.pop(str(key), None)
- return q
- def merge(self, params: QueryParamTypes | None = None) -> QueryParams:
- """
- Return a new QueryParams instance, updated with.
- Usage:
- q = httpx.QueryParams("a=123")
- q = q.merge({"b": "456"})
- assert q == httpx.QueryParams("a=123&b=456")
- q = httpx.QueryParams("a=123")
- q = q.merge({"a": "456", "b": "789"})
- assert q == httpx.QueryParams("a=456&b=789")
- """
- q = QueryParams(params)
- q._dict = {**self._dict, **q._dict}
- return q
- def __getitem__(self, key: typing.Any) -> str:
- return self._dict[key][0]
- def __contains__(self, key: typing.Any) -> bool:
- return key in self._dict
- def __iter__(self) -> typing.Iterator[typing.Any]:
- return iter(self.keys())
- def __len__(self) -> int:
- return len(self._dict)
- def __bool__(self) -> bool:
- return bool(self._dict)
- def __hash__(self) -> int:
- return hash(str(self))
- def __eq__(self, other: typing.Any) -> bool:
- if not isinstance(other, self.__class__):
- return False
- return sorted(self.multi_items()) == sorted(other.multi_items())
- def __str__(self) -> str:
- return urlencode(self.multi_items())
- def __repr__(self) -> str:
- class_name = self.__class__.__name__
- query_string = str(self)
- return f"{class_name}({query_string!r})"
- def update(self, params: QueryParamTypes | None = None) -> None:
- raise RuntimeError(
- "QueryParams are immutable since 0.18.0. "
- "Use `q = q.merge(...)` to create an updated copy."
- )
- def __setitem__(self, key: str, value: str) -> None:
- raise RuntimeError(
- "QueryParams are immutable since 0.18.0. "
- "Use `q = q.set(key, value)` to create an updated copy."
- )
|