socks.py 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228
  1. """
  2. This module contains provisional support for SOCKS proxies from within
  3. urllib3. This module supports SOCKS4, SOCKS4A (an extension of SOCKS4), and
  4. SOCKS5. To enable its functionality, either install PySocks or install this
  5. module with the ``socks`` extra.
  6. The SOCKS implementation supports the full range of urllib3 features. It also
  7. supports the following SOCKS features:
  8. - SOCKS4A (``proxy_url='socks4a://...``)
  9. - SOCKS4 (``proxy_url='socks4://...``)
  10. - SOCKS5 with remote DNS (``proxy_url='socks5h://...``)
  11. - SOCKS5 with local DNS (``proxy_url='socks5://...``)
  12. - Usernames and passwords for the SOCKS proxy
  13. .. note::
  14. It is recommended to use ``socks5h://`` or ``socks4a://`` schemes in
  15. your ``proxy_url`` to ensure that DNS resolution is done from the remote
  16. server instead of client-side when connecting to a domain name.
  17. SOCKS4 supports IPv4 and domain names with the SOCKS4A extension. SOCKS5
  18. supports IPv4, IPv6, and domain names.
  19. When connecting to a SOCKS4 proxy the ``username`` portion of the ``proxy_url``
  20. will be sent as the ``userid`` section of the SOCKS request:
  21. .. code-block:: python
  22. proxy_url="socks4a://<userid>@proxy-host"
  23. When connecting to a SOCKS5 proxy the ``username`` and ``password`` portion
  24. of the ``proxy_url`` will be sent as the username/password to authenticate
  25. with the proxy:
  26. .. code-block:: python
  27. proxy_url="socks5h://<username>:<password>@proxy-host"
  28. """
  29. from __future__ import annotations
  30. try:
  31. import socks # type: ignore[import-not-found]
  32. except ImportError:
  33. import warnings
  34. from ..exceptions import DependencyWarning
  35. warnings.warn(
  36. (
  37. "SOCKS support in urllib3 requires the installation of optional "
  38. "dependencies: specifically, PySocks. For more information, see "
  39. "https://urllib3.readthedocs.io/en/latest/advanced-usage.html#socks-proxies"
  40. ),
  41. DependencyWarning,
  42. )
  43. raise
  44. import typing
  45. from socket import timeout as SocketTimeout
  46. from ..connection import HTTPConnection, HTTPSConnection
  47. from ..connectionpool import HTTPConnectionPool, HTTPSConnectionPool
  48. from ..exceptions import ConnectTimeoutError, NewConnectionError
  49. from ..poolmanager import PoolManager
  50. from ..util.url import parse_url
  51. try:
  52. import ssl
  53. except ImportError:
  54. ssl = None # type: ignore[assignment]
  55. class _TYPE_SOCKS_OPTIONS(typing.TypedDict):
  56. socks_version: int
  57. proxy_host: str | None
  58. proxy_port: str | None
  59. username: str | None
  60. password: str | None
  61. rdns: bool
  62. class SOCKSConnection(HTTPConnection):
  63. """
  64. A plain-text HTTP connection that connects via a SOCKS proxy.
  65. """
  66. def __init__(
  67. self,
  68. _socks_options: _TYPE_SOCKS_OPTIONS,
  69. *args: typing.Any,
  70. **kwargs: typing.Any,
  71. ) -> None:
  72. self._socks_options = _socks_options
  73. super().__init__(*args, **kwargs)
  74. def _new_conn(self) -> socks.socksocket:
  75. """
  76. Establish a new connection via the SOCKS proxy.
  77. """
  78. extra_kw: dict[str, typing.Any] = {}
  79. if self.source_address:
  80. extra_kw["source_address"] = self.source_address
  81. if self.socket_options:
  82. extra_kw["socket_options"] = self.socket_options
  83. try:
  84. conn = socks.create_connection(
  85. (self.host, self.port),
  86. proxy_type=self._socks_options["socks_version"],
  87. proxy_addr=self._socks_options["proxy_host"],
  88. proxy_port=self._socks_options["proxy_port"],
  89. proxy_username=self._socks_options["username"],
  90. proxy_password=self._socks_options["password"],
  91. proxy_rdns=self._socks_options["rdns"],
  92. timeout=self.timeout,
  93. **extra_kw,
  94. )
  95. except SocketTimeout as e:
  96. raise ConnectTimeoutError(
  97. self,
  98. f"Connection to {self.host} timed out. (connect timeout={self.timeout})",
  99. ) from e
  100. except socks.ProxyError as e:
  101. # This is fragile as hell, but it seems to be the only way to raise
  102. # useful errors here.
  103. if e.socket_err:
  104. error = e.socket_err
  105. if isinstance(error, SocketTimeout):
  106. raise ConnectTimeoutError(
  107. self,
  108. f"Connection to {self.host} timed out. (connect timeout={self.timeout})",
  109. ) from e
  110. else:
  111. # Adding `from e` messes with coverage somehow, so it's omitted.
  112. # See #2386.
  113. raise NewConnectionError(
  114. self, f"Failed to establish a new connection: {error}"
  115. )
  116. else:
  117. raise NewConnectionError(
  118. self, f"Failed to establish a new connection: {e}"
  119. ) from e
  120. except OSError as e: # Defensive: PySocks should catch all these.
  121. raise NewConnectionError(
  122. self, f"Failed to establish a new connection: {e}"
  123. ) from e
  124. return conn
  125. # We don't need to duplicate the Verified/Unverified distinction from
  126. # urllib3/connection.py here because the HTTPSConnection will already have been
  127. # correctly set to either the Verified or Unverified form by that module. This
  128. # means the SOCKSHTTPSConnection will automatically be the correct type.
  129. class SOCKSHTTPSConnection(SOCKSConnection, HTTPSConnection):
  130. pass
  131. class SOCKSHTTPConnectionPool(HTTPConnectionPool):
  132. ConnectionCls = SOCKSConnection
  133. class SOCKSHTTPSConnectionPool(HTTPSConnectionPool):
  134. ConnectionCls = SOCKSHTTPSConnection
  135. class SOCKSProxyManager(PoolManager):
  136. """
  137. A version of the urllib3 ProxyManager that routes connections via the
  138. defined SOCKS proxy.
  139. """
  140. pool_classes_by_scheme = {
  141. "http": SOCKSHTTPConnectionPool,
  142. "https": SOCKSHTTPSConnectionPool,
  143. }
  144. def __init__(
  145. self,
  146. proxy_url: str,
  147. username: str | None = None,
  148. password: str | None = None,
  149. num_pools: int = 10,
  150. headers: typing.Mapping[str, str] | None = None,
  151. **connection_pool_kw: typing.Any,
  152. ):
  153. parsed = parse_url(proxy_url)
  154. if username is None and password is None and parsed.auth is not None:
  155. split = parsed.auth.split(":")
  156. if len(split) == 2:
  157. username, password = split
  158. if parsed.scheme == "socks5":
  159. socks_version = socks.PROXY_TYPE_SOCKS5
  160. rdns = False
  161. elif parsed.scheme == "socks5h":
  162. socks_version = socks.PROXY_TYPE_SOCKS5
  163. rdns = True
  164. elif parsed.scheme == "socks4":
  165. socks_version = socks.PROXY_TYPE_SOCKS4
  166. rdns = False
  167. elif parsed.scheme == "socks4a":
  168. socks_version = socks.PROXY_TYPE_SOCKS4
  169. rdns = True
  170. else:
  171. raise ValueError(f"Unable to determine SOCKS version from {proxy_url}")
  172. self.proxy_url = proxy_url
  173. socks_options = {
  174. "socks_version": socks_version,
  175. "proxy_host": parsed.host,
  176. "proxy_port": parsed.port,
  177. "username": username,
  178. "password": password,
  179. "rdns": rdns,
  180. }
  181. connection_pool_kw["_socks_options"] = socks_options
  182. super().__init__(num_pools, headers, **connection_pool_kw)
  183. self.pool_classes_by_scheme = SOCKSProxyManager.pool_classes_by_scheme