python (3.11.7)
       1  from __future__ import absolute_import
       2  
       3  import collections
       4  import functools
       5  import logging
       6  
       7  from ._collections import RecentlyUsedContainer
       8  from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool, port_by_scheme
       9  from .exceptions import (
      10      LocationValueError,
      11      MaxRetryError,
      12      ProxySchemeUnknown,
      13      ProxySchemeUnsupported,
      14      URLSchemeUnknown,
      15  )
      16  from .packages import six
      17  from .packages.six.moves.urllib.parse import urljoin
      18  from .request import RequestMethods
      19  from .util.proxy import connection_requires_http_tunnel
      20  from .util.retry import Retry
      21  from .util.url import parse_url
      22  
      23  __all__ = ["PoolManager", "ProxyManager", "proxy_from_url"]
      24  
      25  
      26  log = logging.getLogger(__name__)
      27  
      28  SSL_KEYWORDS = (
      29      "key_file",
      30      "cert_file",
      31      "cert_reqs",
      32      "ca_certs",
      33      "ssl_version",
      34      "ca_cert_dir",
      35      "ssl_context",
      36      "key_password",
      37      "server_hostname",
      38  )
      39  
      40  # All known keyword arguments that could be provided to the pool manager, its
      41  # pools, or the underlying connections. This is used to construct a pool key.
      42  _key_fields = (
      43      "key_scheme",  # str
      44      "key_host",  # str
      45      "key_port",  # int
      46      "key_timeout",  # int or float or Timeout
      47      "key_retries",  # int or Retry
      48      "key_strict",  # bool
      49      "key_block",  # bool
      50      "key_source_address",  # str
      51      "key_key_file",  # str
      52      "key_key_password",  # str
      53      "key_cert_file",  # str
      54      "key_cert_reqs",  # str
      55      "key_ca_certs",  # str
      56      "key_ssl_version",  # str
      57      "key_ca_cert_dir",  # str
      58      "key_ssl_context",  # instance of ssl.SSLContext or urllib3.util.ssl_.SSLContext
      59      "key_maxsize",  # int
      60      "key_headers",  # dict
      61      "key__proxy",  # parsed proxy url
      62      "key__proxy_headers",  # dict
      63      "key__proxy_config",  # class
      64      "key_socket_options",  # list of (level (int), optname (int), value (int or str)) tuples
      65      "key__socks_options",  # dict
      66      "key_assert_hostname",  # bool or string
      67      "key_assert_fingerprint",  # str
      68      "key_server_hostname",  # str
      69  )
      70  
      71  #: The namedtuple class used to construct keys for the connection pool.
      72  #: All custom key schemes should include the fields in this key at a minimum.
      73  PoolKey = collections.namedtuple("PoolKey", _key_fields)
      74  
      75  _proxy_config_fields = ("ssl_context", "use_forwarding_for_https")
      76  ProxyConfig = collections.namedtuple("ProxyConfig", _proxy_config_fields)
      77  
      78  
      79  def _default_key_normalizer(key_class, request_context):
      80      """
      81      Create a pool key out of a request context dictionary.
      82  
      83      According to RFC 3986, both the scheme and host are case-insensitive.
      84      Therefore, this function normalizes both before constructing the pool
      85      key for an HTTPS request. If you wish to change this behaviour, provide
      86      alternate callables to ``key_fn_by_scheme``.
      87  
      88      :param key_class:
      89          The class to use when constructing the key. This should be a namedtuple
      90          with the ``scheme`` and ``host`` keys at a minimum.
      91      :type  key_class: namedtuple
      92      :param request_context:
      93          A dictionary-like object that contain the context for a request.
      94      :type  request_context: dict
      95  
      96      :return: A namedtuple that can be used as a connection pool key.
      97      :rtype:  PoolKey
      98      """
      99      # Since we mutate the dictionary, make a copy first
     100      context = request_context.copy()
     101      context["scheme"] = context["scheme"].lower()
     102      context["host"] = context["host"].lower()
     103  
     104      # These are both dictionaries and need to be transformed into frozensets
     105      for key in ("headers", "_proxy_headers", "_socks_options"):
     106          if key in context and context[key] is not None:
     107              context[key] = frozenset(context[key].items())
     108  
     109      # The socket_options key may be a list and needs to be transformed into a
     110      # tuple.
     111      socket_opts = context.get("socket_options")
     112      if socket_opts is not None:
     113          context["socket_options"] = tuple(socket_opts)
     114  
     115      # Map the kwargs to the names in the namedtuple - this is necessary since
     116      # namedtuples can't have fields starting with '_'.
     117      for key in list(context.keys()):
     118          context["key_" + key] = context.pop(key)
     119  
     120      # Default to ``None`` for keys missing from the context
     121      for field in key_class._fields:
     122          if field not in context:
     123              context[field] = None
     124  
     125      return key_class(**context)
     126  
     127  
     128  #: A dictionary that maps a scheme to a callable that creates a pool key.
     129  #: This can be used to alter the way pool keys are constructed, if desired.
     130  #: Each PoolManager makes a copy of this dictionary so they can be configured
     131  #: globally here, or individually on the instance.
     132  key_fn_by_scheme = {
     133      "http": functools.partial(_default_key_normalizer, PoolKey),
     134      "https": functools.partial(_default_key_normalizer, PoolKey),
     135  }
     136  
     137  pool_classes_by_scheme = {"http": HTTPConnectionPool, "https": HTTPSConnectionPool}
     138  
     139  
     140  class ESC[4;38;5;81mPoolManager(ESC[4;38;5;149mRequestMethods):
     141      """
     142      Allows for arbitrary requests while transparently keeping track of
     143      necessary connection pools for you.
     144  
     145      :param num_pools:
     146          Number of connection pools to cache before discarding the least
     147          recently used pool.
     148  
     149      :param headers:
     150          Headers to include with all requests, unless other headers are given
     151          explicitly.
     152  
     153      :param \\**connection_pool_kw:
     154          Additional parameters are used to create fresh
     155          :class:`urllib3.connectionpool.ConnectionPool` instances.
     156  
     157      Example::
     158  
     159          >>> manager = PoolManager(num_pools=2)
     160          >>> r = manager.request('GET', 'http://google.com/')
     161          >>> r = manager.request('GET', 'http://google.com/mail')
     162          >>> r = manager.request('GET', 'http://yahoo.com/')
     163          >>> len(manager.pools)
     164          2
     165  
     166      """
     167  
     168      proxy = None
     169      proxy_config = None
     170  
     171      def __init__(self, num_pools=10, headers=None, **connection_pool_kw):
     172          RequestMethods.__init__(self, headers)
     173          self.connection_pool_kw = connection_pool_kw
     174          self.pools = RecentlyUsedContainer(num_pools)
     175  
     176          # Locally set the pool classes and keys so other PoolManagers can
     177          # override them.
     178          self.pool_classes_by_scheme = pool_classes_by_scheme
     179          self.key_fn_by_scheme = key_fn_by_scheme.copy()
     180  
     181      def __enter__(self):
     182          return self
     183  
     184      def __exit__(self, exc_type, exc_val, exc_tb):
     185          self.clear()
     186          # Return False to re-raise any potential exceptions
     187          return False
     188  
     189      def _new_pool(self, scheme, host, port, request_context=None):
     190          """
     191          Create a new :class:`urllib3.connectionpool.ConnectionPool` based on host, port, scheme, and
     192          any additional pool keyword arguments.
     193  
     194          If ``request_context`` is provided, it is provided as keyword arguments
     195          to the pool class used. This method is used to actually create the
     196          connection pools handed out by :meth:`connection_from_url` and
     197          companion methods. It is intended to be overridden for customization.
     198          """
     199          pool_cls = self.pool_classes_by_scheme[scheme]
     200          if request_context is None:
     201              request_context = self.connection_pool_kw.copy()
     202  
     203          # Although the context has everything necessary to create the pool,
     204          # this function has historically only used the scheme, host, and port
     205          # in the positional args. When an API change is acceptable these can
     206          # be removed.
     207          for key in ("scheme", "host", "port"):
     208              request_context.pop(key, None)
     209  
     210          if scheme == "http":
     211              for kw in SSL_KEYWORDS:
     212                  request_context.pop(kw, None)
     213  
     214          return pool_cls(host, port, **request_context)
     215  
     216      def clear(self):
     217          """
     218          Empty our store of pools and direct them all to close.
     219  
     220          This will not affect in-flight connections, but they will not be
     221          re-used after completion.
     222          """
     223          self.pools.clear()
     224  
     225      def connection_from_host(self, host, port=None, scheme="http", pool_kwargs=None):
     226          """
     227          Get a :class:`urllib3.connectionpool.ConnectionPool` based on the host, port, and scheme.
     228  
     229          If ``port`` isn't given, it will be derived from the ``scheme`` using
     230          ``urllib3.connectionpool.port_by_scheme``. If ``pool_kwargs`` is
     231          provided, it is merged with the instance's ``connection_pool_kw``
     232          variable and used to create the new connection pool, if one is
     233          needed.
     234          """
     235  
     236          if not host:
     237              raise LocationValueError("No host specified.")
     238  
     239          request_context = self._merge_pool_kwargs(pool_kwargs)
     240          request_context["scheme"] = scheme or "http"
     241          if not port:
     242              port = port_by_scheme.get(request_context["scheme"].lower(), 80)
     243          request_context["port"] = port
     244          request_context["host"] = host
     245  
     246          return self.connection_from_context(request_context)
     247  
     248      def connection_from_context(self, request_context):
     249          """
     250          Get a :class:`urllib3.connectionpool.ConnectionPool` based on the request context.
     251  
     252          ``request_context`` must at least contain the ``scheme`` key and its
     253          value must be a key in ``key_fn_by_scheme`` instance variable.
     254          """
     255          scheme = request_context["scheme"].lower()
     256          pool_key_constructor = self.key_fn_by_scheme.get(scheme)
     257          if not pool_key_constructor:
     258              raise URLSchemeUnknown(scheme)
     259          pool_key = pool_key_constructor(request_context)
     260  
     261          return self.connection_from_pool_key(pool_key, request_context=request_context)
     262  
     263      def connection_from_pool_key(self, pool_key, request_context=None):
     264          """
     265          Get a :class:`urllib3.connectionpool.ConnectionPool` based on the provided pool key.
     266  
     267          ``pool_key`` should be a namedtuple that only contains immutable
     268          objects. At a minimum it must have the ``scheme``, ``host``, and
     269          ``port`` fields.
     270          """
     271          with self.pools.lock:
     272              # If the scheme, host, or port doesn't match existing open
     273              # connections, open a new ConnectionPool.
     274              pool = self.pools.get(pool_key)
     275              if pool:
     276                  return pool
     277  
     278              # Make a fresh ConnectionPool of the desired type
     279              scheme = request_context["scheme"]
     280              host = request_context["host"]
     281              port = request_context["port"]
     282              pool = self._new_pool(scheme, host, port, request_context=request_context)
     283              self.pools[pool_key] = pool
     284  
     285          return pool
     286  
     287      def connection_from_url(self, url, pool_kwargs=None):
     288          """
     289          Similar to :func:`urllib3.connectionpool.connection_from_url`.
     290  
     291          If ``pool_kwargs`` is not provided and a new pool needs to be
     292          constructed, ``self.connection_pool_kw`` is used to initialize
     293          the :class:`urllib3.connectionpool.ConnectionPool`. If ``pool_kwargs``
     294          is provided, it is used instead. Note that if a new pool does not
     295          need to be created for the request, the provided ``pool_kwargs`` are
     296          not used.
     297          """
     298          u = parse_url(url)
     299          return self.connection_from_host(
     300              u.host, port=u.port, scheme=u.scheme, pool_kwargs=pool_kwargs
     301          )
     302  
     303      def _merge_pool_kwargs(self, override):
     304          """
     305          Merge a dictionary of override values for self.connection_pool_kw.
     306  
     307          This does not modify self.connection_pool_kw and returns a new dict.
     308          Any keys in the override dictionary with a value of ``None`` are
     309          removed from the merged dictionary.
     310          """
     311          base_pool_kwargs = self.connection_pool_kw.copy()
     312          if override:
     313              for key, value in override.items():
     314                  if value is None:
     315                      try:
     316                          del base_pool_kwargs[key]
     317                      except KeyError:
     318                          pass
     319                  else:
     320                      base_pool_kwargs[key] = value
     321          return base_pool_kwargs
     322  
     323      def _proxy_requires_url_absolute_form(self, parsed_url):
     324          """
     325          Indicates if the proxy requires the complete destination URL in the
     326          request.  Normally this is only needed when not using an HTTP CONNECT
     327          tunnel.
     328          """
     329          if self.proxy is None:
     330              return False
     331  
     332          return not connection_requires_http_tunnel(
     333              self.proxy, self.proxy_config, parsed_url.scheme
     334          )
     335  
     336      def _validate_proxy_scheme_url_selection(self, url_scheme):
     337          """
     338          Validates that were not attempting to do TLS in TLS connections on
     339          Python2 or with unsupported SSL implementations.
     340          """
     341          if self.proxy is None or url_scheme != "https":
     342              return
     343  
     344          if self.proxy.scheme != "https":
     345              return
     346  
     347          if six.PY2 and not self.proxy_config.use_forwarding_for_https:
     348              raise ProxySchemeUnsupported(
     349                  "Contacting HTTPS destinations through HTTPS proxies "
     350                  "'via CONNECT tunnels' is not supported in Python 2"
     351              )
     352  
     353      def urlopen(self, method, url, redirect=True, **kw):
     354          """
     355          Same as :meth:`urllib3.HTTPConnectionPool.urlopen`
     356          with custom cross-host redirect logic and only sends the request-uri
     357          portion of the ``url``.
     358  
     359          The given ``url`` parameter must be absolute, such that an appropriate
     360          :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it.
     361          """
     362          u = parse_url(url)
     363          self._validate_proxy_scheme_url_selection(u.scheme)
     364  
     365          conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme)
     366  
     367          kw["assert_same_host"] = False
     368          kw["redirect"] = False
     369  
     370          if "headers" not in kw:
     371              kw["headers"] = self.headers.copy()
     372  
     373          if self._proxy_requires_url_absolute_form(u):
     374              response = conn.urlopen(method, url, **kw)
     375          else:
     376              response = conn.urlopen(method, u.request_uri, **kw)
     377  
     378          redirect_location = redirect and response.get_redirect_location()
     379          if not redirect_location:
     380              return response
     381  
     382          # Support relative URLs for redirecting.
     383          redirect_location = urljoin(url, redirect_location)
     384  
     385          # RFC 7231, Section 6.4.4
     386          if response.status == 303:
     387              method = "GET"
     388  
     389          retries = kw.get("retries")
     390          if not isinstance(retries, Retry):
     391              retries = Retry.from_int(retries, redirect=redirect)
     392  
     393          # Strip headers marked as unsafe to forward to the redirected location.
     394          # Check remove_headers_on_redirect to avoid a potential network call within
     395          # conn.is_same_host() which may use socket.gethostbyname() in the future.
     396          if retries.remove_headers_on_redirect and not conn.is_same_host(
     397              redirect_location
     398          ):
     399              headers = list(six.iterkeys(kw["headers"]))
     400              for header in headers:
     401                  if header.lower() in retries.remove_headers_on_redirect:
     402                      kw["headers"].pop(header, None)
     403  
     404          try:
     405              retries = retries.increment(method, url, response=response, _pool=conn)
     406          except MaxRetryError:
     407              if retries.raise_on_redirect:
     408                  response.drain_conn()
     409                  raise
     410              return response
     411  
     412          kw["retries"] = retries
     413          kw["redirect"] = redirect
     414  
     415          log.info("Redirecting %s -> %s", url, redirect_location)
     416  
     417          response.drain_conn()
     418          return self.urlopen(method, redirect_location, **kw)
     419  
     420  
     421  class ESC[4;38;5;81mProxyManager(ESC[4;38;5;149mPoolManager):
     422      """
     423      Behaves just like :class:`PoolManager`, but sends all requests through
     424      the defined proxy, using the CONNECT method for HTTPS URLs.
     425  
     426      :param proxy_url:
     427          The URL of the proxy to be used.
     428  
     429      :param proxy_headers:
     430          A dictionary containing headers that will be sent to the proxy. In case
     431          of HTTP they are being sent with each request, while in the
     432          HTTPS/CONNECT case they are sent only once. Could be used for proxy
     433          authentication.
     434  
     435      :param proxy_ssl_context:
     436          The proxy SSL context is used to establish the TLS connection to the
     437          proxy when using HTTPS proxies.
     438  
     439      :param use_forwarding_for_https:
     440          (Defaults to False) If set to True will forward requests to the HTTPS
     441          proxy to be made on behalf of the client instead of creating a TLS
     442          tunnel via the CONNECT method. **Enabling this flag means that request
     443          and response headers and content will be visible from the HTTPS proxy**
     444          whereas tunneling keeps request and response headers and content
     445          private.  IP address, target hostname, SNI, and port are always visible
     446          to an HTTPS proxy even when this flag is disabled.
     447  
     448      Example:
     449          >>> proxy = urllib3.ProxyManager('http://localhost:3128/')
     450          >>> r1 = proxy.request('GET', 'http://google.com/')
     451          >>> r2 = proxy.request('GET', 'http://httpbin.org/')
     452          >>> len(proxy.pools)
     453          1
     454          >>> r3 = proxy.request('GET', 'https://httpbin.org/')
     455          >>> r4 = proxy.request('GET', 'https://twitter.com/')
     456          >>> len(proxy.pools)
     457          3
     458  
     459      """
     460  
     461      def __init__(
     462          self,
     463          proxy_url,
     464          num_pools=10,
     465          headers=None,
     466          proxy_headers=None,
     467          proxy_ssl_context=None,
     468          use_forwarding_for_https=False,
     469          **connection_pool_kw
     470      ):
     471  
     472          if isinstance(proxy_url, HTTPConnectionPool):
     473              proxy_url = "%s://%s:%i" % (
     474                  proxy_url.scheme,
     475                  proxy_url.host,
     476                  proxy_url.port,
     477              )
     478          proxy = parse_url(proxy_url)
     479  
     480          if proxy.scheme not in ("http", "https"):
     481              raise ProxySchemeUnknown(proxy.scheme)
     482  
     483          if not proxy.port:
     484              port = port_by_scheme.get(proxy.scheme, 80)
     485              proxy = proxy._replace(port=port)
     486  
     487          self.proxy = proxy
     488          self.proxy_headers = proxy_headers or {}
     489          self.proxy_ssl_context = proxy_ssl_context
     490          self.proxy_config = ProxyConfig(proxy_ssl_context, use_forwarding_for_https)
     491  
     492          connection_pool_kw["_proxy"] = self.proxy
     493          connection_pool_kw["_proxy_headers"] = self.proxy_headers
     494          connection_pool_kw["_proxy_config"] = self.proxy_config
     495  
     496          super(ProxyManager, self).__init__(num_pools, headers, **connection_pool_kw)
     497  
     498      def connection_from_host(self, host, port=None, scheme="http", pool_kwargs=None):
     499          if scheme == "https":
     500              return super(ProxyManager, self).connection_from_host(
     501                  host, port, scheme, pool_kwargs=pool_kwargs
     502              )
     503  
     504          return super(ProxyManager, self).connection_from_host(
     505              self.proxy.host, self.proxy.port, self.proxy.scheme, pool_kwargs=pool_kwargs
     506          )
     507  
     508      def _set_proxy_headers(self, url, headers=None):
     509          """
     510          Sets headers needed by proxies: specifically, the Accept and Host
     511          headers. Only sets headers not provided by the user.
     512          """
     513          headers_ = {"Accept": "*/*"}
     514  
     515          netloc = parse_url(url).netloc
     516          if netloc:
     517              headers_["Host"] = netloc
     518  
     519          if headers:
     520              headers_.update(headers)
     521          return headers_
     522  
     523      def urlopen(self, method, url, redirect=True, **kw):
     524          "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute."
     525          u = parse_url(url)
     526          if not connection_requires_http_tunnel(self.proxy, self.proxy_config, u.scheme):
     527              # For connections using HTTP CONNECT, httplib sets the necessary
     528              # headers on the CONNECT to the proxy. If we're not using CONNECT,
     529              # we'll definitely need to set 'Host' at the very least.
     530              headers = kw.get("headers", self.headers)
     531              kw["headers"] = self._set_proxy_headers(url, headers)
     532  
     533          return super(ProxyManager, self).urlopen(method, url, redirect=redirect, **kw)
     534  
     535  
     536  def proxy_from_url(url, **kw):
     537      return ProxyManager(proxy_url=url, **kw)