1  """An extensible library for opening URLs using a variety of protocols
       2  
       3  The simplest way to use this module is to call the urlopen function,
       4  which accepts a string containing a URL or a Request object (described
       5  below).  It opens the URL and returns the results as file-like
       6  object; the returned object has some extra methods described below.
       7  
       8  The OpenerDirector manages a collection of Handler objects that do
       9  all the actual work.  Each Handler implements a particular protocol or
      10  option.  The OpenerDirector is a composite object that invokes the
      11  Handlers needed to open the requested URL.  For example, the
      12  HTTPHandler performs HTTP GET and POST requests and deals with
      13  non-error returns.  The HTTPRedirectHandler automatically deals with
      14  HTTP 301, 302, 303, 307, and 308 redirect errors, and the
      15  HTTPDigestAuthHandler deals with digest authentication.
      16  
      17  urlopen(url, data=None) -- Basic usage is the same as original
      18  urllib.  pass the url and optionally data to post to an HTTP URL, and
      19  get a file-like object back.  One difference is that you can also pass
      20  a Request instance instead of URL.  Raises a URLError (subclass of
      21  OSError); for HTTP errors, raises an HTTPError, which can also be
      22  treated as a valid response.
      23  
      24  build_opener -- Function that creates a new OpenerDirector instance.
      25  Will install the default handlers.  Accepts one or more Handlers as
      26  arguments, either instances or Handler classes that it will
      27  instantiate.  If one of the argument is a subclass of the default
      28  handler, the argument will be installed instead of the default.
      29  
      30  install_opener -- Installs a new opener as the default opener.
      31  
      32  objects of interest:
      33  
      34  OpenerDirector -- Sets up the User Agent as the Python-urllib client and manages
      35  the Handler classes, while dealing with requests and responses.
      36  
      37  Request -- An object that encapsulates the state of a request.  The
      38  state can be as simple as the URL.  It can also include extra HTTP
      39  headers, e.g. a User-Agent.
      40  
      41  BaseHandler --
      42  
      43  internals:
      44  BaseHandler and parent
      45  _call_chain conventions
      46  
      47  Example usage:
      48  
      49  import urllib.request
      50  
      51  # set up authentication info
      52  authinfo = urllib.request.HTTPBasicAuthHandler()
      53  authinfo.add_password(realm='PDQ Application',
      54                        uri='https://mahler:8092/site-updates.py',
      55                        user='klem',
      56                        passwd='geheim$parole')
      57  
      58  proxy_support = urllib.request.ProxyHandler({"http" : "http://ahad-haam:3128"})
      59  
      60  # build a new opener that adds authentication and caching FTP handlers
      61  opener = urllib.request.build_opener(proxy_support, authinfo,
      62                                       urllib.request.CacheFTPHandler)
      63  
      64  # install it
      65  urllib.request.install_opener(opener)
      66  
      67  f = urllib.request.urlopen('https://www.python.org/')
      68  """
      69  
      70  # XXX issues:
      71  # If an authentication error handler that tries to perform
      72  # authentication for some reason but fails, how should the error be
      73  # signalled?  The client needs to know the HTTP error code.  But if
      74  # the handler knows that the problem was, e.g., that it didn't know
      75  # that hash algo that requested in the challenge, it would be good to
      76  # pass that information along to the client, too.
      77  # ftp errors aren't handled cleanly
      78  # check digest against correct (i.e. non-apache) implementation
      79  
      80  # Possible extensions:
      81  # complex proxies  XXX not sure what exactly was meant by this
      82  # abstract factory for opener
      83  
      84  import base64
      85  import bisect
      86  import email
      87  import hashlib
      88  import http.client
      89  import io
      90  import os
      91  import re
      92  import socket
      93  import string
      94  import sys
      95  import time
      96  import tempfile
      97  import contextlib
      98  import warnings
      99  
     100  
     101  from urllib.error import URLError, HTTPError, ContentTooShortError
     102  from urllib.parse import (
     103      urlparse, urlsplit, urljoin, unwrap, quote, unquote,
     104      _splittype, _splithost, _splitport, _splituser, _splitpasswd,
     105      _splitattr, _splitquery, _splitvalue, _splittag, _to_bytes,
     106      unquote_to_bytes, urlunparse)
     107  from urllib.response import addinfourl, addclosehook
     108  
     109  # check for SSL
     110  try:
     111      import ssl
     112  except ImportError:
     113      _have_ssl = False
     114  else:
     115      _have_ssl = True
     116  
     117  __all__ = [
     118      # Classes
     119      'Request', 'OpenerDirector', 'BaseHandler', 'HTTPDefaultErrorHandler',
     120      'HTTPRedirectHandler', 'HTTPCookieProcessor', 'ProxyHandler',
     121      'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm',
     122      'HTTPPasswordMgrWithPriorAuth', 'AbstractBasicAuthHandler',
     123      'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler', 'AbstractDigestAuthHandler',
     124      'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler', 'HTTPHandler',
     125      'FileHandler', 'FTPHandler', 'CacheFTPHandler', 'DataHandler',
     126      'UnknownHandler', 'HTTPErrorProcessor',
     127      # Functions
     128      'urlopen', 'install_opener', 'build_opener',
     129      'pathname2url', 'url2pathname', 'getproxies',
     130      # Legacy interface
     131      'urlretrieve', 'urlcleanup', 'URLopener', 'FancyURLopener',
     132  ]
     133  
     134  # used in User-Agent header sent
     135  __version__ = '%d.%d' % sys.version_info[:2]
     136  
     137  _opener = None
     138  def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
     139              *, cafile=None, capath=None, cadefault=False, context=None):
     140      '''Open the URL url, which can be either a string or a Request object.
     141  
     142      *data* must be an object specifying additional data to be sent to
     143      the server, or None if no such data is needed.  See Request for
     144      details.
     145  
     146      urllib.request module uses HTTP/1.1 and includes a "Connection:close"
     147      header in its HTTP requests.
     148  
     149      The optional *timeout* parameter specifies a timeout in seconds for
     150      blocking operations like the connection attempt (if not specified, the
     151      global default timeout setting will be used). This only works for HTTP,
     152      HTTPS and FTP connections.
     153  
     154      If *context* is specified, it must be a ssl.SSLContext instance describing
     155      the various SSL options. See HTTPSConnection for more details.
     156  
     157      The optional *cafile* and *capath* parameters specify a set of trusted CA
     158      certificates for HTTPS requests. cafile should point to a single file
     159      containing a bundle of CA certificates, whereas capath should point to a
     160      directory of hashed certificate files. More information can be found in
     161      ssl.SSLContext.load_verify_locations().
     162  
     163      The *cadefault* parameter is ignored.
     164  
     165  
     166      This function always returns an object which can work as a
     167      context manager and has the properties url, headers, and status.
     168      See urllib.response.addinfourl for more detail on these properties.
     169  
     170      For HTTP and HTTPS URLs, this function returns a http.client.HTTPResponse
     171      object slightly modified. In addition to the three new methods above, the
     172      msg attribute contains the same information as the reason attribute ---
     173      the reason phrase returned by the server --- instead of the response
     174      headers as it is specified in the documentation for HTTPResponse.
     175  
     176      For FTP, file, and data URLs and requests explicitly handled by legacy
     177      URLopener and FancyURLopener classes, this function returns a
     178      urllib.response.addinfourl object.
     179  
     180      Note that None may be returned if no handler handles the request (though
     181      the default installed global OpenerDirector uses UnknownHandler to ensure
     182      this never happens).
     183  
     184      In addition, if proxy settings are detected (for example, when a *_proxy
     185      environment variable like http_proxy is set), ProxyHandler is default
     186      installed and makes sure the requests are handled through the proxy.
     187  
     188      '''
     189      global _opener
     190      if cafile or capath or cadefault:
     191          import warnings
     192          warnings.warn("cafile, capath and cadefault are deprecated, use a "
     193                        "custom context instead.", DeprecationWarning, 2)
     194          if context is not None:
     195              raise ValueError(
     196                  "You can't pass both context and any of cafile, capath, and "
     197                  "cadefault"
     198              )
     199          if not _have_ssl:
     200              raise ValueError('SSL support not available')
     201          context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH,
     202                                               cafile=cafile,
     203                                               capath=capath)
     204          # send ALPN extension to indicate HTTP/1.1 protocol
     205          context.set_alpn_protocols(['http/1.1'])
     206          https_handler = HTTPSHandler(context=context)
     207          opener = build_opener(https_handler)
     208      elif context:
     209          https_handler = HTTPSHandler(context=context)
     210          opener = build_opener(https_handler)
     211      elif _opener is None:
     212          _opener = opener = build_opener()
     213      else:
     214          opener = _opener
     215      return opener.open(url, data, timeout)
     216  
     217  def install_opener(opener):
     218      global _opener
     219      _opener = opener
     220  
     221  _url_tempfiles = []
     222  def urlretrieve(url, filename=None, reporthook=None, data=None):
     223      """
     224      Retrieve a URL into a temporary location on disk.
     225  
     226      Requires a URL argument. If a filename is passed, it is used as
     227      the temporary file location. The reporthook argument should be
     228      a callable that accepts a block number, a read size, and the
     229      total file size of the URL target. The data argument should be
     230      valid URL encoded data.
     231  
     232      If a filename is passed and the URL points to a local resource,
     233      the result is a copy from local file to new file.
     234  
     235      Returns a tuple containing the path to the newly created
     236      data file as well as the resulting HTTPMessage object.
     237      """
     238      url_type, path = _splittype(url)
     239  
     240      with contextlib.closing(urlopen(url, data)) as fp:
     241          headers = fp.info()
     242  
     243          # Just return the local path and the "headers" for file://
     244          # URLs. No sense in performing a copy unless requested.
     245          if url_type == "file" and not filename:
     246              return os.path.normpath(path), headers
     247  
     248          # Handle temporary file setup.
     249          if filename:
     250              tfp = open(filename, 'wb')
     251          else:
     252              tfp = tempfile.NamedTemporaryFile(delete=False)
     253              filename = tfp.name
     254              _url_tempfiles.append(filename)
     255  
     256          with tfp:
     257              result = filename, headers
     258              bs = 1024*8
     259              size = -1
     260              read = 0
     261              blocknum = 0
     262              if "content-length" in headers:
     263                  size = int(headers["Content-Length"])
     264  
     265              if reporthook:
     266                  reporthook(blocknum, bs, size)
     267  
     268              while block := fp.read(bs):
     269                  read += len(block)
     270                  tfp.write(block)
     271                  blocknum += 1
     272                  if reporthook:
     273                      reporthook(blocknum, bs, size)
     274  
     275      if size >= 0 and read < size:
     276          raise ContentTooShortError(
     277              "retrieval incomplete: got only %i out of %i bytes"
     278              % (read, size), result)
     279  
     280      return result
     281  
     282  def urlcleanup():
     283      """Clean up temporary files from urlretrieve calls."""
     284      for temp_file in _url_tempfiles:
     285          try:
     286              os.unlink(temp_file)
     287          except OSError:
     288              pass
     289  
     290      del _url_tempfiles[:]
     291      global _opener
     292      if _opener:
     293          _opener = None
     294  
     295  # copied from cookielib.py
     296  _cut_port_re = re.compile(r":\d+$", re.ASCII)
     297  def request_host(request):
     298      """Return request-host, as defined by RFC 2965.
     299  
     300      Variation from RFC: returned value is lowercased, for convenient
     301      comparison.
     302  
     303      """
     304      url = request.full_url
     305      host = urlparse(url)[1]
     306      if host == "":
     307          host = request.get_header("Host", "")
     308  
     309      # remove port, if present
     310      host = _cut_port_re.sub("", host, 1)
     311      return host.lower()
     312  
     313  class ESC[4;38;5;81mRequest:
     314  
     315      def __init__(self, url, data=None, headers={},
     316                   origin_req_host=None, unverifiable=False,
     317                   method=None):
     318          self.full_url = url
     319          self.headers = {}
     320          self.unredirected_hdrs = {}
     321          self._data = None
     322          self.data = data
     323          self._tunnel_host = None
     324          for key, value in headers.items():
     325              self.add_header(key, value)
     326          if origin_req_host is None:
     327              origin_req_host = request_host(self)
     328          self.origin_req_host = origin_req_host
     329          self.unverifiable = unverifiable
     330          if method:
     331              self.method = method
     332  
     333      @property
     334      def full_url(self):
     335          if self.fragment:
     336              return '{}#{}'.format(self._full_url, self.fragment)
     337          return self._full_url
     338  
     339      @full_url.setter
     340      def full_url(self, url):
     341          # unwrap('<URL:type://host/path>') --> 'type://host/path'
     342          self._full_url = unwrap(url)
     343          self._full_url, self.fragment = _splittag(self._full_url)
     344          self._parse()
     345  
     346      @full_url.deleter
     347      def full_url(self):
     348          self._full_url = None
     349          self.fragment = None
     350          self.selector = ''
     351  
     352      @property
     353      def data(self):
     354          return self._data
     355  
     356      @data.setter
     357      def data(self, data):
     358          if data != self._data:
     359              self._data = data
     360              # issue 16464
     361              # if we change data we need to remove content-length header
     362              # (cause it's most probably calculated for previous value)
     363              if self.has_header("Content-length"):
     364                  self.remove_header("Content-length")
     365  
     366      @data.deleter
     367      def data(self):
     368          self.data = None
     369  
     370      def _parse(self):
     371          self.type, rest = _splittype(self._full_url)
     372          if self.type is None:
     373              raise ValueError("unknown url type: %r" % self.full_url)
     374          self.host, self.selector = _splithost(rest)
     375          if self.host:
     376              self.host = unquote(self.host)
     377  
     378      def get_method(self):
     379          """Return a string indicating the HTTP request method."""
     380          default_method = "POST" if self.data is not None else "GET"
     381          return getattr(self, 'method', default_method)
     382  
     383      def get_full_url(self):
     384          return self.full_url
     385  
     386      def set_proxy(self, host, type):
     387          if self.type == 'https' and not self._tunnel_host:
     388              self._tunnel_host = self.host
     389          else:
     390              self.type= type
     391              self.selector = self.full_url
     392          self.host = host
     393  
     394      def has_proxy(self):
     395          return self.selector == self.full_url
     396  
     397      def add_header(self, key, val):
     398          # useful for something like authentication
     399          self.headers[key.capitalize()] = val
     400  
     401      def add_unredirected_header(self, key, val):
     402          # will not be added to a redirected request
     403          self.unredirected_hdrs[key.capitalize()] = val
     404  
     405      def has_header(self, header_name):
     406          return (header_name in self.headers or
     407                  header_name in self.unredirected_hdrs)
     408  
     409      def get_header(self, header_name, default=None):
     410          return self.headers.get(
     411              header_name,
     412              self.unredirected_hdrs.get(header_name, default))
     413  
     414      def remove_header(self, header_name):
     415          self.headers.pop(header_name, None)
     416          self.unredirected_hdrs.pop(header_name, None)
     417  
     418      def header_items(self):
     419          hdrs = {**self.unredirected_hdrs, **self.headers}
     420          return list(hdrs.items())
     421  
     422  class ESC[4;38;5;81mOpenerDirector:
     423      def __init__(self):
     424          client_version = "Python-urllib/%s" % __version__
     425          self.addheaders = [('User-agent', client_version)]
     426          # self.handlers is retained only for backward compatibility
     427          self.handlers = []
     428          # manage the individual handlers
     429          self.handle_open = {}
     430          self.handle_error = {}
     431          self.process_response = {}
     432          self.process_request = {}
     433  
     434      def add_handler(self, handler):
     435          if not hasattr(handler, "add_parent"):
     436              raise TypeError("expected BaseHandler instance, got %r" %
     437                              type(handler))
     438  
     439          added = False
     440          for meth in dir(handler):
     441              if meth in ["redirect_request", "do_open", "proxy_open"]:
     442                  # oops, coincidental match
     443                  continue
     444  
     445              i = meth.find("_")
     446              protocol = meth[:i]
     447              condition = meth[i+1:]
     448  
     449              if condition.startswith("error"):
     450                  j = condition.find("_") + i + 1
     451                  kind = meth[j+1:]
     452                  try:
     453                      kind = int(kind)
     454                  except ValueError:
     455                      pass
     456                  lookup = self.handle_error.get(protocol, {})
     457                  self.handle_error[protocol] = lookup
     458              elif condition == "open":
     459                  kind = protocol
     460                  lookup = self.handle_open
     461              elif condition == "response":
     462                  kind = protocol
     463                  lookup = self.process_response
     464              elif condition == "request":
     465                  kind = protocol
     466                  lookup = self.process_request
     467              else:
     468                  continue
     469  
     470              handlers = lookup.setdefault(kind, [])
     471              if handlers:
     472                  bisect.insort(handlers, handler)
     473              else:
     474                  handlers.append(handler)
     475              added = True
     476  
     477          if added:
     478              bisect.insort(self.handlers, handler)
     479              handler.add_parent(self)
     480  
     481      def close(self):
     482          # Only exists for backwards compatibility.
     483          pass
     484  
     485      def _call_chain(self, chain, kind, meth_name, *args):
     486          # Handlers raise an exception if no one else should try to handle
     487          # the request, or return None if they can't but another handler
     488          # could.  Otherwise, they return the response.
     489          handlers = chain.get(kind, ())
     490          for handler in handlers:
     491              func = getattr(handler, meth_name)
     492              result = func(*args)
     493              if result is not None:
     494                  return result
     495  
     496      def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
     497          # accept a URL or a Request object
     498          if isinstance(fullurl, str):
     499              req = Request(fullurl, data)
     500          else:
     501              req = fullurl
     502              if data is not None:
     503                  req.data = data
     504  
     505          req.timeout = timeout
     506          protocol = req.type
     507  
     508          # pre-process request
     509          meth_name = protocol+"_request"
     510          for processor in self.process_request.get(protocol, []):
     511              meth = getattr(processor, meth_name)
     512              req = meth(req)
     513  
     514          sys.audit('urllib.Request', req.full_url, req.data, req.headers, req.get_method())
     515          response = self._open(req, data)
     516  
     517          # post-process response
     518          meth_name = protocol+"_response"
     519          for processor in self.process_response.get(protocol, []):
     520              meth = getattr(processor, meth_name)
     521              response = meth(req, response)
     522  
     523          return response
     524  
     525      def _open(self, req, data=None):
     526          result = self._call_chain(self.handle_open, 'default',
     527                                    'default_open', req)
     528          if result:
     529              return result
     530  
     531          protocol = req.type
     532          result = self._call_chain(self.handle_open, protocol, protocol +
     533                                    '_open', req)
     534          if result:
     535              return result
     536  
     537          return self._call_chain(self.handle_open, 'unknown',
     538                                  'unknown_open', req)
     539  
     540      def error(self, proto, *args):
     541          if proto in ('http', 'https'):
     542              # XXX http[s] protocols are special-cased
     543              dict = self.handle_error['http'] # https is not different than http
     544              proto = args[2]  # YUCK!
     545              meth_name = 'http_error_%s' % proto
     546              http_err = 1
     547              orig_args = args
     548          else:
     549              dict = self.handle_error
     550              meth_name = proto + '_error'
     551              http_err = 0
     552          args = (dict, proto, meth_name) + args
     553          result = self._call_chain(*args)
     554          if result:
     555              return result
     556  
     557          if http_err:
     558              args = (dict, 'default', 'http_error_default') + orig_args
     559              return self._call_chain(*args)
     560  
     561  # XXX probably also want an abstract factory that knows when it makes
     562  # sense to skip a superclass in favor of a subclass and when it might
     563  # make sense to include both
     564  
     565  def build_opener(*handlers):
     566      """Create an opener object from a list of handlers.
     567  
     568      The opener will use several default handlers, including support
     569      for HTTP, FTP and when applicable HTTPS.
     570  
     571      If any of the handlers passed as arguments are subclasses of the
     572      default handlers, the default handlers will not be used.
     573      """
     574      opener = OpenerDirector()
     575      default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
     576                         HTTPDefaultErrorHandler, HTTPRedirectHandler,
     577                         FTPHandler, FileHandler, HTTPErrorProcessor,
     578                         DataHandler]
     579      if hasattr(http.client, "HTTPSConnection"):
     580          default_classes.append(HTTPSHandler)
     581      skip = set()
     582      for klass in default_classes:
     583          for check in handlers:
     584              if isinstance(check, type):
     585                  if issubclass(check, klass):
     586                      skip.add(klass)
     587              elif isinstance(check, klass):
     588                  skip.add(klass)
     589      for klass in skip:
     590          default_classes.remove(klass)
     591  
     592      for klass in default_classes:
     593          opener.add_handler(klass())
     594  
     595      for h in handlers:
     596          if isinstance(h, type):
     597              h = h()
     598          opener.add_handler(h)
     599      return opener
     600  
     601  class ESC[4;38;5;81mBaseHandler:
     602      handler_order = 500
     603  
     604      def add_parent(self, parent):
     605          self.parent = parent
     606  
     607      def close(self):
     608          # Only exists for backwards compatibility
     609          pass
     610  
     611      def __lt__(self, other):
     612          if not hasattr(other, "handler_order"):
     613              # Try to preserve the old behavior of having custom classes
     614              # inserted after default ones (works only for custom user
     615              # classes which are not aware of handler_order).
     616              return True
     617          return self.handler_order < other.handler_order
     618  
     619  
     620  class ESC[4;38;5;81mHTTPErrorProcessor(ESC[4;38;5;149mBaseHandler):
     621      """Process HTTP error responses."""
     622      handler_order = 1000  # after all other processing
     623  
     624      def http_response(self, request, response):
     625          code, msg, hdrs = response.code, response.msg, response.info()
     626  
     627          # According to RFC 2616, "2xx" code indicates that the client's
     628          # request was successfully received, understood, and accepted.
     629          if not (200 <= code < 300):
     630              response = self.parent.error(
     631                  'http', request, response, code, msg, hdrs)
     632  
     633          return response
     634  
     635      https_response = http_response
     636  
     637  class ESC[4;38;5;81mHTTPDefaultErrorHandler(ESC[4;38;5;149mBaseHandler):
     638      def http_error_default(self, req, fp, code, msg, hdrs):
     639          raise HTTPError(req.full_url, code, msg, hdrs, fp)
     640  
     641  class ESC[4;38;5;81mHTTPRedirectHandler(ESC[4;38;5;149mBaseHandler):
     642      # maximum number of redirections to any single URL
     643      # this is needed because of the state that cookies introduce
     644      max_repeats = 4
     645      # maximum total number of redirections (regardless of URL) before
     646      # assuming we're in a loop
     647      max_redirections = 10
     648  
     649      def redirect_request(self, req, fp, code, msg, headers, newurl):
     650          """Return a Request or None in response to a redirect.
     651  
     652          This is called by the http_error_30x methods when a
     653          redirection response is received.  If a redirection should
     654          take place, return a new Request to allow http_error_30x to
     655          perform the redirect.  Otherwise, raise HTTPError if no-one
     656          else should try to handle this url.  Return None if you can't
     657          but another Handler might.
     658          """
     659          m = req.get_method()
     660          if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
     661              or code in (301, 302, 303) and m == "POST")):
     662              raise HTTPError(req.full_url, code, msg, headers, fp)
     663  
     664          # Strictly (according to RFC 2616), 301 or 302 in response to
     665          # a POST MUST NOT cause a redirection without confirmation
     666          # from the user (of urllib.request, in this case).  In practice,
     667          # essentially all clients do redirect in this case, so we do
     668          # the same.
     669  
     670          # Be conciliant with URIs containing a space.  This is mainly
     671          # redundant with the more complete encoding done in http_error_302(),
     672          # but it is kept for compatibility with other callers.
     673          newurl = newurl.replace(' ', '%20')
     674  
     675          CONTENT_HEADERS = ("content-length", "content-type")
     676          newheaders = {k: v for k, v in req.headers.items()
     677                        if k.lower() not in CONTENT_HEADERS}
     678          return Request(newurl,
     679                         headers=newheaders,
     680                         origin_req_host=req.origin_req_host,
     681                         unverifiable=True)
     682  
     683      # Implementation note: To avoid the server sending us into an
     684      # infinite loop, the request object needs to track what URLs we
     685      # have already seen.  Do this by adding a handler-specific
     686      # attribute to the Request object.
     687      def http_error_302(self, req, fp, code, msg, headers):
     688          # Some servers (incorrectly) return multiple Location headers
     689          # (so probably same goes for URI).  Use first header.
     690          if "location" in headers:
     691              newurl = headers["location"]
     692          elif "uri" in headers:
     693              newurl = headers["uri"]
     694          else:
     695              return
     696  
     697          # fix a possible malformed URL
     698          urlparts = urlparse(newurl)
     699  
     700          # For security reasons we don't allow redirection to anything other
     701          # than http, https or ftp.
     702  
     703          if urlparts.scheme not in ('http', 'https', 'ftp', ''):
     704              raise HTTPError(
     705                  newurl, code,
     706                  "%s - Redirection to url '%s' is not allowed" % (msg, newurl),
     707                  headers, fp)
     708  
     709          if not urlparts.path and urlparts.netloc:
     710              urlparts = list(urlparts)
     711              urlparts[2] = "/"
     712          newurl = urlunparse(urlparts)
     713  
     714          # http.client.parse_headers() decodes as ISO-8859-1.  Recover the
     715          # original bytes and percent-encode non-ASCII bytes, and any special
     716          # characters such as the space.
     717          newurl = quote(
     718              newurl, encoding="iso-8859-1", safe=string.punctuation)
     719          newurl = urljoin(req.full_url, newurl)
     720  
     721          # XXX Probably want to forget about the state of the current
     722          # request, although that might interact poorly with other
     723          # handlers that also use handler-specific request attributes
     724          new = self.redirect_request(req, fp, code, msg, headers, newurl)
     725          if new is None:
     726              return
     727  
     728          # loop detection
     729          # .redirect_dict has a key url if url was previously visited.
     730          if hasattr(req, 'redirect_dict'):
     731              visited = new.redirect_dict = req.redirect_dict
     732              if (visited.get(newurl, 0) >= self.max_repeats or
     733                  len(visited) >= self.max_redirections):
     734                  raise HTTPError(req.full_url, code,
     735                                  self.inf_msg + msg, headers, fp)
     736          else:
     737              visited = new.redirect_dict = req.redirect_dict = {}
     738          visited[newurl] = visited.get(newurl, 0) + 1
     739  
     740          # Don't close the fp until we are sure that we won't use it
     741          # with HTTPError.
     742          fp.read()
     743          fp.close()
     744  
     745          return self.parent.open(new, timeout=req.timeout)
     746  
     747      http_error_301 = http_error_303 = http_error_307 = http_error_308 = http_error_302
     748  
     749      inf_msg = "The HTTP server returned a redirect error that would " \
     750                "lead to an infinite loop.\n" \
     751                "The last 30x error message was:\n"
     752  
     753  
     754  def _parse_proxy(proxy):
     755      """Return (scheme, user, password, host/port) given a URL or an authority.
     756  
     757      If a URL is supplied, it must have an authority (host:port) component.
     758      According to RFC 3986, having an authority component means the URL must
     759      have two slashes after the scheme.
     760      """
     761      scheme, r_scheme = _splittype(proxy)
     762      if not r_scheme.startswith("/"):
     763          # authority
     764          scheme = None
     765          authority = proxy
     766      else:
     767          # URL
     768          if not r_scheme.startswith("//"):
     769              raise ValueError("proxy URL with no authority: %r" % proxy)
     770          # We have an authority, so for RFC 3986-compliant URLs (by ss 3.
     771          # and 3.3.), path is empty or starts with '/'
     772          if '@' in r_scheme:
     773              host_separator = r_scheme.find('@')
     774              end = r_scheme.find("/", host_separator)
     775          else:
     776              end = r_scheme.find("/", 2)
     777          if end == -1:
     778              end = None
     779          authority = r_scheme[2:end]
     780      userinfo, hostport = _splituser(authority)
     781      if userinfo is not None:
     782          user, password = _splitpasswd(userinfo)
     783      else:
     784          user = password = None
     785      return scheme, user, password, hostport
     786  
     787  class ESC[4;38;5;81mProxyHandler(ESC[4;38;5;149mBaseHandler):
     788      # Proxies must be in front
     789      handler_order = 100
     790  
     791      def __init__(self, proxies=None):
     792          if proxies is None:
     793              proxies = getproxies()
     794          assert hasattr(proxies, 'keys'), "proxies must be a mapping"
     795          self.proxies = proxies
     796          for type, url in proxies.items():
     797              type = type.lower()
     798              setattr(self, '%s_open' % type,
     799                      lambda r, proxy=url, type=type, meth=self.proxy_open:
     800                          meth(r, proxy, type))
     801  
     802      def proxy_open(self, req, proxy, type):
     803          orig_type = req.type
     804          proxy_type, user, password, hostport = _parse_proxy(proxy)
     805          if proxy_type is None:
     806              proxy_type = orig_type
     807  
     808          if req.host and proxy_bypass(req.host):
     809              return None
     810  
     811          if user and password:
     812              user_pass = '%s:%s' % (unquote(user),
     813                                     unquote(password))
     814              creds = base64.b64encode(user_pass.encode()).decode("ascii")
     815              req.add_header('Proxy-authorization', 'Basic ' + creds)
     816          hostport = unquote(hostport)
     817          req.set_proxy(hostport, proxy_type)
     818          if orig_type == proxy_type or orig_type == 'https':
     819              # let other handlers take care of it
     820              return None
     821          else:
     822              # need to start over, because the other handlers don't
     823              # grok the proxy's URL type
     824              # e.g. if we have a constructor arg proxies like so:
     825              # {'http': 'ftp://proxy.example.com'}, we may end up turning
     826              # a request for http://acme.example.com/a into one for
     827              # ftp://proxy.example.com/a
     828              return self.parent.open(req, timeout=req.timeout)
     829  
     830  class ESC[4;38;5;81mHTTPPasswordMgr:
     831  
     832      def __init__(self):
     833          self.passwd = {}
     834  
     835      def add_password(self, realm, uri, user, passwd):
     836          # uri could be a single URI or a sequence
     837          if isinstance(uri, str):
     838              uri = [uri]
     839          if realm not in self.passwd:
     840              self.passwd[realm] = {}
     841          for default_port in True, False:
     842              reduced_uri = tuple(
     843                  self.reduce_uri(u, default_port) for u in uri)
     844              self.passwd[realm][reduced_uri] = (user, passwd)
     845  
     846      def find_user_password(self, realm, authuri):
     847          domains = self.passwd.get(realm, {})
     848          for default_port in True, False:
     849              reduced_authuri = self.reduce_uri(authuri, default_port)
     850              for uris, authinfo in domains.items():
     851                  for uri in uris:
     852                      if self.is_suburi(uri, reduced_authuri):
     853                          return authinfo
     854          return None, None
     855  
     856      def reduce_uri(self, uri, default_port=True):
     857          """Accept authority or URI and extract only the authority and path."""
     858          # note HTTP URLs do not have a userinfo component
     859          parts = urlsplit(uri)
     860          if parts[1]:
     861              # URI
     862              scheme = parts[0]
     863              authority = parts[1]
     864              path = parts[2] or '/'
     865          else:
     866              # host or host:port
     867              scheme = None
     868              authority = uri
     869              path = '/'
     870          host, port = _splitport(authority)
     871          if default_port and port is None and scheme is not None:
     872              dport = {"http": 80,
     873                       "https": 443,
     874                       }.get(scheme)
     875              if dport is not None:
     876                  authority = "%s:%d" % (host, dport)
     877          return authority, path
     878  
     879      def is_suburi(self, base, test):
     880          """Check if test is below base in a URI tree
     881  
     882          Both args must be URIs in reduced form.
     883          """
     884          if base == test:
     885              return True
     886          if base[0] != test[0]:
     887              return False
     888          prefix = base[1]
     889          if prefix[-1:] != '/':
     890              prefix += '/'
     891          return test[1].startswith(prefix)
     892  
     893  
     894  class ESC[4;38;5;81mHTTPPasswordMgrWithDefaultRealm(ESC[4;38;5;149mHTTPPasswordMgr):
     895  
     896      def find_user_password(self, realm, authuri):
     897          user, password = HTTPPasswordMgr.find_user_password(self, realm,
     898                                                              authuri)
     899          if user is not None:
     900              return user, password
     901          return HTTPPasswordMgr.find_user_password(self, None, authuri)
     902  
     903  
     904  class ESC[4;38;5;81mHTTPPasswordMgrWithPriorAuth(ESC[4;38;5;149mHTTPPasswordMgrWithDefaultRealm):
     905  
     906      def __init__(self, *args, **kwargs):
     907          self.authenticated = {}
     908          super().__init__(*args, **kwargs)
     909  
     910      def add_password(self, realm, uri, user, passwd, is_authenticated=False):
     911          self.update_authenticated(uri, is_authenticated)
     912          # Add a default for prior auth requests
     913          if realm is not None:
     914              super().add_password(None, uri, user, passwd)
     915          super().add_password(realm, uri, user, passwd)
     916  
     917      def update_authenticated(self, uri, is_authenticated=False):
     918          # uri could be a single URI or a sequence
     919          if isinstance(uri, str):
     920              uri = [uri]
     921  
     922          for default_port in True, False:
     923              for u in uri:
     924                  reduced_uri = self.reduce_uri(u, default_port)
     925                  self.authenticated[reduced_uri] = is_authenticated
     926  
     927      def is_authenticated(self, authuri):
     928          for default_port in True, False:
     929              reduced_authuri = self.reduce_uri(authuri, default_port)
     930              for uri in self.authenticated:
     931                  if self.is_suburi(uri, reduced_authuri):
     932                      return self.authenticated[uri]
     933  
     934  
     935  class ESC[4;38;5;81mAbstractBasicAuthHandler:
     936  
     937      # XXX this allows for multiple auth-schemes, but will stupidly pick
     938      # the last one with a realm specified.
     939  
     940      # allow for double- and single-quoted realm values
     941      # (single quotes are a violation of the RFC, but appear in the wild)
     942      rx = re.compile('(?:^|,)'   # start of the string or ','
     943                      '[ \t]*'    # optional whitespaces
     944                      '([^ \t,]+)' # scheme like "Basic"
     945                      '[ \t]+'    # mandatory whitespaces
     946                      # realm=xxx
     947                      # realm='xxx'
     948                      # realm="xxx"
     949                      'realm=(["\']?)([^"\']*)\\2',
     950                      re.I)
     951  
     952      # XXX could pre-emptively send auth info already accepted (RFC 2617,
     953      # end of section 2, and section 1.2 immediately after "credentials"
     954      # production).
     955  
     956      def __init__(self, password_mgr=None):
     957          if password_mgr is None:
     958              password_mgr = HTTPPasswordMgr()
     959          self.passwd = password_mgr
     960          self.add_password = self.passwd.add_password
     961  
     962      def _parse_realm(self, header):
     963          # parse WWW-Authenticate header: accept multiple challenges per header
     964          found_challenge = False
     965          for mo in AbstractBasicAuthHandler.rx.finditer(header):
     966              scheme, quote, realm = mo.groups()
     967              if quote not in ['"', "'"]:
     968                  warnings.warn("Basic Auth Realm was unquoted",
     969                                UserWarning, 3)
     970  
     971              yield (scheme, realm)
     972  
     973              found_challenge = True
     974  
     975          if not found_challenge:
     976              if header:
     977                  scheme = header.split()[0]
     978              else:
     979                  scheme = ''
     980              yield (scheme, None)
     981  
     982      def http_error_auth_reqed(self, authreq, host, req, headers):
     983          # host may be an authority (without userinfo) or a URL with an
     984          # authority
     985          headers = headers.get_all(authreq)
     986          if not headers:
     987              # no header found
     988              return
     989  
     990          unsupported = None
     991          for header in headers:
     992              for scheme, realm in self._parse_realm(header):
     993                  if scheme.lower() != 'basic':
     994                      unsupported = scheme
     995                      continue
     996  
     997                  if realm is not None:
     998                      # Use the first matching Basic challenge.
     999                      # Ignore following challenges even if they use the Basic
    1000                      # scheme.
    1001                      return self.retry_http_basic_auth(host, req, realm)
    1002  
    1003          if unsupported is not None:
    1004              raise ValueError("AbstractBasicAuthHandler does not "
    1005                               "support the following scheme: %r"
    1006                               % (scheme,))
    1007  
    1008      def retry_http_basic_auth(self, host, req, realm):
    1009          user, pw = self.passwd.find_user_password(realm, host)
    1010          if pw is not None:
    1011              raw = "%s:%s" % (user, pw)
    1012              auth = "Basic " + base64.b64encode(raw.encode()).decode("ascii")
    1013              if req.get_header(self.auth_header, None) == auth:
    1014                  return None
    1015              req.add_unredirected_header(self.auth_header, auth)
    1016              return self.parent.open(req, timeout=req.timeout)
    1017          else:
    1018              return None
    1019  
    1020      def http_request(self, req):
    1021          if (not hasattr(self.passwd, 'is_authenticated') or
    1022             not self.passwd.is_authenticated(req.full_url)):
    1023              return req
    1024  
    1025          if not req.has_header('Authorization'):
    1026              user, passwd = self.passwd.find_user_password(None, req.full_url)
    1027              credentials = '{0}:{1}'.format(user, passwd).encode()
    1028              auth_str = base64.standard_b64encode(credentials).decode()
    1029              req.add_unredirected_header('Authorization',
    1030                                          'Basic {}'.format(auth_str.strip()))
    1031          return req
    1032  
    1033      def http_response(self, req, response):
    1034          if hasattr(self.passwd, 'is_authenticated'):
    1035              if 200 <= response.code < 300:
    1036                  self.passwd.update_authenticated(req.full_url, True)
    1037              else:
    1038                  self.passwd.update_authenticated(req.full_url, False)
    1039          return response
    1040  
    1041      https_request = http_request
    1042      https_response = http_response
    1043  
    1044  
    1045  
    1046  class ESC[4;38;5;81mHTTPBasicAuthHandler(ESC[4;38;5;149mAbstractBasicAuthHandler, ESC[4;38;5;149mBaseHandler):
    1047  
    1048      auth_header = 'Authorization'
    1049  
    1050      def http_error_401(self, req, fp, code, msg, headers):
    1051          url = req.full_url
    1052          response = self.http_error_auth_reqed('www-authenticate',
    1053                                            url, req, headers)
    1054          return response
    1055  
    1056  
    1057  class ESC[4;38;5;81mProxyBasicAuthHandler(ESC[4;38;5;149mAbstractBasicAuthHandler, ESC[4;38;5;149mBaseHandler):
    1058  
    1059      auth_header = 'Proxy-authorization'
    1060  
    1061      def http_error_407(self, req, fp, code, msg, headers):
    1062          # http_error_auth_reqed requires that there is no userinfo component in
    1063          # authority.  Assume there isn't one, since urllib.request does not (and
    1064          # should not, RFC 3986 s. 3.2.1) support requests for URLs containing
    1065          # userinfo.
    1066          authority = req.host
    1067          response = self.http_error_auth_reqed('proxy-authenticate',
    1068                                            authority, req, headers)
    1069          return response
    1070  
    1071  
    1072  # Return n random bytes.
    1073  _randombytes = os.urandom
    1074  
    1075  
    1076  class ESC[4;38;5;81mAbstractDigestAuthHandler:
    1077      # Digest authentication is specified in RFC 2617.
    1078  
    1079      # XXX The client does not inspect the Authentication-Info header
    1080      # in a successful response.
    1081  
    1082      # XXX It should be possible to test this implementation against
    1083      # a mock server that just generates a static set of challenges.
    1084  
    1085      # XXX qop="auth-int" supports is shaky
    1086  
    1087      def __init__(self, passwd=None):
    1088          if passwd is None:
    1089              passwd = HTTPPasswordMgr()
    1090          self.passwd = passwd
    1091          self.add_password = self.passwd.add_password
    1092          self.retried = 0
    1093          self.nonce_count = 0
    1094          self.last_nonce = None
    1095  
    1096      def reset_retry_count(self):
    1097          self.retried = 0
    1098  
    1099      def http_error_auth_reqed(self, auth_header, host, req, headers):
    1100          authreq = headers.get(auth_header, None)
    1101          if self.retried > 5:
    1102              # Don't fail endlessly - if we failed once, we'll probably
    1103              # fail a second time. Hm. Unless the Password Manager is
    1104              # prompting for the information. Crap. This isn't great
    1105              # but it's better than the current 'repeat until recursion
    1106              # depth exceeded' approach <wink>
    1107              raise HTTPError(req.full_url, 401, "digest auth failed",
    1108                              headers, None)
    1109          else:
    1110              self.retried += 1
    1111          if authreq:
    1112              scheme = authreq.split()[0]
    1113              if scheme.lower() == 'digest':
    1114                  return self.retry_http_digest_auth(req, authreq)
    1115              elif scheme.lower() != 'basic':
    1116                  raise ValueError("AbstractDigestAuthHandler does not support"
    1117                                   " the following scheme: '%s'" % scheme)
    1118  
    1119      def retry_http_digest_auth(self, req, auth):
    1120          token, challenge = auth.split(' ', 1)
    1121          chal = parse_keqv_list(filter(None, parse_http_list(challenge)))
    1122          auth = self.get_authorization(req, chal)
    1123          if auth:
    1124              auth_val = 'Digest %s' % auth
    1125              if req.headers.get(self.auth_header, None) == auth_val:
    1126                  return None
    1127              req.add_unredirected_header(self.auth_header, auth_val)
    1128              resp = self.parent.open(req, timeout=req.timeout)
    1129              return resp
    1130  
    1131      def get_cnonce(self, nonce):
    1132          # The cnonce-value is an opaque
    1133          # quoted string value provided by the client and used by both client
    1134          # and server to avoid chosen plaintext attacks, to provide mutual
    1135          # authentication, and to provide some message integrity protection.
    1136          # This isn't a fabulous effort, but it's probably Good Enough.
    1137          s = "%s:%s:%s:" % (self.nonce_count, nonce, time.ctime())
    1138          b = s.encode("ascii") + _randombytes(8)
    1139          dig = hashlib.sha1(b).hexdigest()
    1140          return dig[:16]
    1141  
    1142      def get_authorization(self, req, chal):
    1143          try:
    1144              realm = chal['realm']
    1145              nonce = chal['nonce']
    1146              qop = chal.get('qop')
    1147              algorithm = chal.get('algorithm', 'MD5')
    1148              # mod_digest doesn't send an opaque, even though it isn't
    1149              # supposed to be optional
    1150              opaque = chal.get('opaque', None)
    1151          except KeyError:
    1152              return None
    1153  
    1154          H, KD = self.get_algorithm_impls(algorithm)
    1155          if H is None:
    1156              return None
    1157  
    1158          user, pw = self.passwd.find_user_password(realm, req.full_url)
    1159          if user is None:
    1160              return None
    1161  
    1162          # XXX not implemented yet
    1163          if req.data is not None:
    1164              entdig = self.get_entity_digest(req.data, chal)
    1165          else:
    1166              entdig = None
    1167  
    1168          A1 = "%s:%s:%s" % (user, realm, pw)
    1169          A2 = "%s:%s" % (req.get_method(),
    1170                          # XXX selector: what about proxies and full urls
    1171                          req.selector)
    1172          # NOTE: As per  RFC 2617, when server sends "auth,auth-int", the client could use either `auth`
    1173          #     or `auth-int` to the response back. we use `auth` to send the response back.
    1174          if qop is None:
    1175              respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
    1176          elif 'auth' in qop.split(','):
    1177              if nonce == self.last_nonce:
    1178                  self.nonce_count += 1
    1179              else:
    1180                  self.nonce_count = 1
    1181                  self.last_nonce = nonce
    1182              ncvalue = '%08x' % self.nonce_count
    1183              cnonce = self.get_cnonce(nonce)
    1184              noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, 'auth', H(A2))
    1185              respdig = KD(H(A1), noncebit)
    1186          else:
    1187              # XXX handle auth-int.
    1188              raise URLError("qop '%s' is not supported." % qop)
    1189  
    1190          # XXX should the partial digests be encoded too?
    1191  
    1192          base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
    1193                 'response="%s"' % (user, realm, nonce, req.selector,
    1194                                    respdig)
    1195          if opaque:
    1196              base += ', opaque="%s"' % opaque
    1197          if entdig:
    1198              base += ', digest="%s"' % entdig
    1199          base += ', algorithm="%s"' % algorithm
    1200          if qop:
    1201              base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
    1202          return base
    1203  
    1204      def get_algorithm_impls(self, algorithm):
    1205          # lambdas assume digest modules are imported at the top level
    1206          if algorithm == 'MD5':
    1207              H = lambda x: hashlib.md5(x.encode("ascii")).hexdigest()
    1208          elif algorithm == 'SHA':
    1209              H = lambda x: hashlib.sha1(x.encode("ascii")).hexdigest()
    1210          # XXX MD5-sess
    1211          else:
    1212              raise ValueError("Unsupported digest authentication "
    1213                               "algorithm %r" % algorithm)
    1214          KD = lambda s, d: H("%s:%s" % (s, d))
    1215          return H, KD
    1216  
    1217      def get_entity_digest(self, data, chal):
    1218          # XXX not implemented yet
    1219          return None
    1220  
    1221  
    1222  class ESC[4;38;5;81mHTTPDigestAuthHandler(ESC[4;38;5;149mBaseHandler, ESC[4;38;5;149mAbstractDigestAuthHandler):
    1223      """An authentication protocol defined by RFC 2069
    1224  
    1225      Digest authentication improves on basic authentication because it
    1226      does not transmit passwords in the clear.
    1227      """
    1228  
    1229      auth_header = 'Authorization'
    1230      handler_order = 490  # before Basic auth
    1231  
    1232      def http_error_401(self, req, fp, code, msg, headers):
    1233          host = urlparse(req.full_url)[1]
    1234          retry = self.http_error_auth_reqed('www-authenticate',
    1235                                             host, req, headers)
    1236          self.reset_retry_count()
    1237          return retry
    1238  
    1239  
    1240  class ESC[4;38;5;81mProxyDigestAuthHandler(ESC[4;38;5;149mBaseHandler, ESC[4;38;5;149mAbstractDigestAuthHandler):
    1241  
    1242      auth_header = 'Proxy-Authorization'
    1243      handler_order = 490  # before Basic auth
    1244  
    1245      def http_error_407(self, req, fp, code, msg, headers):
    1246          host = req.host
    1247          retry = self.http_error_auth_reqed('proxy-authenticate',
    1248                                             host, req, headers)
    1249          self.reset_retry_count()
    1250          return retry
    1251  
    1252  class ESC[4;38;5;81mAbstractHTTPHandler(ESC[4;38;5;149mBaseHandler):
    1253  
    1254      def __init__(self, debuglevel=None):
    1255          self._debuglevel = debuglevel if debuglevel is not None else http.client.HTTPConnection.debuglevel
    1256  
    1257      def set_http_debuglevel(self, level):
    1258          self._debuglevel = level
    1259  
    1260      def _get_content_length(self, request):
    1261          return http.client.HTTPConnection._get_content_length(
    1262              request.data,
    1263              request.get_method())
    1264  
    1265      def do_request_(self, request):
    1266          host = request.host
    1267          if not host:
    1268              raise URLError('no host given')
    1269  
    1270          if request.data is not None:  # POST
    1271              data = request.data
    1272              if isinstance(data, str):
    1273                  msg = "POST data should be bytes, an iterable of bytes, " \
    1274                        "or a file object. It cannot be of type str."
    1275                  raise TypeError(msg)
    1276              if not request.has_header('Content-type'):
    1277                  request.add_unredirected_header(
    1278                      'Content-type',
    1279                      'application/x-www-form-urlencoded')
    1280              if (not request.has_header('Content-length')
    1281                      and not request.has_header('Transfer-encoding')):
    1282                  content_length = self._get_content_length(request)
    1283                  if content_length is not None:
    1284                      request.add_unredirected_header(
    1285                              'Content-length', str(content_length))
    1286                  else:
    1287                      request.add_unredirected_header(
    1288                              'Transfer-encoding', 'chunked')
    1289  
    1290          sel_host = host
    1291          if request.has_proxy():
    1292              scheme, sel = _splittype(request.selector)
    1293              sel_host, sel_path = _splithost(sel)
    1294          if not request.has_header('Host'):
    1295              request.add_unredirected_header('Host', sel_host)
    1296          for name, value in self.parent.addheaders:
    1297              name = name.capitalize()
    1298              if not request.has_header(name):
    1299                  request.add_unredirected_header(name, value)
    1300  
    1301          return request
    1302  
    1303      def do_open(self, http_class, req, **http_conn_args):
    1304          """Return an HTTPResponse object for the request, using http_class.
    1305  
    1306          http_class must implement the HTTPConnection API from http.client.
    1307          """
    1308          host = req.host
    1309          if not host:
    1310              raise URLError('no host given')
    1311  
    1312          # will parse host:port
    1313          h = http_class(host, timeout=req.timeout, **http_conn_args)
    1314          h.set_debuglevel(self._debuglevel)
    1315  
    1316          headers = dict(req.unredirected_hdrs)
    1317          headers.update({k: v for k, v in req.headers.items()
    1318                          if k not in headers})
    1319  
    1320          # TODO(jhylton): Should this be redesigned to handle
    1321          # persistent connections?
    1322  
    1323          # We want to make an HTTP/1.1 request, but the addinfourl
    1324          # class isn't prepared to deal with a persistent connection.
    1325          # It will try to read all remaining data from the socket,
    1326          # which will block while the server waits for the next request.
    1327          # So make sure the connection gets closed after the (only)
    1328          # request.
    1329          headers["Connection"] = "close"
    1330          headers = {name.title(): val for name, val in headers.items()}
    1331  
    1332          if req._tunnel_host:
    1333              tunnel_headers = {}
    1334              proxy_auth_hdr = "Proxy-Authorization"
    1335              if proxy_auth_hdr in headers:
    1336                  tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
    1337                  # Proxy-Authorization should not be sent to origin
    1338                  # server.
    1339                  del headers[proxy_auth_hdr]
    1340              h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
    1341  
    1342          try:
    1343              try:
    1344                  h.request(req.get_method(), req.selector, req.data, headers,
    1345                            encode_chunked=req.has_header('Transfer-encoding'))
    1346              except OSError as err: # timeout error
    1347                  raise URLError(err)
    1348              r = h.getresponse()
    1349          except:
    1350              h.close()
    1351              raise
    1352  
    1353          # If the server does not send us a 'Connection: close' header,
    1354          # HTTPConnection assumes the socket should be left open. Manually
    1355          # mark the socket to be closed when this response object goes away.
    1356          if h.sock:
    1357              h.sock.close()
    1358              h.sock = None
    1359  
    1360          r.url = req.get_full_url()
    1361          # This line replaces the .msg attribute of the HTTPResponse
    1362          # with .headers, because urllib clients expect the response to
    1363          # have the reason in .msg.  It would be good to mark this
    1364          # attribute is deprecated and get then to use info() or
    1365          # .headers.
    1366          r.msg = r.reason
    1367          return r
    1368  
    1369  
    1370  class ESC[4;38;5;81mHTTPHandler(ESC[4;38;5;149mAbstractHTTPHandler):
    1371  
    1372      def http_open(self, req):
    1373          return self.do_open(http.client.HTTPConnection, req)
    1374  
    1375      http_request = AbstractHTTPHandler.do_request_
    1376  
    1377  if hasattr(http.client, 'HTTPSConnection'):
    1378  
    1379      class ESC[4;38;5;81mHTTPSHandler(ESC[4;38;5;149mAbstractHTTPHandler):
    1380  
    1381          def __init__(self, debuglevel=None, context=None, check_hostname=None):
    1382              debuglevel = debuglevel if debuglevel is not None else http.client.HTTPSConnection.debuglevel
    1383              AbstractHTTPHandler.__init__(self, debuglevel)
    1384              if context is None:
    1385                  http_version = http.client.HTTPSConnection._http_vsn
    1386                  context = http.client._create_https_context(http_version)
    1387              if check_hostname is not None:
    1388                  context.check_hostname = check_hostname
    1389              self._context = context
    1390  
    1391          def https_open(self, req):
    1392              return self.do_open(http.client.HTTPSConnection, req,
    1393                                  context=self._context)
    1394  
    1395          https_request = AbstractHTTPHandler.do_request_
    1396  
    1397      __all__.append('HTTPSHandler')
    1398  
    1399  class ESC[4;38;5;81mHTTPCookieProcessor(ESC[4;38;5;149mBaseHandler):
    1400      def __init__(self, cookiejar=None):
    1401          import http.cookiejar
    1402          if cookiejar is None:
    1403              cookiejar = http.cookiejar.CookieJar()
    1404          self.cookiejar = cookiejar
    1405  
    1406      def http_request(self, request):
    1407          self.cookiejar.add_cookie_header(request)
    1408          return request
    1409  
    1410      def http_response(self, request, response):
    1411          self.cookiejar.extract_cookies(response, request)
    1412          return response
    1413  
    1414      https_request = http_request
    1415      https_response = http_response
    1416  
    1417  class ESC[4;38;5;81mUnknownHandler(ESC[4;38;5;149mBaseHandler):
    1418      def unknown_open(self, req):
    1419          type = req.type
    1420          raise URLError('unknown url type: %s' % type)
    1421  
    1422  def parse_keqv_list(l):
    1423      """Parse list of key=value strings where keys are not duplicated."""
    1424      parsed = {}
    1425      for elt in l:
    1426          k, v = elt.split('=', 1)
    1427          if v[0] == '"' and v[-1] == '"':
    1428              v = v[1:-1]
    1429          parsed[k] = v
    1430      return parsed
    1431  
    1432  def parse_http_list(s):
    1433      """Parse lists as described by RFC 2068 Section 2.
    1434  
    1435      In particular, parse comma-separated lists where the elements of
    1436      the list may include quoted-strings.  A quoted-string could
    1437      contain a comma.  A non-quoted string could have quotes in the
    1438      middle.  Neither commas nor quotes count if they are escaped.
    1439      Only double-quotes count, not single-quotes.
    1440      """
    1441      res = []
    1442      part = ''
    1443  
    1444      escape = quote = False
    1445      for cur in s:
    1446          if escape:
    1447              part += cur
    1448              escape = False
    1449              continue
    1450          if quote:
    1451              if cur == '\\':
    1452                  escape = True
    1453                  continue
    1454              elif cur == '"':
    1455                  quote = False
    1456              part += cur
    1457              continue
    1458  
    1459          if cur == ',':
    1460              res.append(part)
    1461              part = ''
    1462              continue
    1463  
    1464          if cur == '"':
    1465              quote = True
    1466  
    1467          part += cur
    1468  
    1469      # append last part
    1470      if part:
    1471          res.append(part)
    1472  
    1473      return [part.strip() for part in res]
    1474  
    1475  class ESC[4;38;5;81mFileHandler(ESC[4;38;5;149mBaseHandler):
    1476      # Use local file or FTP depending on form of URL
    1477      def file_open(self, req):
    1478          url = req.selector
    1479          if url[:2] == '//' and url[2:3] != '/' and (req.host and
    1480                  req.host != 'localhost'):
    1481              if not req.host in self.get_names():
    1482                  raise URLError("file:// scheme is supported only on localhost")
    1483          else:
    1484              return self.open_local_file(req)
    1485  
    1486      # names for the localhost
    1487      names = None
    1488      def get_names(self):
    1489          if FileHandler.names is None:
    1490              try:
    1491                  FileHandler.names = tuple(
    1492                      socket.gethostbyname_ex('localhost')[2] +
    1493                      socket.gethostbyname_ex(socket.gethostname())[2])
    1494              except socket.gaierror:
    1495                  FileHandler.names = (socket.gethostbyname('localhost'),)
    1496          return FileHandler.names
    1497  
    1498      # not entirely sure what the rules are here
    1499      def open_local_file(self, req):
    1500          import email.utils
    1501          import mimetypes
    1502          host = req.host
    1503          filename = req.selector
    1504          localfile = url2pathname(filename)
    1505          try:
    1506              stats = os.stat(localfile)
    1507              size = stats.st_size
    1508              modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
    1509              mtype = mimetypes.guess_type(filename)[0]
    1510              headers = email.message_from_string(
    1511                  'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
    1512                  (mtype or 'text/plain', size, modified))
    1513              if host:
    1514                  host, port = _splitport(host)
    1515              if not host or \
    1516                  (not port and _safe_gethostbyname(host) in self.get_names()):
    1517                  if host:
    1518                      origurl = 'file://' + host + filename
    1519                  else:
    1520                      origurl = 'file://' + filename
    1521                  return addinfourl(open(localfile, 'rb'), headers, origurl)
    1522          except OSError as exp:
    1523              raise URLError(exp)
    1524          raise URLError('file not on local host')
    1525  
    1526  def _safe_gethostbyname(host):
    1527      try:
    1528          return socket.gethostbyname(host)
    1529      except socket.gaierror:
    1530          return None
    1531  
    1532  class ESC[4;38;5;81mFTPHandler(ESC[4;38;5;149mBaseHandler):
    1533      def ftp_open(self, req):
    1534          import ftplib
    1535          import mimetypes
    1536          host = req.host
    1537          if not host:
    1538              raise URLError('ftp error: no host given')
    1539          host, port = _splitport(host)
    1540          if port is None:
    1541              port = ftplib.FTP_PORT
    1542          else:
    1543              port = int(port)
    1544  
    1545          # username/password handling
    1546          user, host = _splituser(host)
    1547          if user:
    1548              user, passwd = _splitpasswd(user)
    1549          else:
    1550              passwd = None
    1551          host = unquote(host)
    1552          user = user or ''
    1553          passwd = passwd or ''
    1554  
    1555          try:
    1556              host = socket.gethostbyname(host)
    1557          except OSError as msg:
    1558              raise URLError(msg)
    1559          path, attrs = _splitattr(req.selector)
    1560          dirs = path.split('/')
    1561          dirs = list(map(unquote, dirs))
    1562          dirs, file = dirs[:-1], dirs[-1]
    1563          if dirs and not dirs[0]:
    1564              dirs = dirs[1:]
    1565          try:
    1566              fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout)
    1567              type = file and 'I' or 'D'
    1568              for attr in attrs:
    1569                  attr, value = _splitvalue(attr)
    1570                  if attr.lower() == 'type' and \
    1571                     value in ('a', 'A', 'i', 'I', 'd', 'D'):
    1572                      type = value.upper()
    1573              fp, retrlen = fw.retrfile(file, type)
    1574              headers = ""
    1575              mtype = mimetypes.guess_type(req.full_url)[0]
    1576              if mtype:
    1577                  headers += "Content-type: %s\n" % mtype
    1578              if retrlen is not None and retrlen >= 0:
    1579                  headers += "Content-length: %d\n" % retrlen
    1580              headers = email.message_from_string(headers)
    1581              return addinfourl(fp, headers, req.full_url)
    1582          except ftplib.all_errors as exp:
    1583              raise URLError(exp) from exp
    1584  
    1585      def connect_ftp(self, user, passwd, host, port, dirs, timeout):
    1586          return ftpwrapper(user, passwd, host, port, dirs, timeout,
    1587                            persistent=False)
    1588  
    1589  class ESC[4;38;5;81mCacheFTPHandler(ESC[4;38;5;149mFTPHandler):
    1590      # XXX would be nice to have pluggable cache strategies
    1591      # XXX this stuff is definitely not thread safe
    1592      def __init__(self):
    1593          self.cache = {}
    1594          self.timeout = {}
    1595          self.soonest = 0
    1596          self.delay = 60
    1597          self.max_conns = 16
    1598  
    1599      def setTimeout(self, t):
    1600          self.delay = t
    1601  
    1602      def setMaxConns(self, m):
    1603          self.max_conns = m
    1604  
    1605      def connect_ftp(self, user, passwd, host, port, dirs, timeout):
    1606          key = user, host, port, '/'.join(dirs), timeout
    1607          if key in self.cache:
    1608              self.timeout[key] = time.time() + self.delay
    1609          else:
    1610              self.cache[key] = ftpwrapper(user, passwd, host, port,
    1611                                           dirs, timeout)
    1612              self.timeout[key] = time.time() + self.delay
    1613          self.check_cache()
    1614          return self.cache[key]
    1615  
    1616      def check_cache(self):
    1617          # first check for old ones
    1618          t = time.time()
    1619          if self.soonest <= t:
    1620              for k, v in list(self.timeout.items()):
    1621                  if v < t:
    1622                      self.cache[k].close()
    1623                      del self.cache[k]
    1624                      del self.timeout[k]
    1625          self.soonest = min(list(self.timeout.values()))
    1626  
    1627          # then check the size
    1628          if len(self.cache) == self.max_conns:
    1629              for k, v in list(self.timeout.items()):
    1630                  if v == self.soonest:
    1631                      del self.cache[k]
    1632                      del self.timeout[k]
    1633                      break
    1634              self.soonest = min(list(self.timeout.values()))
    1635  
    1636      def clear_cache(self):
    1637          for conn in self.cache.values():
    1638              conn.close()
    1639          self.cache.clear()
    1640          self.timeout.clear()
    1641  
    1642  class ESC[4;38;5;81mDataHandler(ESC[4;38;5;149mBaseHandler):
    1643      def data_open(self, req):
    1644          # data URLs as specified in RFC 2397.
    1645          #
    1646          # ignores POSTed data
    1647          #
    1648          # syntax:
    1649          # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
    1650          # mediatype := [ type "/" subtype ] *( ";" parameter )
    1651          # data      := *urlchar
    1652          # parameter := attribute "=" value
    1653          url = req.full_url
    1654  
    1655          scheme, data = url.split(":",1)
    1656          mediatype, data = data.split(",",1)
    1657  
    1658          # even base64 encoded data URLs might be quoted so unquote in any case:
    1659          data = unquote_to_bytes(data)
    1660          if mediatype.endswith(";base64"):
    1661              data = base64.decodebytes(data)
    1662              mediatype = mediatype[:-7]
    1663  
    1664          if not mediatype:
    1665              mediatype = "text/plain;charset=US-ASCII"
    1666  
    1667          headers = email.message_from_string("Content-type: %s\nContent-length: %d\n" %
    1668              (mediatype, len(data)))
    1669  
    1670          return addinfourl(io.BytesIO(data), headers, url)
    1671  
    1672  
    1673  # Code move from the old urllib module
    1674  
    1675  MAXFTPCACHE = 10        # Trim the ftp cache beyond this size
    1676  
    1677  # Helper for non-unix systems
    1678  if os.name == 'nt':
    1679      from nturl2path import url2pathname, pathname2url
    1680  else:
    1681      def url2pathname(pathname):
    1682          """OS-specific conversion from a relative URL of the 'file' scheme
    1683          to a file system path; not recommended for general use."""
    1684          return unquote(pathname)
    1685  
    1686      def pathname2url(pathname):
    1687          """OS-specific conversion from a file system path to a relative URL
    1688          of the 'file' scheme; not recommended for general use."""
    1689          return quote(pathname)
    1690  
    1691  
    1692  ftpcache = {}
    1693  
    1694  
    1695  class ESC[4;38;5;81mURLopener:
    1696      """Class to open URLs.
    1697      This is a class rather than just a subroutine because we may need
    1698      more than one set of global protocol-specific options.
    1699      Note -- this is a base class for those who don't want the
    1700      automatic handling of errors type 302 (relocated) and 401
    1701      (authorization needed)."""
    1702  
    1703      __tempfiles = None
    1704  
    1705      version = "Python-urllib/%s" % __version__
    1706  
    1707      # Constructor
    1708      def __init__(self, proxies=None, **x509):
    1709          msg = "%(class)s style of invoking requests is deprecated. " \
    1710                "Use newer urlopen functions/methods" % {'class': self.__class__.__name__}
    1711          warnings.warn(msg, DeprecationWarning, stacklevel=3)
    1712          if proxies is None:
    1713              proxies = getproxies()
    1714          assert hasattr(proxies, 'keys'), "proxies must be a mapping"
    1715          self.proxies = proxies
    1716          self.key_file = x509.get('key_file')
    1717          self.cert_file = x509.get('cert_file')
    1718          self.addheaders = [('User-Agent', self.version), ('Accept', '*/*')]
    1719          self.__tempfiles = []
    1720          self.__unlink = os.unlink # See cleanup()
    1721          self.tempcache = None
    1722          # Undocumented feature: if you assign {} to tempcache,
    1723          # it is used to cache files retrieved with
    1724          # self.retrieve().  This is not enabled by default
    1725          # since it does not work for changing documents (and I
    1726          # haven't got the logic to check expiration headers
    1727          # yet).
    1728          self.ftpcache = ftpcache
    1729          # Undocumented feature: you can use a different
    1730          # ftp cache by assigning to the .ftpcache member;
    1731          # in case you want logically independent URL openers
    1732          # XXX This is not threadsafe.  Bah.
    1733  
    1734      def __del__(self):
    1735          self.close()
    1736  
    1737      def close(self):
    1738          self.cleanup()
    1739  
    1740      def cleanup(self):
    1741          # This code sometimes runs when the rest of this module
    1742          # has already been deleted, so it can't use any globals
    1743          # or import anything.
    1744          if self.__tempfiles:
    1745              for file in self.__tempfiles:
    1746                  try:
    1747                      self.__unlink(file)
    1748                  except OSError:
    1749                      pass
    1750              del self.__tempfiles[:]
    1751          if self.tempcache:
    1752              self.tempcache.clear()
    1753  
    1754      def addheader(self, *args):
    1755          """Add a header to be used by the HTTP interface only
    1756          e.g. u.addheader('Accept', 'sound/basic')"""
    1757          self.addheaders.append(args)
    1758  
    1759      # External interface
    1760      def open(self, fullurl, data=None):
    1761          """Use URLopener().open(file) instead of open(file, 'r')."""
    1762          fullurl = unwrap(_to_bytes(fullurl))
    1763          fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
    1764          if self.tempcache and fullurl in self.tempcache:
    1765              filename, headers = self.tempcache[fullurl]
    1766              fp = open(filename, 'rb')
    1767              return addinfourl(fp, headers, fullurl)
    1768          urltype, url = _splittype(fullurl)
    1769          if not urltype:
    1770              urltype = 'file'
    1771          if urltype in self.proxies:
    1772              proxy = self.proxies[urltype]
    1773              urltype, proxyhost = _splittype(proxy)
    1774              host, selector = _splithost(proxyhost)
    1775              url = (host, fullurl) # Signal special case to open_*()
    1776          else:
    1777              proxy = None
    1778          name = 'open_' + urltype
    1779          self.type = urltype
    1780          name = name.replace('-', '_')
    1781          if not hasattr(self, name) or name == 'open_local_file':
    1782              if proxy:
    1783                  return self.open_unknown_proxy(proxy, fullurl, data)
    1784              else:
    1785                  return self.open_unknown(fullurl, data)
    1786          try:
    1787              if data is None:
    1788                  return getattr(self, name)(url)
    1789              else:
    1790                  return getattr(self, name)(url, data)
    1791          except (HTTPError, URLError):
    1792              raise
    1793          except OSError as msg:
    1794              raise OSError('socket error', msg) from msg
    1795  
    1796      def open_unknown(self, fullurl, data=None):
    1797          """Overridable interface to open unknown URL type."""
    1798          type, url = _splittype(fullurl)
    1799          raise OSError('url error', 'unknown url type', type)
    1800  
    1801      def open_unknown_proxy(self, proxy, fullurl, data=None):
    1802          """Overridable interface to open unknown URL type."""
    1803          type, url = _splittype(fullurl)
    1804          raise OSError('url error', 'invalid proxy for %s' % type, proxy)
    1805  
    1806      # External interface
    1807      def retrieve(self, url, filename=None, reporthook=None, data=None):
    1808          """retrieve(url) returns (filename, headers) for a local object
    1809          or (tempfilename, headers) for a remote object."""
    1810          url = unwrap(_to_bytes(url))
    1811          if self.tempcache and url in self.tempcache:
    1812              return self.tempcache[url]
    1813          type, url1 = _splittype(url)
    1814          if filename is None and (not type or type == 'file'):
    1815              try:
    1816                  fp = self.open_local_file(url1)
    1817                  hdrs = fp.info()
    1818                  fp.close()
    1819                  return url2pathname(_splithost(url1)[1]), hdrs
    1820              except OSError:
    1821                  pass
    1822          fp = self.open(url, data)
    1823          try:
    1824              headers = fp.info()
    1825              if filename:
    1826                  tfp = open(filename, 'wb')
    1827              else:
    1828                  garbage, path = _splittype(url)
    1829                  garbage, path = _splithost(path or "")
    1830                  path, garbage = _splitquery(path or "")
    1831                  path, garbage = _splitattr(path or "")
    1832                  suffix = os.path.splitext(path)[1]
    1833                  (fd, filename) = tempfile.mkstemp(suffix)
    1834                  self.__tempfiles.append(filename)
    1835                  tfp = os.fdopen(fd, 'wb')
    1836              try:
    1837                  result = filename, headers
    1838                  if self.tempcache is not None:
    1839                      self.tempcache[url] = result
    1840                  bs = 1024*8
    1841                  size = -1
    1842                  read = 0
    1843                  blocknum = 0
    1844                  if "content-length" in headers:
    1845                      size = int(headers["Content-Length"])
    1846                  if reporthook:
    1847                      reporthook(blocknum, bs, size)
    1848                  while block := fp.read(bs):
    1849                      read += len(block)
    1850                      tfp.write(block)
    1851                      blocknum += 1
    1852                      if reporthook:
    1853                          reporthook(blocknum, bs, size)
    1854              finally:
    1855                  tfp.close()
    1856          finally:
    1857              fp.close()
    1858  
    1859          # raise exception if actual size does not match content-length header
    1860          if size >= 0 and read < size:
    1861              raise ContentTooShortError(
    1862                  "retrieval incomplete: got only %i out of %i bytes"
    1863                  % (read, size), result)
    1864  
    1865          return result
    1866  
    1867      # Each method named open_<type> knows how to open that type of URL
    1868  
    1869      def _open_generic_http(self, connection_factory, url, data):
    1870          """Make an HTTP connection using connection_class.
    1871  
    1872          This is an internal method that should be called from
    1873          open_http() or open_https().
    1874  
    1875          Arguments:
    1876          - connection_factory should take a host name and return an
    1877            HTTPConnection instance.
    1878          - url is the url to retrieval or a host, relative-path pair.
    1879          - data is payload for a POST request or None.
    1880          """
    1881  
    1882          user_passwd = None
    1883          proxy_passwd= None
    1884          if isinstance(url, str):
    1885              host, selector = _splithost(url)
    1886              if host:
    1887                  user_passwd, host = _splituser(host)
    1888                  host = unquote(host)
    1889              realhost = host
    1890          else:
    1891              host, selector = url
    1892              # check whether the proxy contains authorization information
    1893              proxy_passwd, host = _splituser(host)
    1894              # now we proceed with the url we want to obtain
    1895              urltype, rest = _splittype(selector)
    1896              url = rest
    1897              user_passwd = None
    1898              if urltype.lower() != 'http':
    1899                  realhost = None
    1900              else:
    1901                  realhost, rest = _splithost(rest)
    1902                  if realhost:
    1903                      user_passwd, realhost = _splituser(realhost)
    1904                  if user_passwd:
    1905                      selector = "%s://%s%s" % (urltype, realhost, rest)
    1906                  if proxy_bypass(realhost):
    1907                      host = realhost
    1908  
    1909          if not host: raise OSError('http error', 'no host given')
    1910  
    1911          if proxy_passwd:
    1912              proxy_passwd = unquote(proxy_passwd)
    1913              proxy_auth = base64.b64encode(proxy_passwd.encode()).decode('ascii')
    1914          else:
    1915              proxy_auth = None
    1916  
    1917          if user_passwd:
    1918              user_passwd = unquote(user_passwd)
    1919              auth = base64.b64encode(user_passwd.encode()).decode('ascii')
    1920          else:
    1921              auth = None
    1922          http_conn = connection_factory(host)
    1923          headers = {}
    1924          if proxy_auth:
    1925              headers["Proxy-Authorization"] = "Basic %s" % proxy_auth
    1926          if auth:
    1927              headers["Authorization"] =  "Basic %s" % auth
    1928          if realhost:
    1929              headers["Host"] = realhost
    1930  
    1931          # Add Connection:close as we don't support persistent connections yet.
    1932          # This helps in closing the socket and avoiding ResourceWarning
    1933  
    1934          headers["Connection"] = "close"
    1935  
    1936          for header, value in self.addheaders:
    1937              headers[header] = value
    1938  
    1939          if data is not None:
    1940              headers["Content-Type"] = "application/x-www-form-urlencoded"
    1941              http_conn.request("POST", selector, data, headers)
    1942          else:
    1943              http_conn.request("GET", selector, headers=headers)
    1944  
    1945          try:
    1946              response = http_conn.getresponse()
    1947          except http.client.BadStatusLine:
    1948              # something went wrong with the HTTP status line
    1949              raise URLError("http protocol error: bad status line")
    1950  
    1951          # According to RFC 2616, "2xx" code indicates that the client's
    1952          # request was successfully received, understood, and accepted.
    1953          if 200 <= response.status < 300:
    1954              return addinfourl(response, response.msg, "http:" + url,
    1955                                response.status)
    1956          else:
    1957              return self.http_error(
    1958                  url, response.fp,
    1959                  response.status, response.reason, response.msg, data)
    1960  
    1961      def open_http(self, url, data=None):
    1962          """Use HTTP protocol."""
    1963          return self._open_generic_http(http.client.HTTPConnection, url, data)
    1964  
    1965      def http_error(self, url, fp, errcode, errmsg, headers, data=None):
    1966          """Handle http errors.
    1967  
    1968          Derived class can override this, or provide specific handlers
    1969          named http_error_DDD where DDD is the 3-digit error code."""
    1970          # First check if there's a specific handler for this error
    1971          name = 'http_error_%d' % errcode
    1972          if hasattr(self, name):
    1973              method = getattr(self, name)
    1974              if data is None:
    1975                  result = method(url, fp, errcode, errmsg, headers)
    1976              else:
    1977                  result = method(url, fp, errcode, errmsg, headers, data)
    1978              if result: return result
    1979          return self.http_error_default(url, fp, errcode, errmsg, headers)
    1980  
    1981      def http_error_default(self, url, fp, errcode, errmsg, headers):
    1982          """Default error handler: close the connection and raise OSError."""
    1983          fp.close()
    1984          raise HTTPError(url, errcode, errmsg, headers, None)
    1985  
    1986      if _have_ssl:
    1987          def _https_connection(self, host):
    1988              if self.key_file or self.cert_file:
    1989                  http_version = http.client.HTTPSConnection._http_vsn
    1990                  context = http.client._create_https_context(http_version)
    1991                  context.load_cert_chain(self.cert_file, self.key_file)
    1992                  # cert and key file means the user wants to authenticate.
    1993                  # enable TLS 1.3 PHA implicitly even for custom contexts.
    1994                  if context.post_handshake_auth is not None:
    1995                      context.post_handshake_auth = True
    1996              else:
    1997                  context = None
    1998              return http.client.HTTPSConnection(host, context=context)
    1999  
    2000          def open_https(self, url, data=None):
    2001              """Use HTTPS protocol."""
    2002              return self._open_generic_http(self._https_connection, url, data)
    2003  
    2004      def open_file(self, url):
    2005          """Use local file or FTP depending on form of URL."""
    2006          if not isinstance(url, str):
    2007              raise URLError('file error: proxy support for file protocol currently not implemented')
    2008          if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
    2009              raise ValueError("file:// scheme is supported only on localhost")
    2010          else:
    2011              return self.open_local_file(url)
    2012  
    2013      def open_local_file(self, url):
    2014          """Use local file."""
    2015          import email.utils
    2016          import mimetypes
    2017          host, file = _splithost(url)
    2018          localname = url2pathname(file)
    2019          try:
    2020              stats = os.stat(localname)
    2021          except OSError as e:
    2022              raise URLError(e.strerror, e.filename)
    2023          size = stats.st_size
    2024          modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
    2025          mtype = mimetypes.guess_type(url)[0]
    2026          headers = email.message_from_string(
    2027              'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
    2028              (mtype or 'text/plain', size, modified))
    2029          if not host:
    2030              urlfile = file
    2031              if file[:1] == '/':
    2032                  urlfile = 'file://' + file
    2033              return addinfourl(open(localname, 'rb'), headers, urlfile)
    2034          host, port = _splitport(host)
    2035          if (not port
    2036             and socket.gethostbyname(host) in ((localhost(),) + thishost())):
    2037              urlfile = file
    2038              if file[:1] == '/':
    2039                  urlfile = 'file://' + file
    2040              elif file[:2] == './':
    2041                  raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)
    2042              return addinfourl(open(localname, 'rb'), headers, urlfile)
    2043          raise URLError('local file error: not on local host')
    2044  
    2045      def open_ftp(self, url):
    2046          """Use FTP protocol."""
    2047          if not isinstance(url, str):
    2048              raise URLError('ftp error: proxy support for ftp protocol currently not implemented')
    2049          import mimetypes
    2050          host, path = _splithost(url)
    2051          if not host: raise URLError('ftp error: no host given')
    2052          host, port = _splitport(host)
    2053          user, host = _splituser(host)
    2054          if user: user, passwd = _splitpasswd(user)
    2055          else: passwd = None
    2056          host = unquote(host)
    2057          user = unquote(user or '')
    2058          passwd = unquote(passwd or '')
    2059          host = socket.gethostbyname(host)
    2060          if not port:
    2061              import ftplib
    2062              port = ftplib.FTP_PORT
    2063          else:
    2064              port = int(port)
    2065          path, attrs = _splitattr(path)
    2066          path = unquote(path)
    2067          dirs = path.split('/')
    2068          dirs, file = dirs[:-1], dirs[-1]
    2069          if dirs and not dirs[0]: dirs = dirs[1:]
    2070          if dirs and not dirs[0]: dirs[0] = '/'
    2071          key = user, host, port, '/'.join(dirs)
    2072          # XXX thread unsafe!
    2073          if len(self.ftpcache) > MAXFTPCACHE:
    2074              # Prune the cache, rather arbitrarily
    2075              for k in list(self.ftpcache):
    2076                  if k != key:
    2077                      v = self.ftpcache[k]
    2078                      del self.ftpcache[k]
    2079                      v.close()
    2080          try:
    2081              if key not in self.ftpcache:
    2082                  self.ftpcache[key] = \
    2083                      ftpwrapper(user, passwd, host, port, dirs)
    2084              if not file: type = 'D'
    2085              else: type = 'I'
    2086              for attr in attrs:
    2087                  attr, value = _splitvalue(attr)
    2088                  if attr.lower() == 'type' and \
    2089                     value in ('a', 'A', 'i', 'I', 'd', 'D'):
    2090                      type = value.upper()
    2091              (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
    2092              mtype = mimetypes.guess_type("ftp:" + url)[0]
    2093              headers = ""
    2094              if mtype:
    2095                  headers += "Content-Type: %s\n" % mtype
    2096              if retrlen is not None and retrlen >= 0:
    2097                  headers += "Content-Length: %d\n" % retrlen
    2098              headers = email.message_from_string(headers)
    2099              return addinfourl(fp, headers, "ftp:" + url)
    2100          except ftperrors() as exp:
    2101              raise URLError(f'ftp error: {exp}') from exp
    2102  
    2103      def open_data(self, url, data=None):
    2104          """Use "data" URL."""
    2105          if not isinstance(url, str):
    2106              raise URLError('data error: proxy support for data protocol currently not implemented')
    2107          # ignore POSTed data
    2108          #
    2109          # syntax of data URLs:
    2110          # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
    2111          # mediatype := [ type "/" subtype ] *( ";" parameter )
    2112          # data      := *urlchar
    2113          # parameter := attribute "=" value
    2114          try:
    2115              [type, data] = url.split(',', 1)
    2116          except ValueError:
    2117              raise OSError('data error', 'bad data URL')
    2118          if not type:
    2119              type = 'text/plain;charset=US-ASCII'
    2120          semi = type.rfind(';')
    2121          if semi >= 0 and '=' not in type[semi:]:
    2122              encoding = type[semi+1:]
    2123              type = type[:semi]
    2124          else:
    2125              encoding = ''
    2126          msg = []
    2127          msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
    2128                                              time.gmtime(time.time())))
    2129          msg.append('Content-type: %s' % type)
    2130          if encoding == 'base64':
    2131              # XXX is this encoding/decoding ok?
    2132              data = base64.decodebytes(data.encode('ascii')).decode('latin-1')
    2133          else:
    2134              data = unquote(data)
    2135          msg.append('Content-Length: %d' % len(data))
    2136          msg.append('')
    2137          msg.append(data)
    2138          msg = '\n'.join(msg)
    2139          headers = email.message_from_string(msg)
    2140          f = io.StringIO(msg)
    2141          #f.fileno = None     # needed for addinfourl
    2142          return addinfourl(f, headers, url)
    2143  
    2144  
    2145  class ESC[4;38;5;81mFancyURLopener(ESC[4;38;5;149mURLopener):
    2146      """Derived class with handlers for errors we can handle (perhaps)."""
    2147  
    2148      def __init__(self, *args, **kwargs):
    2149          URLopener.__init__(self, *args, **kwargs)
    2150          self.auth_cache = {}
    2151          self.tries = 0
    2152          self.maxtries = 10
    2153  
    2154      def http_error_default(self, url, fp, errcode, errmsg, headers):
    2155          """Default error handling -- don't raise an exception."""
    2156          return addinfourl(fp, headers, "http:" + url, errcode)
    2157  
    2158      def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
    2159          """Error 302 -- relocated (temporarily)."""
    2160          self.tries += 1
    2161          try:
    2162              if self.maxtries and self.tries >= self.maxtries:
    2163                  if hasattr(self, "http_error_500"):
    2164                      meth = self.http_error_500
    2165                  else:
    2166                      meth = self.http_error_default
    2167                  return meth(url, fp, 500,
    2168                              "Internal Server Error: Redirect Recursion",
    2169                              headers)
    2170              result = self.redirect_internal(url, fp, errcode, errmsg,
    2171                                              headers, data)
    2172              return result
    2173          finally:
    2174              self.tries = 0
    2175  
    2176      def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
    2177          if 'location' in headers:
    2178              newurl = headers['location']
    2179          elif 'uri' in headers:
    2180              newurl = headers['uri']
    2181          else:
    2182              return
    2183          fp.close()
    2184  
    2185          # In case the server sent a relative URL, join with original:
    2186          newurl = urljoin(self.type + ":" + url, newurl)
    2187  
    2188          urlparts = urlparse(newurl)
    2189  
    2190          # For security reasons, we don't allow redirection to anything other
    2191          # than http, https and ftp.
    2192  
    2193          # We are using newer HTTPError with older redirect_internal method
    2194          # This older method will get deprecated in 3.3
    2195  
    2196          if urlparts.scheme not in ('http', 'https', 'ftp', ''):
    2197              raise HTTPError(newurl, errcode,
    2198                              errmsg +
    2199                              " Redirection to url '%s' is not allowed." % newurl,
    2200                              headers, fp)
    2201  
    2202          return self.open(newurl)
    2203  
    2204      def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
    2205          """Error 301 -- also relocated (permanently)."""
    2206          return self.http_error_302(url, fp, errcode, errmsg, headers, data)
    2207  
    2208      def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
    2209          """Error 303 -- also relocated (essentially identical to 302)."""
    2210          return self.http_error_302(url, fp, errcode, errmsg, headers, data)
    2211  
    2212      def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
    2213          """Error 307 -- relocated, but turn POST into error."""
    2214          if data is None:
    2215              return self.http_error_302(url, fp, errcode, errmsg, headers, data)
    2216          else:
    2217              return self.http_error_default(url, fp, errcode, errmsg, headers)
    2218  
    2219      def http_error_308(self, url, fp, errcode, errmsg, headers, data=None):
    2220          """Error 308 -- relocated, but turn POST into error."""
    2221          if data is None:
    2222              return self.http_error_301(url, fp, errcode, errmsg, headers, data)
    2223          else:
    2224              return self.http_error_default(url, fp, errcode, errmsg, headers)
    2225  
    2226      def http_error_401(self, url, fp, errcode, errmsg, headers, data=None,
    2227              retry=False):
    2228          """Error 401 -- authentication required.
    2229          This function supports Basic authentication only."""
    2230          if 'www-authenticate' not in headers:
    2231              URLopener.http_error_default(self, url, fp,
    2232                                           errcode, errmsg, headers)
    2233          stuff = headers['www-authenticate']
    2234          match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
    2235          if not match:
    2236              URLopener.http_error_default(self, url, fp,
    2237                                           errcode, errmsg, headers)
    2238          scheme, realm = match.groups()
    2239          if scheme.lower() != 'basic':
    2240              URLopener.http_error_default(self, url, fp,
    2241                                           errcode, errmsg, headers)
    2242          if not retry:
    2243              URLopener.http_error_default(self, url, fp, errcode, errmsg,
    2244                      headers)
    2245          name = 'retry_' + self.type + '_basic_auth'
    2246          if data is None:
    2247              return getattr(self,name)(url, realm)
    2248          else:
    2249              return getattr(self,name)(url, realm, data)
    2250  
    2251      def http_error_407(self, url, fp, errcode, errmsg, headers, data=None,
    2252              retry=False):
    2253          """Error 407 -- proxy authentication required.
    2254          This function supports Basic authentication only."""
    2255          if 'proxy-authenticate' not in headers:
    2256              URLopener.http_error_default(self, url, fp,
    2257                                           errcode, errmsg, headers)
    2258          stuff = headers['proxy-authenticate']
    2259          match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
    2260          if not match:
    2261              URLopener.http_error_default(self, url, fp,
    2262                                           errcode, errmsg, headers)
    2263          scheme, realm = match.groups()
    2264          if scheme.lower() != 'basic':
    2265              URLopener.http_error_default(self, url, fp,
    2266                                           errcode, errmsg, headers)
    2267          if not retry:
    2268              URLopener.http_error_default(self, url, fp, errcode, errmsg,
    2269                      headers)
    2270          name = 'retry_proxy_' + self.type + '_basic_auth'
    2271          if data is None:
    2272              return getattr(self,name)(url, realm)
    2273          else:
    2274              return getattr(self,name)(url, realm, data)
    2275  
    2276      def retry_proxy_http_basic_auth(self, url, realm, data=None):
    2277          host, selector = _splithost(url)
    2278          newurl = 'http://' + host + selector
    2279          proxy = self.proxies['http']
    2280          urltype, proxyhost = _splittype(proxy)
    2281          proxyhost, proxyselector = _splithost(proxyhost)
    2282          i = proxyhost.find('@') + 1
    2283          proxyhost = proxyhost[i:]
    2284          user, passwd = self.get_user_passwd(proxyhost, realm, i)
    2285          if not (user or passwd): return None
    2286          proxyhost = "%s:%s@%s" % (quote(user, safe=''),
    2287                                    quote(passwd, safe=''), proxyhost)
    2288          self.proxies['http'] = 'http://' + proxyhost + proxyselector
    2289          if data is None:
    2290              return self.open(newurl)
    2291          else:
    2292              return self.open(newurl, data)
    2293  
    2294      def retry_proxy_https_basic_auth(self, url, realm, data=None):
    2295          host, selector = _splithost(url)
    2296          newurl = 'https://' + host + selector
    2297          proxy = self.proxies['https']
    2298          urltype, proxyhost = _splittype(proxy)
    2299          proxyhost, proxyselector = _splithost(proxyhost)
    2300          i = proxyhost.find('@') + 1
    2301          proxyhost = proxyhost[i:]
    2302          user, passwd = self.get_user_passwd(proxyhost, realm, i)
    2303          if not (user or passwd): return None
    2304          proxyhost = "%s:%s@%s" % (quote(user, safe=''),
    2305                                    quote(passwd, safe=''), proxyhost)
    2306          self.proxies['https'] = 'https://' + proxyhost + proxyselector
    2307          if data is None:
    2308              return self.open(newurl)
    2309          else:
    2310              return self.open(newurl, data)
    2311  
    2312      def retry_http_basic_auth(self, url, realm, data=None):
    2313          host, selector = _splithost(url)
    2314          i = host.find('@') + 1
    2315          host = host[i:]
    2316          user, passwd = self.get_user_passwd(host, realm, i)
    2317          if not (user or passwd): return None
    2318          host = "%s:%s@%s" % (quote(user, safe=''),
    2319                               quote(passwd, safe=''), host)
    2320          newurl = 'http://' + host + selector
    2321          if data is None:
    2322              return self.open(newurl)
    2323          else:
    2324              return self.open(newurl, data)
    2325  
    2326      def retry_https_basic_auth(self, url, realm, data=None):
    2327          host, selector = _splithost(url)
    2328          i = host.find('@') + 1
    2329          host = host[i:]
    2330          user, passwd = self.get_user_passwd(host, realm, i)
    2331          if not (user or passwd): return None
    2332          host = "%s:%s@%s" % (quote(user, safe=''),
    2333                               quote(passwd, safe=''), host)
    2334          newurl = 'https://' + host + selector
    2335          if data is None:
    2336              return self.open(newurl)
    2337          else:
    2338              return self.open(newurl, data)
    2339  
    2340      def get_user_passwd(self, host, realm, clear_cache=0):
    2341          key = realm + '@' + host.lower()
    2342          if key in self.auth_cache:
    2343              if clear_cache:
    2344                  del self.auth_cache[key]
    2345              else:
    2346                  return self.auth_cache[key]
    2347          user, passwd = self.prompt_user_passwd(host, realm)
    2348          if user or passwd: self.auth_cache[key] = (user, passwd)
    2349          return user, passwd
    2350  
    2351      def prompt_user_passwd(self, host, realm):
    2352          """Override this in a GUI environment!"""
    2353          import getpass
    2354          try:
    2355              user = input("Enter username for %s at %s: " % (realm, host))
    2356              passwd = getpass.getpass("Enter password for %s in %s at %s: " %
    2357                  (user, realm, host))
    2358              return user, passwd
    2359          except KeyboardInterrupt:
    2360              print()
    2361              return None, None
    2362  
    2363  
    2364  # Utility functions
    2365  
    2366  _localhost = None
    2367  def localhost():
    2368      """Return the IP address of the magic hostname 'localhost'."""
    2369      global _localhost
    2370      if _localhost is None:
    2371          _localhost = socket.gethostbyname('localhost')
    2372      return _localhost
    2373  
    2374  _thishost = None
    2375  def thishost():
    2376      """Return the IP addresses of the current host."""
    2377      global _thishost
    2378      if _thishost is None:
    2379          try:
    2380              _thishost = tuple(socket.gethostbyname_ex(socket.gethostname())[2])
    2381          except socket.gaierror:
    2382              _thishost = tuple(socket.gethostbyname_ex('localhost')[2])
    2383      return _thishost
    2384  
    2385  _ftperrors = None
    2386  def ftperrors():
    2387      """Return the set of errors raised by the FTP class."""
    2388      global _ftperrors
    2389      if _ftperrors is None:
    2390          import ftplib
    2391          _ftperrors = ftplib.all_errors
    2392      return _ftperrors
    2393  
    2394  _noheaders = None
    2395  def noheaders():
    2396      """Return an empty email Message object."""
    2397      global _noheaders
    2398      if _noheaders is None:
    2399          _noheaders = email.message_from_string("")
    2400      return _noheaders
    2401  
    2402  
    2403  # Utility classes
    2404  
    2405  class ESC[4;38;5;81mftpwrapper:
    2406      """Class used by open_ftp() for cache of open FTP connections."""
    2407  
    2408      def __init__(self, user, passwd, host, port, dirs, timeout=None,
    2409                   persistent=True):
    2410          self.user = user
    2411          self.passwd = passwd
    2412          self.host = host
    2413          self.port = port
    2414          self.dirs = dirs
    2415          self.timeout = timeout
    2416          self.refcount = 0
    2417          self.keepalive = persistent
    2418          try:
    2419              self.init()
    2420          except:
    2421              self.close()
    2422              raise
    2423  
    2424      def init(self):
    2425          import ftplib
    2426          self.busy = 0
    2427          self.ftp = ftplib.FTP()
    2428          self.ftp.connect(self.host, self.port, self.timeout)
    2429          self.ftp.login(self.user, self.passwd)
    2430          _target = '/'.join(self.dirs)
    2431          self.ftp.cwd(_target)
    2432  
    2433      def retrfile(self, file, type):
    2434          import ftplib
    2435          self.endtransfer()
    2436          if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
    2437          else: cmd = 'TYPE ' + type; isdir = 0
    2438          try:
    2439              self.ftp.voidcmd(cmd)
    2440          except ftplib.all_errors:
    2441              self.init()
    2442              self.ftp.voidcmd(cmd)
    2443          conn = None
    2444          if file and not isdir:
    2445              # Try to retrieve as a file
    2446              try:
    2447                  cmd = 'RETR ' + file
    2448                  conn, retrlen = self.ftp.ntransfercmd(cmd)
    2449              except ftplib.error_perm as reason:
    2450                  if str(reason)[:3] != '550':
    2451                      raise URLError(f'ftp error: {reason}') from reason
    2452          if not conn:
    2453              # Set transfer mode to ASCII!
    2454              self.ftp.voidcmd('TYPE A')
    2455              # Try a directory listing. Verify that directory exists.
    2456              if file:
    2457                  pwd = self.ftp.pwd()
    2458                  try:
    2459                      try:
    2460                          self.ftp.cwd(file)
    2461                      except ftplib.error_perm as reason:
    2462                          raise URLError('ftp error: %r' % reason) from reason
    2463                  finally:
    2464                      self.ftp.cwd(pwd)
    2465                  cmd = 'LIST ' + file
    2466              else:
    2467                  cmd = 'LIST'
    2468              conn, retrlen = self.ftp.ntransfercmd(cmd)
    2469          self.busy = 1
    2470  
    2471          ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
    2472          self.refcount += 1
    2473          conn.close()
    2474          # Pass back both a suitably decorated object and a retrieval length
    2475          return (ftpobj, retrlen)
    2476  
    2477      def endtransfer(self):
    2478          if not self.busy:
    2479              return
    2480          self.busy = 0
    2481          try:
    2482              self.ftp.voidresp()
    2483          except ftperrors():
    2484              pass
    2485  
    2486      def close(self):
    2487          self.keepalive = False
    2488          if self.refcount <= 0:
    2489              self.real_close()
    2490  
    2491      def file_close(self):
    2492          self.endtransfer()
    2493          self.refcount -= 1
    2494          if self.refcount <= 0 and not self.keepalive:
    2495              self.real_close()
    2496  
    2497      def real_close(self):
    2498          self.endtransfer()
    2499          try:
    2500              self.ftp.close()
    2501          except ftperrors():
    2502              pass
    2503  
    2504  # Proxy handling
    2505  def getproxies_environment():
    2506      """Return a dictionary of scheme -> proxy server URL mappings.
    2507  
    2508      Scan the environment for variables named <scheme>_proxy;
    2509      this seems to be the standard convention.  If you need a
    2510      different way, you can pass a proxies dictionary to the
    2511      [Fancy]URLopener constructor.
    2512      """
    2513      # in order to prefer lowercase variables, process environment in
    2514      # two passes: first matches any, second pass matches lowercase only
    2515  
    2516      # select only environment variables which end in (after making lowercase) _proxy
    2517      proxies = {}
    2518      environment = []
    2519      for name in os.environ.keys():
    2520          # fast screen underscore position before more expensive case-folding
    2521          if len(name) > 5 and name[-6] == "_" and name[-5:].lower() == "proxy":
    2522              value = os.environ[name]
    2523              proxy_name = name[:-6].lower()
    2524              environment.append((name, value, proxy_name))
    2525              if value:
    2526                  proxies[proxy_name] = value
    2527      # CVE-2016-1000110 - If we are running as CGI script, forget HTTP_PROXY
    2528      # (non-all-lowercase) as it may be set from the web server by a "Proxy:"
    2529      # header from the client
    2530      # If "proxy" is lowercase, it will still be used thanks to the next block
    2531      if 'REQUEST_METHOD' in os.environ:
    2532          proxies.pop('http', None)
    2533      for name, value, proxy_name in environment:
    2534          # not case-folded, checking here for lower-case env vars only
    2535          if name[-6:] == '_proxy':
    2536              if value:
    2537                  proxies[proxy_name] = value
    2538              else:
    2539                  proxies.pop(proxy_name, None)
    2540      return proxies
    2541  
    2542  def proxy_bypass_environment(host, proxies=None):
    2543      """Test if proxies should not be used for a particular host.
    2544  
    2545      Checks the proxy dict for the value of no_proxy, which should
    2546      be a list of comma separated DNS suffixes, or '*' for all hosts.
    2547  
    2548      """
    2549      if proxies is None:
    2550          proxies = getproxies_environment()
    2551      # don't bypass, if no_proxy isn't specified
    2552      try:
    2553          no_proxy = proxies['no']
    2554      except KeyError:
    2555          return False
    2556      # '*' is special case for always bypass
    2557      if no_proxy == '*':
    2558          return True
    2559      host = host.lower()
    2560      # strip port off host
    2561      hostonly, port = _splitport(host)
    2562      # check if the host ends with any of the DNS suffixes
    2563      for name in no_proxy.split(','):
    2564          name = name.strip()
    2565          if name:
    2566              name = name.lstrip('.')  # ignore leading dots
    2567              name = name.lower()
    2568              if hostonly == name or host == name:
    2569                  return True
    2570              name = '.' + name
    2571              if hostonly.endswith(name) or host.endswith(name):
    2572                  return True
    2573      # otherwise, don't bypass
    2574      return False
    2575  
    2576  
    2577  # This code tests an OSX specific data structure but is testable on all
    2578  # platforms
    2579  def _proxy_bypass_macosx_sysconf(host, proxy_settings):
    2580      """
    2581      Return True iff this host shouldn't be accessed using a proxy
    2582  
    2583      This function uses the MacOSX framework SystemConfiguration
    2584      to fetch the proxy information.
    2585  
    2586      proxy_settings come from _scproxy._get_proxy_settings or get mocked ie:
    2587      { 'exclude_simple': bool,
    2588        'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.1', '10.0/16']
    2589      }
    2590      """
    2591      from fnmatch import fnmatch
    2592  
    2593      hostonly, port = _splitport(host)
    2594  
    2595      def ip2num(ipAddr):
    2596          parts = ipAddr.split('.')
    2597          parts = list(map(int, parts))
    2598          if len(parts) != 4:
    2599              parts = (parts + [0, 0, 0, 0])[:4]
    2600          return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
    2601  
    2602      # Check for simple host names:
    2603      if '.' not in host:
    2604          if proxy_settings['exclude_simple']:
    2605              return True
    2606  
    2607      hostIP = None
    2608  
    2609      for value in proxy_settings.get('exceptions', ()):
    2610          # Items in the list are strings like these: *.local, 169.254/16
    2611          if not value: continue
    2612  
    2613          m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
    2614          if m is not None:
    2615              if hostIP is None:
    2616                  try:
    2617                      hostIP = socket.gethostbyname(hostonly)
    2618                      hostIP = ip2num(hostIP)
    2619                  except OSError:
    2620                      continue
    2621  
    2622              base = ip2num(m.group(1))
    2623              mask = m.group(2)
    2624              if mask is None:
    2625                  mask = 8 * (m.group(1).count('.') + 1)
    2626              else:
    2627                  mask = int(mask[1:])
    2628  
    2629              if mask < 0 or mask > 32:
    2630                  # System libraries ignore invalid prefix lengths
    2631                  continue
    2632  
    2633              mask = 32 - mask
    2634  
    2635              if (hostIP >> mask) == (base >> mask):
    2636                  return True
    2637  
    2638          elif fnmatch(host, value):
    2639              return True
    2640  
    2641      return False
    2642  
    2643  
    2644  if sys.platform == 'darwin':
    2645      from _scproxy import _get_proxy_settings, _get_proxies
    2646  
    2647      def proxy_bypass_macosx_sysconf(host):
    2648          proxy_settings = _get_proxy_settings()
    2649          return _proxy_bypass_macosx_sysconf(host, proxy_settings)
    2650  
    2651      def getproxies_macosx_sysconf():
    2652          """Return a dictionary of scheme -> proxy server URL mappings.
    2653  
    2654          This function uses the MacOSX framework SystemConfiguration
    2655          to fetch the proxy information.
    2656          """
    2657          return _get_proxies()
    2658  
    2659  
    2660  
    2661      def proxy_bypass(host):
    2662          """Return True, if host should be bypassed.
    2663  
    2664          Checks proxy settings gathered from the environment, if specified,
    2665          or from the MacOSX framework SystemConfiguration.
    2666  
    2667          """
    2668          proxies = getproxies_environment()
    2669          if proxies:
    2670              return proxy_bypass_environment(host, proxies)
    2671          else:
    2672              return proxy_bypass_macosx_sysconf(host)
    2673  
    2674      def getproxies():
    2675          return getproxies_environment() or getproxies_macosx_sysconf()
    2676  
    2677  
    2678  elif os.name == 'nt':
    2679      def getproxies_registry():
    2680          """Return a dictionary of scheme -> proxy server URL mappings.
    2681  
    2682          Win32 uses the registry to store proxies.
    2683  
    2684          """
    2685          proxies = {}
    2686          try:
    2687              import winreg
    2688          except ImportError:
    2689              # Std module, so should be around - but you never know!
    2690              return proxies
    2691          try:
    2692              internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
    2693                  r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
    2694              proxyEnable = winreg.QueryValueEx(internetSettings,
    2695                                                 'ProxyEnable')[0]
    2696              if proxyEnable:
    2697                  # Returned as Unicode but problems if not converted to ASCII
    2698                  proxyServer = str(winreg.QueryValueEx(internetSettings,
    2699                                                         'ProxyServer')[0])
    2700                  if '=' not in proxyServer and ';' not in proxyServer:
    2701                      # Use one setting for all protocols.
    2702                      proxyServer = 'http={0};https={0};ftp={0}'.format(proxyServer)
    2703                  for p in proxyServer.split(';'):
    2704                      protocol, address = p.split('=', 1)
    2705                      # See if address has a type:// prefix
    2706                      if not re.match('(?:[^/:]+)://', address):
    2707                          # Add type:// prefix to address without specifying type
    2708                          if protocol in ('http', 'https', 'ftp'):
    2709                              # The default proxy type of Windows is HTTP
    2710                              address = 'http://' + address
    2711                          elif protocol == 'socks':
    2712                              address = 'socks://' + address
    2713                      proxies[protocol] = address
    2714                  # Use SOCKS proxy for HTTP(S) protocols
    2715                  if proxies.get('socks'):
    2716                      # The default SOCKS proxy type of Windows is SOCKS4
    2717                      address = re.sub(r'^socks://', 'socks4://', proxies['socks'])
    2718                      proxies['http'] = proxies.get('http') or address
    2719                      proxies['https'] = proxies.get('https') or address
    2720              internetSettings.Close()
    2721          except (OSError, ValueError, TypeError):
    2722              # Either registry key not found etc, or the value in an
    2723              # unexpected format.
    2724              # proxies already set up to be empty so nothing to do
    2725              pass
    2726          return proxies
    2727  
    2728      def getproxies():
    2729          """Return a dictionary of scheme -> proxy server URL mappings.
    2730  
    2731          Returns settings gathered from the environment, if specified,
    2732          or the registry.
    2733  
    2734          """
    2735          return getproxies_environment() or getproxies_registry()
    2736  
    2737      def proxy_bypass_registry(host):
    2738          try:
    2739              import winreg
    2740          except ImportError:
    2741              # Std modules, so should be around - but you never know!
    2742              return 0
    2743          try:
    2744              internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
    2745                  r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
    2746              proxyEnable = winreg.QueryValueEx(internetSettings,
    2747                                                 'ProxyEnable')[0]
    2748              proxyOverride = str(winreg.QueryValueEx(internetSettings,
    2749                                                       'ProxyOverride')[0])
    2750              # ^^^^ Returned as Unicode but problems if not converted to ASCII
    2751          except OSError:
    2752              return 0
    2753          if not proxyEnable or not proxyOverride:
    2754              return 0
    2755          # try to make a host list from name and IP address.
    2756          rawHost, port = _splitport(host)
    2757          host = [rawHost]
    2758          try:
    2759              addr = socket.gethostbyname(rawHost)
    2760              if addr != rawHost:
    2761                  host.append(addr)
    2762          except OSError:
    2763              pass
    2764          try:
    2765              fqdn = socket.getfqdn(rawHost)
    2766              if fqdn != rawHost:
    2767                  host.append(fqdn)
    2768          except OSError:
    2769              pass
    2770          # make a check value list from the registry entry: replace the
    2771          # '<local>' string by the localhost entry and the corresponding
    2772          # canonical entry.
    2773          proxyOverride = proxyOverride.split(';')
    2774          # now check if we match one of the registry values.
    2775          for test in proxyOverride:
    2776              if test == '<local>':
    2777                  if '.' not in rawHost:
    2778                      return 1
    2779              test = test.replace(".", r"\.")     # mask dots
    2780              test = test.replace("*", r".*")     # change glob sequence
    2781              test = test.replace("?", r".")      # change glob char
    2782              for val in host:
    2783                  if re.match(test, val, re.I):
    2784                      return 1
    2785          return 0
    2786  
    2787      def proxy_bypass(host):
    2788          """Return True, if host should be bypassed.
    2789  
    2790          Checks proxy settings gathered from the environment, if specified,
    2791          or the registry.
    2792  
    2793          """
    2794          proxies = getproxies_environment()
    2795          if proxies:
    2796              return proxy_bypass_environment(host, proxies)
    2797          else:
    2798              return proxy_bypass_registry(host)
    2799  
    2800  else:
    2801      # By default use environment variables
    2802      getproxies = getproxies_environment
    2803      proxy_bypass = proxy_bypass_environment