1 """An extensible library for opening URLs using a variety of protocols
2
3 The simplest way to use this module is to call the urlopen function,
4 which accepts a string containing a URL or a Request object (described
5 below). It opens the URL and returns the results as file-like
6 object; the returned object has some extra methods described below.
7
8 The OpenerDirector manages a collection of Handler objects that do
9 all the actual work. Each Handler implements a particular protocol or
10 option. The OpenerDirector is a composite object that invokes the
11 Handlers needed to open the requested URL. For example, the
12 HTTPHandler performs HTTP GET and POST requests and deals with
13 non-error returns. The HTTPRedirectHandler automatically deals with
14 HTTP 301, 302, 303, 307, and 308 redirect errors, and the
15 HTTPDigestAuthHandler deals with digest authentication.
16
17 urlopen(url, data=None) -- Basic usage is the same as original
18 urllib. pass the url and optionally data to post to an HTTP URL, and
19 get a file-like object back. One difference is that you can also pass
20 a Request instance instead of URL. Raises a URLError (subclass of
21 OSError); for HTTP errors, raises an HTTPError, which can also be
22 treated as a valid response.
23
24 build_opener -- Function that creates a new OpenerDirector instance.
25 Will install the default handlers. Accepts one or more Handlers as
26 arguments, either instances or Handler classes that it will
27 instantiate. If one of the argument is a subclass of the default
28 handler, the argument will be installed instead of the default.
29
30 install_opener -- Installs a new opener as the default opener.
31
32 objects of interest:
33
34 OpenerDirector -- Sets up the User Agent as the Python-urllib client and manages
35 the Handler classes, while dealing with requests and responses.
36
37 Request -- An object that encapsulates the state of a request. The
38 state can be as simple as the URL. It can also include extra HTTP
39 headers, e.g. a User-Agent.
40
41 BaseHandler --
42
43 internals:
44 BaseHandler and parent
45 _call_chain conventions
46
47 Example usage:
48
49 import urllib.request
50
51 # set up authentication info
52 authinfo = urllib.request.HTTPBasicAuthHandler()
53 authinfo.add_password(realm='PDQ Application',
54 uri='https://mahler:8092/site-updates.py',
55 user='klem',
56 passwd='geheim$parole')
57
58 proxy_support = urllib.request.ProxyHandler({"http" : "http://ahad-haam:3128"})
59
60 # build a new opener that adds authentication and caching FTP handlers
61 opener = urllib.request.build_opener(proxy_support, authinfo,
62 urllib.request.CacheFTPHandler)
63
64 # install it
65 urllib.request.install_opener(opener)
66
67 f = urllib.request.urlopen('https://www.python.org/')
68 """
69
70 # XXX issues:
71 # If an authentication error handler that tries to perform
72 # authentication for some reason but fails, how should the error be
73 # signalled? The client needs to know the HTTP error code. But if
74 # the handler knows that the problem was, e.g., that it didn't know
75 # that hash algo that requested in the challenge, it would be good to
76 # pass that information along to the client, too.
77 # ftp errors aren't handled cleanly
78 # check digest against correct (i.e. non-apache) implementation
79
80 # Possible extensions:
81 # complex proxies XXX not sure what exactly was meant by this
82 # abstract factory for opener
83
84 import base64
85 import bisect
86 import email
87 import hashlib
88 import http.client
89 import io
90 import os
91 import re
92 import socket
93 import string
94 import sys
95 import time
96 import tempfile
97 import contextlib
98 import warnings
99
100
101 from urllib.error import URLError, HTTPError, ContentTooShortError
102 from urllib.parse import (
103 urlparse, urlsplit, urljoin, unwrap, quote, unquote,
104 _splittype, _splithost, _splitport, _splituser, _splitpasswd,
105 _splitattr, _splitquery, _splitvalue, _splittag, _to_bytes,
106 unquote_to_bytes, urlunparse)
107 from urllib.response import addinfourl, addclosehook
108
109 # check for SSL
110 try:
111 import ssl
112 except ImportError:
113 _have_ssl = False
114 else:
115 _have_ssl = True
116
117 __all__ = [
118 # Classes
119 'Request', 'OpenerDirector', 'BaseHandler', 'HTTPDefaultErrorHandler',
120 'HTTPRedirectHandler', 'HTTPCookieProcessor', 'ProxyHandler',
121 'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm',
122 'HTTPPasswordMgrWithPriorAuth', 'AbstractBasicAuthHandler',
123 'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler', 'AbstractDigestAuthHandler',
124 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler', 'HTTPHandler',
125 'FileHandler', 'FTPHandler', 'CacheFTPHandler', 'DataHandler',
126 'UnknownHandler', 'HTTPErrorProcessor',
127 # Functions
128 'urlopen', 'install_opener', 'build_opener',
129 'pathname2url', 'url2pathname', 'getproxies',
130 # Legacy interface
131 'urlretrieve', 'urlcleanup', 'URLopener', 'FancyURLopener',
132 ]
133
134 # used in User-Agent header sent
135 __version__ = '%d.%d' % sys.version_info[:2]
136
137 _opener = None
138 def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
139 *, cafile=None, capath=None, cadefault=False, context=None):
140 '''Open the URL url, which can be either a string or a Request object.
141
142 *data* must be an object specifying additional data to be sent to
143 the server, or None if no such data is needed. See Request for
144 details.
145
146 urllib.request module uses HTTP/1.1 and includes a "Connection:close"
147 header in its HTTP requests.
148
149 The optional *timeout* parameter specifies a timeout in seconds for
150 blocking operations like the connection attempt (if not specified, the
151 global default timeout setting will be used). This only works for HTTP,
152 HTTPS and FTP connections.
153
154 If *context* is specified, it must be a ssl.SSLContext instance describing
155 the various SSL options. See HTTPSConnection for more details.
156
157 The optional *cafile* and *capath* parameters specify a set of trusted CA
158 certificates for HTTPS requests. cafile should point to a single file
159 containing a bundle of CA certificates, whereas capath should point to a
160 directory of hashed certificate files. More information can be found in
161 ssl.SSLContext.load_verify_locations().
162
163 The *cadefault* parameter is ignored.
164
165
166 This function always returns an object which can work as a
167 context manager and has the properties url, headers, and status.
168 See urllib.response.addinfourl for more detail on these properties.
169
170 For HTTP and HTTPS URLs, this function returns a http.client.HTTPResponse
171 object slightly modified. In addition to the three new methods above, the
172 msg attribute contains the same information as the reason attribute ---
173 the reason phrase returned by the server --- instead of the response
174 headers as it is specified in the documentation for HTTPResponse.
175
176 For FTP, file, and data URLs and requests explicitly handled by legacy
177 URLopener and FancyURLopener classes, this function returns a
178 urllib.response.addinfourl object.
179
180 Note that None may be returned if no handler handles the request (though
181 the default installed global OpenerDirector uses UnknownHandler to ensure
182 this never happens).
183
184 In addition, if proxy settings are detected (for example, when a *_proxy
185 environment variable like http_proxy is set), ProxyHandler is default
186 installed and makes sure the requests are handled through the proxy.
187
188 '''
189 global _opener
190 if cafile or capath or cadefault:
191 import warnings
192 warnings.warn("cafile, capath and cadefault are deprecated, use a "
193 "custom context instead.", DeprecationWarning, 2)
194 if context is not None:
195 raise ValueError(
196 "You can't pass both context and any of cafile, capath, and "
197 "cadefault"
198 )
199 if not _have_ssl:
200 raise ValueError('SSL support not available')
201 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH,
202 cafile=cafile,
203 capath=capath)
204 # send ALPN extension to indicate HTTP/1.1 protocol
205 context.set_alpn_protocols(['http/1.1'])
206 https_handler = HTTPSHandler(context=context)
207 opener = build_opener(https_handler)
208 elif context:
209 https_handler = HTTPSHandler(context=context)
210 opener = build_opener(https_handler)
211 elif _opener is None:
212 _opener = opener = build_opener()
213 else:
214 opener = _opener
215 return opener.open(url, data, timeout)
216
217 def install_opener(opener):
218 global _opener
219 _opener = opener
220
221 _url_tempfiles = []
222 def urlretrieve(url, filename=None, reporthook=None, data=None):
223 """
224 Retrieve a URL into a temporary location on disk.
225
226 Requires a URL argument. If a filename is passed, it is used as
227 the temporary file location. The reporthook argument should be
228 a callable that accepts a block number, a read size, and the
229 total file size of the URL target. The data argument should be
230 valid URL encoded data.
231
232 If a filename is passed and the URL points to a local resource,
233 the result is a copy from local file to new file.
234
235 Returns a tuple containing the path to the newly created
236 data file as well as the resulting HTTPMessage object.
237 """
238 url_type, path = _splittype(url)
239
240 with contextlib.closing(urlopen(url, data)) as fp:
241 headers = fp.info()
242
243 # Just return the local path and the "headers" for file://
244 # URLs. No sense in performing a copy unless requested.
245 if url_type == "file" and not filename:
246 return os.path.normpath(path), headers
247
248 # Handle temporary file setup.
249 if filename:
250 tfp = open(filename, 'wb')
251 else:
252 tfp = tempfile.NamedTemporaryFile(delete=False)
253 filename = tfp.name
254 _url_tempfiles.append(filename)
255
256 with tfp:
257 result = filename, headers
258 bs = 1024*8
259 size = -1
260 read = 0
261 blocknum = 0
262 if "content-length" in headers:
263 size = int(headers["Content-Length"])
264
265 if reporthook:
266 reporthook(blocknum, bs, size)
267
268 while block := fp.read(bs):
269 read += len(block)
270 tfp.write(block)
271 blocknum += 1
272 if reporthook:
273 reporthook(blocknum, bs, size)
274
275 if size >= 0 and read < size:
276 raise ContentTooShortError(
277 "retrieval incomplete: got only %i out of %i bytes"
278 % (read, size), result)
279
280 return result
281
282 def urlcleanup():
283 """Clean up temporary files from urlretrieve calls."""
284 for temp_file in _url_tempfiles:
285 try:
286 os.unlink(temp_file)
287 except OSError:
288 pass
289
290 del _url_tempfiles[:]
291 global _opener
292 if _opener:
293 _opener = None
294
295 # copied from cookielib.py
296 _cut_port_re = re.compile(r":\d+$", re.ASCII)
297 def request_host(request):
298 """Return request-host, as defined by RFC 2965.
299
300 Variation from RFC: returned value is lowercased, for convenient
301 comparison.
302
303 """
304 url = request.full_url
305 host = urlparse(url)[1]
306 if host == "":
307 host = request.get_header("Host", "")
308
309 # remove port, if present
310 host = _cut_port_re.sub("", host, 1)
311 return host.lower()
312
313 class ESC[4;38;5;81mRequest:
314
315 def __init__(self, url, data=None, headers={},
316 origin_req_host=None, unverifiable=False,
317 method=None):
318 self.full_url = url
319 self.headers = {}
320 self.unredirected_hdrs = {}
321 self._data = None
322 self.data = data
323 self._tunnel_host = None
324 for key, value in headers.items():
325 self.add_header(key, value)
326 if origin_req_host is None:
327 origin_req_host = request_host(self)
328 self.origin_req_host = origin_req_host
329 self.unverifiable = unverifiable
330 if method:
331 self.method = method
332
333 @property
334 def full_url(self):
335 if self.fragment:
336 return '{}#{}'.format(self._full_url, self.fragment)
337 return self._full_url
338
339 @full_url.setter
340 def full_url(self, url):
341 # unwrap('<URL:type://host/path>') --> 'type://host/path'
342 self._full_url = unwrap(url)
343 self._full_url, self.fragment = _splittag(self._full_url)
344 self._parse()
345
346 @full_url.deleter
347 def full_url(self):
348 self._full_url = None
349 self.fragment = None
350 self.selector = ''
351
352 @property
353 def data(self):
354 return self._data
355
356 @data.setter
357 def data(self, data):
358 if data != self._data:
359 self._data = data
360 # issue 16464
361 # if we change data we need to remove content-length header
362 # (cause it's most probably calculated for previous value)
363 if self.has_header("Content-length"):
364 self.remove_header("Content-length")
365
366 @data.deleter
367 def data(self):
368 self.data = None
369
370 def _parse(self):
371 self.type, rest = _splittype(self._full_url)
372 if self.type is None:
373 raise ValueError("unknown url type: %r" % self.full_url)
374 self.host, self.selector = _splithost(rest)
375 if self.host:
376 self.host = unquote(self.host)
377
378 def get_method(self):
379 """Return a string indicating the HTTP request method."""
380 default_method = "POST" if self.data is not None else "GET"
381 return getattr(self, 'method', default_method)
382
383 def get_full_url(self):
384 return self.full_url
385
386 def set_proxy(self, host, type):
387 if self.type == 'https' and not self._tunnel_host:
388 self._tunnel_host = self.host
389 else:
390 self.type= type
391 self.selector = self.full_url
392 self.host = host
393
394 def has_proxy(self):
395 return self.selector == self.full_url
396
397 def add_header(self, key, val):
398 # useful for something like authentication
399 self.headers[key.capitalize()] = val
400
401 def add_unredirected_header(self, key, val):
402 # will not be added to a redirected request
403 self.unredirected_hdrs[key.capitalize()] = val
404
405 def has_header(self, header_name):
406 return (header_name in self.headers or
407 header_name in self.unredirected_hdrs)
408
409 def get_header(self, header_name, default=None):
410 return self.headers.get(
411 header_name,
412 self.unredirected_hdrs.get(header_name, default))
413
414 def remove_header(self, header_name):
415 self.headers.pop(header_name, None)
416 self.unredirected_hdrs.pop(header_name, None)
417
418 def header_items(self):
419 hdrs = {**self.unredirected_hdrs, **self.headers}
420 return list(hdrs.items())
421
422 class ESC[4;38;5;81mOpenerDirector:
423 def __init__(self):
424 client_version = "Python-urllib/%s" % __version__
425 self.addheaders = [('User-agent', client_version)]
426 # self.handlers is retained only for backward compatibility
427 self.handlers = []
428 # manage the individual handlers
429 self.handle_open = {}
430 self.handle_error = {}
431 self.process_response = {}
432 self.process_request = {}
433
434 def add_handler(self, handler):
435 if not hasattr(handler, "add_parent"):
436 raise TypeError("expected BaseHandler instance, got %r" %
437 type(handler))
438
439 added = False
440 for meth in dir(handler):
441 if meth in ["redirect_request", "do_open", "proxy_open"]:
442 # oops, coincidental match
443 continue
444
445 i = meth.find("_")
446 protocol = meth[:i]
447 condition = meth[i+1:]
448
449 if condition.startswith("error"):
450 j = condition.find("_") + i + 1
451 kind = meth[j+1:]
452 try:
453 kind = int(kind)
454 except ValueError:
455 pass
456 lookup = self.handle_error.get(protocol, {})
457 self.handle_error[protocol] = lookup
458 elif condition == "open":
459 kind = protocol
460 lookup = self.handle_open
461 elif condition == "response":
462 kind = protocol
463 lookup = self.process_response
464 elif condition == "request":
465 kind = protocol
466 lookup = self.process_request
467 else:
468 continue
469
470 handlers = lookup.setdefault(kind, [])
471 if handlers:
472 bisect.insort(handlers, handler)
473 else:
474 handlers.append(handler)
475 added = True
476
477 if added:
478 bisect.insort(self.handlers, handler)
479 handler.add_parent(self)
480
481 def close(self):
482 # Only exists for backwards compatibility.
483 pass
484
485 def _call_chain(self, chain, kind, meth_name, *args):
486 # Handlers raise an exception if no one else should try to handle
487 # the request, or return None if they can't but another handler
488 # could. Otherwise, they return the response.
489 handlers = chain.get(kind, ())
490 for handler in handlers:
491 func = getattr(handler, meth_name)
492 result = func(*args)
493 if result is not None:
494 return result
495
496 def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
497 # accept a URL or a Request object
498 if isinstance(fullurl, str):
499 req = Request(fullurl, data)
500 else:
501 req = fullurl
502 if data is not None:
503 req.data = data
504
505 req.timeout = timeout
506 protocol = req.type
507
508 # pre-process request
509 meth_name = protocol+"_request"
510 for processor in self.process_request.get(protocol, []):
511 meth = getattr(processor, meth_name)
512 req = meth(req)
513
514 sys.audit('urllib.Request', req.full_url, req.data, req.headers, req.get_method())
515 response = self._open(req, data)
516
517 # post-process response
518 meth_name = protocol+"_response"
519 for processor in self.process_response.get(protocol, []):
520 meth = getattr(processor, meth_name)
521 response = meth(req, response)
522
523 return response
524
525 def _open(self, req, data=None):
526 result = self._call_chain(self.handle_open, 'default',
527 'default_open', req)
528 if result:
529 return result
530
531 protocol = req.type
532 result = self._call_chain(self.handle_open, protocol, protocol +
533 '_open', req)
534 if result:
535 return result
536
537 return self._call_chain(self.handle_open, 'unknown',
538 'unknown_open', req)
539
540 def error(self, proto, *args):
541 if proto in ('http', 'https'):
542 # XXX http[s] protocols are special-cased
543 dict = self.handle_error['http'] # https is not different than http
544 proto = args[2] # YUCK!
545 meth_name = 'http_error_%s' % proto
546 http_err = 1
547 orig_args = args
548 else:
549 dict = self.handle_error
550 meth_name = proto + '_error'
551 http_err = 0
552 args = (dict, proto, meth_name) + args
553 result = self._call_chain(*args)
554 if result:
555 return result
556
557 if http_err:
558 args = (dict, 'default', 'http_error_default') + orig_args
559 return self._call_chain(*args)
560
561 # XXX probably also want an abstract factory that knows when it makes
562 # sense to skip a superclass in favor of a subclass and when it might
563 # make sense to include both
564
565 def build_opener(*handlers):
566 """Create an opener object from a list of handlers.
567
568 The opener will use several default handlers, including support
569 for HTTP, FTP and when applicable HTTPS.
570
571 If any of the handlers passed as arguments are subclasses of the
572 default handlers, the default handlers will not be used.
573 """
574 opener = OpenerDirector()
575 default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
576 HTTPDefaultErrorHandler, HTTPRedirectHandler,
577 FTPHandler, FileHandler, HTTPErrorProcessor,
578 DataHandler]
579 if hasattr(http.client, "HTTPSConnection"):
580 default_classes.append(HTTPSHandler)
581 skip = set()
582 for klass in default_classes:
583 for check in handlers:
584 if isinstance(check, type):
585 if issubclass(check, klass):
586 skip.add(klass)
587 elif isinstance(check, klass):
588 skip.add(klass)
589 for klass in skip:
590 default_classes.remove(klass)
591
592 for klass in default_classes:
593 opener.add_handler(klass())
594
595 for h in handlers:
596 if isinstance(h, type):
597 h = h()
598 opener.add_handler(h)
599 return opener
600
601 class ESC[4;38;5;81mBaseHandler:
602 handler_order = 500
603
604 def add_parent(self, parent):
605 self.parent = parent
606
607 def close(self):
608 # Only exists for backwards compatibility
609 pass
610
611 def __lt__(self, other):
612 if not hasattr(other, "handler_order"):
613 # Try to preserve the old behavior of having custom classes
614 # inserted after default ones (works only for custom user
615 # classes which are not aware of handler_order).
616 return True
617 return self.handler_order < other.handler_order
618
619
620 class ESC[4;38;5;81mHTTPErrorProcessor(ESC[4;38;5;149mBaseHandler):
621 """Process HTTP error responses."""
622 handler_order = 1000 # after all other processing
623
624 def http_response(self, request, response):
625 code, msg, hdrs = response.code, response.msg, response.info()
626
627 # According to RFC 2616, "2xx" code indicates that the client's
628 # request was successfully received, understood, and accepted.
629 if not (200 <= code < 300):
630 response = self.parent.error(
631 'http', request, response, code, msg, hdrs)
632
633 return response
634
635 https_response = http_response
636
637 class ESC[4;38;5;81mHTTPDefaultErrorHandler(ESC[4;38;5;149mBaseHandler):
638 def http_error_default(self, req, fp, code, msg, hdrs):
639 raise HTTPError(req.full_url, code, msg, hdrs, fp)
640
641 class ESC[4;38;5;81mHTTPRedirectHandler(ESC[4;38;5;149mBaseHandler):
642 # maximum number of redirections to any single URL
643 # this is needed because of the state that cookies introduce
644 max_repeats = 4
645 # maximum total number of redirections (regardless of URL) before
646 # assuming we're in a loop
647 max_redirections = 10
648
649 def redirect_request(self, req, fp, code, msg, headers, newurl):
650 """Return a Request or None in response to a redirect.
651
652 This is called by the http_error_30x methods when a
653 redirection response is received. If a redirection should
654 take place, return a new Request to allow http_error_30x to
655 perform the redirect. Otherwise, raise HTTPError if no-one
656 else should try to handle this url. Return None if you can't
657 but another Handler might.
658 """
659 m = req.get_method()
660 if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
661 or code in (301, 302, 303) and m == "POST")):
662 raise HTTPError(req.full_url, code, msg, headers, fp)
663
664 # Strictly (according to RFC 2616), 301 or 302 in response to
665 # a POST MUST NOT cause a redirection without confirmation
666 # from the user (of urllib.request, in this case). In practice,
667 # essentially all clients do redirect in this case, so we do
668 # the same.
669
670 # Be conciliant with URIs containing a space. This is mainly
671 # redundant with the more complete encoding done in http_error_302(),
672 # but it is kept for compatibility with other callers.
673 newurl = newurl.replace(' ', '%20')
674
675 CONTENT_HEADERS = ("content-length", "content-type")
676 newheaders = {k: v for k, v in req.headers.items()
677 if k.lower() not in CONTENT_HEADERS}
678 return Request(newurl,
679 headers=newheaders,
680 origin_req_host=req.origin_req_host,
681 unverifiable=True)
682
683 # Implementation note: To avoid the server sending us into an
684 # infinite loop, the request object needs to track what URLs we
685 # have already seen. Do this by adding a handler-specific
686 # attribute to the Request object.
687 def http_error_302(self, req, fp, code, msg, headers):
688 # Some servers (incorrectly) return multiple Location headers
689 # (so probably same goes for URI). Use first header.
690 if "location" in headers:
691 newurl = headers["location"]
692 elif "uri" in headers:
693 newurl = headers["uri"]
694 else:
695 return
696
697 # fix a possible malformed URL
698 urlparts = urlparse(newurl)
699
700 # For security reasons we don't allow redirection to anything other
701 # than http, https or ftp.
702
703 if urlparts.scheme not in ('http', 'https', 'ftp', ''):
704 raise HTTPError(
705 newurl, code,
706 "%s - Redirection to url '%s' is not allowed" % (msg, newurl),
707 headers, fp)
708
709 if not urlparts.path and urlparts.netloc:
710 urlparts = list(urlparts)
711 urlparts[2] = "/"
712 newurl = urlunparse(urlparts)
713
714 # http.client.parse_headers() decodes as ISO-8859-1. Recover the
715 # original bytes and percent-encode non-ASCII bytes, and any special
716 # characters such as the space.
717 newurl = quote(
718 newurl, encoding="iso-8859-1", safe=string.punctuation)
719 newurl = urljoin(req.full_url, newurl)
720
721 # XXX Probably want to forget about the state of the current
722 # request, although that might interact poorly with other
723 # handlers that also use handler-specific request attributes
724 new = self.redirect_request(req, fp, code, msg, headers, newurl)
725 if new is None:
726 return
727
728 # loop detection
729 # .redirect_dict has a key url if url was previously visited.
730 if hasattr(req, 'redirect_dict'):
731 visited = new.redirect_dict = req.redirect_dict
732 if (visited.get(newurl, 0) >= self.max_repeats or
733 len(visited) >= self.max_redirections):
734 raise HTTPError(req.full_url, code,
735 self.inf_msg + msg, headers, fp)
736 else:
737 visited = new.redirect_dict = req.redirect_dict = {}
738 visited[newurl] = visited.get(newurl, 0) + 1
739
740 # Don't close the fp until we are sure that we won't use it
741 # with HTTPError.
742 fp.read()
743 fp.close()
744
745 return self.parent.open(new, timeout=req.timeout)
746
747 http_error_301 = http_error_303 = http_error_307 = http_error_308 = http_error_302
748
749 inf_msg = "The HTTP server returned a redirect error that would " \
750 "lead to an infinite loop.\n" \
751 "The last 30x error message was:\n"
752
753
754 def _parse_proxy(proxy):
755 """Return (scheme, user, password, host/port) given a URL or an authority.
756
757 If a URL is supplied, it must have an authority (host:port) component.
758 According to RFC 3986, having an authority component means the URL must
759 have two slashes after the scheme.
760 """
761 scheme, r_scheme = _splittype(proxy)
762 if not r_scheme.startswith("/"):
763 # authority
764 scheme = None
765 authority = proxy
766 else:
767 # URL
768 if not r_scheme.startswith("//"):
769 raise ValueError("proxy URL with no authority: %r" % proxy)
770 # We have an authority, so for RFC 3986-compliant URLs (by ss 3.
771 # and 3.3.), path is empty or starts with '/'
772 if '@' in r_scheme:
773 host_separator = r_scheme.find('@')
774 end = r_scheme.find("/", host_separator)
775 else:
776 end = r_scheme.find("/", 2)
777 if end == -1:
778 end = None
779 authority = r_scheme[2:end]
780 userinfo, hostport = _splituser(authority)
781 if userinfo is not None:
782 user, password = _splitpasswd(userinfo)
783 else:
784 user = password = None
785 return scheme, user, password, hostport
786
787 class ESC[4;38;5;81mProxyHandler(ESC[4;38;5;149mBaseHandler):
788 # Proxies must be in front
789 handler_order = 100
790
791 def __init__(self, proxies=None):
792 if proxies is None:
793 proxies = getproxies()
794 assert hasattr(proxies, 'keys'), "proxies must be a mapping"
795 self.proxies = proxies
796 for type, url in proxies.items():
797 type = type.lower()
798 setattr(self, '%s_open' % type,
799 lambda r, proxy=url, type=type, meth=self.proxy_open:
800 meth(r, proxy, type))
801
802 def proxy_open(self, req, proxy, type):
803 orig_type = req.type
804 proxy_type, user, password, hostport = _parse_proxy(proxy)
805 if proxy_type is None:
806 proxy_type = orig_type
807
808 if req.host and proxy_bypass(req.host):
809 return None
810
811 if user and password:
812 user_pass = '%s:%s' % (unquote(user),
813 unquote(password))
814 creds = base64.b64encode(user_pass.encode()).decode("ascii")
815 req.add_header('Proxy-authorization', 'Basic ' + creds)
816 hostport = unquote(hostport)
817 req.set_proxy(hostport, proxy_type)
818 if orig_type == proxy_type or orig_type == 'https':
819 # let other handlers take care of it
820 return None
821 else:
822 # need to start over, because the other handlers don't
823 # grok the proxy's URL type
824 # e.g. if we have a constructor arg proxies like so:
825 # {'http': 'ftp://proxy.example.com'}, we may end up turning
826 # a request for http://acme.example.com/a into one for
827 # ftp://proxy.example.com/a
828 return self.parent.open(req, timeout=req.timeout)
829
830 class ESC[4;38;5;81mHTTPPasswordMgr:
831
832 def __init__(self):
833 self.passwd = {}
834
835 def add_password(self, realm, uri, user, passwd):
836 # uri could be a single URI or a sequence
837 if isinstance(uri, str):
838 uri = [uri]
839 if realm not in self.passwd:
840 self.passwd[realm] = {}
841 for default_port in True, False:
842 reduced_uri = tuple(
843 self.reduce_uri(u, default_port) for u in uri)
844 self.passwd[realm][reduced_uri] = (user, passwd)
845
846 def find_user_password(self, realm, authuri):
847 domains = self.passwd.get(realm, {})
848 for default_port in True, False:
849 reduced_authuri = self.reduce_uri(authuri, default_port)
850 for uris, authinfo in domains.items():
851 for uri in uris:
852 if self.is_suburi(uri, reduced_authuri):
853 return authinfo
854 return None, None
855
856 def reduce_uri(self, uri, default_port=True):
857 """Accept authority or URI and extract only the authority and path."""
858 # note HTTP URLs do not have a userinfo component
859 parts = urlsplit(uri)
860 if parts[1]:
861 # URI
862 scheme = parts[0]
863 authority = parts[1]
864 path = parts[2] or '/'
865 else:
866 # host or host:port
867 scheme = None
868 authority = uri
869 path = '/'
870 host, port = _splitport(authority)
871 if default_port and port is None and scheme is not None:
872 dport = {"http": 80,
873 "https": 443,
874 }.get(scheme)
875 if dport is not None:
876 authority = "%s:%d" % (host, dport)
877 return authority, path
878
879 def is_suburi(self, base, test):
880 """Check if test is below base in a URI tree
881
882 Both args must be URIs in reduced form.
883 """
884 if base == test:
885 return True
886 if base[0] != test[0]:
887 return False
888 prefix = base[1]
889 if prefix[-1:] != '/':
890 prefix += '/'
891 return test[1].startswith(prefix)
892
893
894 class ESC[4;38;5;81mHTTPPasswordMgrWithDefaultRealm(ESC[4;38;5;149mHTTPPasswordMgr):
895
896 def find_user_password(self, realm, authuri):
897 user, password = HTTPPasswordMgr.find_user_password(self, realm,
898 authuri)
899 if user is not None:
900 return user, password
901 return HTTPPasswordMgr.find_user_password(self, None, authuri)
902
903
904 class ESC[4;38;5;81mHTTPPasswordMgrWithPriorAuth(ESC[4;38;5;149mHTTPPasswordMgrWithDefaultRealm):
905
906 def __init__(self, *args, **kwargs):
907 self.authenticated = {}
908 super().__init__(*args, **kwargs)
909
910 def add_password(self, realm, uri, user, passwd, is_authenticated=False):
911 self.update_authenticated(uri, is_authenticated)
912 # Add a default for prior auth requests
913 if realm is not None:
914 super().add_password(None, uri, user, passwd)
915 super().add_password(realm, uri, user, passwd)
916
917 def update_authenticated(self, uri, is_authenticated=False):
918 # uri could be a single URI or a sequence
919 if isinstance(uri, str):
920 uri = [uri]
921
922 for default_port in True, False:
923 for u in uri:
924 reduced_uri = self.reduce_uri(u, default_port)
925 self.authenticated[reduced_uri] = is_authenticated
926
927 def is_authenticated(self, authuri):
928 for default_port in True, False:
929 reduced_authuri = self.reduce_uri(authuri, default_port)
930 for uri in self.authenticated:
931 if self.is_suburi(uri, reduced_authuri):
932 return self.authenticated[uri]
933
934
935 class ESC[4;38;5;81mAbstractBasicAuthHandler:
936
937 # XXX this allows for multiple auth-schemes, but will stupidly pick
938 # the last one with a realm specified.
939
940 # allow for double- and single-quoted realm values
941 # (single quotes are a violation of the RFC, but appear in the wild)
942 rx = re.compile('(?:^|,)' # start of the string or ','
943 '[ \t]*' # optional whitespaces
944 '([^ \t,]+)' # scheme like "Basic"
945 '[ \t]+' # mandatory whitespaces
946 # realm=xxx
947 # realm='xxx'
948 # realm="xxx"
949 'realm=(["\']?)([^"\']*)\\2',
950 re.I)
951
952 # XXX could pre-emptively send auth info already accepted (RFC 2617,
953 # end of section 2, and section 1.2 immediately after "credentials"
954 # production).
955
956 def __init__(self, password_mgr=None):
957 if password_mgr is None:
958 password_mgr = HTTPPasswordMgr()
959 self.passwd = password_mgr
960 self.add_password = self.passwd.add_password
961
962 def _parse_realm(self, header):
963 # parse WWW-Authenticate header: accept multiple challenges per header
964 found_challenge = False
965 for mo in AbstractBasicAuthHandler.rx.finditer(header):
966 scheme, quote, realm = mo.groups()
967 if quote not in ['"', "'"]:
968 warnings.warn("Basic Auth Realm was unquoted",
969 UserWarning, 3)
970
971 yield (scheme, realm)
972
973 found_challenge = True
974
975 if not found_challenge:
976 if header:
977 scheme = header.split()[0]
978 else:
979 scheme = ''
980 yield (scheme, None)
981
982 def http_error_auth_reqed(self, authreq, host, req, headers):
983 # host may be an authority (without userinfo) or a URL with an
984 # authority
985 headers = headers.get_all(authreq)
986 if not headers:
987 # no header found
988 return
989
990 unsupported = None
991 for header in headers:
992 for scheme, realm in self._parse_realm(header):
993 if scheme.lower() != 'basic':
994 unsupported = scheme
995 continue
996
997 if realm is not None:
998 # Use the first matching Basic challenge.
999 # Ignore following challenges even if they use the Basic
1000 # scheme.
1001 return self.retry_http_basic_auth(host, req, realm)
1002
1003 if unsupported is not None:
1004 raise ValueError("AbstractBasicAuthHandler does not "
1005 "support the following scheme: %r"
1006 % (scheme,))
1007
1008 def retry_http_basic_auth(self, host, req, realm):
1009 user, pw = self.passwd.find_user_password(realm, host)
1010 if pw is not None:
1011 raw = "%s:%s" % (user, pw)
1012 auth = "Basic " + base64.b64encode(raw.encode()).decode("ascii")
1013 if req.get_header(self.auth_header, None) == auth:
1014 return None
1015 req.add_unredirected_header(self.auth_header, auth)
1016 return self.parent.open(req, timeout=req.timeout)
1017 else:
1018 return None
1019
1020 def http_request(self, req):
1021 if (not hasattr(self.passwd, 'is_authenticated') or
1022 not self.passwd.is_authenticated(req.full_url)):
1023 return req
1024
1025 if not req.has_header('Authorization'):
1026 user, passwd = self.passwd.find_user_password(None, req.full_url)
1027 credentials = '{0}:{1}'.format(user, passwd).encode()
1028 auth_str = base64.standard_b64encode(credentials).decode()
1029 req.add_unredirected_header('Authorization',
1030 'Basic {}'.format(auth_str.strip()))
1031 return req
1032
1033 def http_response(self, req, response):
1034 if hasattr(self.passwd, 'is_authenticated'):
1035 if 200 <= response.code < 300:
1036 self.passwd.update_authenticated(req.full_url, True)
1037 else:
1038 self.passwd.update_authenticated(req.full_url, False)
1039 return response
1040
1041 https_request = http_request
1042 https_response = http_response
1043
1044
1045
1046 class ESC[4;38;5;81mHTTPBasicAuthHandler(ESC[4;38;5;149mAbstractBasicAuthHandler, ESC[4;38;5;149mBaseHandler):
1047
1048 auth_header = 'Authorization'
1049
1050 def http_error_401(self, req, fp, code, msg, headers):
1051 url = req.full_url
1052 response = self.http_error_auth_reqed('www-authenticate',
1053 url, req, headers)
1054 return response
1055
1056
1057 class ESC[4;38;5;81mProxyBasicAuthHandler(ESC[4;38;5;149mAbstractBasicAuthHandler, ESC[4;38;5;149mBaseHandler):
1058
1059 auth_header = 'Proxy-authorization'
1060
1061 def http_error_407(self, req, fp, code, msg, headers):
1062 # http_error_auth_reqed requires that there is no userinfo component in
1063 # authority. Assume there isn't one, since urllib.request does not (and
1064 # should not, RFC 3986 s. 3.2.1) support requests for URLs containing
1065 # userinfo.
1066 authority = req.host
1067 response = self.http_error_auth_reqed('proxy-authenticate',
1068 authority, req, headers)
1069 return response
1070
1071
1072 # Return n random bytes.
1073 _randombytes = os.urandom
1074
1075
1076 class ESC[4;38;5;81mAbstractDigestAuthHandler:
1077 # Digest authentication is specified in RFC 2617.
1078
1079 # XXX The client does not inspect the Authentication-Info header
1080 # in a successful response.
1081
1082 # XXX It should be possible to test this implementation against
1083 # a mock server that just generates a static set of challenges.
1084
1085 # XXX qop="auth-int" supports is shaky
1086
1087 def __init__(self, passwd=None):
1088 if passwd is None:
1089 passwd = HTTPPasswordMgr()
1090 self.passwd = passwd
1091 self.add_password = self.passwd.add_password
1092 self.retried = 0
1093 self.nonce_count = 0
1094 self.last_nonce = None
1095
1096 def reset_retry_count(self):
1097 self.retried = 0
1098
1099 def http_error_auth_reqed(self, auth_header, host, req, headers):
1100 authreq = headers.get(auth_header, None)
1101 if self.retried > 5:
1102 # Don't fail endlessly - if we failed once, we'll probably
1103 # fail a second time. Hm. Unless the Password Manager is
1104 # prompting for the information. Crap. This isn't great
1105 # but it's better than the current 'repeat until recursion
1106 # depth exceeded' approach <wink>
1107 raise HTTPError(req.full_url, 401, "digest auth failed",
1108 headers, None)
1109 else:
1110 self.retried += 1
1111 if authreq:
1112 scheme = authreq.split()[0]
1113 if scheme.lower() == 'digest':
1114 return self.retry_http_digest_auth(req, authreq)
1115 elif scheme.lower() != 'basic':
1116 raise ValueError("AbstractDigestAuthHandler does not support"
1117 " the following scheme: '%s'" % scheme)
1118
1119 def retry_http_digest_auth(self, req, auth):
1120 token, challenge = auth.split(' ', 1)
1121 chal = parse_keqv_list(filter(None, parse_http_list(challenge)))
1122 auth = self.get_authorization(req, chal)
1123 if auth:
1124 auth_val = 'Digest %s' % auth
1125 if req.headers.get(self.auth_header, None) == auth_val:
1126 return None
1127 req.add_unredirected_header(self.auth_header, auth_val)
1128 resp = self.parent.open(req, timeout=req.timeout)
1129 return resp
1130
1131 def get_cnonce(self, nonce):
1132 # The cnonce-value is an opaque
1133 # quoted string value provided by the client and used by both client
1134 # and server to avoid chosen plaintext attacks, to provide mutual
1135 # authentication, and to provide some message integrity protection.
1136 # This isn't a fabulous effort, but it's probably Good Enough.
1137 s = "%s:%s:%s:" % (self.nonce_count, nonce, time.ctime())
1138 b = s.encode("ascii") + _randombytes(8)
1139 dig = hashlib.sha1(b).hexdigest()
1140 return dig[:16]
1141
1142 def get_authorization(self, req, chal):
1143 try:
1144 realm = chal['realm']
1145 nonce = chal['nonce']
1146 qop = chal.get('qop')
1147 algorithm = chal.get('algorithm', 'MD5')
1148 # mod_digest doesn't send an opaque, even though it isn't
1149 # supposed to be optional
1150 opaque = chal.get('opaque', None)
1151 except KeyError:
1152 return None
1153
1154 H, KD = self.get_algorithm_impls(algorithm)
1155 if H is None:
1156 return None
1157
1158 user, pw = self.passwd.find_user_password(realm, req.full_url)
1159 if user is None:
1160 return None
1161
1162 # XXX not implemented yet
1163 if req.data is not None:
1164 entdig = self.get_entity_digest(req.data, chal)
1165 else:
1166 entdig = None
1167
1168 A1 = "%s:%s:%s" % (user, realm, pw)
1169 A2 = "%s:%s" % (req.get_method(),
1170 # XXX selector: what about proxies and full urls
1171 req.selector)
1172 # NOTE: As per RFC 2617, when server sends "auth,auth-int", the client could use either `auth`
1173 # or `auth-int` to the response back. we use `auth` to send the response back.
1174 if qop is None:
1175 respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
1176 elif 'auth' in qop.split(','):
1177 if nonce == self.last_nonce:
1178 self.nonce_count += 1
1179 else:
1180 self.nonce_count = 1
1181 self.last_nonce = nonce
1182 ncvalue = '%08x' % self.nonce_count
1183 cnonce = self.get_cnonce(nonce)
1184 noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, 'auth', H(A2))
1185 respdig = KD(H(A1), noncebit)
1186 else:
1187 # XXX handle auth-int.
1188 raise URLError("qop '%s' is not supported." % qop)
1189
1190 # XXX should the partial digests be encoded too?
1191
1192 base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
1193 'response="%s"' % (user, realm, nonce, req.selector,
1194 respdig)
1195 if opaque:
1196 base += ', opaque="%s"' % opaque
1197 if entdig:
1198 base += ', digest="%s"' % entdig
1199 base += ', algorithm="%s"' % algorithm
1200 if qop:
1201 base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
1202 return base
1203
1204 def get_algorithm_impls(self, algorithm):
1205 # lambdas assume digest modules are imported at the top level
1206 if algorithm == 'MD5':
1207 H = lambda x: hashlib.md5(x.encode("ascii")).hexdigest()
1208 elif algorithm == 'SHA':
1209 H = lambda x: hashlib.sha1(x.encode("ascii")).hexdigest()
1210 # XXX MD5-sess
1211 else:
1212 raise ValueError("Unsupported digest authentication "
1213 "algorithm %r" % algorithm)
1214 KD = lambda s, d: H("%s:%s" % (s, d))
1215 return H, KD
1216
1217 def get_entity_digest(self, data, chal):
1218 # XXX not implemented yet
1219 return None
1220
1221
1222 class ESC[4;38;5;81mHTTPDigestAuthHandler(ESC[4;38;5;149mBaseHandler, ESC[4;38;5;149mAbstractDigestAuthHandler):
1223 """An authentication protocol defined by RFC 2069
1224
1225 Digest authentication improves on basic authentication because it
1226 does not transmit passwords in the clear.
1227 """
1228
1229 auth_header = 'Authorization'
1230 handler_order = 490 # before Basic auth
1231
1232 def http_error_401(self, req, fp, code, msg, headers):
1233 host = urlparse(req.full_url)[1]
1234 retry = self.http_error_auth_reqed('www-authenticate',
1235 host, req, headers)
1236 self.reset_retry_count()
1237 return retry
1238
1239
1240 class ESC[4;38;5;81mProxyDigestAuthHandler(ESC[4;38;5;149mBaseHandler, ESC[4;38;5;149mAbstractDigestAuthHandler):
1241
1242 auth_header = 'Proxy-Authorization'
1243 handler_order = 490 # before Basic auth
1244
1245 def http_error_407(self, req, fp, code, msg, headers):
1246 host = req.host
1247 retry = self.http_error_auth_reqed('proxy-authenticate',
1248 host, req, headers)
1249 self.reset_retry_count()
1250 return retry
1251
1252 class ESC[4;38;5;81mAbstractHTTPHandler(ESC[4;38;5;149mBaseHandler):
1253
1254 def __init__(self, debuglevel=None):
1255 self._debuglevel = debuglevel if debuglevel is not None else http.client.HTTPConnection.debuglevel
1256
1257 def set_http_debuglevel(self, level):
1258 self._debuglevel = level
1259
1260 def _get_content_length(self, request):
1261 return http.client.HTTPConnection._get_content_length(
1262 request.data,
1263 request.get_method())
1264
1265 def do_request_(self, request):
1266 host = request.host
1267 if not host:
1268 raise URLError('no host given')
1269
1270 if request.data is not None: # POST
1271 data = request.data
1272 if isinstance(data, str):
1273 msg = "POST data should be bytes, an iterable of bytes, " \
1274 "or a file object. It cannot be of type str."
1275 raise TypeError(msg)
1276 if not request.has_header('Content-type'):
1277 request.add_unredirected_header(
1278 'Content-type',
1279 'application/x-www-form-urlencoded')
1280 if (not request.has_header('Content-length')
1281 and not request.has_header('Transfer-encoding')):
1282 content_length = self._get_content_length(request)
1283 if content_length is not None:
1284 request.add_unredirected_header(
1285 'Content-length', str(content_length))
1286 else:
1287 request.add_unredirected_header(
1288 'Transfer-encoding', 'chunked')
1289
1290 sel_host = host
1291 if request.has_proxy():
1292 scheme, sel = _splittype(request.selector)
1293 sel_host, sel_path = _splithost(sel)
1294 if not request.has_header('Host'):
1295 request.add_unredirected_header('Host', sel_host)
1296 for name, value in self.parent.addheaders:
1297 name = name.capitalize()
1298 if not request.has_header(name):
1299 request.add_unredirected_header(name, value)
1300
1301 return request
1302
1303 def do_open(self, http_class, req, **http_conn_args):
1304 """Return an HTTPResponse object for the request, using http_class.
1305
1306 http_class must implement the HTTPConnection API from http.client.
1307 """
1308 host = req.host
1309 if not host:
1310 raise URLError('no host given')
1311
1312 # will parse host:port
1313 h = http_class(host, timeout=req.timeout, **http_conn_args)
1314 h.set_debuglevel(self._debuglevel)
1315
1316 headers = dict(req.unredirected_hdrs)
1317 headers.update({k: v for k, v in req.headers.items()
1318 if k not in headers})
1319
1320 # TODO(jhylton): Should this be redesigned to handle
1321 # persistent connections?
1322
1323 # We want to make an HTTP/1.1 request, but the addinfourl
1324 # class isn't prepared to deal with a persistent connection.
1325 # It will try to read all remaining data from the socket,
1326 # which will block while the server waits for the next request.
1327 # So make sure the connection gets closed after the (only)
1328 # request.
1329 headers["Connection"] = "close"
1330 headers = {name.title(): val for name, val in headers.items()}
1331
1332 if req._tunnel_host:
1333 tunnel_headers = {}
1334 proxy_auth_hdr = "Proxy-Authorization"
1335 if proxy_auth_hdr in headers:
1336 tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
1337 # Proxy-Authorization should not be sent to origin
1338 # server.
1339 del headers[proxy_auth_hdr]
1340 h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
1341
1342 try:
1343 try:
1344 h.request(req.get_method(), req.selector, req.data, headers,
1345 encode_chunked=req.has_header('Transfer-encoding'))
1346 except OSError as err: # timeout error
1347 raise URLError(err)
1348 r = h.getresponse()
1349 except:
1350 h.close()
1351 raise
1352
1353 # If the server does not send us a 'Connection: close' header,
1354 # HTTPConnection assumes the socket should be left open. Manually
1355 # mark the socket to be closed when this response object goes away.
1356 if h.sock:
1357 h.sock.close()
1358 h.sock = None
1359
1360 r.url = req.get_full_url()
1361 # This line replaces the .msg attribute of the HTTPResponse
1362 # with .headers, because urllib clients expect the response to
1363 # have the reason in .msg. It would be good to mark this
1364 # attribute is deprecated and get then to use info() or
1365 # .headers.
1366 r.msg = r.reason
1367 return r
1368
1369
1370 class ESC[4;38;5;81mHTTPHandler(ESC[4;38;5;149mAbstractHTTPHandler):
1371
1372 def http_open(self, req):
1373 return self.do_open(http.client.HTTPConnection, req)
1374
1375 http_request = AbstractHTTPHandler.do_request_
1376
1377 if hasattr(http.client, 'HTTPSConnection'):
1378
1379 class ESC[4;38;5;81mHTTPSHandler(ESC[4;38;5;149mAbstractHTTPHandler):
1380
1381 def __init__(self, debuglevel=None, context=None, check_hostname=None):
1382 debuglevel = debuglevel if debuglevel is not None else http.client.HTTPSConnection.debuglevel
1383 AbstractHTTPHandler.__init__(self, debuglevel)
1384 if context is None:
1385 http_version = http.client.HTTPSConnection._http_vsn
1386 context = http.client._create_https_context(http_version)
1387 if check_hostname is not None:
1388 context.check_hostname = check_hostname
1389 self._context = context
1390
1391 def https_open(self, req):
1392 return self.do_open(http.client.HTTPSConnection, req,
1393 context=self._context)
1394
1395 https_request = AbstractHTTPHandler.do_request_
1396
1397 __all__.append('HTTPSHandler')
1398
1399 class ESC[4;38;5;81mHTTPCookieProcessor(ESC[4;38;5;149mBaseHandler):
1400 def __init__(self, cookiejar=None):
1401 import http.cookiejar
1402 if cookiejar is None:
1403 cookiejar = http.cookiejar.CookieJar()
1404 self.cookiejar = cookiejar
1405
1406 def http_request(self, request):
1407 self.cookiejar.add_cookie_header(request)
1408 return request
1409
1410 def http_response(self, request, response):
1411 self.cookiejar.extract_cookies(response, request)
1412 return response
1413
1414 https_request = http_request
1415 https_response = http_response
1416
1417 class ESC[4;38;5;81mUnknownHandler(ESC[4;38;5;149mBaseHandler):
1418 def unknown_open(self, req):
1419 type = req.type
1420 raise URLError('unknown url type: %s' % type)
1421
1422 def parse_keqv_list(l):
1423 """Parse list of key=value strings where keys are not duplicated."""
1424 parsed = {}
1425 for elt in l:
1426 k, v = elt.split('=', 1)
1427 if v[0] == '"' and v[-1] == '"':
1428 v = v[1:-1]
1429 parsed[k] = v
1430 return parsed
1431
1432 def parse_http_list(s):
1433 """Parse lists as described by RFC 2068 Section 2.
1434
1435 In particular, parse comma-separated lists where the elements of
1436 the list may include quoted-strings. A quoted-string could
1437 contain a comma. A non-quoted string could have quotes in the
1438 middle. Neither commas nor quotes count if they are escaped.
1439 Only double-quotes count, not single-quotes.
1440 """
1441 res = []
1442 part = ''
1443
1444 escape = quote = False
1445 for cur in s:
1446 if escape:
1447 part += cur
1448 escape = False
1449 continue
1450 if quote:
1451 if cur == '\\':
1452 escape = True
1453 continue
1454 elif cur == '"':
1455 quote = False
1456 part += cur
1457 continue
1458
1459 if cur == ',':
1460 res.append(part)
1461 part = ''
1462 continue
1463
1464 if cur == '"':
1465 quote = True
1466
1467 part += cur
1468
1469 # append last part
1470 if part:
1471 res.append(part)
1472
1473 return [part.strip() for part in res]
1474
1475 class ESC[4;38;5;81mFileHandler(ESC[4;38;5;149mBaseHandler):
1476 # Use local file or FTP depending on form of URL
1477 def file_open(self, req):
1478 url = req.selector
1479 if url[:2] == '//' and url[2:3] != '/' and (req.host and
1480 req.host != 'localhost'):
1481 if not req.host in self.get_names():
1482 raise URLError("file:// scheme is supported only on localhost")
1483 else:
1484 return self.open_local_file(req)
1485
1486 # names for the localhost
1487 names = None
1488 def get_names(self):
1489 if FileHandler.names is None:
1490 try:
1491 FileHandler.names = tuple(
1492 socket.gethostbyname_ex('localhost')[2] +
1493 socket.gethostbyname_ex(socket.gethostname())[2])
1494 except socket.gaierror:
1495 FileHandler.names = (socket.gethostbyname('localhost'),)
1496 return FileHandler.names
1497
1498 # not entirely sure what the rules are here
1499 def open_local_file(self, req):
1500 import email.utils
1501 import mimetypes
1502 host = req.host
1503 filename = req.selector
1504 localfile = url2pathname(filename)
1505 try:
1506 stats = os.stat(localfile)
1507 size = stats.st_size
1508 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
1509 mtype = mimetypes.guess_type(filename)[0]
1510 headers = email.message_from_string(
1511 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
1512 (mtype or 'text/plain', size, modified))
1513 if host:
1514 host, port = _splitport(host)
1515 if not host or \
1516 (not port and _safe_gethostbyname(host) in self.get_names()):
1517 if host:
1518 origurl = 'file://' + host + filename
1519 else:
1520 origurl = 'file://' + filename
1521 return addinfourl(open(localfile, 'rb'), headers, origurl)
1522 except OSError as exp:
1523 raise URLError(exp)
1524 raise URLError('file not on local host')
1525
1526 def _safe_gethostbyname(host):
1527 try:
1528 return socket.gethostbyname(host)
1529 except socket.gaierror:
1530 return None
1531
1532 class ESC[4;38;5;81mFTPHandler(ESC[4;38;5;149mBaseHandler):
1533 def ftp_open(self, req):
1534 import ftplib
1535 import mimetypes
1536 host = req.host
1537 if not host:
1538 raise URLError('ftp error: no host given')
1539 host, port = _splitport(host)
1540 if port is None:
1541 port = ftplib.FTP_PORT
1542 else:
1543 port = int(port)
1544
1545 # username/password handling
1546 user, host = _splituser(host)
1547 if user:
1548 user, passwd = _splitpasswd(user)
1549 else:
1550 passwd = None
1551 host = unquote(host)
1552 user = user or ''
1553 passwd = passwd or ''
1554
1555 try:
1556 host = socket.gethostbyname(host)
1557 except OSError as msg:
1558 raise URLError(msg)
1559 path, attrs = _splitattr(req.selector)
1560 dirs = path.split('/')
1561 dirs = list(map(unquote, dirs))
1562 dirs, file = dirs[:-1], dirs[-1]
1563 if dirs and not dirs[0]:
1564 dirs = dirs[1:]
1565 try:
1566 fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout)
1567 type = file and 'I' or 'D'
1568 for attr in attrs:
1569 attr, value = _splitvalue(attr)
1570 if attr.lower() == 'type' and \
1571 value in ('a', 'A', 'i', 'I', 'd', 'D'):
1572 type = value.upper()
1573 fp, retrlen = fw.retrfile(file, type)
1574 headers = ""
1575 mtype = mimetypes.guess_type(req.full_url)[0]
1576 if mtype:
1577 headers += "Content-type: %s\n" % mtype
1578 if retrlen is not None and retrlen >= 0:
1579 headers += "Content-length: %d\n" % retrlen
1580 headers = email.message_from_string(headers)
1581 return addinfourl(fp, headers, req.full_url)
1582 except ftplib.all_errors as exp:
1583 raise URLError(exp) from exp
1584
1585 def connect_ftp(self, user, passwd, host, port, dirs, timeout):
1586 return ftpwrapper(user, passwd, host, port, dirs, timeout,
1587 persistent=False)
1588
1589 class ESC[4;38;5;81mCacheFTPHandler(ESC[4;38;5;149mFTPHandler):
1590 # XXX would be nice to have pluggable cache strategies
1591 # XXX this stuff is definitely not thread safe
1592 def __init__(self):
1593 self.cache = {}
1594 self.timeout = {}
1595 self.soonest = 0
1596 self.delay = 60
1597 self.max_conns = 16
1598
1599 def setTimeout(self, t):
1600 self.delay = t
1601
1602 def setMaxConns(self, m):
1603 self.max_conns = m
1604
1605 def connect_ftp(self, user, passwd, host, port, dirs, timeout):
1606 key = user, host, port, '/'.join(dirs), timeout
1607 if key in self.cache:
1608 self.timeout[key] = time.time() + self.delay
1609 else:
1610 self.cache[key] = ftpwrapper(user, passwd, host, port,
1611 dirs, timeout)
1612 self.timeout[key] = time.time() + self.delay
1613 self.check_cache()
1614 return self.cache[key]
1615
1616 def check_cache(self):
1617 # first check for old ones
1618 t = time.time()
1619 if self.soonest <= t:
1620 for k, v in list(self.timeout.items()):
1621 if v < t:
1622 self.cache[k].close()
1623 del self.cache[k]
1624 del self.timeout[k]
1625 self.soonest = min(list(self.timeout.values()))
1626
1627 # then check the size
1628 if len(self.cache) == self.max_conns:
1629 for k, v in list(self.timeout.items()):
1630 if v == self.soonest:
1631 del self.cache[k]
1632 del self.timeout[k]
1633 break
1634 self.soonest = min(list(self.timeout.values()))
1635
1636 def clear_cache(self):
1637 for conn in self.cache.values():
1638 conn.close()
1639 self.cache.clear()
1640 self.timeout.clear()
1641
1642 class ESC[4;38;5;81mDataHandler(ESC[4;38;5;149mBaseHandler):
1643 def data_open(self, req):
1644 # data URLs as specified in RFC 2397.
1645 #
1646 # ignores POSTed data
1647 #
1648 # syntax:
1649 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
1650 # mediatype := [ type "/" subtype ] *( ";" parameter )
1651 # data := *urlchar
1652 # parameter := attribute "=" value
1653 url = req.full_url
1654
1655 scheme, data = url.split(":",1)
1656 mediatype, data = data.split(",",1)
1657
1658 # even base64 encoded data URLs might be quoted so unquote in any case:
1659 data = unquote_to_bytes(data)
1660 if mediatype.endswith(";base64"):
1661 data = base64.decodebytes(data)
1662 mediatype = mediatype[:-7]
1663
1664 if not mediatype:
1665 mediatype = "text/plain;charset=US-ASCII"
1666
1667 headers = email.message_from_string("Content-type: %s\nContent-length: %d\n" %
1668 (mediatype, len(data)))
1669
1670 return addinfourl(io.BytesIO(data), headers, url)
1671
1672
1673 # Code move from the old urllib module
1674
1675 MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
1676
1677 # Helper for non-unix systems
1678 if os.name == 'nt':
1679 from nturl2path import url2pathname, pathname2url
1680 else:
1681 def url2pathname(pathname):
1682 """OS-specific conversion from a relative URL of the 'file' scheme
1683 to a file system path; not recommended for general use."""
1684 return unquote(pathname)
1685
1686 def pathname2url(pathname):
1687 """OS-specific conversion from a file system path to a relative URL
1688 of the 'file' scheme; not recommended for general use."""
1689 return quote(pathname)
1690
1691
1692 ftpcache = {}
1693
1694
1695 class ESC[4;38;5;81mURLopener:
1696 """Class to open URLs.
1697 This is a class rather than just a subroutine because we may need
1698 more than one set of global protocol-specific options.
1699 Note -- this is a base class for those who don't want the
1700 automatic handling of errors type 302 (relocated) and 401
1701 (authorization needed)."""
1702
1703 __tempfiles = None
1704
1705 version = "Python-urllib/%s" % __version__
1706
1707 # Constructor
1708 def __init__(self, proxies=None, **x509):
1709 msg = "%(class)s style of invoking requests is deprecated. " \
1710 "Use newer urlopen functions/methods" % {'class': self.__class__.__name__}
1711 warnings.warn(msg, DeprecationWarning, stacklevel=3)
1712 if proxies is None:
1713 proxies = getproxies()
1714 assert hasattr(proxies, 'keys'), "proxies must be a mapping"
1715 self.proxies = proxies
1716 self.key_file = x509.get('key_file')
1717 self.cert_file = x509.get('cert_file')
1718 self.addheaders = [('User-Agent', self.version), ('Accept', '*/*')]
1719 self.__tempfiles = []
1720 self.__unlink = os.unlink # See cleanup()
1721 self.tempcache = None
1722 # Undocumented feature: if you assign {} to tempcache,
1723 # it is used to cache files retrieved with
1724 # self.retrieve(). This is not enabled by default
1725 # since it does not work for changing documents (and I
1726 # haven't got the logic to check expiration headers
1727 # yet).
1728 self.ftpcache = ftpcache
1729 # Undocumented feature: you can use a different
1730 # ftp cache by assigning to the .ftpcache member;
1731 # in case you want logically independent URL openers
1732 # XXX This is not threadsafe. Bah.
1733
1734 def __del__(self):
1735 self.close()
1736
1737 def close(self):
1738 self.cleanup()
1739
1740 def cleanup(self):
1741 # This code sometimes runs when the rest of this module
1742 # has already been deleted, so it can't use any globals
1743 # or import anything.
1744 if self.__tempfiles:
1745 for file in self.__tempfiles:
1746 try:
1747 self.__unlink(file)
1748 except OSError:
1749 pass
1750 del self.__tempfiles[:]
1751 if self.tempcache:
1752 self.tempcache.clear()
1753
1754 def addheader(self, *args):
1755 """Add a header to be used by the HTTP interface only
1756 e.g. u.addheader('Accept', 'sound/basic')"""
1757 self.addheaders.append(args)
1758
1759 # External interface
1760 def open(self, fullurl, data=None):
1761 """Use URLopener().open(file) instead of open(file, 'r')."""
1762 fullurl = unwrap(_to_bytes(fullurl))
1763 fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
1764 if self.tempcache and fullurl in self.tempcache:
1765 filename, headers = self.tempcache[fullurl]
1766 fp = open(filename, 'rb')
1767 return addinfourl(fp, headers, fullurl)
1768 urltype, url = _splittype(fullurl)
1769 if not urltype:
1770 urltype = 'file'
1771 if urltype in self.proxies:
1772 proxy = self.proxies[urltype]
1773 urltype, proxyhost = _splittype(proxy)
1774 host, selector = _splithost(proxyhost)
1775 url = (host, fullurl) # Signal special case to open_*()
1776 else:
1777 proxy = None
1778 name = 'open_' + urltype
1779 self.type = urltype
1780 name = name.replace('-', '_')
1781 if not hasattr(self, name) or name == 'open_local_file':
1782 if proxy:
1783 return self.open_unknown_proxy(proxy, fullurl, data)
1784 else:
1785 return self.open_unknown(fullurl, data)
1786 try:
1787 if data is None:
1788 return getattr(self, name)(url)
1789 else:
1790 return getattr(self, name)(url, data)
1791 except (HTTPError, URLError):
1792 raise
1793 except OSError as msg:
1794 raise OSError('socket error', msg) from msg
1795
1796 def open_unknown(self, fullurl, data=None):
1797 """Overridable interface to open unknown URL type."""
1798 type, url = _splittype(fullurl)
1799 raise OSError('url error', 'unknown url type', type)
1800
1801 def open_unknown_proxy(self, proxy, fullurl, data=None):
1802 """Overridable interface to open unknown URL type."""
1803 type, url = _splittype(fullurl)
1804 raise OSError('url error', 'invalid proxy for %s' % type, proxy)
1805
1806 # External interface
1807 def retrieve(self, url, filename=None, reporthook=None, data=None):
1808 """retrieve(url) returns (filename, headers) for a local object
1809 or (tempfilename, headers) for a remote object."""
1810 url = unwrap(_to_bytes(url))
1811 if self.tempcache and url in self.tempcache:
1812 return self.tempcache[url]
1813 type, url1 = _splittype(url)
1814 if filename is None and (not type or type == 'file'):
1815 try:
1816 fp = self.open_local_file(url1)
1817 hdrs = fp.info()
1818 fp.close()
1819 return url2pathname(_splithost(url1)[1]), hdrs
1820 except OSError:
1821 pass
1822 fp = self.open(url, data)
1823 try:
1824 headers = fp.info()
1825 if filename:
1826 tfp = open(filename, 'wb')
1827 else:
1828 garbage, path = _splittype(url)
1829 garbage, path = _splithost(path or "")
1830 path, garbage = _splitquery(path or "")
1831 path, garbage = _splitattr(path or "")
1832 suffix = os.path.splitext(path)[1]
1833 (fd, filename) = tempfile.mkstemp(suffix)
1834 self.__tempfiles.append(filename)
1835 tfp = os.fdopen(fd, 'wb')
1836 try:
1837 result = filename, headers
1838 if self.tempcache is not None:
1839 self.tempcache[url] = result
1840 bs = 1024*8
1841 size = -1
1842 read = 0
1843 blocknum = 0
1844 if "content-length" in headers:
1845 size = int(headers["Content-Length"])
1846 if reporthook:
1847 reporthook(blocknum, bs, size)
1848 while block := fp.read(bs):
1849 read += len(block)
1850 tfp.write(block)
1851 blocknum += 1
1852 if reporthook:
1853 reporthook(blocknum, bs, size)
1854 finally:
1855 tfp.close()
1856 finally:
1857 fp.close()
1858
1859 # raise exception if actual size does not match content-length header
1860 if size >= 0 and read < size:
1861 raise ContentTooShortError(
1862 "retrieval incomplete: got only %i out of %i bytes"
1863 % (read, size), result)
1864
1865 return result
1866
1867 # Each method named open_<type> knows how to open that type of URL
1868
1869 def _open_generic_http(self, connection_factory, url, data):
1870 """Make an HTTP connection using connection_class.
1871
1872 This is an internal method that should be called from
1873 open_http() or open_https().
1874
1875 Arguments:
1876 - connection_factory should take a host name and return an
1877 HTTPConnection instance.
1878 - url is the url to retrieval or a host, relative-path pair.
1879 - data is payload for a POST request or None.
1880 """
1881
1882 user_passwd = None
1883 proxy_passwd= None
1884 if isinstance(url, str):
1885 host, selector = _splithost(url)
1886 if host:
1887 user_passwd, host = _splituser(host)
1888 host = unquote(host)
1889 realhost = host
1890 else:
1891 host, selector = url
1892 # check whether the proxy contains authorization information
1893 proxy_passwd, host = _splituser(host)
1894 # now we proceed with the url we want to obtain
1895 urltype, rest = _splittype(selector)
1896 url = rest
1897 user_passwd = None
1898 if urltype.lower() != 'http':
1899 realhost = None
1900 else:
1901 realhost, rest = _splithost(rest)
1902 if realhost:
1903 user_passwd, realhost = _splituser(realhost)
1904 if user_passwd:
1905 selector = "%s://%s%s" % (urltype, realhost, rest)
1906 if proxy_bypass(realhost):
1907 host = realhost
1908
1909 if not host: raise OSError('http error', 'no host given')
1910
1911 if proxy_passwd:
1912 proxy_passwd = unquote(proxy_passwd)
1913 proxy_auth = base64.b64encode(proxy_passwd.encode()).decode('ascii')
1914 else:
1915 proxy_auth = None
1916
1917 if user_passwd:
1918 user_passwd = unquote(user_passwd)
1919 auth = base64.b64encode(user_passwd.encode()).decode('ascii')
1920 else:
1921 auth = None
1922 http_conn = connection_factory(host)
1923 headers = {}
1924 if proxy_auth:
1925 headers["Proxy-Authorization"] = "Basic %s" % proxy_auth
1926 if auth:
1927 headers["Authorization"] = "Basic %s" % auth
1928 if realhost:
1929 headers["Host"] = realhost
1930
1931 # Add Connection:close as we don't support persistent connections yet.
1932 # This helps in closing the socket and avoiding ResourceWarning
1933
1934 headers["Connection"] = "close"
1935
1936 for header, value in self.addheaders:
1937 headers[header] = value
1938
1939 if data is not None:
1940 headers["Content-Type"] = "application/x-www-form-urlencoded"
1941 http_conn.request("POST", selector, data, headers)
1942 else:
1943 http_conn.request("GET", selector, headers=headers)
1944
1945 try:
1946 response = http_conn.getresponse()
1947 except http.client.BadStatusLine:
1948 # something went wrong with the HTTP status line
1949 raise URLError("http protocol error: bad status line")
1950
1951 # According to RFC 2616, "2xx" code indicates that the client's
1952 # request was successfully received, understood, and accepted.
1953 if 200 <= response.status < 300:
1954 return addinfourl(response, response.msg, "http:" + url,
1955 response.status)
1956 else:
1957 return self.http_error(
1958 url, response.fp,
1959 response.status, response.reason, response.msg, data)
1960
1961 def open_http(self, url, data=None):
1962 """Use HTTP protocol."""
1963 return self._open_generic_http(http.client.HTTPConnection, url, data)
1964
1965 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
1966 """Handle http errors.
1967
1968 Derived class can override this, or provide specific handlers
1969 named http_error_DDD where DDD is the 3-digit error code."""
1970 # First check if there's a specific handler for this error
1971 name = 'http_error_%d' % errcode
1972 if hasattr(self, name):
1973 method = getattr(self, name)
1974 if data is None:
1975 result = method(url, fp, errcode, errmsg, headers)
1976 else:
1977 result = method(url, fp, errcode, errmsg, headers, data)
1978 if result: return result
1979 return self.http_error_default(url, fp, errcode, errmsg, headers)
1980
1981 def http_error_default(self, url, fp, errcode, errmsg, headers):
1982 """Default error handler: close the connection and raise OSError."""
1983 fp.close()
1984 raise HTTPError(url, errcode, errmsg, headers, None)
1985
1986 if _have_ssl:
1987 def _https_connection(self, host):
1988 if self.key_file or self.cert_file:
1989 http_version = http.client.HTTPSConnection._http_vsn
1990 context = http.client._create_https_context(http_version)
1991 context.load_cert_chain(self.cert_file, self.key_file)
1992 # cert and key file means the user wants to authenticate.
1993 # enable TLS 1.3 PHA implicitly even for custom contexts.
1994 if context.post_handshake_auth is not None:
1995 context.post_handshake_auth = True
1996 else:
1997 context = None
1998 return http.client.HTTPSConnection(host, context=context)
1999
2000 def open_https(self, url, data=None):
2001 """Use HTTPS protocol."""
2002 return self._open_generic_http(self._https_connection, url, data)
2003
2004 def open_file(self, url):
2005 """Use local file or FTP depending on form of URL."""
2006 if not isinstance(url, str):
2007 raise URLError('file error: proxy support for file protocol currently not implemented')
2008 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
2009 raise ValueError("file:// scheme is supported only on localhost")
2010 else:
2011 return self.open_local_file(url)
2012
2013 def open_local_file(self, url):
2014 """Use local file."""
2015 import email.utils
2016 import mimetypes
2017 host, file = _splithost(url)
2018 localname = url2pathname(file)
2019 try:
2020 stats = os.stat(localname)
2021 except OSError as e:
2022 raise URLError(e.strerror, e.filename)
2023 size = stats.st_size
2024 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
2025 mtype = mimetypes.guess_type(url)[0]
2026 headers = email.message_from_string(
2027 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
2028 (mtype or 'text/plain', size, modified))
2029 if not host:
2030 urlfile = file
2031 if file[:1] == '/':
2032 urlfile = 'file://' + file
2033 return addinfourl(open(localname, 'rb'), headers, urlfile)
2034 host, port = _splitport(host)
2035 if (not port
2036 and socket.gethostbyname(host) in ((localhost(),) + thishost())):
2037 urlfile = file
2038 if file[:1] == '/':
2039 urlfile = 'file://' + file
2040 elif file[:2] == './':
2041 raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)
2042 return addinfourl(open(localname, 'rb'), headers, urlfile)
2043 raise URLError('local file error: not on local host')
2044
2045 def open_ftp(self, url):
2046 """Use FTP protocol."""
2047 if not isinstance(url, str):
2048 raise URLError('ftp error: proxy support for ftp protocol currently not implemented')
2049 import mimetypes
2050 host, path = _splithost(url)
2051 if not host: raise URLError('ftp error: no host given')
2052 host, port = _splitport(host)
2053 user, host = _splituser(host)
2054 if user: user, passwd = _splitpasswd(user)
2055 else: passwd = None
2056 host = unquote(host)
2057 user = unquote(user or '')
2058 passwd = unquote(passwd or '')
2059 host = socket.gethostbyname(host)
2060 if not port:
2061 import ftplib
2062 port = ftplib.FTP_PORT
2063 else:
2064 port = int(port)
2065 path, attrs = _splitattr(path)
2066 path = unquote(path)
2067 dirs = path.split('/')
2068 dirs, file = dirs[:-1], dirs[-1]
2069 if dirs and not dirs[0]: dirs = dirs[1:]
2070 if dirs and not dirs[0]: dirs[0] = '/'
2071 key = user, host, port, '/'.join(dirs)
2072 # XXX thread unsafe!
2073 if len(self.ftpcache) > MAXFTPCACHE:
2074 # Prune the cache, rather arbitrarily
2075 for k in list(self.ftpcache):
2076 if k != key:
2077 v = self.ftpcache[k]
2078 del self.ftpcache[k]
2079 v.close()
2080 try:
2081 if key not in self.ftpcache:
2082 self.ftpcache[key] = \
2083 ftpwrapper(user, passwd, host, port, dirs)
2084 if not file: type = 'D'
2085 else: type = 'I'
2086 for attr in attrs:
2087 attr, value = _splitvalue(attr)
2088 if attr.lower() == 'type' and \
2089 value in ('a', 'A', 'i', 'I', 'd', 'D'):
2090 type = value.upper()
2091 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
2092 mtype = mimetypes.guess_type("ftp:" + url)[0]
2093 headers = ""
2094 if mtype:
2095 headers += "Content-Type: %s\n" % mtype
2096 if retrlen is not None and retrlen >= 0:
2097 headers += "Content-Length: %d\n" % retrlen
2098 headers = email.message_from_string(headers)
2099 return addinfourl(fp, headers, "ftp:" + url)
2100 except ftperrors() as exp:
2101 raise URLError(f'ftp error: {exp}') from exp
2102
2103 def open_data(self, url, data=None):
2104 """Use "data" URL."""
2105 if not isinstance(url, str):
2106 raise URLError('data error: proxy support for data protocol currently not implemented')
2107 # ignore POSTed data
2108 #
2109 # syntax of data URLs:
2110 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
2111 # mediatype := [ type "/" subtype ] *( ";" parameter )
2112 # data := *urlchar
2113 # parameter := attribute "=" value
2114 try:
2115 [type, data] = url.split(',', 1)
2116 except ValueError:
2117 raise OSError('data error', 'bad data URL')
2118 if not type:
2119 type = 'text/plain;charset=US-ASCII'
2120 semi = type.rfind(';')
2121 if semi >= 0 and '=' not in type[semi:]:
2122 encoding = type[semi+1:]
2123 type = type[:semi]
2124 else:
2125 encoding = ''
2126 msg = []
2127 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
2128 time.gmtime(time.time())))
2129 msg.append('Content-type: %s' % type)
2130 if encoding == 'base64':
2131 # XXX is this encoding/decoding ok?
2132 data = base64.decodebytes(data.encode('ascii')).decode('latin-1')
2133 else:
2134 data = unquote(data)
2135 msg.append('Content-Length: %d' % len(data))
2136 msg.append('')
2137 msg.append(data)
2138 msg = '\n'.join(msg)
2139 headers = email.message_from_string(msg)
2140 f = io.StringIO(msg)
2141 #f.fileno = None # needed for addinfourl
2142 return addinfourl(f, headers, url)
2143
2144
2145 class ESC[4;38;5;81mFancyURLopener(ESC[4;38;5;149mURLopener):
2146 """Derived class with handlers for errors we can handle (perhaps)."""
2147
2148 def __init__(self, *args, **kwargs):
2149 URLopener.__init__(self, *args, **kwargs)
2150 self.auth_cache = {}
2151 self.tries = 0
2152 self.maxtries = 10
2153
2154 def http_error_default(self, url, fp, errcode, errmsg, headers):
2155 """Default error handling -- don't raise an exception."""
2156 return addinfourl(fp, headers, "http:" + url, errcode)
2157
2158 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
2159 """Error 302 -- relocated (temporarily)."""
2160 self.tries += 1
2161 try:
2162 if self.maxtries and self.tries >= self.maxtries:
2163 if hasattr(self, "http_error_500"):
2164 meth = self.http_error_500
2165 else:
2166 meth = self.http_error_default
2167 return meth(url, fp, 500,
2168 "Internal Server Error: Redirect Recursion",
2169 headers)
2170 result = self.redirect_internal(url, fp, errcode, errmsg,
2171 headers, data)
2172 return result
2173 finally:
2174 self.tries = 0
2175
2176 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
2177 if 'location' in headers:
2178 newurl = headers['location']
2179 elif 'uri' in headers:
2180 newurl = headers['uri']
2181 else:
2182 return
2183 fp.close()
2184
2185 # In case the server sent a relative URL, join with original:
2186 newurl = urljoin(self.type + ":" + url, newurl)
2187
2188 urlparts = urlparse(newurl)
2189
2190 # For security reasons, we don't allow redirection to anything other
2191 # than http, https and ftp.
2192
2193 # We are using newer HTTPError with older redirect_internal method
2194 # This older method will get deprecated in 3.3
2195
2196 if urlparts.scheme not in ('http', 'https', 'ftp', ''):
2197 raise HTTPError(newurl, errcode,
2198 errmsg +
2199 " Redirection to url '%s' is not allowed." % newurl,
2200 headers, fp)
2201
2202 return self.open(newurl)
2203
2204 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
2205 """Error 301 -- also relocated (permanently)."""
2206 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2207
2208 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
2209 """Error 303 -- also relocated (essentially identical to 302)."""
2210 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2211
2212 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
2213 """Error 307 -- relocated, but turn POST into error."""
2214 if data is None:
2215 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2216 else:
2217 return self.http_error_default(url, fp, errcode, errmsg, headers)
2218
2219 def http_error_308(self, url, fp, errcode, errmsg, headers, data=None):
2220 """Error 308 -- relocated, but turn POST into error."""
2221 if data is None:
2222 return self.http_error_301(url, fp, errcode, errmsg, headers, data)
2223 else:
2224 return self.http_error_default(url, fp, errcode, errmsg, headers)
2225
2226 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None,
2227 retry=False):
2228 """Error 401 -- authentication required.
2229 This function supports Basic authentication only."""
2230 if 'www-authenticate' not in headers:
2231 URLopener.http_error_default(self, url, fp,
2232 errcode, errmsg, headers)
2233 stuff = headers['www-authenticate']
2234 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
2235 if not match:
2236 URLopener.http_error_default(self, url, fp,
2237 errcode, errmsg, headers)
2238 scheme, realm = match.groups()
2239 if scheme.lower() != 'basic':
2240 URLopener.http_error_default(self, url, fp,
2241 errcode, errmsg, headers)
2242 if not retry:
2243 URLopener.http_error_default(self, url, fp, errcode, errmsg,
2244 headers)
2245 name = 'retry_' + self.type + '_basic_auth'
2246 if data is None:
2247 return getattr(self,name)(url, realm)
2248 else:
2249 return getattr(self,name)(url, realm, data)
2250
2251 def http_error_407(self, url, fp, errcode, errmsg, headers, data=None,
2252 retry=False):
2253 """Error 407 -- proxy authentication required.
2254 This function supports Basic authentication only."""
2255 if 'proxy-authenticate' not in headers:
2256 URLopener.http_error_default(self, url, fp,
2257 errcode, errmsg, headers)
2258 stuff = headers['proxy-authenticate']
2259 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
2260 if not match:
2261 URLopener.http_error_default(self, url, fp,
2262 errcode, errmsg, headers)
2263 scheme, realm = match.groups()
2264 if scheme.lower() != 'basic':
2265 URLopener.http_error_default(self, url, fp,
2266 errcode, errmsg, headers)
2267 if not retry:
2268 URLopener.http_error_default(self, url, fp, errcode, errmsg,
2269 headers)
2270 name = 'retry_proxy_' + self.type + '_basic_auth'
2271 if data is None:
2272 return getattr(self,name)(url, realm)
2273 else:
2274 return getattr(self,name)(url, realm, data)
2275
2276 def retry_proxy_http_basic_auth(self, url, realm, data=None):
2277 host, selector = _splithost(url)
2278 newurl = 'http://' + host + selector
2279 proxy = self.proxies['http']
2280 urltype, proxyhost = _splittype(proxy)
2281 proxyhost, proxyselector = _splithost(proxyhost)
2282 i = proxyhost.find('@') + 1
2283 proxyhost = proxyhost[i:]
2284 user, passwd = self.get_user_passwd(proxyhost, realm, i)
2285 if not (user or passwd): return None
2286 proxyhost = "%s:%s@%s" % (quote(user, safe=''),
2287 quote(passwd, safe=''), proxyhost)
2288 self.proxies['http'] = 'http://' + proxyhost + proxyselector
2289 if data is None:
2290 return self.open(newurl)
2291 else:
2292 return self.open(newurl, data)
2293
2294 def retry_proxy_https_basic_auth(self, url, realm, data=None):
2295 host, selector = _splithost(url)
2296 newurl = 'https://' + host + selector
2297 proxy = self.proxies['https']
2298 urltype, proxyhost = _splittype(proxy)
2299 proxyhost, proxyselector = _splithost(proxyhost)
2300 i = proxyhost.find('@') + 1
2301 proxyhost = proxyhost[i:]
2302 user, passwd = self.get_user_passwd(proxyhost, realm, i)
2303 if not (user or passwd): return None
2304 proxyhost = "%s:%s@%s" % (quote(user, safe=''),
2305 quote(passwd, safe=''), proxyhost)
2306 self.proxies['https'] = 'https://' + proxyhost + proxyselector
2307 if data is None:
2308 return self.open(newurl)
2309 else:
2310 return self.open(newurl, data)
2311
2312 def retry_http_basic_auth(self, url, realm, data=None):
2313 host, selector = _splithost(url)
2314 i = host.find('@') + 1
2315 host = host[i:]
2316 user, passwd = self.get_user_passwd(host, realm, i)
2317 if not (user or passwd): return None
2318 host = "%s:%s@%s" % (quote(user, safe=''),
2319 quote(passwd, safe=''), host)
2320 newurl = 'http://' + host + selector
2321 if data is None:
2322 return self.open(newurl)
2323 else:
2324 return self.open(newurl, data)
2325
2326 def retry_https_basic_auth(self, url, realm, data=None):
2327 host, selector = _splithost(url)
2328 i = host.find('@') + 1
2329 host = host[i:]
2330 user, passwd = self.get_user_passwd(host, realm, i)
2331 if not (user or passwd): return None
2332 host = "%s:%s@%s" % (quote(user, safe=''),
2333 quote(passwd, safe=''), host)
2334 newurl = 'https://' + host + selector
2335 if data is None:
2336 return self.open(newurl)
2337 else:
2338 return self.open(newurl, data)
2339
2340 def get_user_passwd(self, host, realm, clear_cache=0):
2341 key = realm + '@' + host.lower()
2342 if key in self.auth_cache:
2343 if clear_cache:
2344 del self.auth_cache[key]
2345 else:
2346 return self.auth_cache[key]
2347 user, passwd = self.prompt_user_passwd(host, realm)
2348 if user or passwd: self.auth_cache[key] = (user, passwd)
2349 return user, passwd
2350
2351 def prompt_user_passwd(self, host, realm):
2352 """Override this in a GUI environment!"""
2353 import getpass
2354 try:
2355 user = input("Enter username for %s at %s: " % (realm, host))
2356 passwd = getpass.getpass("Enter password for %s in %s at %s: " %
2357 (user, realm, host))
2358 return user, passwd
2359 except KeyboardInterrupt:
2360 print()
2361 return None, None
2362
2363
2364 # Utility functions
2365
2366 _localhost = None
2367 def localhost():
2368 """Return the IP address of the magic hostname 'localhost'."""
2369 global _localhost
2370 if _localhost is None:
2371 _localhost = socket.gethostbyname('localhost')
2372 return _localhost
2373
2374 _thishost = None
2375 def thishost():
2376 """Return the IP addresses of the current host."""
2377 global _thishost
2378 if _thishost is None:
2379 try:
2380 _thishost = tuple(socket.gethostbyname_ex(socket.gethostname())[2])
2381 except socket.gaierror:
2382 _thishost = tuple(socket.gethostbyname_ex('localhost')[2])
2383 return _thishost
2384
2385 _ftperrors = None
2386 def ftperrors():
2387 """Return the set of errors raised by the FTP class."""
2388 global _ftperrors
2389 if _ftperrors is None:
2390 import ftplib
2391 _ftperrors = ftplib.all_errors
2392 return _ftperrors
2393
2394 _noheaders = None
2395 def noheaders():
2396 """Return an empty email Message object."""
2397 global _noheaders
2398 if _noheaders is None:
2399 _noheaders = email.message_from_string("")
2400 return _noheaders
2401
2402
2403 # Utility classes
2404
2405 class ESC[4;38;5;81mftpwrapper:
2406 """Class used by open_ftp() for cache of open FTP connections."""
2407
2408 def __init__(self, user, passwd, host, port, dirs, timeout=None,
2409 persistent=True):
2410 self.user = user
2411 self.passwd = passwd
2412 self.host = host
2413 self.port = port
2414 self.dirs = dirs
2415 self.timeout = timeout
2416 self.refcount = 0
2417 self.keepalive = persistent
2418 try:
2419 self.init()
2420 except:
2421 self.close()
2422 raise
2423
2424 def init(self):
2425 import ftplib
2426 self.busy = 0
2427 self.ftp = ftplib.FTP()
2428 self.ftp.connect(self.host, self.port, self.timeout)
2429 self.ftp.login(self.user, self.passwd)
2430 _target = '/'.join(self.dirs)
2431 self.ftp.cwd(_target)
2432
2433 def retrfile(self, file, type):
2434 import ftplib
2435 self.endtransfer()
2436 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
2437 else: cmd = 'TYPE ' + type; isdir = 0
2438 try:
2439 self.ftp.voidcmd(cmd)
2440 except ftplib.all_errors:
2441 self.init()
2442 self.ftp.voidcmd(cmd)
2443 conn = None
2444 if file and not isdir:
2445 # Try to retrieve as a file
2446 try:
2447 cmd = 'RETR ' + file
2448 conn, retrlen = self.ftp.ntransfercmd(cmd)
2449 except ftplib.error_perm as reason:
2450 if str(reason)[:3] != '550':
2451 raise URLError(f'ftp error: {reason}') from reason
2452 if not conn:
2453 # Set transfer mode to ASCII!
2454 self.ftp.voidcmd('TYPE A')
2455 # Try a directory listing. Verify that directory exists.
2456 if file:
2457 pwd = self.ftp.pwd()
2458 try:
2459 try:
2460 self.ftp.cwd(file)
2461 except ftplib.error_perm as reason:
2462 raise URLError('ftp error: %r' % reason) from reason
2463 finally:
2464 self.ftp.cwd(pwd)
2465 cmd = 'LIST ' + file
2466 else:
2467 cmd = 'LIST'
2468 conn, retrlen = self.ftp.ntransfercmd(cmd)
2469 self.busy = 1
2470
2471 ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
2472 self.refcount += 1
2473 conn.close()
2474 # Pass back both a suitably decorated object and a retrieval length
2475 return (ftpobj, retrlen)
2476
2477 def endtransfer(self):
2478 if not self.busy:
2479 return
2480 self.busy = 0
2481 try:
2482 self.ftp.voidresp()
2483 except ftperrors():
2484 pass
2485
2486 def close(self):
2487 self.keepalive = False
2488 if self.refcount <= 0:
2489 self.real_close()
2490
2491 def file_close(self):
2492 self.endtransfer()
2493 self.refcount -= 1
2494 if self.refcount <= 0 and not self.keepalive:
2495 self.real_close()
2496
2497 def real_close(self):
2498 self.endtransfer()
2499 try:
2500 self.ftp.close()
2501 except ftperrors():
2502 pass
2503
2504 # Proxy handling
2505 def getproxies_environment():
2506 """Return a dictionary of scheme -> proxy server URL mappings.
2507
2508 Scan the environment for variables named <scheme>_proxy;
2509 this seems to be the standard convention. If you need a
2510 different way, you can pass a proxies dictionary to the
2511 [Fancy]URLopener constructor.
2512 """
2513 # in order to prefer lowercase variables, process environment in
2514 # two passes: first matches any, second pass matches lowercase only
2515
2516 # select only environment variables which end in (after making lowercase) _proxy
2517 proxies = {}
2518 environment = []
2519 for name in os.environ.keys():
2520 # fast screen underscore position before more expensive case-folding
2521 if len(name) > 5 and name[-6] == "_" and name[-5:].lower() == "proxy":
2522 value = os.environ[name]
2523 proxy_name = name[:-6].lower()
2524 environment.append((name, value, proxy_name))
2525 if value:
2526 proxies[proxy_name] = value
2527 # CVE-2016-1000110 - If we are running as CGI script, forget HTTP_PROXY
2528 # (non-all-lowercase) as it may be set from the web server by a "Proxy:"
2529 # header from the client
2530 # If "proxy" is lowercase, it will still be used thanks to the next block
2531 if 'REQUEST_METHOD' in os.environ:
2532 proxies.pop('http', None)
2533 for name, value, proxy_name in environment:
2534 # not case-folded, checking here for lower-case env vars only
2535 if name[-6:] == '_proxy':
2536 if value:
2537 proxies[proxy_name] = value
2538 else:
2539 proxies.pop(proxy_name, None)
2540 return proxies
2541
2542 def proxy_bypass_environment(host, proxies=None):
2543 """Test if proxies should not be used for a particular host.
2544
2545 Checks the proxy dict for the value of no_proxy, which should
2546 be a list of comma separated DNS suffixes, or '*' for all hosts.
2547
2548 """
2549 if proxies is None:
2550 proxies = getproxies_environment()
2551 # don't bypass, if no_proxy isn't specified
2552 try:
2553 no_proxy = proxies['no']
2554 except KeyError:
2555 return False
2556 # '*' is special case for always bypass
2557 if no_proxy == '*':
2558 return True
2559 host = host.lower()
2560 # strip port off host
2561 hostonly, port = _splitport(host)
2562 # check if the host ends with any of the DNS suffixes
2563 for name in no_proxy.split(','):
2564 name = name.strip()
2565 if name:
2566 name = name.lstrip('.') # ignore leading dots
2567 name = name.lower()
2568 if hostonly == name or host == name:
2569 return True
2570 name = '.' + name
2571 if hostonly.endswith(name) or host.endswith(name):
2572 return True
2573 # otherwise, don't bypass
2574 return False
2575
2576
2577 # This code tests an OSX specific data structure but is testable on all
2578 # platforms
2579 def _proxy_bypass_macosx_sysconf(host, proxy_settings):
2580 """
2581 Return True iff this host shouldn't be accessed using a proxy
2582
2583 This function uses the MacOSX framework SystemConfiguration
2584 to fetch the proxy information.
2585
2586 proxy_settings come from _scproxy._get_proxy_settings or get mocked ie:
2587 { 'exclude_simple': bool,
2588 'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.1', '10.0/16']
2589 }
2590 """
2591 from fnmatch import fnmatch
2592
2593 hostonly, port = _splitport(host)
2594
2595 def ip2num(ipAddr):
2596 parts = ipAddr.split('.')
2597 parts = list(map(int, parts))
2598 if len(parts) != 4:
2599 parts = (parts + [0, 0, 0, 0])[:4]
2600 return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
2601
2602 # Check for simple host names:
2603 if '.' not in host:
2604 if proxy_settings['exclude_simple']:
2605 return True
2606
2607 hostIP = None
2608
2609 for value in proxy_settings.get('exceptions', ()):
2610 # Items in the list are strings like these: *.local, 169.254/16
2611 if not value: continue
2612
2613 m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
2614 if m is not None:
2615 if hostIP is None:
2616 try:
2617 hostIP = socket.gethostbyname(hostonly)
2618 hostIP = ip2num(hostIP)
2619 except OSError:
2620 continue
2621
2622 base = ip2num(m.group(1))
2623 mask = m.group(2)
2624 if mask is None:
2625 mask = 8 * (m.group(1).count('.') + 1)
2626 else:
2627 mask = int(mask[1:])
2628
2629 if mask < 0 or mask > 32:
2630 # System libraries ignore invalid prefix lengths
2631 continue
2632
2633 mask = 32 - mask
2634
2635 if (hostIP >> mask) == (base >> mask):
2636 return True
2637
2638 elif fnmatch(host, value):
2639 return True
2640
2641 return False
2642
2643
2644 if sys.platform == 'darwin':
2645 from _scproxy import _get_proxy_settings, _get_proxies
2646
2647 def proxy_bypass_macosx_sysconf(host):
2648 proxy_settings = _get_proxy_settings()
2649 return _proxy_bypass_macosx_sysconf(host, proxy_settings)
2650
2651 def getproxies_macosx_sysconf():
2652 """Return a dictionary of scheme -> proxy server URL mappings.
2653
2654 This function uses the MacOSX framework SystemConfiguration
2655 to fetch the proxy information.
2656 """
2657 return _get_proxies()
2658
2659
2660
2661 def proxy_bypass(host):
2662 """Return True, if host should be bypassed.
2663
2664 Checks proxy settings gathered from the environment, if specified,
2665 or from the MacOSX framework SystemConfiguration.
2666
2667 """
2668 proxies = getproxies_environment()
2669 if proxies:
2670 return proxy_bypass_environment(host, proxies)
2671 else:
2672 return proxy_bypass_macosx_sysconf(host)
2673
2674 def getproxies():
2675 return getproxies_environment() or getproxies_macosx_sysconf()
2676
2677
2678 elif os.name == 'nt':
2679 def getproxies_registry():
2680 """Return a dictionary of scheme -> proxy server URL mappings.
2681
2682 Win32 uses the registry to store proxies.
2683
2684 """
2685 proxies = {}
2686 try:
2687 import winreg
2688 except ImportError:
2689 # Std module, so should be around - but you never know!
2690 return proxies
2691 try:
2692 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
2693 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
2694 proxyEnable = winreg.QueryValueEx(internetSettings,
2695 'ProxyEnable')[0]
2696 if proxyEnable:
2697 # Returned as Unicode but problems if not converted to ASCII
2698 proxyServer = str(winreg.QueryValueEx(internetSettings,
2699 'ProxyServer')[0])
2700 if '=' not in proxyServer and ';' not in proxyServer:
2701 # Use one setting for all protocols.
2702 proxyServer = 'http={0};https={0};ftp={0}'.format(proxyServer)
2703 for p in proxyServer.split(';'):
2704 protocol, address = p.split('=', 1)
2705 # See if address has a type:// prefix
2706 if not re.match('(?:[^/:]+)://', address):
2707 # Add type:// prefix to address without specifying type
2708 if protocol in ('http', 'https', 'ftp'):
2709 # The default proxy type of Windows is HTTP
2710 address = 'http://' + address
2711 elif protocol == 'socks':
2712 address = 'socks://' + address
2713 proxies[protocol] = address
2714 # Use SOCKS proxy for HTTP(S) protocols
2715 if proxies.get('socks'):
2716 # The default SOCKS proxy type of Windows is SOCKS4
2717 address = re.sub(r'^socks://', 'socks4://', proxies['socks'])
2718 proxies['http'] = proxies.get('http') or address
2719 proxies['https'] = proxies.get('https') or address
2720 internetSettings.Close()
2721 except (OSError, ValueError, TypeError):
2722 # Either registry key not found etc, or the value in an
2723 # unexpected format.
2724 # proxies already set up to be empty so nothing to do
2725 pass
2726 return proxies
2727
2728 def getproxies():
2729 """Return a dictionary of scheme -> proxy server URL mappings.
2730
2731 Returns settings gathered from the environment, if specified,
2732 or the registry.
2733
2734 """
2735 return getproxies_environment() or getproxies_registry()
2736
2737 def proxy_bypass_registry(host):
2738 try:
2739 import winreg
2740 except ImportError:
2741 # Std modules, so should be around - but you never know!
2742 return 0
2743 try:
2744 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
2745 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
2746 proxyEnable = winreg.QueryValueEx(internetSettings,
2747 'ProxyEnable')[0]
2748 proxyOverride = str(winreg.QueryValueEx(internetSettings,
2749 'ProxyOverride')[0])
2750 # ^^^^ Returned as Unicode but problems if not converted to ASCII
2751 except OSError:
2752 return 0
2753 if not proxyEnable or not proxyOverride:
2754 return 0
2755 # try to make a host list from name and IP address.
2756 rawHost, port = _splitport(host)
2757 host = [rawHost]
2758 try:
2759 addr = socket.gethostbyname(rawHost)
2760 if addr != rawHost:
2761 host.append(addr)
2762 except OSError:
2763 pass
2764 try:
2765 fqdn = socket.getfqdn(rawHost)
2766 if fqdn != rawHost:
2767 host.append(fqdn)
2768 except OSError:
2769 pass
2770 # make a check value list from the registry entry: replace the
2771 # '<local>' string by the localhost entry and the corresponding
2772 # canonical entry.
2773 proxyOverride = proxyOverride.split(';')
2774 # now check if we match one of the registry values.
2775 for test in proxyOverride:
2776 if test == '<local>':
2777 if '.' not in rawHost:
2778 return 1
2779 test = test.replace(".", r"\.") # mask dots
2780 test = test.replace("*", r".*") # change glob sequence
2781 test = test.replace("?", r".") # change glob char
2782 for val in host:
2783 if re.match(test, val, re.I):
2784 return 1
2785 return 0
2786
2787 def proxy_bypass(host):
2788 """Return True, if host should be bypassed.
2789
2790 Checks proxy settings gathered from the environment, if specified,
2791 or the registry.
2792
2793 """
2794 proxies = getproxies_environment()
2795 if proxies:
2796 return proxy_bypass_environment(host, proxies)
2797 else:
2798 return proxy_bypass_registry(host)
2799
2800 else:
2801 # By default use environment variables
2802 getproxies = getproxies_environment
2803 proxy_bypass = proxy_bypass_environment