1 """HTTP server classes.
2
3 Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see
4 SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST,
5 and CGIHTTPRequestHandler for CGI scripts.
6
7 It does, however, optionally implement HTTP/1.1 persistent connections,
8 as of version 0.3.
9
10 Notes on CGIHTTPRequestHandler
11 ------------------------------
12
13 This class implements GET and POST requests to cgi-bin scripts.
14
15 If the os.fork() function is not present (e.g. on Windows),
16 subprocess.Popen() is used as a fallback, with slightly altered semantics.
17
18 In all cases, the implementation is intentionally naive -- all
19 requests are executed synchronously.
20
21 SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
22 -- it may execute arbitrary Python code or external programs.
23
24 Note that status code 200 is sent prior to execution of a CGI script, so
25 scripts cannot send other status codes such as 302 (redirect).
26
27 XXX To do:
28
29 - log requests even later (to capture byte count)
30 - log user-agent header and other interesting goodies
31 - send error log to separate file
32 """
33
34
35 # See also:
36 #
37 # HTTP Working Group T. Berners-Lee
38 # INTERNET-DRAFT R. T. Fielding
39 # <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen
40 # Expires September 8, 1995 March 8, 1995
41 #
42 # URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
43 #
44 # and
45 #
46 # Network Working Group R. Fielding
47 # Request for Comments: 2616 et al
48 # Obsoletes: 2068 June 1999
49 # Category: Standards Track
50 #
51 # URL: http://www.faqs.org/rfcs/rfc2616.html
52
53 # Log files
54 # ---------
55 #
56 # Here's a quote from the NCSA httpd docs about log file format.
57 #
58 # | The logfile format is as follows. Each line consists of:
59 # |
60 # | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
61 # |
62 # | host: Either the DNS name or the IP number of the remote client
63 # | rfc931: Any information returned by identd for this person,
64 # | - otherwise.
65 # | authuser: If user sent a userid for authentication, the user name,
66 # | - otherwise.
67 # | DD: Day
68 # | Mon: Month (calendar name)
69 # | YYYY: Year
70 # | hh: hour (24-hour format, the machine's timezone)
71 # | mm: minutes
72 # | ss: seconds
73 # | request: The first line of the HTTP request as sent by the client.
74 # | ddd: the status code returned by the server, - if not available.
75 # | bbbb: the total number of bytes sent,
76 # | *not including the HTTP/1.0 header*, - if not available
77 # |
78 # | You can determine the name of the file accessed through request.
79 #
80 # (Actually, the latter is only true if you know the server configuration
81 # at the time the request was made!)
82
83 __version__ = "0.6"
84
85 __all__ = [
86 "HTTPServer", "ThreadingHTTPServer", "BaseHTTPRequestHandler",
87 "SimpleHTTPRequestHandler", "CGIHTTPRequestHandler",
88 ]
89
90 import copy
91 import datetime
92 import email.utils
93 import html
94 import http.client
95 import io
96 import itertools
97 import mimetypes
98 import os
99 import posixpath
100 import select
101 import shutil
102 import socket # For gethostbyaddr()
103 import socketserver
104 import sys
105 import time
106 import urllib.parse
107
108 from http import HTTPStatus
109
110
111 # Default error message template
112 DEFAULT_ERROR_MESSAGE = """\
113 <!DOCTYPE HTML>
114 <html lang="en">
115 <head>
116 <meta charset="utf-8">
117 <title>Error response</title>
118 </head>
119 <body>
120 <h1>Error response</h1>
121 <p>Error code: %(code)d</p>
122 <p>Message: %(message)s.</p>
123 <p>Error code explanation: %(code)s - %(explain)s.</p>
124 </body>
125 </html>
126 """
127
128 DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8"
129
130 class ESC[4;38;5;81mHTTPServer(ESC[4;38;5;149msocketserverESC[4;38;5;149m.ESC[4;38;5;149mTCPServer):
131
132 allow_reuse_address = 1 # Seems to make sense in testing environment
133
134 def server_bind(self):
135 """Override server_bind to store the server name."""
136 socketserver.TCPServer.server_bind(self)
137 host, port = self.server_address[:2]
138 self.server_name = socket.getfqdn(host)
139 self.server_port = port
140
141
142 class ESC[4;38;5;81mThreadingHTTPServer(ESC[4;38;5;149msocketserverESC[4;38;5;149m.ESC[4;38;5;149mThreadingMixIn, ESC[4;38;5;149mHTTPServer):
143 daemon_threads = True
144
145
146 class ESC[4;38;5;81mBaseHTTPRequestHandler(ESC[4;38;5;149msocketserverESC[4;38;5;149m.ESC[4;38;5;149mStreamRequestHandler):
147
148 """HTTP request handler base class.
149
150 The following explanation of HTTP serves to guide you through the
151 code as well as to expose any misunderstandings I may have about
152 HTTP (so you don't need to read the code to figure out I'm wrong
153 :-).
154
155 HTTP (HyperText Transfer Protocol) is an extensible protocol on
156 top of a reliable stream transport (e.g. TCP/IP). The protocol
157 recognizes three parts to a request:
158
159 1. One line identifying the request type and path
160 2. An optional set of RFC-822-style headers
161 3. An optional data part
162
163 The headers and data are separated by a blank line.
164
165 The first line of the request has the form
166
167 <command> <path> <version>
168
169 where <command> is a (case-sensitive) keyword such as GET or POST,
170 <path> is a string containing path information for the request,
171 and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
172 <path> is encoded using the URL encoding scheme (using %xx to signify
173 the ASCII character with hex code xx).
174
175 The specification specifies that lines are separated by CRLF but
176 for compatibility with the widest range of clients recommends
177 servers also handle LF. Similarly, whitespace in the request line
178 is treated sensibly (allowing multiple spaces between components
179 and allowing trailing whitespace).
180
181 Similarly, for output, lines ought to be separated by CRLF pairs
182 but most clients grok LF characters just fine.
183
184 If the first line of the request has the form
185
186 <command> <path>
187
188 (i.e. <version> is left out) then this is assumed to be an HTTP
189 0.9 request; this form has no optional headers and data part and
190 the reply consists of just the data.
191
192 The reply form of the HTTP 1.x protocol again has three parts:
193
194 1. One line giving the response code
195 2. An optional set of RFC-822-style headers
196 3. The data
197
198 Again, the headers and data are separated by a blank line.
199
200 The response code line has the form
201
202 <version> <responsecode> <responsestring>
203
204 where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
205 <responsecode> is a 3-digit response code indicating success or
206 failure of the request, and <responsestring> is an optional
207 human-readable string explaining what the response code means.
208
209 This server parses the request and the headers, and then calls a
210 function specific to the request type (<command>). Specifically,
211 a request SPAM will be handled by a method do_SPAM(). If no
212 such method exists the server sends an error response to the
213 client. If it exists, it is called with no arguments:
214
215 do_SPAM()
216
217 Note that the request name is case sensitive (i.e. SPAM and spam
218 are different requests).
219
220 The various request details are stored in instance variables:
221
222 - client_address is the client IP address in the form (host,
223 port);
224
225 - command, path and version are the broken-down request line;
226
227 - headers is an instance of email.message.Message (or a derived
228 class) containing the header information;
229
230 - rfile is a file object open for reading positioned at the
231 start of the optional input data part;
232
233 - wfile is a file object open for writing.
234
235 IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
236
237 The first thing to be written must be the response line. Then
238 follow 0 or more header lines, then a blank line, and then the
239 actual data (if any). The meaning of the header lines depends on
240 the command executed by the server; in most cases, when data is
241 returned, there should be at least one header line of the form
242
243 Content-type: <type>/<subtype>
244
245 where <type> and <subtype> should be registered MIME types,
246 e.g. "text/html" or "text/plain".
247
248 """
249
250 # The Python system version, truncated to its first component.
251 sys_version = "Python/" + sys.version.split()[0]
252
253 # The server software version. You may want to override this.
254 # The format is multiple whitespace-separated strings,
255 # where each string is of the form name[/version].
256 server_version = "BaseHTTP/" + __version__
257
258 error_message_format = DEFAULT_ERROR_MESSAGE
259 error_content_type = DEFAULT_ERROR_CONTENT_TYPE
260
261 # The default request version. This only affects responses up until
262 # the point where the request line is parsed, so it mainly decides what
263 # the client gets back when sending a malformed request line.
264 # Most web servers default to HTTP 0.9, i.e. don't send a status line.
265 default_request_version = "HTTP/0.9"
266
267 def parse_request(self):
268 """Parse a request (internal).
269
270 The request should be stored in self.raw_requestline; the results
271 are in self.command, self.path, self.request_version and
272 self.headers.
273
274 Return True for success, False for failure; on failure, any relevant
275 error response has already been sent back.
276
277 """
278 self.command = None # set in case of error on the first line
279 self.request_version = version = self.default_request_version
280 self.close_connection = True
281 requestline = str(self.raw_requestline, 'iso-8859-1')
282 requestline = requestline.rstrip('\r\n')
283 self.requestline = requestline
284 words = requestline.split()
285 if len(words) == 0:
286 return False
287
288 if len(words) >= 3: # Enough to determine protocol version
289 version = words[-1]
290 try:
291 if not version.startswith('HTTP/'):
292 raise ValueError
293 base_version_number = version.split('/', 1)[1]
294 version_number = base_version_number.split(".")
295 # RFC 2145 section 3.1 says there can be only one "." and
296 # - major and minor numbers MUST be treated as
297 # separate integers;
298 # - HTTP/2.4 is a lower version than HTTP/2.13, which in
299 # turn is lower than HTTP/12.3;
300 # - Leading zeros MUST be ignored by recipients.
301 if len(version_number) != 2:
302 raise ValueError
303 if any(not component.isdigit() for component in version_number):
304 raise ValueError("non digit in http version")
305 if any(len(component) > 10 for component in version_number):
306 raise ValueError("unreasonable length http version")
307 version_number = int(version_number[0]), int(version_number[1])
308 except (ValueError, IndexError):
309 self.send_error(
310 HTTPStatus.BAD_REQUEST,
311 "Bad request version (%r)" % version)
312 return False
313 if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
314 self.close_connection = False
315 if version_number >= (2, 0):
316 self.send_error(
317 HTTPStatus.HTTP_VERSION_NOT_SUPPORTED,
318 "Invalid HTTP version (%s)" % base_version_number)
319 return False
320 self.request_version = version
321
322 if not 2 <= len(words) <= 3:
323 self.send_error(
324 HTTPStatus.BAD_REQUEST,
325 "Bad request syntax (%r)" % requestline)
326 return False
327 command, path = words[:2]
328 if len(words) == 2:
329 self.close_connection = True
330 if command != 'GET':
331 self.send_error(
332 HTTPStatus.BAD_REQUEST,
333 "Bad HTTP/0.9 request type (%r)" % command)
334 return False
335 self.command, self.path = command, path
336
337 # gh-87389: The purpose of replacing '//' with '/' is to protect
338 # against open redirect attacks possibly triggered if the path starts
339 # with '//' because http clients treat //path as an absolute URI
340 # without scheme (similar to http://path) rather than a path.
341 if self.path.startswith('//'):
342 self.path = '/' + self.path.lstrip('/') # Reduce to a single /
343
344 # Examine the headers and look for a Connection directive.
345 try:
346 self.headers = http.client.parse_headers(self.rfile,
347 _class=self.MessageClass)
348 except http.client.LineTooLong as err:
349 self.send_error(
350 HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,
351 "Line too long",
352 str(err))
353 return False
354 except http.client.HTTPException as err:
355 self.send_error(
356 HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,
357 "Too many headers",
358 str(err)
359 )
360 return False
361
362 conntype = self.headers.get('Connection', "")
363 if conntype.lower() == 'close':
364 self.close_connection = True
365 elif (conntype.lower() == 'keep-alive' and
366 self.protocol_version >= "HTTP/1.1"):
367 self.close_connection = False
368 # Examine the headers and look for an Expect directive
369 expect = self.headers.get('Expect', "")
370 if (expect.lower() == "100-continue" and
371 self.protocol_version >= "HTTP/1.1" and
372 self.request_version >= "HTTP/1.1"):
373 if not self.handle_expect_100():
374 return False
375 return True
376
377 def handle_expect_100(self):
378 """Decide what to do with an "Expect: 100-continue" header.
379
380 If the client is expecting a 100 Continue response, we must
381 respond with either a 100 Continue or a final response before
382 waiting for the request body. The default is to always respond
383 with a 100 Continue. You can behave differently (for example,
384 reject unauthorized requests) by overriding this method.
385
386 This method should either return True (possibly after sending
387 a 100 Continue response) or send an error response and return
388 False.
389
390 """
391 self.send_response_only(HTTPStatus.CONTINUE)
392 self.end_headers()
393 return True
394
395 def handle_one_request(self):
396 """Handle a single HTTP request.
397
398 You normally don't need to override this method; see the class
399 __doc__ string for information on how to handle specific HTTP
400 commands such as GET and POST.
401
402 """
403 try:
404 self.raw_requestline = self.rfile.readline(65537)
405 if len(self.raw_requestline) > 65536:
406 self.requestline = ''
407 self.request_version = ''
408 self.command = ''
409 self.send_error(HTTPStatus.REQUEST_URI_TOO_LONG)
410 return
411 if not self.raw_requestline:
412 self.close_connection = True
413 return
414 if not self.parse_request():
415 # An error code has been sent, just exit
416 return
417 mname = 'do_' + self.command
418 if not hasattr(self, mname):
419 self.send_error(
420 HTTPStatus.NOT_IMPLEMENTED,
421 "Unsupported method (%r)" % self.command)
422 return
423 method = getattr(self, mname)
424 method()
425 self.wfile.flush() #actually send the response if not already done.
426 except TimeoutError as e:
427 #a read or a write timed out. Discard this connection
428 self.log_error("Request timed out: %r", e)
429 self.close_connection = True
430 return
431
432 def handle(self):
433 """Handle multiple requests if necessary."""
434 self.close_connection = True
435
436 self.handle_one_request()
437 while not self.close_connection:
438 self.handle_one_request()
439
440 def send_error(self, code, message=None, explain=None):
441 """Send and log an error reply.
442
443 Arguments are
444 * code: an HTTP error code
445 3 digits
446 * message: a simple optional 1 line reason phrase.
447 *( HTAB / SP / VCHAR / %x80-FF )
448 defaults to short entry matching the response code
449 * explain: a detailed message defaults to the long entry
450 matching the response code.
451
452 This sends an error response (so it must be called before any
453 output has been generated), logs the error, and finally sends
454 a piece of HTML explaining the error to the user.
455
456 """
457
458 try:
459 shortmsg, longmsg = self.responses[code]
460 except KeyError:
461 shortmsg, longmsg = '???', '???'
462 if message is None:
463 message = shortmsg
464 if explain is None:
465 explain = longmsg
466 self.log_error("code %d, message %s", code, message)
467 self.send_response(code, message)
468 self.send_header('Connection', 'close')
469
470 # Message body is omitted for cases described in:
471 # - RFC7230: 3.3. 1xx, 204(No Content), 304(Not Modified)
472 # - RFC7231: 6.3.6. 205(Reset Content)
473 body = None
474 if (code >= 200 and
475 code not in (HTTPStatus.NO_CONTENT,
476 HTTPStatus.RESET_CONTENT,
477 HTTPStatus.NOT_MODIFIED)):
478 # HTML encode to prevent Cross Site Scripting attacks
479 # (see bug #1100201)
480 content = (self.error_message_format % {
481 'code': code,
482 'message': html.escape(message, quote=False),
483 'explain': html.escape(explain, quote=False)
484 })
485 body = content.encode('UTF-8', 'replace')
486 self.send_header("Content-Type", self.error_content_type)
487 self.send_header('Content-Length', str(len(body)))
488 self.end_headers()
489
490 if self.command != 'HEAD' and body:
491 self.wfile.write(body)
492
493 def send_response(self, code, message=None):
494 """Add the response header to the headers buffer and log the
495 response code.
496
497 Also send two standard headers with the server software
498 version and the current date.
499
500 """
501 self.log_request(code)
502 self.send_response_only(code, message)
503 self.send_header('Server', self.version_string())
504 self.send_header('Date', self.date_time_string())
505
506 def send_response_only(self, code, message=None):
507 """Send the response header only."""
508 if self.request_version != 'HTTP/0.9':
509 if message is None:
510 if code in self.responses:
511 message = self.responses[code][0]
512 else:
513 message = ''
514 if not hasattr(self, '_headers_buffer'):
515 self._headers_buffer = []
516 self._headers_buffer.append(("%s %d %s\r\n" %
517 (self.protocol_version, code, message)).encode(
518 'latin-1', 'strict'))
519
520 def send_header(self, keyword, value):
521 """Send a MIME header to the headers buffer."""
522 if self.request_version != 'HTTP/0.9':
523 if not hasattr(self, '_headers_buffer'):
524 self._headers_buffer = []
525 self._headers_buffer.append(
526 ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict'))
527
528 if keyword.lower() == 'connection':
529 if value.lower() == 'close':
530 self.close_connection = True
531 elif value.lower() == 'keep-alive':
532 self.close_connection = False
533
534 def end_headers(self):
535 """Send the blank line ending the MIME headers."""
536 if self.request_version != 'HTTP/0.9':
537 self._headers_buffer.append(b"\r\n")
538 self.flush_headers()
539
540 def flush_headers(self):
541 if hasattr(self, '_headers_buffer'):
542 self.wfile.write(b"".join(self._headers_buffer))
543 self._headers_buffer = []
544
545 def log_request(self, code='-', size='-'):
546 """Log an accepted request.
547
548 This is called by send_response().
549
550 """
551 if isinstance(code, HTTPStatus):
552 code = code.value
553 self.log_message('"%s" %s %s',
554 self.requestline, str(code), str(size))
555
556 def log_error(self, format, *args):
557 """Log an error.
558
559 This is called when a request cannot be fulfilled. By
560 default it passes the message on to log_message().
561
562 Arguments are the same as for log_message().
563
564 XXX This should go to the separate error log.
565
566 """
567
568 self.log_message(format, *args)
569
570 # https://en.wikipedia.org/wiki/List_of_Unicode_characters#Control_codes
571 _control_char_table = str.maketrans(
572 {c: fr'\x{c:02x}' for c in itertools.chain(range(0x20), range(0x7f,0xa0))})
573 _control_char_table[ord('\\')] = r'\\'
574
575 def log_message(self, format, *args):
576 """Log an arbitrary message.
577
578 This is used by all other logging functions. Override
579 it if you have specific logging wishes.
580
581 The first argument, FORMAT, is a format string for the
582 message to be logged. If the format string contains
583 any % escapes requiring parameters, they should be
584 specified as subsequent arguments (it's just like
585 printf!).
586
587 The client ip and current date/time are prefixed to
588 every message.
589
590 Unicode control characters are replaced with escaped hex
591 before writing the output to stderr.
592
593 """
594
595 message = format % args
596 sys.stderr.write("%s - - [%s] %s\n" %
597 (self.address_string(),
598 self.log_date_time_string(),
599 message.translate(self._control_char_table)))
600
601 def version_string(self):
602 """Return the server software version string."""
603 return self.server_version + ' ' + self.sys_version
604
605 def date_time_string(self, timestamp=None):
606 """Return the current date and time formatted for a message header."""
607 if timestamp is None:
608 timestamp = time.time()
609 return email.utils.formatdate(timestamp, usegmt=True)
610
611 def log_date_time_string(self):
612 """Return the current time formatted for logging."""
613 now = time.time()
614 year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
615 s = "%02d/%3s/%04d %02d:%02d:%02d" % (
616 day, self.monthname[month], year, hh, mm, ss)
617 return s
618
619 weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
620
621 monthname = [None,
622 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
623 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
624
625 def address_string(self):
626 """Return the client address."""
627
628 return self.client_address[0]
629
630 # Essentially static class variables
631
632 # The version of the HTTP protocol we support.
633 # Set this to HTTP/1.1 to enable automatic keepalive
634 protocol_version = "HTTP/1.0"
635
636 # MessageClass used to parse headers
637 MessageClass = http.client.HTTPMessage
638
639 # hack to maintain backwards compatibility
640 responses = {
641 v: (v.phrase, v.description)
642 for v in HTTPStatus.__members__.values()
643 }
644
645
646 class ESC[4;38;5;81mSimpleHTTPRequestHandler(ESC[4;38;5;149mBaseHTTPRequestHandler):
647
648 """Simple HTTP request handler with GET and HEAD commands.
649
650 This serves files from the current directory and any of its
651 subdirectories. The MIME type for files is determined by
652 calling the .guess_type() method.
653
654 The GET and HEAD requests are identical except that the HEAD
655 request omits the actual contents of the file.
656
657 """
658
659 server_version = "SimpleHTTP/" + __version__
660 extensions_map = _encodings_map_default = {
661 '.gz': 'application/gzip',
662 '.Z': 'application/octet-stream',
663 '.bz2': 'application/x-bzip2',
664 '.xz': 'application/x-xz',
665 }
666
667 def __init__(self, *args, directory=None, **kwargs):
668 if directory is None:
669 directory = os.getcwd()
670 self.directory = os.fspath(directory)
671 super().__init__(*args, **kwargs)
672
673 def do_GET(self):
674 """Serve a GET request."""
675 f = self.send_head()
676 if f:
677 try:
678 self.copyfile(f, self.wfile)
679 finally:
680 f.close()
681
682 def do_HEAD(self):
683 """Serve a HEAD request."""
684 f = self.send_head()
685 if f:
686 f.close()
687
688 def send_head(self):
689 """Common code for GET and HEAD commands.
690
691 This sends the response code and MIME headers.
692
693 Return value is either a file object (which has to be copied
694 to the outputfile by the caller unless the command was HEAD,
695 and must be closed by the caller under all circumstances), or
696 None, in which case the caller has nothing further to do.
697
698 """
699 path = self.translate_path(self.path)
700 f = None
701 if os.path.isdir(path):
702 parts = urllib.parse.urlsplit(self.path)
703 if not parts.path.endswith('/'):
704 # redirect browser - doing basically what apache does
705 self.send_response(HTTPStatus.MOVED_PERMANENTLY)
706 new_parts = (parts[0], parts[1], parts[2] + '/',
707 parts[3], parts[4])
708 new_url = urllib.parse.urlunsplit(new_parts)
709 self.send_header("Location", new_url)
710 self.send_header("Content-Length", "0")
711 self.end_headers()
712 return None
713 for index in "index.html", "index.htm":
714 index = os.path.join(path, index)
715 if os.path.isfile(index):
716 path = index
717 break
718 else:
719 return self.list_directory(path)
720 ctype = self.guess_type(path)
721 # check for trailing "/" which should return 404. See Issue17324
722 # The test for this was added in test_httpserver.py
723 # However, some OS platforms accept a trailingSlash as a filename
724 # See discussion on python-dev and Issue34711 regarding
725 # parsing and rejection of filenames with a trailing slash
726 if path.endswith("/"):
727 self.send_error(HTTPStatus.NOT_FOUND, "File not found")
728 return None
729 try:
730 f = open(path, 'rb')
731 except OSError:
732 self.send_error(HTTPStatus.NOT_FOUND, "File not found")
733 return None
734
735 try:
736 fs = os.fstat(f.fileno())
737 # Use browser cache if possible
738 if ("If-Modified-Since" in self.headers
739 and "If-None-Match" not in self.headers):
740 # compare If-Modified-Since and time of last file modification
741 try:
742 ims = email.utils.parsedate_to_datetime(
743 self.headers["If-Modified-Since"])
744 except (TypeError, IndexError, OverflowError, ValueError):
745 # ignore ill-formed values
746 pass
747 else:
748 if ims.tzinfo is None:
749 # obsolete format with no timezone, cf.
750 # https://tools.ietf.org/html/rfc7231#section-7.1.1.1
751 ims = ims.replace(tzinfo=datetime.timezone.utc)
752 if ims.tzinfo is datetime.timezone.utc:
753 # compare to UTC datetime of last modification
754 last_modif = datetime.datetime.fromtimestamp(
755 fs.st_mtime, datetime.timezone.utc)
756 # remove microseconds, like in If-Modified-Since
757 last_modif = last_modif.replace(microsecond=0)
758
759 if last_modif <= ims:
760 self.send_response(HTTPStatus.NOT_MODIFIED)
761 self.end_headers()
762 f.close()
763 return None
764
765 self.send_response(HTTPStatus.OK)
766 self.send_header("Content-type", ctype)
767 self.send_header("Content-Length", str(fs[6]))
768 self.send_header("Last-Modified",
769 self.date_time_string(fs.st_mtime))
770 self.end_headers()
771 return f
772 except:
773 f.close()
774 raise
775
776 def list_directory(self, path):
777 """Helper to produce a directory listing (absent index.html).
778
779 Return value is either a file object, or None (indicating an
780 error). In either case, the headers are sent, making the
781 interface the same as for send_head().
782
783 """
784 try:
785 list = os.listdir(path)
786 except OSError:
787 self.send_error(
788 HTTPStatus.NOT_FOUND,
789 "No permission to list directory")
790 return None
791 list.sort(key=lambda a: a.lower())
792 r = []
793 try:
794 displaypath = urllib.parse.unquote(self.path,
795 errors='surrogatepass')
796 except UnicodeDecodeError:
797 displaypath = urllib.parse.unquote(self.path)
798 displaypath = html.escape(displaypath, quote=False)
799 enc = sys.getfilesystemencoding()
800 title = f'Directory listing for {displaypath}'
801 r.append('<!DOCTYPE HTML>')
802 r.append('<html lang="en">')
803 r.append('<head>')
804 r.append(f'<meta charset="{enc}">')
805 r.append(f'<title>{title}</title>\n</head>')
806 r.append(f'<body>\n<h1>{title}</h1>')
807 r.append('<hr>\n<ul>')
808 for name in list:
809 fullname = os.path.join(path, name)
810 displayname = linkname = name
811 # Append / for directories or @ for symbolic links
812 if os.path.isdir(fullname):
813 displayname = name + "/"
814 linkname = name + "/"
815 if os.path.islink(fullname):
816 displayname = name + "@"
817 # Note: a link to a directory displays with @ and links with /
818 r.append('<li><a href="%s">%s</a></li>'
819 % (urllib.parse.quote(linkname,
820 errors='surrogatepass'),
821 html.escape(displayname, quote=False)))
822 r.append('</ul>\n<hr>\n</body>\n</html>\n')
823 encoded = '\n'.join(r).encode(enc, 'surrogateescape')
824 f = io.BytesIO()
825 f.write(encoded)
826 f.seek(0)
827 self.send_response(HTTPStatus.OK)
828 self.send_header("Content-type", "text/html; charset=%s" % enc)
829 self.send_header("Content-Length", str(len(encoded)))
830 self.end_headers()
831 return f
832
833 def translate_path(self, path):
834 """Translate a /-separated PATH to the local filename syntax.
835
836 Components that mean special things to the local file system
837 (e.g. drive or directory names) are ignored. (XXX They should
838 probably be diagnosed.)
839
840 """
841 # abandon query parameters
842 path = path.split('?',1)[0]
843 path = path.split('#',1)[0]
844 # Don't forget explicit trailing slash when normalizing. Issue17324
845 trailing_slash = path.rstrip().endswith('/')
846 try:
847 path = urllib.parse.unquote(path, errors='surrogatepass')
848 except UnicodeDecodeError:
849 path = urllib.parse.unquote(path)
850 path = posixpath.normpath(path)
851 words = path.split('/')
852 words = filter(None, words)
853 path = self.directory
854 for word in words:
855 if os.path.dirname(word) or word in (os.curdir, os.pardir):
856 # Ignore components that are not a simple file/directory name
857 continue
858 path = os.path.join(path, word)
859 if trailing_slash:
860 path += '/'
861 return path
862
863 def copyfile(self, source, outputfile):
864 """Copy all data between two file objects.
865
866 The SOURCE argument is a file object open for reading
867 (or anything with a read() method) and the DESTINATION
868 argument is a file object open for writing (or
869 anything with a write() method).
870
871 The only reason for overriding this would be to change
872 the block size or perhaps to replace newlines by CRLF
873 -- note however that this the default server uses this
874 to copy binary data as well.
875
876 """
877 shutil.copyfileobj(source, outputfile)
878
879 def guess_type(self, path):
880 """Guess the type of a file.
881
882 Argument is a PATH (a filename).
883
884 Return value is a string of the form type/subtype,
885 usable for a MIME Content-type header.
886
887 The default implementation looks the file's extension
888 up in the table self.extensions_map, using application/octet-stream
889 as a default; however it would be permissible (if
890 slow) to look inside the data to make a better guess.
891
892 """
893 base, ext = posixpath.splitext(path)
894 if ext in self.extensions_map:
895 return self.extensions_map[ext]
896 ext = ext.lower()
897 if ext in self.extensions_map:
898 return self.extensions_map[ext]
899 guess, _ = mimetypes.guess_type(path)
900 if guess:
901 return guess
902 return 'application/octet-stream'
903
904
905 # Utilities for CGIHTTPRequestHandler
906
907 def _url_collapse_path(path):
908 """
909 Given a URL path, remove extra '/'s and '.' path elements and collapse
910 any '..' references and returns a collapsed path.
911
912 Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
913 The utility of this function is limited to is_cgi method and helps
914 preventing some security attacks.
915
916 Returns: The reconstituted URL, which will always start with a '/'.
917
918 Raises: IndexError if too many '..' occur within the path.
919
920 """
921 # Query component should not be involved.
922 path, _, query = path.partition('?')
923 path = urllib.parse.unquote(path)
924
925 # Similar to os.path.split(os.path.normpath(path)) but specific to URL
926 # path semantics rather than local operating system semantics.
927 path_parts = path.split('/')
928 head_parts = []
929 for part in path_parts[:-1]:
930 if part == '..':
931 head_parts.pop() # IndexError if more '..' than prior parts
932 elif part and part != '.':
933 head_parts.append( part )
934 if path_parts:
935 tail_part = path_parts.pop()
936 if tail_part:
937 if tail_part == '..':
938 head_parts.pop()
939 tail_part = ''
940 elif tail_part == '.':
941 tail_part = ''
942 else:
943 tail_part = ''
944
945 if query:
946 tail_part = '?'.join((tail_part, query))
947
948 splitpath = ('/' + '/'.join(head_parts), tail_part)
949 collapsed_path = "/".join(splitpath)
950
951 return collapsed_path
952
953
954
955 nobody = None
956
957 def nobody_uid():
958 """Internal routine to get nobody's uid"""
959 global nobody
960 if nobody:
961 return nobody
962 try:
963 import pwd
964 except ImportError:
965 return -1
966 try:
967 nobody = pwd.getpwnam('nobody')[2]
968 except KeyError:
969 nobody = 1 + max(x[2] for x in pwd.getpwall())
970 return nobody
971
972
973 def executable(path):
974 """Test for executable file."""
975 return os.access(path, os.X_OK)
976
977
978 class ESC[4;38;5;81mCGIHTTPRequestHandler(ESC[4;38;5;149mSimpleHTTPRequestHandler):
979
980 """Complete HTTP server with GET, HEAD and POST commands.
981
982 GET and HEAD also support running CGI scripts.
983
984 The POST command is *only* implemented for CGI scripts.
985
986 """
987
988 # Determine platform specifics
989 have_fork = hasattr(os, 'fork')
990
991 # Make rfile unbuffered -- we need to read one line and then pass
992 # the rest to a subprocess, so we can't use buffered input.
993 rbufsize = 0
994
995 def do_POST(self):
996 """Serve a POST request.
997
998 This is only implemented for CGI scripts.
999
1000 """
1001
1002 if self.is_cgi():
1003 self.run_cgi()
1004 else:
1005 self.send_error(
1006 HTTPStatus.NOT_IMPLEMENTED,
1007 "Can only POST to CGI scripts")
1008
1009 def send_head(self):
1010 """Version of send_head that support CGI scripts"""
1011 if self.is_cgi():
1012 return self.run_cgi()
1013 else:
1014 return SimpleHTTPRequestHandler.send_head(self)
1015
1016 def is_cgi(self):
1017 """Test whether self.path corresponds to a CGI script.
1018
1019 Returns True and updates the cgi_info attribute to the tuple
1020 (dir, rest) if self.path requires running a CGI script.
1021 Returns False otherwise.
1022
1023 If any exception is raised, the caller should assume that
1024 self.path was rejected as invalid and act accordingly.
1025
1026 The default implementation tests whether the normalized url
1027 path begins with one of the strings in self.cgi_directories
1028 (and the next character is a '/' or the end of the string).
1029
1030 """
1031 collapsed_path = _url_collapse_path(self.path)
1032 dir_sep = collapsed_path.find('/', 1)
1033 while dir_sep > 0 and not collapsed_path[:dir_sep] in self.cgi_directories:
1034 dir_sep = collapsed_path.find('/', dir_sep+1)
1035 if dir_sep > 0:
1036 head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:]
1037 self.cgi_info = head, tail
1038 return True
1039 return False
1040
1041
1042 cgi_directories = ['/cgi-bin', '/htbin']
1043
1044 def is_executable(self, path):
1045 """Test whether argument path is an executable file."""
1046 return executable(path)
1047
1048 def is_python(self, path):
1049 """Test whether argument path is a Python script."""
1050 head, tail = os.path.splitext(path)
1051 return tail.lower() in (".py", ".pyw")
1052
1053 def run_cgi(self):
1054 """Execute a CGI script."""
1055 dir, rest = self.cgi_info
1056 path = dir + '/' + rest
1057 i = path.find('/', len(dir)+1)
1058 while i >= 0:
1059 nextdir = path[:i]
1060 nextrest = path[i+1:]
1061
1062 scriptdir = self.translate_path(nextdir)
1063 if os.path.isdir(scriptdir):
1064 dir, rest = nextdir, nextrest
1065 i = path.find('/', len(dir)+1)
1066 else:
1067 break
1068
1069 # find an explicit query string, if present.
1070 rest, _, query = rest.partition('?')
1071
1072 # dissect the part after the directory name into a script name &
1073 # a possible additional path, to be stored in PATH_INFO.
1074 i = rest.find('/')
1075 if i >= 0:
1076 script, rest = rest[:i], rest[i:]
1077 else:
1078 script, rest = rest, ''
1079
1080 scriptname = dir + '/' + script
1081 scriptfile = self.translate_path(scriptname)
1082 if not os.path.exists(scriptfile):
1083 self.send_error(
1084 HTTPStatus.NOT_FOUND,
1085 "No such CGI script (%r)" % scriptname)
1086 return
1087 if not os.path.isfile(scriptfile):
1088 self.send_error(
1089 HTTPStatus.FORBIDDEN,
1090 "CGI script is not a plain file (%r)" % scriptname)
1091 return
1092 ispy = self.is_python(scriptname)
1093 if self.have_fork or not ispy:
1094 if not self.is_executable(scriptfile):
1095 self.send_error(
1096 HTTPStatus.FORBIDDEN,
1097 "CGI script is not executable (%r)" % scriptname)
1098 return
1099
1100 # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
1101 # XXX Much of the following could be prepared ahead of time!
1102 env = copy.deepcopy(os.environ)
1103 env['SERVER_SOFTWARE'] = self.version_string()
1104 env['SERVER_NAME'] = self.server.server_name
1105 env['GATEWAY_INTERFACE'] = 'CGI/1.1'
1106 env['SERVER_PROTOCOL'] = self.protocol_version
1107 env['SERVER_PORT'] = str(self.server.server_port)
1108 env['REQUEST_METHOD'] = self.command
1109 uqrest = urllib.parse.unquote(rest)
1110 env['PATH_INFO'] = uqrest
1111 env['PATH_TRANSLATED'] = self.translate_path(uqrest)
1112 env['SCRIPT_NAME'] = scriptname
1113 env['QUERY_STRING'] = query
1114 env['REMOTE_ADDR'] = self.client_address[0]
1115 authorization = self.headers.get("authorization")
1116 if authorization:
1117 authorization = authorization.split()
1118 if len(authorization) == 2:
1119 import base64, binascii
1120 env['AUTH_TYPE'] = authorization[0]
1121 if authorization[0].lower() == "basic":
1122 try:
1123 authorization = authorization[1].encode('ascii')
1124 authorization = base64.decodebytes(authorization).\
1125 decode('ascii')
1126 except (binascii.Error, UnicodeError):
1127 pass
1128 else:
1129 authorization = authorization.split(':')
1130 if len(authorization) == 2:
1131 env['REMOTE_USER'] = authorization[0]
1132 # XXX REMOTE_IDENT
1133 if self.headers.get('content-type') is None:
1134 env['CONTENT_TYPE'] = self.headers.get_content_type()
1135 else:
1136 env['CONTENT_TYPE'] = self.headers['content-type']
1137 length = self.headers.get('content-length')
1138 if length:
1139 env['CONTENT_LENGTH'] = length
1140 referer = self.headers.get('referer')
1141 if referer:
1142 env['HTTP_REFERER'] = referer
1143 accept = self.headers.get_all('accept', ())
1144 env['HTTP_ACCEPT'] = ','.join(accept)
1145 ua = self.headers.get('user-agent')
1146 if ua:
1147 env['HTTP_USER_AGENT'] = ua
1148 co = filter(None, self.headers.get_all('cookie', []))
1149 cookie_str = ', '.join(co)
1150 if cookie_str:
1151 env['HTTP_COOKIE'] = cookie_str
1152 # XXX Other HTTP_* headers
1153 # Since we're setting the env in the parent, provide empty
1154 # values to override previously set values
1155 for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
1156 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
1157 env.setdefault(k, "")
1158
1159 self.send_response(HTTPStatus.OK, "Script output follows")
1160 self.flush_headers()
1161
1162 decoded_query = query.replace('+', ' ')
1163
1164 if self.have_fork:
1165 # Unix -- fork as we should
1166 args = [script]
1167 if '=' not in decoded_query:
1168 args.append(decoded_query)
1169 nobody = nobody_uid()
1170 self.wfile.flush() # Always flush before forking
1171 pid = os.fork()
1172 if pid != 0:
1173 # Parent
1174 pid, sts = os.waitpid(pid, 0)
1175 # throw away additional data [see bug #427345]
1176 while select.select([self.rfile], [], [], 0)[0]:
1177 if not self.rfile.read(1):
1178 break
1179 exitcode = os.waitstatus_to_exitcode(sts)
1180 if exitcode:
1181 self.log_error(f"CGI script exit code {exitcode}")
1182 return
1183 # Child
1184 try:
1185 try:
1186 os.setuid(nobody)
1187 except OSError:
1188 pass
1189 os.dup2(self.rfile.fileno(), 0)
1190 os.dup2(self.wfile.fileno(), 1)
1191 os.execve(scriptfile, args, env)
1192 except:
1193 self.server.handle_error(self.request, self.client_address)
1194 os._exit(127)
1195
1196 else:
1197 # Non-Unix -- use subprocess
1198 import subprocess
1199 cmdline = [scriptfile]
1200 if self.is_python(scriptfile):
1201 interp = sys.executable
1202 if interp.lower().endswith("w.exe"):
1203 # On Windows, use python.exe, not pythonw.exe
1204 interp = interp[:-5] + interp[-4:]
1205 cmdline = [interp, '-u'] + cmdline
1206 if '=' not in query:
1207 cmdline.append(query)
1208 self.log_message("command: %s", subprocess.list2cmdline(cmdline))
1209 try:
1210 nbytes = int(length)
1211 except (TypeError, ValueError):
1212 nbytes = 0
1213 p = subprocess.Popen(cmdline,
1214 stdin=subprocess.PIPE,
1215 stdout=subprocess.PIPE,
1216 stderr=subprocess.PIPE,
1217 env = env
1218 )
1219 if self.command.lower() == "post" and nbytes > 0:
1220 data = self.rfile.read(nbytes)
1221 else:
1222 data = None
1223 # throw away additional data [see bug #427345]
1224 while select.select([self.rfile._sock], [], [], 0)[0]:
1225 if not self.rfile._sock.recv(1):
1226 break
1227 stdout, stderr = p.communicate(data)
1228 self.wfile.write(stdout)
1229 if stderr:
1230 self.log_error('%s', stderr)
1231 p.stderr.close()
1232 p.stdout.close()
1233 status = p.returncode
1234 if status:
1235 self.log_error("CGI script exit status %#x", status)
1236 else:
1237 self.log_message("CGI script exited OK")
1238
1239
1240 def _get_best_family(*address):
1241 infos = socket.getaddrinfo(
1242 *address,
1243 type=socket.SOCK_STREAM,
1244 flags=socket.AI_PASSIVE,
1245 )
1246 family, type, proto, canonname, sockaddr = next(iter(infos))
1247 return family, sockaddr
1248
1249
1250 def test(HandlerClass=BaseHTTPRequestHandler,
1251 ServerClass=ThreadingHTTPServer,
1252 protocol="HTTP/1.0", port=8000, bind=None):
1253 """Test the HTTP request handler class.
1254
1255 This runs an HTTP server on port 8000 (or the port argument).
1256
1257 """
1258 ServerClass.address_family, addr = _get_best_family(bind, port)
1259 HandlerClass.protocol_version = protocol
1260 with ServerClass(addr, HandlerClass) as httpd:
1261 host, port = httpd.socket.getsockname()[:2]
1262 url_host = f'[{host}]' if ':' in host else host
1263 print(
1264 f"Serving HTTP on {host} port {port} "
1265 f"(http://{url_host}:{port}/) ..."
1266 )
1267 try:
1268 httpd.serve_forever()
1269 except KeyboardInterrupt:
1270 print("\nKeyboard interrupt received, exiting.")
1271 sys.exit(0)
1272
1273 if __name__ == '__main__':
1274 import argparse
1275 import contextlib
1276
1277 parser = argparse.ArgumentParser()
1278 parser.add_argument('--cgi', action='store_true',
1279 help='run as CGI server')
1280 parser.add_argument('-b', '--bind', metavar='ADDRESS',
1281 help='bind to this address '
1282 '(default: all interfaces)')
1283 parser.add_argument('-d', '--directory', default=os.getcwd(),
1284 help='serve this directory '
1285 '(default: current directory)')
1286 parser.add_argument('-p', '--protocol', metavar='VERSION',
1287 default='HTTP/1.0',
1288 help='conform to this HTTP version '
1289 '(default: %(default)s)')
1290 parser.add_argument('port', default=8000, type=int, nargs='?',
1291 help='bind to this port '
1292 '(default: %(default)s)')
1293 args = parser.parse_args()
1294 if args.cgi:
1295 handler_class = CGIHTTPRequestHandler
1296 else:
1297 handler_class = SimpleHTTPRequestHandler
1298
1299 # ensure dual-stack is not disabled; ref #38907
1300 class ESC[4;38;5;81mDualStackServer(ESC[4;38;5;149mThreadingHTTPServer):
1301
1302 def server_bind(self):
1303 # suppress exception when protocol is IPv4
1304 with contextlib.suppress(Exception):
1305 self.socket.setsockopt(
1306 socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0)
1307 return super().server_bind()
1308
1309 def finish_request(self, request, client_address):
1310 self.RequestHandlerClass(request, client_address, self,
1311 directory=args.directory)
1312
1313 test(
1314 HandlerClass=handler_class,
1315 ServerClass=DualStackServer,
1316 port=args.port,
1317 bind=args.bind,
1318 protocol=args.protocol,
1319 )