python (3.12.0)
1 """HTTP server classes.
2
3 Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see
4 SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST,
5 and CGIHTTPRequestHandler for CGI scripts.
6
7 It does, however, optionally implement HTTP/1.1 persistent connections,
8 as of version 0.3.
9
10 Notes on CGIHTTPRequestHandler
11 ------------------------------
12
13 This class implements GET and POST requests to cgi-bin scripts.
14
15 If the os.fork() function is not present (e.g. on Windows),
16 subprocess.Popen() is used as a fallback, with slightly altered semantics.
17
18 In all cases, the implementation is intentionally naive -- all
19 requests are executed synchronously.
20
21 SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
22 -- it may execute arbitrary Python code or external programs.
23
24 Note that status code 200 is sent prior to execution of a CGI script, so
25 scripts cannot send other status codes such as 302 (redirect).
26
27 XXX To do:
28
29 - log requests even later (to capture byte count)
30 - log user-agent header and other interesting goodies
31 - send error log to separate file
32 """
33
34
35 # See also:
36 #
37 # HTTP Working Group T. Berners-Lee
38 # INTERNET-DRAFT R. T. Fielding
39 # <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen
40 # Expires September 8, 1995 March 8, 1995
41 #
42 # URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
43 #
44 # and
45 #
46 # Network Working Group R. Fielding
47 # Request for Comments: 2616 et al
48 # Obsoletes: 2068 June 1999
49 # Category: Standards Track
50 #
51 # URL: http://www.faqs.org/rfcs/rfc2616.html
52
53 # Log files
54 # ---------
55 #
56 # Here's a quote from the NCSA httpd docs about log file format.
57 #
58 # | The logfile format is as follows. Each line consists of:
59 # |
60 # | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
61 # |
62 # | host: Either the DNS name or the IP number of the remote client
63 # | rfc931: Any information returned by identd for this person,
64 # | - otherwise.
65 # | authuser: If user sent a userid for authentication, the user name,
66 # | - otherwise.
67 # | DD: Day
68 # | Mon: Month (calendar name)
69 # | YYYY: Year
70 # | hh: hour (24-hour format, the machine's timezone)
71 # | mm: minutes
72 # | ss: seconds
73 # | request: The first line of the HTTP request as sent by the client.
74 # | ddd: the status code returned by the server, - if not available.
75 # | bbbb: the total number of bytes sent,
76 # | *not including the HTTP/1.0 header*, - if not available
77 # |
78 # | You can determine the name of the file accessed through request.
79 #
80 # (Actually, the latter is only true if you know the server configuration
81 # at the time the request was made!)
82
83 __version__ = "0.6"
84
85 __all__ = [
86 "HTTPServer", "ThreadingHTTPServer", "BaseHTTPRequestHandler",
87 "SimpleHTTPRequestHandler", "CGIHTTPRequestHandler",
88 ]
89
90 import copy
91 import datetime
92 import email.utils
93 import html
94 import http.client
95 import io
96 import itertools
97 import mimetypes
98 import os
99 import posixpath
100 import select
101 import shutil
102 import socket # For gethostbyaddr()
103 import socketserver
104 import sys
105 import time
106 import urllib.parse
107
108 from http import HTTPStatus
109
110
111 # Default error message template
112 DEFAULT_ERROR_MESSAGE = """\
113 <!DOCTYPE HTML>
114 <html lang="en">
115 <head>
116 <meta charset="utf-8">
117 <title>Error response</title>
118 </head>
119 <body>
120 <h1>Error response</h1>
121 <p>Error code: %(code)d</p>
122 <p>Message: %(message)s.</p>
123 <p>Error code explanation: %(code)s - %(explain)s.</p>
124 </body>
125 </html>
126 """
127
128 DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8"
129
130 class ESC[4;38;5;81mHTTPServer(ESC[4;38;5;149msocketserverESC[4;38;5;149m.ESC[4;38;5;149mTCPServer):
131
132 allow_reuse_address = 1 # Seems to make sense in testing environment
133
134 def server_bind(self):
135 """Override server_bind to store the server name."""
136 socketserver.TCPServer.server_bind(self)
137 host, port = self.server_address[:2]
138 self.server_name = socket.getfqdn(host)
139 self.server_port = port
140
141
142 class ESC[4;38;5;81mThreadingHTTPServer(ESC[4;38;5;149msocketserverESC[4;38;5;149m.ESC[4;38;5;149mThreadingMixIn, ESC[4;38;5;149mHTTPServer):
143 daemon_threads = True
144
145
146 class ESC[4;38;5;81mBaseHTTPRequestHandler(ESC[4;38;5;149msocketserverESC[4;38;5;149m.ESC[4;38;5;149mStreamRequestHandler):
147
148 """HTTP request handler base class.
149
150 The following explanation of HTTP serves to guide you through the
151 code as well as to expose any misunderstandings I may have about
152 HTTP (so you don't need to read the code to figure out I'm wrong
153 :-).
154
155 HTTP (HyperText Transfer Protocol) is an extensible protocol on
156 top of a reliable stream transport (e.g. TCP/IP). The protocol
157 recognizes three parts to a request:
158
159 1. One line identifying the request type and path
160 2. An optional set of RFC-822-style headers
161 3. An optional data part
162
163 The headers and data are separated by a blank line.
164
165 The first line of the request has the form
166
167 <command> <path> <version>
168
169 where <command> is a (case-sensitive) keyword such as GET or POST,
170 <path> is a string containing path information for the request,
171 and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
172 <path> is encoded using the URL encoding scheme (using %xx to signify
173 the ASCII character with hex code xx).
174
175 The specification specifies that lines are separated by CRLF but
176 for compatibility with the widest range of clients recommends
177 servers also handle LF. Similarly, whitespace in the request line
178 is treated sensibly (allowing multiple spaces between components
179 and allowing trailing whitespace).
180
181 Similarly, for output, lines ought to be separated by CRLF pairs
182 but most clients grok LF characters just fine.
183
184 If the first line of the request has the form
185
186 <command> <path>
187
188 (i.e. <version> is left out) then this is assumed to be an HTTP
189 0.9 request; this form has no optional headers and data part and
190 the reply consists of just the data.
191
192 The reply form of the HTTP 1.x protocol again has three parts:
193
194 1. One line giving the response code
195 2. An optional set of RFC-822-style headers
196 3. The data
197
198 Again, the headers and data are separated by a blank line.
199
200 The response code line has the form
201
202 <version> <responsecode> <responsestring>
203
204 where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
205 <responsecode> is a 3-digit response code indicating success or
206 failure of the request, and <responsestring> is an optional
207 human-readable string explaining what the response code means.
208
209 This server parses the request and the headers, and then calls a
210 function specific to the request type (<command>). Specifically,
211 a request SPAM will be handled by a method do_SPAM(). If no
212 such method exists the server sends an error response to the
213 client. If it exists, it is called with no arguments:
214
215 do_SPAM()
216
217 Note that the request name is case sensitive (i.e. SPAM and spam
218 are different requests).
219
220 The various request details are stored in instance variables:
221
222 - client_address is the client IP address in the form (host,
223 port);
224
225 - command, path and version are the broken-down request line;
226
227 - headers is an instance of email.message.Message (or a derived
228 class) containing the header information;
229
230 - rfile is a file object open for reading positioned at the
231 start of the optional input data part;
232
233 - wfile is a file object open for writing.
234
235 IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
236
237 The first thing to be written must be the response line. Then
238 follow 0 or more header lines, then a blank line, and then the
239 actual data (if any). The meaning of the header lines depends on
240 the command executed by the server; in most cases, when data is
241 returned, there should be at least one header line of the form
242
243 Content-type: <type>/<subtype>
244
245 where <type> and <subtype> should be registered MIME types,
246 e.g. "text/html" or "text/plain".
247
248 """
249
250 # The Python system version, truncated to its first component.
251 sys_version = "Python/" + sys.version.split()[0]
252
253 # The server software version. You may want to override this.
254 # The format is multiple whitespace-separated strings,
255 # where each string is of the form name[/version].
256 server_version = "BaseHTTP/" + __version__
257
258 error_message_format = DEFAULT_ERROR_MESSAGE
259 error_content_type = DEFAULT_ERROR_CONTENT_TYPE
260
261 # The default request version. This only affects responses up until
262 # the point where the request line is parsed, so it mainly decides what
263 # the client gets back when sending a malformed request line.
264 # Most web servers default to HTTP 0.9, i.e. don't send a status line.
265 default_request_version = "HTTP/0.9"
266
267 def parse_request(self):
268 """Parse a request (internal).
269
270 The request should be stored in self.raw_requestline; the results
271 are in self.command, self.path, self.request_version and
272 self.headers.
273
274 Return True for success, False for failure; on failure, any relevant
275 error response has already been sent back.
276
277 """
278 self.command = None # set in case of error on the first line
279 self.request_version = version = self.default_request_version
280 self.close_connection = True
281 requestline = str(self.raw_requestline, 'iso-8859-1')
282 requestline = requestline.rstrip('\r\n')
283 self.requestline = requestline
284 words = requestline.split()
285 if len(words) == 0:
286 return False
287
288 if len(words) >= 3: # Enough to determine protocol version
289 version = words[-1]
290 try:
291 if not version.startswith('HTTP/'):
292 raise ValueError
293 base_version_number = version.split('/', 1)[1]
294 version_number = base_version_number.split(".")
295 # RFC 2145 section 3.1 says there can be only one "." and
296 # - major and minor numbers MUST be treated as
297 # separate integers;
298 # - HTTP/2.4 is a lower version than HTTP/2.13, which in
299 # turn is lower than HTTP/12.3;
300 # - Leading zeros MUST be ignored by recipients.
301 if len(version_number) != 2:
302 raise ValueError
303 if any(not component.isdigit() for component in version_number):
304 raise ValueError("non digit in http version")
305 if any(len(component) > 10 for component in version_number):
306 raise ValueError("unreasonable length http version")
307 version_number = int(version_number[0]), int(version_number[1])
308 except (ValueError, IndexError):
309 self.send_error(
310 HTTPStatus.BAD_REQUEST,
311 "Bad request version (%r)" % version)
312 return False
313 if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
314 self.close_connection = False
315 if version_number >= (2, 0):
316 self.send_error(
317 HTTPStatus.HTTP_VERSION_NOT_SUPPORTED,
318 "Invalid HTTP version (%s)" % base_version_number)
319 return False
320 self.request_version = version
321
322 if not 2 <= len(words) <= 3:
323 self.send_error(
324 HTTPStatus.BAD_REQUEST,
325 "Bad request syntax (%r)" % requestline)
326 return False
327 command, path = words[:2]
328 if len(words) == 2:
329 self.close_connection = True
330 if command != 'GET':
331 self.send_error(
332 HTTPStatus.BAD_REQUEST,
333 "Bad HTTP/0.9 request type (%r)" % command)
334 return False
335 self.command, self.path = command, path
336
337 # gh-87389: The purpose of replacing '//' with '/' is to protect
338 # against open redirect attacks possibly triggered if the path starts
339 # with '//' because http clients treat //path as an absolute URI
340 # without scheme (similar to http://path) rather than a path.
341 if self.path.startswith('//'):
342 self.path = '/' + self.path.lstrip('/') # Reduce to a single /
343
344 # Examine the headers and look for a Connection directive.
345 try:
346 self.headers = http.client.parse_headers(self.rfile,
347 _class=self.MessageClass)
348 except http.client.LineTooLong as err:
349 self.send_error(
350 HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,
351 "Line too long",
352 str(err))
353 return False
354 except http.client.HTTPException as err:
355 self.send_error(
356 HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,
357 "Too many headers",
358 str(err)
359 )
360 return False
361
362 conntype = self.headers.get('Connection', "")
363 if conntype.lower() == 'close':
364 self.close_connection = True
365 elif (conntype.lower() == 'keep-alive' and
366 self.protocol_version >= "HTTP/1.1"):
367 self.close_connection = False
368 # Examine the headers and look for an Expect directive
369 expect = self.headers.get('Expect', "")
370 if (expect.lower() == "100-continue" and
371 self.protocol_version >= "HTTP/1.1" and
372 self.request_version >= "HTTP/1.1"):
373 if not self.handle_expect_100():
374 return False
375 return True
376
377 def handle_expect_100(self):
378 """Decide what to do with an "Expect: 100-continue" header.
379
380 If the client is expecting a 100 Continue response, we must
381 respond with either a 100 Continue or a final response before
382 waiting for the request body. The default is to always respond
383 with a 100 Continue. You can behave differently (for example,
384 reject unauthorized requests) by overriding this method.
385
386 This method should either return True (possibly after sending
387 a 100 Continue response) or send an error response and return
388 False.
389
390 """
391 self.send_response_only(HTTPStatus.CONTINUE)
392 self.end_headers()
393 return True
394
395 def handle_one_request(self):
396 """Handle a single HTTP request.
397
398 You normally don't need to override this method; see the class
399 __doc__ string for information on how to handle specific HTTP
400 commands such as GET and POST.
401
402 """
403 try:
404 self.raw_requestline = self.rfile.readline(65537)
405 if len(self.raw_requestline) > 65536:
406 self.requestline = ''
407 self.request_version = ''
408 self.command = ''
409 self.send_error(HTTPStatus.REQUEST_URI_TOO_LONG)
410 return
411 if not self.raw_requestline:
412 self.close_connection = True
413 return
414 if not self.parse_request():
415 # An error code has been sent, just exit
416 return
417 mname = 'do_' + self.command
418 if not hasattr(self, mname):
419 self.send_error(
420 HTTPStatus.NOT_IMPLEMENTED,
421 "Unsupported method (%r)" % self.command)
422 return
423 method = getattr(self, mname)
424 method()
425 self.wfile.flush() #actually send the response if not already done.
426 except TimeoutError as e:
427 #a read or a write timed out. Discard this connection
428 self.log_error("Request timed out: %r", e)
429 self.close_connection = True
430 return
431
432 def handle(self):
433 """Handle multiple requests if necessary."""
434 self.close_connection = True
435
436 self.handle_one_request()
437 while not self.close_connection:
438 self.handle_one_request()
439
440 def send_error(self, code, message=None, explain=None):
441 """Send and log an error reply.
442
443 Arguments are
444 * code: an HTTP error code
445 3 digits
446 * message: a simple optional 1 line reason phrase.
447 *( HTAB / SP / VCHAR / %x80-FF )
448 defaults to short entry matching the response code
449 * explain: a detailed message defaults to the long entry
450 matching the response code.
451
452 This sends an error response (so it must be called before any
453 output has been generated), logs the error, and finally sends
454 a piece of HTML explaining the error to the user.
455
456 """
457
458 try:
459 shortmsg, longmsg = self.responses[code]
460 except KeyError:
461 shortmsg, longmsg = '???', '???'
462 if message is None:
463 message = shortmsg
464 if explain is None:
465 explain = longmsg
466 self.log_error("code %d, message %s", code, message)
467 self.send_response(code, message)
468 self.send_header('Connection', 'close')
469
470 # Message body is omitted for cases described in:
471 # - RFC7230: 3.3. 1xx, 204(No Content), 304(Not Modified)
472 # - RFC7231: 6.3.6. 205(Reset Content)
473 body = None
474 if (code >= 200 and
475 code not in (HTTPStatus.NO_CONTENT,
476 HTTPStatus.RESET_CONTENT,
477 HTTPStatus.NOT_MODIFIED)):
478 # HTML encode to prevent Cross Site Scripting attacks
479 # (see bug #1100201)
480 content = (self.error_message_format % {
481 'code': code,
482 'message': html.escape(message, quote=False),
483 'explain': html.escape(explain, quote=False)
484 })
485 body = content.encode('UTF-8', 'replace')
486 self.send_header("Content-Type", self.error_content_type)
487 self.send_header('Content-Length', str(len(body)))
488 self.end_headers()
489
490 if self.command != 'HEAD' and body:
491 self.wfile.write(body)
492
493 def send_response(self, code, message=None):
494 """Add the response header to the headers buffer and log the
495 response code.
496
497 Also send two standard headers with the server software
498 version and the current date.
499
500 """
501 self.log_request(code)
502 self.send_response_only(code, message)
503 self.send_header('Server', self.version_string())
504 self.send_header('Date', self.date_time_string())
505
506 def send_response_only(self, code, message=None):
507 """Send the response header only."""
508 if self.request_version != 'HTTP/0.9':
509 if message is None:
510 if code in self.responses:
511 message = self.responses[code][0]
512 else:
513 message = ''
514 if not hasattr(self, '_headers_buffer'):
515 self._headers_buffer = []
516 self._headers_buffer.append(("%s %d %s\r\n" %
517 (self.protocol_version, code, message)).encode(
518 'latin-1', 'strict'))
519
520 def send_header(self, keyword, value):
521 """Send a MIME header to the headers buffer."""
522 if self.request_version != 'HTTP/0.9':
523 if not hasattr(self, '_headers_buffer'):
524 self._headers_buffer = []
525 self._headers_buffer.append(
526 ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict'))
527
528 if keyword.lower() == 'connection':
529 if value.lower() == 'close':
530 self.close_connection = True
531 elif value.lower() == 'keep-alive':
532 self.close_connection = False
533
534 def end_headers(self):
535 """Send the blank line ending the MIME headers."""
536 if self.request_version != 'HTTP/0.9':
537 self._headers_buffer.append(b"\r\n")
538 self.flush_headers()
539
540 def flush_headers(self):
541 if hasattr(self, '_headers_buffer'):
542 self.wfile.write(b"".join(self._headers_buffer))
543 self._headers_buffer = []
544
545 def log_request(self, code='-', size='-'):
546 """Log an accepted request.
547
548 This is called by send_response().
549
550 """
551 if isinstance(code, HTTPStatus):
552 code = code.value
553 self.log_message('"%s" %s %s',
554 self.requestline, str(code), str(size))
555
556 def log_error(self, format, *args):
557 """Log an error.
558
559 This is called when a request cannot be fulfilled. By
560 default it passes the message on to log_message().
561
562 Arguments are the same as for log_message().
563
564 XXX This should go to the separate error log.
565
566 """
567
568 self.log_message(format, *args)
569
570 # https://en.wikipedia.org/wiki/List_of_Unicode_characters#Control_codes
571 _control_char_table = str.maketrans(
572 {c: fr'\x{c:02x}' for c in itertools.chain(range(0x20), range(0x7f,0xa0))})
573 _control_char_table[ord('\\')] = r'\\'
574
575 def log_message(self, format, *args):
576 """Log an arbitrary message.
577
578 This is used by all other logging functions. Override
579 it if you have specific logging wishes.
580
581 The first argument, FORMAT, is a format string for the
582 message to be logged. If the format string contains
583 any % escapes requiring parameters, they should be
584 specified as subsequent arguments (it's just like
585 printf!).
586
587 The client ip and current date/time are prefixed to
588 every message.
589
590 Unicode control characters are replaced with escaped hex
591 before writing the output to stderr.
592
593 """
594
595 message = format % args
596 sys.stderr.write("%s - - [%s] %s\n" %
597 (self.address_string(),
598 self.log_date_time_string(),
599 message.translate(self._control_char_table)))
600
601 def version_string(self):
602 """Return the server software version string."""
603 return self.server_version + ' ' + self.sys_version
604
605 def date_time_string(self, timestamp=None):
606 """Return the current date and time formatted for a message header."""
607 if timestamp is None:
608 timestamp = time.time()
609 return email.utils.formatdate(timestamp, usegmt=True)
610
611 def log_date_time_string(self):
612 """Return the current time formatted for logging."""
613 now = time.time()
614 year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
615 s = "%02d/%3s/%04d %02d:%02d:%02d" % (
616 day, self.monthname[month], year, hh, mm, ss)
617 return s
618
619 weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
620
621 monthname = [None,
622 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
623 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
624
625 def address_string(self):
626 """Return the client address."""
627
628 return self.client_address[0]
629
630 # Essentially static class variables
631
632 # The version of the HTTP protocol we support.
633 # Set this to HTTP/1.1 to enable automatic keepalive
634 protocol_version = "HTTP/1.0"
635
636 # MessageClass used to parse headers
637 MessageClass = http.client.HTTPMessage
638
639 # hack to maintain backwards compatibility
640 responses = {
641 v: (v.phrase, v.description)
642 for v in HTTPStatus.__members__.values()
643 }
644
645
646 class ESC[4;38;5;81mSimpleHTTPRequestHandler(ESC[4;38;5;149mBaseHTTPRequestHandler):
647
648 """Simple HTTP request handler with GET and HEAD commands.
649
650 This serves files from the current directory and any of its
651 subdirectories. The MIME type for files is determined by
652 calling the .guess_type() method.
653
654 The GET and HEAD requests are identical except that the HEAD
655 request omits the actual contents of the file.
656
657 """
658
659 server_version = "SimpleHTTP/" + __version__
660 index_pages = ("index.html", "index.htm")
661 extensions_map = _encodings_map_default = {
662 '.gz': 'application/gzip',
663 '.Z': 'application/octet-stream',
664 '.bz2': 'application/x-bzip2',
665 '.xz': 'application/x-xz',
666 }
667
668 def __init__(self, *args, directory=None, **kwargs):
669 if directory is None:
670 directory = os.getcwd()
671 self.directory = os.fspath(directory)
672 super().__init__(*args, **kwargs)
673
674 def do_GET(self):
675 """Serve a GET request."""
676 f = self.send_head()
677 if f:
678 try:
679 self.copyfile(f, self.wfile)
680 finally:
681 f.close()
682
683 def do_HEAD(self):
684 """Serve a HEAD request."""
685 f = self.send_head()
686 if f:
687 f.close()
688
689 def send_head(self):
690 """Common code for GET and HEAD commands.
691
692 This sends the response code and MIME headers.
693
694 Return value is either a file object (which has to be copied
695 to the outputfile by the caller unless the command was HEAD,
696 and must be closed by the caller under all circumstances), or
697 None, in which case the caller has nothing further to do.
698
699 """
700 path = self.translate_path(self.path)
701 f = None
702 if os.path.isdir(path):
703 parts = urllib.parse.urlsplit(self.path)
704 if not parts.path.endswith('/'):
705 # redirect browser - doing basically what apache does
706 self.send_response(HTTPStatus.MOVED_PERMANENTLY)
707 new_parts = (parts[0], parts[1], parts[2] + '/',
708 parts[3], parts[4])
709 new_url = urllib.parse.urlunsplit(new_parts)
710 self.send_header("Location", new_url)
711 self.send_header("Content-Length", "0")
712 self.end_headers()
713 return None
714 for index in self.index_pages:
715 index = os.path.join(path, index)
716 if os.path.isfile(index):
717 path = index
718 break
719 else:
720 return self.list_directory(path)
721 ctype = self.guess_type(path)
722 # check for trailing "/" which should return 404. See Issue17324
723 # The test for this was added in test_httpserver.py
724 # However, some OS platforms accept a trailingSlash as a filename
725 # See discussion on python-dev and Issue34711 regarding
726 # parsing and rejection of filenames with a trailing slash
727 if path.endswith("/"):
728 self.send_error(HTTPStatus.NOT_FOUND, "File not found")
729 return None
730 try:
731 f = open(path, 'rb')
732 except OSError:
733 self.send_error(HTTPStatus.NOT_FOUND, "File not found")
734 return None
735
736 try:
737 fs = os.fstat(f.fileno())
738 # Use browser cache if possible
739 if ("If-Modified-Since" in self.headers
740 and "If-None-Match" not in self.headers):
741 # compare If-Modified-Since and time of last file modification
742 try:
743 ims = email.utils.parsedate_to_datetime(
744 self.headers["If-Modified-Since"])
745 except (TypeError, IndexError, OverflowError, ValueError):
746 # ignore ill-formed values
747 pass
748 else:
749 if ims.tzinfo is None:
750 # obsolete format with no timezone, cf.
751 # https://tools.ietf.org/html/rfc7231#section-7.1.1.1
752 ims = ims.replace(tzinfo=datetime.timezone.utc)
753 if ims.tzinfo is datetime.timezone.utc:
754 # compare to UTC datetime of last modification
755 last_modif = datetime.datetime.fromtimestamp(
756 fs.st_mtime, datetime.timezone.utc)
757 # remove microseconds, like in If-Modified-Since
758 last_modif = last_modif.replace(microsecond=0)
759
760 if last_modif <= ims:
761 self.send_response(HTTPStatus.NOT_MODIFIED)
762 self.end_headers()
763 f.close()
764 return None
765
766 self.send_response(HTTPStatus.OK)
767 self.send_header("Content-type", ctype)
768 self.send_header("Content-Length", str(fs[6]))
769 self.send_header("Last-Modified",
770 self.date_time_string(fs.st_mtime))
771 self.end_headers()
772 return f
773 except:
774 f.close()
775 raise
776
777 def list_directory(self, path):
778 """Helper to produce a directory listing (absent index.html).
779
780 Return value is either a file object, or None (indicating an
781 error). In either case, the headers are sent, making the
782 interface the same as for send_head().
783
784 """
785 try:
786 list = os.listdir(path)
787 except OSError:
788 self.send_error(
789 HTTPStatus.NOT_FOUND,
790 "No permission to list directory")
791 return None
792 list.sort(key=lambda a: a.lower())
793 r = []
794 try:
795 displaypath = urllib.parse.unquote(self.path,
796 errors='surrogatepass')
797 except UnicodeDecodeError:
798 displaypath = urllib.parse.unquote(self.path)
799 displaypath = html.escape(displaypath, quote=False)
800 enc = sys.getfilesystemencoding()
801 title = f'Directory listing for {displaypath}'
802 r.append('<!DOCTYPE HTML>')
803 r.append('<html lang="en">')
804 r.append('<head>')
805 r.append(f'<meta charset="{enc}">')
806 r.append(f'<title>{title}</title>\n</head>')
807 r.append(f'<body>\n<h1>{title}</h1>')
808 r.append('<hr>\n<ul>')
809 for name in list:
810 fullname = os.path.join(path, name)
811 displayname = linkname = name
812 # Append / for directories or @ for symbolic links
813 if os.path.isdir(fullname):
814 displayname = name + "/"
815 linkname = name + "/"
816 if os.path.islink(fullname):
817 displayname = name + "@"
818 # Note: a link to a directory displays with @ and links with /
819 r.append('<li><a href="%s">%s</a></li>'
820 % (urllib.parse.quote(linkname,
821 errors='surrogatepass'),
822 html.escape(displayname, quote=False)))
823 r.append('</ul>\n<hr>\n</body>\n</html>\n')
824 encoded = '\n'.join(r).encode(enc, 'surrogateescape')
825 f = io.BytesIO()
826 f.write(encoded)
827 f.seek(0)
828 self.send_response(HTTPStatus.OK)
829 self.send_header("Content-type", "text/html; charset=%s" % enc)
830 self.send_header("Content-Length", str(len(encoded)))
831 self.end_headers()
832 return f
833
834 def translate_path(self, path):
835 """Translate a /-separated PATH to the local filename syntax.
836
837 Components that mean special things to the local file system
838 (e.g. drive or directory names) are ignored. (XXX They should
839 probably be diagnosed.)
840
841 """
842 # abandon query parameters
843 path = path.split('?',1)[0]
844 path = path.split('#',1)[0]
845 # Don't forget explicit trailing slash when normalizing. Issue17324
846 trailing_slash = path.rstrip().endswith('/')
847 try:
848 path = urllib.parse.unquote(path, errors='surrogatepass')
849 except UnicodeDecodeError:
850 path = urllib.parse.unquote(path)
851 path = posixpath.normpath(path)
852 words = path.split('/')
853 words = filter(None, words)
854 path = self.directory
855 for word in words:
856 if os.path.dirname(word) or word in (os.curdir, os.pardir):
857 # Ignore components that are not a simple file/directory name
858 continue
859 path = os.path.join(path, word)
860 if trailing_slash:
861 path += '/'
862 return path
863
864 def copyfile(self, source, outputfile):
865 """Copy all data between two file objects.
866
867 The SOURCE argument is a file object open for reading
868 (or anything with a read() method) and the DESTINATION
869 argument is a file object open for writing (or
870 anything with a write() method).
871
872 The only reason for overriding this would be to change
873 the block size or perhaps to replace newlines by CRLF
874 -- note however that this the default server uses this
875 to copy binary data as well.
876
877 """
878 shutil.copyfileobj(source, outputfile)
879
880 def guess_type(self, path):
881 """Guess the type of a file.
882
883 Argument is a PATH (a filename).
884
885 Return value is a string of the form type/subtype,
886 usable for a MIME Content-type header.
887
888 The default implementation looks the file's extension
889 up in the table self.extensions_map, using application/octet-stream
890 as a default; however it would be permissible (if
891 slow) to look inside the data to make a better guess.
892
893 """
894 base, ext = posixpath.splitext(path)
895 if ext in self.extensions_map:
896 return self.extensions_map[ext]
897 ext = ext.lower()
898 if ext in self.extensions_map:
899 return self.extensions_map[ext]
900 guess, _ = mimetypes.guess_type(path)
901 if guess:
902 return guess
903 return 'application/octet-stream'
904
905
906 # Utilities for CGIHTTPRequestHandler
907
908 def _url_collapse_path(path):
909 """
910 Given a URL path, remove extra '/'s and '.' path elements and collapse
911 any '..' references and returns a collapsed path.
912
913 Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
914 The utility of this function is limited to is_cgi method and helps
915 preventing some security attacks.
916
917 Returns: The reconstituted URL, which will always start with a '/'.
918
919 Raises: IndexError if too many '..' occur within the path.
920
921 """
922 # Query component should not be involved.
923 path, _, query = path.partition('?')
924 path = urllib.parse.unquote(path)
925
926 # Similar to os.path.split(os.path.normpath(path)) but specific to URL
927 # path semantics rather than local operating system semantics.
928 path_parts = path.split('/')
929 head_parts = []
930 for part in path_parts[:-1]:
931 if part == '..':
932 head_parts.pop() # IndexError if more '..' than prior parts
933 elif part and part != '.':
934 head_parts.append( part )
935 if path_parts:
936 tail_part = path_parts.pop()
937 if tail_part:
938 if tail_part == '..':
939 head_parts.pop()
940 tail_part = ''
941 elif tail_part == '.':
942 tail_part = ''
943 else:
944 tail_part = ''
945
946 if query:
947 tail_part = '?'.join((tail_part, query))
948
949 splitpath = ('/' + '/'.join(head_parts), tail_part)
950 collapsed_path = "/".join(splitpath)
951
952 return collapsed_path
953
954
955
956 nobody = None
957
958 def nobody_uid():
959 """Internal routine to get nobody's uid"""
960 global nobody
961 if nobody:
962 return nobody
963 try:
964 import pwd
965 except ImportError:
966 return -1
967 try:
968 nobody = pwd.getpwnam('nobody')[2]
969 except KeyError:
970 nobody = 1 + max(x[2] for x in pwd.getpwall())
971 return nobody
972
973
974 def executable(path):
975 """Test for executable file."""
976 return os.access(path, os.X_OK)
977
978
979 class ESC[4;38;5;81mCGIHTTPRequestHandler(ESC[4;38;5;149mSimpleHTTPRequestHandler):
980
981 """Complete HTTP server with GET, HEAD and POST commands.
982
983 GET and HEAD also support running CGI scripts.
984
985 The POST command is *only* implemented for CGI scripts.
986
987 """
988
989 # Determine platform specifics
990 have_fork = hasattr(os, 'fork')
991
992 # Make rfile unbuffered -- we need to read one line and then pass
993 # the rest to a subprocess, so we can't use buffered input.
994 rbufsize = 0
995
996 def do_POST(self):
997 """Serve a POST request.
998
999 This is only implemented for CGI scripts.
1000
1001 """
1002
1003 if self.is_cgi():
1004 self.run_cgi()
1005 else:
1006 self.send_error(
1007 HTTPStatus.NOT_IMPLEMENTED,
1008 "Can only POST to CGI scripts")
1009
1010 def send_head(self):
1011 """Version of send_head that support CGI scripts"""
1012 if self.is_cgi():
1013 return self.run_cgi()
1014 else:
1015 return SimpleHTTPRequestHandler.send_head(self)
1016
1017 def is_cgi(self):
1018 """Test whether self.path corresponds to a CGI script.
1019
1020 Returns True and updates the cgi_info attribute to the tuple
1021 (dir, rest) if self.path requires running a CGI script.
1022 Returns False otherwise.
1023
1024 If any exception is raised, the caller should assume that
1025 self.path was rejected as invalid and act accordingly.
1026
1027 The default implementation tests whether the normalized url
1028 path begins with one of the strings in self.cgi_directories
1029 (and the next character is a '/' or the end of the string).
1030
1031 """
1032 collapsed_path = _url_collapse_path(self.path)
1033 dir_sep = collapsed_path.find('/', 1)
1034 while dir_sep > 0 and not collapsed_path[:dir_sep] in self.cgi_directories:
1035 dir_sep = collapsed_path.find('/', dir_sep+1)
1036 if dir_sep > 0:
1037 head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:]
1038 self.cgi_info = head, tail
1039 return True
1040 return False
1041
1042
1043 cgi_directories = ['/cgi-bin', '/htbin']
1044
1045 def is_executable(self, path):
1046 """Test whether argument path is an executable file."""
1047 return executable(path)
1048
1049 def is_python(self, path):
1050 """Test whether argument path is a Python script."""
1051 head, tail = os.path.splitext(path)
1052 return tail.lower() in (".py", ".pyw")
1053
1054 def run_cgi(self):
1055 """Execute a CGI script."""
1056 dir, rest = self.cgi_info
1057 path = dir + '/' + rest
1058 i = path.find('/', len(dir)+1)
1059 while i >= 0:
1060 nextdir = path[:i]
1061 nextrest = path[i+1:]
1062
1063 scriptdir = self.translate_path(nextdir)
1064 if os.path.isdir(scriptdir):
1065 dir, rest = nextdir, nextrest
1066 i = path.find('/', len(dir)+1)
1067 else:
1068 break
1069
1070 # find an explicit query string, if present.
1071 rest, _, query = rest.partition('?')
1072
1073 # dissect the part after the directory name into a script name &
1074 # a possible additional path, to be stored in PATH_INFO.
1075 i = rest.find('/')
1076 if i >= 0:
1077 script, rest = rest[:i], rest[i:]
1078 else:
1079 script, rest = rest, ''
1080
1081 scriptname = dir + '/' + script
1082 scriptfile = self.translate_path(scriptname)
1083 if not os.path.exists(scriptfile):
1084 self.send_error(
1085 HTTPStatus.NOT_FOUND,
1086 "No such CGI script (%r)" % scriptname)
1087 return
1088 if not os.path.isfile(scriptfile):
1089 self.send_error(
1090 HTTPStatus.FORBIDDEN,
1091 "CGI script is not a plain file (%r)" % scriptname)
1092 return
1093 ispy = self.is_python(scriptname)
1094 if self.have_fork or not ispy:
1095 if not self.is_executable(scriptfile):
1096 self.send_error(
1097 HTTPStatus.FORBIDDEN,
1098 "CGI script is not executable (%r)" % scriptname)
1099 return
1100
1101 # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
1102 # XXX Much of the following could be prepared ahead of time!
1103 env = copy.deepcopy(os.environ)
1104 env['SERVER_SOFTWARE'] = self.version_string()
1105 env['SERVER_NAME'] = self.server.server_name
1106 env['GATEWAY_INTERFACE'] = 'CGI/1.1'
1107 env['SERVER_PROTOCOL'] = self.protocol_version
1108 env['SERVER_PORT'] = str(self.server.server_port)
1109 env['REQUEST_METHOD'] = self.command
1110 uqrest = urllib.parse.unquote(rest)
1111 env['PATH_INFO'] = uqrest
1112 env['PATH_TRANSLATED'] = self.translate_path(uqrest)
1113 env['SCRIPT_NAME'] = scriptname
1114 env['QUERY_STRING'] = query
1115 env['REMOTE_ADDR'] = self.client_address[0]
1116 authorization = self.headers.get("authorization")
1117 if authorization:
1118 authorization = authorization.split()
1119 if len(authorization) == 2:
1120 import base64, binascii
1121 env['AUTH_TYPE'] = authorization[0]
1122 if authorization[0].lower() == "basic":
1123 try:
1124 authorization = authorization[1].encode('ascii')
1125 authorization = base64.decodebytes(authorization).\
1126 decode('ascii')
1127 except (binascii.Error, UnicodeError):
1128 pass
1129 else:
1130 authorization = authorization.split(':')
1131 if len(authorization) == 2:
1132 env['REMOTE_USER'] = authorization[0]
1133 # XXX REMOTE_IDENT
1134 if self.headers.get('content-type') is None:
1135 env['CONTENT_TYPE'] = self.headers.get_content_type()
1136 else:
1137 env['CONTENT_TYPE'] = self.headers['content-type']
1138 length = self.headers.get('content-length')
1139 if length:
1140 env['CONTENT_LENGTH'] = length
1141 referer = self.headers.get('referer')
1142 if referer:
1143 env['HTTP_REFERER'] = referer
1144 accept = self.headers.get_all('accept', ())
1145 env['HTTP_ACCEPT'] = ','.join(accept)
1146 ua = self.headers.get('user-agent')
1147 if ua:
1148 env['HTTP_USER_AGENT'] = ua
1149 co = filter(None, self.headers.get_all('cookie', []))
1150 cookie_str = ', '.join(co)
1151 if cookie_str:
1152 env['HTTP_COOKIE'] = cookie_str
1153 # XXX Other HTTP_* headers
1154 # Since we're setting the env in the parent, provide empty
1155 # values to override previously set values
1156 for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
1157 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
1158 env.setdefault(k, "")
1159
1160 self.send_response(HTTPStatus.OK, "Script output follows")
1161 self.flush_headers()
1162
1163 decoded_query = query.replace('+', ' ')
1164
1165 if self.have_fork:
1166 # Unix -- fork as we should
1167 args = [script]
1168 if '=' not in decoded_query:
1169 args.append(decoded_query)
1170 nobody = nobody_uid()
1171 self.wfile.flush() # Always flush before forking
1172 pid = os.fork()
1173 if pid != 0:
1174 # Parent
1175 pid, sts = os.waitpid(pid, 0)
1176 # throw away additional data [see bug #427345]
1177 while select.select([self.rfile], [], [], 0)[0]:
1178 if not self.rfile.read(1):
1179 break
1180 exitcode = os.waitstatus_to_exitcode(sts)
1181 if exitcode:
1182 self.log_error(f"CGI script exit code {exitcode}")
1183 return
1184 # Child
1185 try:
1186 try:
1187 os.setuid(nobody)
1188 except OSError:
1189 pass
1190 os.dup2(self.rfile.fileno(), 0)
1191 os.dup2(self.wfile.fileno(), 1)
1192 os.execve(scriptfile, args, env)
1193 except:
1194 self.server.handle_error(self.request, self.client_address)
1195 os._exit(127)
1196
1197 else:
1198 # Non-Unix -- use subprocess
1199 import subprocess
1200 cmdline = [scriptfile]
1201 if self.is_python(scriptfile):
1202 interp = sys.executable
1203 if interp.lower().endswith("w.exe"):
1204 # On Windows, use python.exe, not pythonw.exe
1205 interp = interp[:-5] + interp[-4:]
1206 cmdline = [interp, '-u'] + cmdline
1207 if '=' not in query:
1208 cmdline.append(query)
1209 self.log_message("command: %s", subprocess.list2cmdline(cmdline))
1210 try:
1211 nbytes = int(length)
1212 except (TypeError, ValueError):
1213 nbytes = 0
1214 p = subprocess.Popen(cmdline,
1215 stdin=subprocess.PIPE,
1216 stdout=subprocess.PIPE,
1217 stderr=subprocess.PIPE,
1218 env = env
1219 )
1220 if self.command.lower() == "post" and nbytes > 0:
1221 data = self.rfile.read(nbytes)
1222 else:
1223 data = None
1224 # throw away additional data [see bug #427345]
1225 while select.select([self.rfile._sock], [], [], 0)[0]:
1226 if not self.rfile._sock.recv(1):
1227 break
1228 stdout, stderr = p.communicate(data)
1229 self.wfile.write(stdout)
1230 if stderr:
1231 self.log_error('%s', stderr)
1232 p.stderr.close()
1233 p.stdout.close()
1234 status = p.returncode
1235 if status:
1236 self.log_error("CGI script exit status %#x", status)
1237 else:
1238 self.log_message("CGI script exited OK")
1239
1240
1241 def _get_best_family(*address):
1242 infos = socket.getaddrinfo(
1243 *address,
1244 type=socket.SOCK_STREAM,
1245 flags=socket.AI_PASSIVE,
1246 )
1247 family, type, proto, canonname, sockaddr = next(iter(infos))
1248 return family, sockaddr
1249
1250
1251 def test(HandlerClass=BaseHTTPRequestHandler,
1252 ServerClass=ThreadingHTTPServer,
1253 protocol="HTTP/1.0", port=8000, bind=None):
1254 """Test the HTTP request handler class.
1255
1256 This runs an HTTP server on port 8000 (or the port argument).
1257
1258 """
1259 ServerClass.address_family, addr = _get_best_family(bind, port)
1260 HandlerClass.protocol_version = protocol
1261 with ServerClass(addr, HandlerClass) as httpd:
1262 host, port = httpd.socket.getsockname()[:2]
1263 url_host = f'[{host}]' if ':' in host else host
1264 print(
1265 f"Serving HTTP on {host} port {port} "
1266 f"(http://{url_host}:{port}/) ..."
1267 )
1268 try:
1269 httpd.serve_forever()
1270 except KeyboardInterrupt:
1271 print("\nKeyboard interrupt received, exiting.")
1272 sys.exit(0)
1273
1274 if __name__ == '__main__':
1275 import argparse
1276 import contextlib
1277
1278 parser = argparse.ArgumentParser()
1279 parser.add_argument('--cgi', action='store_true',
1280 help='run as CGI server')
1281 parser.add_argument('-b', '--bind', metavar='ADDRESS',
1282 help='bind to this address '
1283 '(default: all interfaces)')
1284 parser.add_argument('-d', '--directory', default=os.getcwd(),
1285 help='serve this directory '
1286 '(default: current directory)')
1287 parser.add_argument('-p', '--protocol', metavar='VERSION',
1288 default='HTTP/1.0',
1289 help='conform to this HTTP version '
1290 '(default: %(default)s)')
1291 parser.add_argument('port', default=8000, type=int, nargs='?',
1292 help='bind to this port '
1293 '(default: %(default)s)')
1294 args = parser.parse_args()
1295 if args.cgi:
1296 handler_class = CGIHTTPRequestHandler
1297 else:
1298 handler_class = SimpleHTTPRequestHandler
1299
1300 # ensure dual-stack is not disabled; ref #38907
1301 class ESC[4;38;5;81mDualStackServer(ESC[4;38;5;149mThreadingHTTPServer):
1302
1303 def server_bind(self):
1304 # suppress exception when protocol is IPv4
1305 with contextlib.suppress(Exception):
1306 self.socket.setsockopt(
1307 socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0)
1308 return super().server_bind()
1309
1310 def finish_request(self, request, client_address):
1311 self.RequestHandlerClass(request, client_address, self,
1312 directory=args.directory)
1313
1314 test(
1315 HandlerClass=handler_class,
1316 ServerClass=DualStackServer,
1317 port=args.port,
1318 bind=args.bind,
1319 protocol=args.protocol,
1320 )