1  #! /usr/local/bin/python
       2  
       3  # NOTE: the above "/usr/local/bin/python" is NOT a mistake.  It is
       4  # intentionally NOT "/usr/bin/env python".  On many systems
       5  # (e.g. Solaris), /usr/local/bin is not in $PATH as passed to CGI
       6  # scripts, and /usr/local/bin is the default directory where Python is
       7  # installed, so /usr/bin/env would be unable to find python.  Granted,
       8  # binary installations by Linux vendors often install Python in
       9  # /usr/bin.  So let those vendors patch cgi.py to match their choice
      10  # of installation.
      11  
      12  """Support module for CGI (Common Gateway Interface) scripts.
      13  
      14  This module defines a number of utilities for use by CGI scripts
      15  written in Python.
      16  
      17  The global variable maxlen can be set to an integer indicating the maximum size
      18  of a POST request. POST requests larger than this size will result in a
      19  ValueError being raised during parsing. The default value of this variable is 0,
      20  meaning the request size is unlimited.
      21  """
      22  
      23  # History
      24  # -------
      25  #
      26  # Michael McLay started this module.  Steve Majewski changed the
      27  # interface to SvFormContentDict and FormContentDict.  The multipart
      28  # parsing was inspired by code submitted by Andreas Paepcke.  Guido van
      29  # Rossum rewrote, reformatted and documented the module and is currently
      30  # responsible for its maintenance.
      31  #
      32  
      33  __version__ = "2.6"
      34  
      35  
      36  # Imports
      37  # =======
      38  
      39  from io import StringIO, BytesIO, TextIOWrapper
      40  from collections.abc import Mapping
      41  import sys
      42  import os
      43  import urllib.parse
      44  from email.parser import FeedParser
      45  from email.message import Message
      46  import html
      47  import locale
      48  import tempfile
      49  import warnings
      50  
      51  __all__ = ["MiniFieldStorage", "FieldStorage", "parse", "parse_multipart",
      52             "parse_header", "test", "print_exception", "print_environ",
      53             "print_form", "print_directory", "print_arguments",
      54             "print_environ_usage"]
      55  
      56  
      57  warnings._deprecated(__name__, remove=(3,13))
      58  
      59  # Logging support
      60  # ===============
      61  
      62  logfile = ""            # Filename to log to, if not empty
      63  logfp = None            # File object to log to, if not None
      64  
      65  def initlog(*allargs):
      66      """Write a log message, if there is a log file.
      67  
      68      Even though this function is called initlog(), you should always
      69      use log(); log is a variable that is set either to initlog
      70      (initially), to dolog (once the log file has been opened), or to
      71      nolog (when logging is disabled).
      72  
      73      The first argument is a format string; the remaining arguments (if
      74      any) are arguments to the % operator, so e.g.
      75          log("%s: %s", "a", "b")
      76      will write "a: b" to the log file, followed by a newline.
      77  
      78      If the global logfp is not None, it should be a file object to
      79      which log data is written.
      80  
      81      If the global logfp is None, the global logfile may be a string
      82      giving a filename to open, in append mode.  This file should be
      83      world writable!!!  If the file can't be opened, logging is
      84      silently disabled (since there is no safe place where we could
      85      send an error message).
      86  
      87      """
      88      global log, logfile, logfp
      89      warnings.warn("cgi.log() is deprecated as of 3.10. Use logging instead",
      90                    DeprecationWarning, stacklevel=2)
      91      if logfile and not logfp:
      92          try:
      93              logfp = open(logfile, "a", encoding="locale")
      94          except OSError:
      95              pass
      96      if not logfp:
      97          log = nolog
      98      else:
      99          log = dolog
     100      log(*allargs)
     101  
     102  def dolog(fmt, *args):
     103      """Write a log message to the log file.  See initlog() for docs."""
     104      logfp.write(fmt%args + "\n")
     105  
     106  def nolog(*allargs):
     107      """Dummy function, assigned to log when logging is disabled."""
     108      pass
     109  
     110  def closelog():
     111      """Close the log file."""
     112      global log, logfile, logfp
     113      logfile = ''
     114      if logfp:
     115          logfp.close()
     116          logfp = None
     117      log = initlog
     118  
     119  log = initlog           # The current logging function
     120  
     121  
     122  # Parsing functions
     123  # =================
     124  
     125  # Maximum input we will accept when REQUEST_METHOD is POST
     126  # 0 ==> unlimited input
     127  maxlen = 0
     128  
     129  def parse(fp=None, environ=os.environ, keep_blank_values=0,
     130            strict_parsing=0, separator='&'):
     131      """Parse a query in the environment or from a file (default stdin)
     132  
     133          Arguments, all optional:
     134  
     135          fp              : file pointer; default: sys.stdin.buffer
     136  
     137          environ         : environment dictionary; default: os.environ
     138  
     139          keep_blank_values: flag indicating whether blank values in
     140              percent-encoded forms should be treated as blank strings.
     141              A true value indicates that blanks should be retained as
     142              blank strings.  The default false value indicates that
     143              blank values are to be ignored and treated as if they were
     144              not included.
     145  
     146          strict_parsing: flag indicating what to do with parsing errors.
     147              If false (the default), errors are silently ignored.
     148              If true, errors raise a ValueError exception.
     149  
     150          separator: str. The symbol to use for separating the query arguments.
     151              Defaults to &.
     152      """
     153      if fp is None:
     154          fp = sys.stdin
     155  
     156      # field keys and values (except for files) are returned as strings
     157      # an encoding is required to decode the bytes read from self.fp
     158      if hasattr(fp,'encoding'):
     159          encoding = fp.encoding
     160      else:
     161          encoding = 'latin-1'
     162  
     163      # fp.read() must return bytes
     164      if isinstance(fp, TextIOWrapper):
     165          fp = fp.buffer
     166  
     167      if not 'REQUEST_METHOD' in environ:
     168          environ['REQUEST_METHOD'] = 'GET'       # For testing stand-alone
     169      if environ['REQUEST_METHOD'] == 'POST':
     170          ctype, pdict = parse_header(environ['CONTENT_TYPE'])
     171          if ctype == 'multipart/form-data':
     172              return parse_multipart(fp, pdict, separator=separator)
     173          elif ctype == 'application/x-www-form-urlencoded':
     174              clength = int(environ['CONTENT_LENGTH'])
     175              if maxlen and clength > maxlen:
     176                  raise ValueError('Maximum content length exceeded')
     177              qs = fp.read(clength).decode(encoding)
     178          else:
     179              qs = ''                     # Unknown content-type
     180          if 'QUERY_STRING' in environ:
     181              if qs: qs = qs + '&'
     182              qs = qs + environ['QUERY_STRING']
     183          elif sys.argv[1:]:
     184              if qs: qs = qs + '&'
     185              qs = qs + sys.argv[1]
     186          environ['QUERY_STRING'] = qs    # XXX Shouldn't, really
     187      elif 'QUERY_STRING' in environ:
     188          qs = environ['QUERY_STRING']
     189      else:
     190          if sys.argv[1:]:
     191              qs = sys.argv[1]
     192          else:
     193              qs = ""
     194          environ['QUERY_STRING'] = qs    # XXX Shouldn't, really
     195      return urllib.parse.parse_qs(qs, keep_blank_values, strict_parsing,
     196                                   encoding=encoding, separator=separator)
     197  
     198  
     199  def parse_multipart(fp, pdict, encoding="utf-8", errors="replace", separator='&'):
     200      """Parse multipart input.
     201  
     202      Arguments:
     203      fp   : input file
     204      pdict: dictionary containing other parameters of content-type header
     205      encoding, errors: request encoding and error handler, passed to
     206          FieldStorage
     207  
     208      Returns a dictionary just like parse_qs(): keys are the field names, each
     209      value is a list of values for that field. For non-file fields, the value
     210      is a list of strings.
     211      """
     212      # RFC 2046, Section 5.1 : The "multipart" boundary delimiters are always
     213      # represented as 7bit US-ASCII.
     214      boundary = pdict['boundary'].decode('ascii')
     215      ctype = "multipart/form-data; boundary={}".format(boundary)
     216      headers = Message()
     217      headers.set_type(ctype)
     218      try:
     219          headers['Content-Length'] = pdict['CONTENT-LENGTH']
     220      except KeyError:
     221          pass
     222      fs = FieldStorage(fp, headers=headers, encoding=encoding, errors=errors,
     223          environ={'REQUEST_METHOD': 'POST'}, separator=separator)
     224      return {k: fs.getlist(k) for k in fs}
     225  
     226  def _parseparam(s):
     227      while s[:1] == ';':
     228          s = s[1:]
     229          end = s.find(';')
     230          while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
     231              end = s.find(';', end + 1)
     232          if end < 0:
     233              end = len(s)
     234          f = s[:end]
     235          yield f.strip()
     236          s = s[end:]
     237  
     238  def parse_header(line):
     239      """Parse a Content-type like header.
     240  
     241      Return the main content-type and a dictionary of options.
     242  
     243      """
     244      parts = _parseparam(';' + line)
     245      key = parts.__next__()
     246      pdict = {}
     247      for p in parts:
     248          i = p.find('=')
     249          if i >= 0:
     250              name = p[:i].strip().lower()
     251              value = p[i+1:].strip()
     252              if len(value) >= 2 and value[0] == value[-1] == '"':
     253                  value = value[1:-1]
     254                  value = value.replace('\\\\', '\\').replace('\\"', '"')
     255              pdict[name] = value
     256      return key, pdict
     257  
     258  
     259  # Classes for field storage
     260  # =========================
     261  
     262  class ESC[4;38;5;81mMiniFieldStorage:
     263  
     264      """Like FieldStorage, for use when no file uploads are possible."""
     265  
     266      # Dummy attributes
     267      filename = None
     268      list = None
     269      type = None
     270      file = None
     271      type_options = {}
     272      disposition = None
     273      disposition_options = {}
     274      headers = {}
     275  
     276      def __init__(self, name, value):
     277          """Constructor from field name and value."""
     278          self.name = name
     279          self.value = value
     280          # self.file = StringIO(value)
     281  
     282      def __repr__(self):
     283          """Return printable representation."""
     284          return "MiniFieldStorage(%r, %r)" % (self.name, self.value)
     285  
     286  
     287  class ESC[4;38;5;81mFieldStorage:
     288  
     289      """Store a sequence of fields, reading multipart/form-data.
     290  
     291      This class provides naming, typing, files stored on disk, and
     292      more.  At the top level, it is accessible like a dictionary, whose
     293      keys are the field names.  (Note: None can occur as a field name.)
     294      The items are either a Python list (if there's multiple values) or
     295      another FieldStorage or MiniFieldStorage object.  If it's a single
     296      object, it has the following attributes:
     297  
     298      name: the field name, if specified; otherwise None
     299  
     300      filename: the filename, if specified; otherwise None; this is the
     301          client side filename, *not* the file name on which it is
     302          stored (that's a temporary file you don't deal with)
     303  
     304      value: the value as a *string*; for file uploads, this
     305          transparently reads the file every time you request the value
     306          and returns *bytes*
     307  
     308      file: the file(-like) object from which you can read the data *as
     309          bytes* ; None if the data is stored a simple string
     310  
     311      type: the content-type, or None if not specified
     312  
     313      type_options: dictionary of options specified on the content-type
     314          line
     315  
     316      disposition: content-disposition, or None if not specified
     317  
     318      disposition_options: dictionary of corresponding options
     319  
     320      headers: a dictionary(-like) object (sometimes email.message.Message or a
     321          subclass thereof) containing *all* headers
     322  
     323      The class is subclassable, mostly for the purpose of overriding
     324      the make_file() method, which is called internally to come up with
     325      a file open for reading and writing.  This makes it possible to
     326      override the default choice of storing all files in a temporary
     327      directory and unlinking them as soon as they have been opened.
     328  
     329      """
     330      def __init__(self, fp=None, headers=None, outerboundary=b'',
     331                   environ=os.environ, keep_blank_values=0, strict_parsing=0,
     332                   limit=None, encoding='utf-8', errors='replace',
     333                   max_num_fields=None, separator='&'):
     334          """Constructor.  Read multipart/* until last part.
     335  
     336          Arguments, all optional:
     337  
     338          fp              : file pointer; default: sys.stdin.buffer
     339              (not used when the request method is GET)
     340              Can be :
     341              1. a TextIOWrapper object
     342              2. an object whose read() and readline() methods return bytes
     343  
     344          headers         : header dictionary-like object; default:
     345              taken from environ as per CGI spec
     346  
     347          outerboundary   : terminating multipart boundary
     348              (for internal use only)
     349  
     350          environ         : environment dictionary; default: os.environ
     351  
     352          keep_blank_values: flag indicating whether blank values in
     353              percent-encoded forms should be treated as blank strings.
     354              A true value indicates that blanks should be retained as
     355              blank strings.  The default false value indicates that
     356              blank values are to be ignored and treated as if they were
     357              not included.
     358  
     359          strict_parsing: flag indicating what to do with parsing errors.
     360              If false (the default), errors are silently ignored.
     361              If true, errors raise a ValueError exception.
     362  
     363          limit : used internally to read parts of multipart/form-data forms,
     364              to exit from the reading loop when reached. It is the difference
     365              between the form content-length and the number of bytes already
     366              read
     367  
     368          encoding, errors : the encoding and error handler used to decode the
     369              binary stream to strings. Must be the same as the charset defined
     370              for the page sending the form (content-type : meta http-equiv or
     371              header)
     372  
     373          max_num_fields: int. If set, then __init__ throws a ValueError
     374              if there are more than n fields read by parse_qsl().
     375  
     376          """
     377          method = 'GET'
     378          self.keep_blank_values = keep_blank_values
     379          self.strict_parsing = strict_parsing
     380          self.max_num_fields = max_num_fields
     381          self.separator = separator
     382          if 'REQUEST_METHOD' in environ:
     383              method = environ['REQUEST_METHOD'].upper()
     384          self.qs_on_post = None
     385          if method == 'GET' or method == 'HEAD':
     386              if 'QUERY_STRING' in environ:
     387                  qs = environ['QUERY_STRING']
     388              elif sys.argv[1:]:
     389                  qs = sys.argv[1]
     390              else:
     391                  qs = ""
     392              qs = qs.encode(locale.getpreferredencoding(), 'surrogateescape')
     393              fp = BytesIO(qs)
     394              if headers is None:
     395                  headers = {'content-type':
     396                             "application/x-www-form-urlencoded"}
     397          if headers is None:
     398              headers = {}
     399              if method == 'POST':
     400                  # Set default content-type for POST to what's traditional
     401                  headers['content-type'] = "application/x-www-form-urlencoded"
     402              if 'CONTENT_TYPE' in environ:
     403                  headers['content-type'] = environ['CONTENT_TYPE']
     404              if 'QUERY_STRING' in environ:
     405                  self.qs_on_post = environ['QUERY_STRING']
     406              if 'CONTENT_LENGTH' in environ:
     407                  headers['content-length'] = environ['CONTENT_LENGTH']
     408          else:
     409              if not (isinstance(headers, (Mapping, Message))):
     410                  raise TypeError("headers must be mapping or an instance of "
     411                                  "email.message.Message")
     412          self.headers = headers
     413          if fp is None:
     414              self.fp = sys.stdin.buffer
     415          # self.fp.read() must return bytes
     416          elif isinstance(fp, TextIOWrapper):
     417              self.fp = fp.buffer
     418          else:
     419              if not (hasattr(fp, 'read') and hasattr(fp, 'readline')):
     420                  raise TypeError("fp must be file pointer")
     421              self.fp = fp
     422  
     423          self.encoding = encoding
     424          self.errors = errors
     425  
     426          if not isinstance(outerboundary, bytes):
     427              raise TypeError('outerboundary must be bytes, not %s'
     428                              % type(outerboundary).__name__)
     429          self.outerboundary = outerboundary
     430  
     431          self.bytes_read = 0
     432          self.limit = limit
     433  
     434          # Process content-disposition header
     435          cdisp, pdict = "", {}
     436          if 'content-disposition' in self.headers:
     437              cdisp, pdict = parse_header(self.headers['content-disposition'])
     438          self.disposition = cdisp
     439          self.disposition_options = pdict
     440          self.name = None
     441          if 'name' in pdict:
     442              self.name = pdict['name']
     443          self.filename = None
     444          if 'filename' in pdict:
     445              self.filename = pdict['filename']
     446          self._binary_file = self.filename is not None
     447  
     448          # Process content-type header
     449          #
     450          # Honor any existing content-type header.  But if there is no
     451          # content-type header, use some sensible defaults.  Assume
     452          # outerboundary is "" at the outer level, but something non-false
     453          # inside a multi-part.  The default for an inner part is text/plain,
     454          # but for an outer part it should be urlencoded.  This should catch
     455          # bogus clients which erroneously forget to include a content-type
     456          # header.
     457          #
     458          # See below for what we do if there does exist a content-type header,
     459          # but it happens to be something we don't understand.
     460          if 'content-type' in self.headers:
     461              ctype, pdict = parse_header(self.headers['content-type'])
     462          elif self.outerboundary or method != 'POST':
     463              ctype, pdict = "text/plain", {}
     464          else:
     465              ctype, pdict = 'application/x-www-form-urlencoded', {}
     466          self.type = ctype
     467          self.type_options = pdict
     468          if 'boundary' in pdict:
     469              self.innerboundary = pdict['boundary'].encode(self.encoding,
     470                                                            self.errors)
     471          else:
     472              self.innerboundary = b""
     473  
     474          clen = -1
     475          if 'content-length' in self.headers:
     476              try:
     477                  clen = int(self.headers['content-length'])
     478              except ValueError:
     479                  pass
     480              if maxlen and clen > maxlen:
     481                  raise ValueError('Maximum content length exceeded')
     482          self.length = clen
     483          if self.limit is None and clen >= 0:
     484              self.limit = clen
     485  
     486          self.list = self.file = None
     487          self.done = 0
     488          if ctype == 'application/x-www-form-urlencoded':
     489              self.read_urlencoded()
     490          elif ctype[:10] == 'multipart/':
     491              self.read_multi(environ, keep_blank_values, strict_parsing)
     492          else:
     493              self.read_single()
     494  
     495      def __del__(self):
     496          try:
     497              self.file.close()
     498          except AttributeError:
     499              pass
     500  
     501      def __enter__(self):
     502          return self
     503  
     504      def __exit__(self, *args):
     505          self.file.close()
     506  
     507      def __repr__(self):
     508          """Return a printable representation."""
     509          return "FieldStorage(%r, %r, %r)" % (
     510                  self.name, self.filename, self.value)
     511  
     512      def __iter__(self):
     513          return iter(self.keys())
     514  
     515      def __getattr__(self, name):
     516          if name != 'value':
     517              raise AttributeError(name)
     518          if self.file:
     519              self.file.seek(0)
     520              value = self.file.read()
     521              self.file.seek(0)
     522          elif self.list is not None:
     523              value = self.list
     524          else:
     525              value = None
     526          return value
     527  
     528      def __getitem__(self, key):
     529          """Dictionary style indexing."""
     530          if self.list is None:
     531              raise TypeError("not indexable")
     532          found = []
     533          for item in self.list:
     534              if item.name == key: found.append(item)
     535          if not found:
     536              raise KeyError(key)
     537          if len(found) == 1:
     538              return found[0]
     539          else:
     540              return found
     541  
     542      def getvalue(self, key, default=None):
     543          """Dictionary style get() method, including 'value' lookup."""
     544          if key in self:
     545              value = self[key]
     546              if isinstance(value, list):
     547                  return [x.value for x in value]
     548              else:
     549                  return value.value
     550          else:
     551              return default
     552  
     553      def getfirst(self, key, default=None):
     554          """ Return the first value received."""
     555          if key in self:
     556              value = self[key]
     557              if isinstance(value, list):
     558                  return value[0].value
     559              else:
     560                  return value.value
     561          else:
     562              return default
     563  
     564      def getlist(self, key):
     565          """ Return list of received values."""
     566          if key in self:
     567              value = self[key]
     568              if isinstance(value, list):
     569                  return [x.value for x in value]
     570              else:
     571                  return [value.value]
     572          else:
     573              return []
     574  
     575      def keys(self):
     576          """Dictionary style keys() method."""
     577          if self.list is None:
     578              raise TypeError("not indexable")
     579          return list(set(item.name for item in self.list))
     580  
     581      def __contains__(self, key):
     582          """Dictionary style __contains__ method."""
     583          if self.list is None:
     584              raise TypeError("not indexable")
     585          return any(item.name == key for item in self.list)
     586  
     587      def __len__(self):
     588          """Dictionary style len(x) support."""
     589          return len(self.keys())
     590  
     591      def __bool__(self):
     592          if self.list is None:
     593              raise TypeError("Cannot be converted to bool.")
     594          return bool(self.list)
     595  
     596      def read_urlencoded(self):
     597          """Internal: read data in query string format."""
     598          qs = self.fp.read(self.length)
     599          if not isinstance(qs, bytes):
     600              raise ValueError("%s should return bytes, got %s" \
     601                               % (self.fp, type(qs).__name__))
     602          qs = qs.decode(self.encoding, self.errors)
     603          if self.qs_on_post:
     604              qs += '&' + self.qs_on_post
     605          query = urllib.parse.parse_qsl(
     606              qs, self.keep_blank_values, self.strict_parsing,
     607              encoding=self.encoding, errors=self.errors,
     608              max_num_fields=self.max_num_fields, separator=self.separator)
     609          self.list = [MiniFieldStorage(key, value) for key, value in query]
     610          self.skip_lines()
     611  
     612      FieldStorageClass = None
     613  
     614      def read_multi(self, environ, keep_blank_values, strict_parsing):
     615          """Internal: read a part that is itself multipart."""
     616          ib = self.innerboundary
     617          if not valid_boundary(ib):
     618              raise ValueError('Invalid boundary in multipart form: %r' % (ib,))
     619          self.list = []
     620          if self.qs_on_post:
     621              query = urllib.parse.parse_qsl(
     622                  self.qs_on_post, self.keep_blank_values, self.strict_parsing,
     623                  encoding=self.encoding, errors=self.errors,
     624                  max_num_fields=self.max_num_fields, separator=self.separator)
     625              self.list.extend(MiniFieldStorage(key, value) for key, value in query)
     626  
     627          klass = self.FieldStorageClass or self.__class__
     628          first_line = self.fp.readline() # bytes
     629          if not isinstance(first_line, bytes):
     630              raise ValueError("%s should return bytes, got %s" \
     631                               % (self.fp, type(first_line).__name__))
     632          self.bytes_read += len(first_line)
     633  
     634          # Ensure that we consume the file until we've hit our inner boundary
     635          while (first_line.strip() != (b"--" + self.innerboundary) and
     636                  first_line):
     637              first_line = self.fp.readline()
     638              self.bytes_read += len(first_line)
     639  
     640          # Propagate max_num_fields into the sub class appropriately
     641          max_num_fields = self.max_num_fields
     642          if max_num_fields is not None:
     643              max_num_fields -= len(self.list)
     644  
     645          while True:
     646              parser = FeedParser()
     647              hdr_text = b""
     648              while True:
     649                  data = self.fp.readline()
     650                  hdr_text += data
     651                  if not data.strip():
     652                      break
     653              if not hdr_text:
     654                  break
     655              # parser takes strings, not bytes
     656              self.bytes_read += len(hdr_text)
     657              parser.feed(hdr_text.decode(self.encoding, self.errors))
     658              headers = parser.close()
     659  
     660              # Some clients add Content-Length for part headers, ignore them
     661              if 'content-length' in headers:
     662                  del headers['content-length']
     663  
     664              limit = None if self.limit is None \
     665                  else self.limit - self.bytes_read
     666              part = klass(self.fp, headers, ib, environ, keep_blank_values,
     667                           strict_parsing, limit,
     668                           self.encoding, self.errors, max_num_fields, self.separator)
     669  
     670              if max_num_fields is not None:
     671                  max_num_fields -= 1
     672                  if part.list:
     673                      max_num_fields -= len(part.list)
     674                  if max_num_fields < 0:
     675                      raise ValueError('Max number of fields exceeded')
     676  
     677              self.bytes_read += part.bytes_read
     678              self.list.append(part)
     679              if part.done or self.bytes_read >= self.length > 0:
     680                  break
     681          self.skip_lines()
     682  
     683      def read_single(self):
     684          """Internal: read an atomic part."""
     685          if self.length >= 0:
     686              self.read_binary()
     687              self.skip_lines()
     688          else:
     689              self.read_lines()
     690          self.file.seek(0)
     691  
     692      bufsize = 8*1024            # I/O buffering size for copy to file
     693  
     694      def read_binary(self):
     695          """Internal: read binary data."""
     696          self.file = self.make_file()
     697          todo = self.length
     698          if todo >= 0:
     699              while todo > 0:
     700                  data = self.fp.read(min(todo, self.bufsize)) # bytes
     701                  if not isinstance(data, bytes):
     702                      raise ValueError("%s should return bytes, got %s"
     703                                       % (self.fp, type(data).__name__))
     704                  self.bytes_read += len(data)
     705                  if not data:
     706                      self.done = -1
     707                      break
     708                  self.file.write(data)
     709                  todo = todo - len(data)
     710  
     711      def read_lines(self):
     712          """Internal: read lines until EOF or outerboundary."""
     713          if self._binary_file:
     714              self.file = self.__file = BytesIO() # store data as bytes for files
     715          else:
     716              self.file = self.__file = StringIO() # as strings for other fields
     717          if self.outerboundary:
     718              self.read_lines_to_outerboundary()
     719          else:
     720              self.read_lines_to_eof()
     721  
     722      def __write(self, line):
     723          """line is always bytes, not string"""
     724          if self.__file is not None:
     725              if self.__file.tell() + len(line) > 1000:
     726                  self.file = self.make_file()
     727                  data = self.__file.getvalue()
     728                  self.file.write(data)
     729                  self.__file = None
     730          if self._binary_file:
     731              # keep bytes
     732              self.file.write(line)
     733          else:
     734              # decode to string
     735              self.file.write(line.decode(self.encoding, self.errors))
     736  
     737      def read_lines_to_eof(self):
     738          """Internal: read lines until EOF."""
     739          while 1:
     740              line = self.fp.readline(1<<16) # bytes
     741              self.bytes_read += len(line)
     742              if not line:
     743                  self.done = -1
     744                  break
     745              self.__write(line)
     746  
     747      def read_lines_to_outerboundary(self):
     748          """Internal: read lines until outerboundary.
     749          Data is read as bytes: boundaries and line ends must be converted
     750          to bytes for comparisons.
     751          """
     752          next_boundary = b"--" + self.outerboundary
     753          last_boundary = next_boundary + b"--"
     754          delim = b""
     755          last_line_lfend = True
     756          _read = 0
     757          while 1:
     758  
     759              if self.limit is not None and 0 <= self.limit <= _read:
     760                  break
     761              line = self.fp.readline(1<<16) # bytes
     762              self.bytes_read += len(line)
     763              _read += len(line)
     764              if not line:
     765                  self.done = -1
     766                  break
     767              if delim == b"\r":
     768                  line = delim + line
     769                  delim = b""
     770              if line.startswith(b"--") and last_line_lfend:
     771                  strippedline = line.rstrip()
     772                  if strippedline == next_boundary:
     773                      break
     774                  if strippedline == last_boundary:
     775                      self.done = 1
     776                      break
     777              odelim = delim
     778              if line.endswith(b"\r\n"):
     779                  delim = b"\r\n"
     780                  line = line[:-2]
     781                  last_line_lfend = True
     782              elif line.endswith(b"\n"):
     783                  delim = b"\n"
     784                  line = line[:-1]
     785                  last_line_lfend = True
     786              elif line.endswith(b"\r"):
     787                  # We may interrupt \r\n sequences if they span the 2**16
     788                  # byte boundary
     789                  delim = b"\r"
     790                  line = line[:-1]
     791                  last_line_lfend = False
     792              else:
     793                  delim = b""
     794                  last_line_lfend = False
     795              self.__write(odelim + line)
     796  
     797      def skip_lines(self):
     798          """Internal: skip lines until outer boundary if defined."""
     799          if not self.outerboundary or self.done:
     800              return
     801          next_boundary = b"--" + self.outerboundary
     802          last_boundary = next_boundary + b"--"
     803          last_line_lfend = True
     804          while True:
     805              line = self.fp.readline(1<<16)
     806              self.bytes_read += len(line)
     807              if not line:
     808                  self.done = -1
     809                  break
     810              if line.endswith(b"--") and last_line_lfend:
     811                  strippedline = line.strip()
     812                  if strippedline == next_boundary:
     813                      break
     814                  if strippedline == last_boundary:
     815                      self.done = 1
     816                      break
     817              last_line_lfend = line.endswith(b'\n')
     818  
     819      def make_file(self):
     820          """Overridable: return a readable & writable file.
     821  
     822          The file will be used as follows:
     823          - data is written to it
     824          - seek(0)
     825          - data is read from it
     826  
     827          The file is opened in binary mode for files, in text mode
     828          for other fields
     829  
     830          This version opens a temporary file for reading and writing,
     831          and immediately deletes (unlinks) it.  The trick (on Unix!) is
     832          that the file can still be used, but it can't be opened by
     833          another process, and it will automatically be deleted when it
     834          is closed or when the current process terminates.
     835  
     836          If you want a more permanent file, you derive a class which
     837          overrides this method.  If you want a visible temporary file
     838          that is nevertheless automatically deleted when the script
     839          terminates, try defining a __del__ method in a derived class
     840          which unlinks the temporary files you have created.
     841  
     842          """
     843          if self._binary_file:
     844              return tempfile.TemporaryFile("wb+")
     845          else:
     846              return tempfile.TemporaryFile("w+",
     847                  encoding=self.encoding, newline = '\n')
     848  
     849  
     850  # Test/debug code
     851  # ===============
     852  
     853  def test(environ=os.environ):
     854      """Robust test CGI script, usable as main program.
     855  
     856      Write minimal HTTP headers and dump all information provided to
     857      the script in HTML form.
     858  
     859      """
     860      print("Content-type: text/html")
     861      print()
     862      sys.stderr = sys.stdout
     863      try:
     864          form = FieldStorage()   # Replace with other classes to test those
     865          print_directory()
     866          print_arguments()
     867          print_form(form)
     868          print_environ(environ)
     869          print_environ_usage()
     870          def f():
     871              exec("testing print_exception() -- <I>italics?</I>")
     872          def g(f=f):
     873              f()
     874          print("<H3>What follows is a test, not an actual exception:</H3>")
     875          g()
     876      except:
     877          print_exception()
     878  
     879      print("<H1>Second try with a small maxlen...</H1>")
     880  
     881      global maxlen
     882      maxlen = 50
     883      try:
     884          form = FieldStorage()   # Replace with other classes to test those
     885          print_directory()
     886          print_arguments()
     887          print_form(form)
     888          print_environ(environ)
     889      except:
     890          print_exception()
     891  
     892  def print_exception(type=None, value=None, tb=None, limit=None):
     893      if type is None:
     894          type, value, tb = sys.exc_info()
     895      import traceback
     896      print()
     897      print("<H3>Traceback (most recent call last):</H3>")
     898      list = traceback.format_tb(tb, limit) + \
     899             traceback.format_exception_only(type, value)
     900      print("<PRE>%s<B>%s</B></PRE>" % (
     901          html.escape("".join(list[:-1])),
     902          html.escape(list[-1]),
     903          ))
     904      del tb
     905  
     906  def print_environ(environ=os.environ):
     907      """Dump the shell environment as HTML."""
     908      keys = sorted(environ.keys())
     909      print()
     910      print("<H3>Shell Environment:</H3>")
     911      print("<DL>")
     912      for key in keys:
     913          print("<DT>", html.escape(key), "<DD>", html.escape(environ[key]))
     914      print("</DL>")
     915      print()
     916  
     917  def print_form(form):
     918      """Dump the contents of a form as HTML."""
     919      keys = sorted(form.keys())
     920      print()
     921      print("<H3>Form Contents:</H3>")
     922      if not keys:
     923          print("<P>No form fields.")
     924      print("<DL>")
     925      for key in keys:
     926          print("<DT>" + html.escape(key) + ":", end=' ')
     927          value = form[key]
     928          print("<i>" + html.escape(repr(type(value))) + "</i>")
     929          print("<DD>" + html.escape(repr(value)))
     930      print("</DL>")
     931      print()
     932  
     933  def print_directory():
     934      """Dump the current directory as HTML."""
     935      print()
     936      print("<H3>Current Working Directory:</H3>")
     937      try:
     938          pwd = os.getcwd()
     939      except OSError as msg:
     940          print("OSError:", html.escape(str(msg)))
     941      else:
     942          print(html.escape(pwd))
     943      print()
     944  
     945  def print_arguments():
     946      print()
     947      print("<H3>Command Line Arguments:</H3>")
     948      print()
     949      print(sys.argv)
     950      print()
     951  
     952  def print_environ_usage():
     953      """Dump a list of environment variables used by CGI as HTML."""
     954      print("""
     955  <H3>These environment variables could have been set:</H3>
     956  <UL>
     957  <LI>AUTH_TYPE
     958  <LI>CONTENT_LENGTH
     959  <LI>CONTENT_TYPE
     960  <LI>DATE_GMT
     961  <LI>DATE_LOCAL
     962  <LI>DOCUMENT_NAME
     963  <LI>DOCUMENT_ROOT
     964  <LI>DOCUMENT_URI
     965  <LI>GATEWAY_INTERFACE
     966  <LI>LAST_MODIFIED
     967  <LI>PATH
     968  <LI>PATH_INFO
     969  <LI>PATH_TRANSLATED
     970  <LI>QUERY_STRING
     971  <LI>REMOTE_ADDR
     972  <LI>REMOTE_HOST
     973  <LI>REMOTE_IDENT
     974  <LI>REMOTE_USER
     975  <LI>REQUEST_METHOD
     976  <LI>SCRIPT_NAME
     977  <LI>SERVER_NAME
     978  <LI>SERVER_PORT
     979  <LI>SERVER_PROTOCOL
     980  <LI>SERVER_ROOT
     981  <LI>SERVER_SOFTWARE
     982  </UL>
     983  In addition, HTTP headers sent by the server may be passed in the
     984  environment as well.  Here are some common variable names:
     985  <UL>
     986  <LI>HTTP_ACCEPT
     987  <LI>HTTP_CONNECTION
     988  <LI>HTTP_HOST
     989  <LI>HTTP_PRAGMA
     990  <LI>HTTP_REFERER
     991  <LI>HTTP_USER_AGENT
     992  </UL>
     993  """)
     994  
     995  
     996  # Utilities
     997  # =========
     998  
     999  def valid_boundary(s):
    1000      import re
    1001      if isinstance(s, bytes):
    1002          _vb_pattern = b"^[ -~]{0,200}[!-~]$"
    1003      else:
    1004          _vb_pattern = "^[ -~]{0,200}[!-~]$"
    1005      return re.match(_vb_pattern, s)
    1006  
    1007  # Invoke mainline
    1008  # ===============
    1009  
    1010  # Call test() when this file is run as a script (not imported as a module)
    1011  if __name__ == '__main__':
    1012      test()