python (3.11.7)

Browse
Build Log
Usage
       1  # Copyright (C) 2001-2010 Python Software Foundation
       2  # Author: Barry Warsaw
       3  # Contact: email-sig@python.org
       4  
       5  """Classes to generate plain text from a message object tree."""
       6  
       7  __all__ = ['Generator', 'DecodedGenerator', 'BytesGenerator']
       8  
       9  import re
      10  import sys
      11  import time
      12  import random
      13  
      14  from copy import deepcopy
      15  from io import StringIO, BytesIO
      16  from email.utils import _has_surrogates
      17  
      18  UNDERSCORE = '_'
      19  NL = '\n'  # XXX: no longer used by the code below.
      20  
      21  NLCRE = re.compile(r'\r\n|\r|\n')
      22  fcre = re.compile(r'^From ', re.MULTILINE)
      23  
      24  
      25  class ESC[4;38;5;81mGenerator:
      26      """Generates output from a Message object tree.
      27  
      28      This basic generator writes the message to the given file object as plain
      29      text.
      30      """
      31      #
      32      # Public interface
      33      #
      34  
      35      def __init__(self, outfp, mangle_from_=None, maxheaderlen=None, *,
      36                   policy=None):
      37          """Create the generator for message flattening.
      38  
      39          outfp is the output file-like object for writing the message to.  It
      40          must have a write() method.
      41  
      42          Optional mangle_from_ is a flag that, when True (the default if policy
      43          is not set), escapes From_ lines in the body of the message by putting
      44          a `>' in front of them.
      45  
      46          Optional maxheaderlen specifies the longest length for a non-continued
      47          header.  When a header line is longer (in characters, with tabs
      48          expanded to 8 spaces) than maxheaderlen, the header will split as
      49          defined in the Header class.  Set maxheaderlen to zero to disable
      50          header wrapping.  The default is 78, as recommended (but not required)
      51          by RFC 2822.
      52  
      53          The policy keyword specifies a policy object that controls a number of
      54          aspects of the generator's operation.  If no policy is specified,
      55          the policy associated with the Message object passed to the
      56          flatten method is used.
      57  
      58          """
      59  
      60          if mangle_from_ is None:
      61              mangle_from_ = True if policy is None else policy.mangle_from_
      62          self._fp = outfp
      63          self._mangle_from_ = mangle_from_
      64          self.maxheaderlen = maxheaderlen
      65          self.policy = policy
      66  
      67      def write(self, s):
      68          # Just delegate to the file object
      69          self._fp.write(s)
      70  
      71      def flatten(self, msg, unixfrom=False, linesep=None):
      72          r"""Print the message object tree rooted at msg to the output file
      73          specified when the Generator instance was created.
      74  
      75          unixfrom is a flag that forces the printing of a Unix From_ delimiter
      76          before the first object in the message tree.  If the original message
      77          has no From_ delimiter, a `standard' one is crafted.  By default, this
      78          is False to inhibit the printing of any From_ delimiter.
      79  
      80          Note that for subobjects, no From_ line is printed.
      81  
      82          linesep specifies the characters used to indicate a new line in
      83          the output.  The default value is determined by the policy specified
      84          when the Generator instance was created or, if none was specified,
      85          from the policy associated with the msg.
      86  
      87          """
      88          # We use the _XXX constants for operating on data that comes directly
      89          # from the msg, and _encoded_XXX constants for operating on data that
      90          # has already been converted (to bytes in the BytesGenerator) and
      91          # inserted into a temporary buffer.
      92          policy = msg.policy if self.policy is None else self.policy
      93          if linesep is not None:
      94              policy = policy.clone(linesep=linesep)
      95          if self.maxheaderlen is not None:
      96              policy = policy.clone(max_line_length=self.maxheaderlen)
      97          self._NL = policy.linesep
      98          self._encoded_NL = self._encode(self._NL)
      99          self._EMPTY = ''
     100          self._encoded_EMPTY = self._encode(self._EMPTY)
     101          # Because we use clone (below) when we recursively process message
     102          # subparts, and because clone uses the computed policy (not None),
     103          # submessages will automatically get set to the computed policy when
     104          # they are processed by this code.
     105          old_gen_policy = self.policy
     106          old_msg_policy = msg.policy
     107          try:
     108              self.policy = policy
     109              msg.policy = policy
     110              if unixfrom:
     111                  ufrom = msg.get_unixfrom()
     112                  if not ufrom:
     113                      ufrom = 'From nobody ' + time.ctime(time.time())
     114                  self.write(ufrom + self._NL)
     115              self._write(msg)
     116          finally:
     117              self.policy = old_gen_policy
     118              msg.policy = old_msg_policy
     119  
     120      def clone(self, fp):
     121          """Clone this generator with the exact same options."""
     122          return self.__class__(fp,
     123                                self._mangle_from_,
     124                                None, # Use policy setting, which we've adjusted
     125                                policy=self.policy)
     126  
     127      #
     128      # Protected interface - undocumented ;/
     129      #
     130  
     131      # Note that we use 'self.write' when what we are writing is coming from
     132      # the source, and self._fp.write when what we are writing is coming from a
     133      # buffer (because the Bytes subclass has already had a chance to transform
     134      # the data in its write method in that case).  This is an entirely
     135      # pragmatic split determined by experiment; we could be more general by
     136      # always using write and having the Bytes subclass write method detect when
     137      # it has already transformed the input; but, since this whole thing is a
     138      # hack anyway this seems good enough.
     139  
     140      def _new_buffer(self):
     141          # BytesGenerator overrides this to return BytesIO.
     142          return StringIO()
     143  
     144      def _encode(self, s):
     145          # BytesGenerator overrides this to encode strings to bytes.
     146          return s
     147  
     148      def _write_lines(self, lines):
     149          # We have to transform the line endings.
     150          if not lines:
     151              return
     152          lines = NLCRE.split(lines)
     153          for line in lines[:-1]:
     154              self.write(line)
     155              self.write(self._NL)
     156          if lines[-1]:
     157              self.write(lines[-1])
     158          # XXX logic tells me this else should be needed, but the tests fail
     159          # with it and pass without it.  (NLCRE.split ends with a blank element
     160          # if and only if there was a trailing newline.)
     161          #else:
     162          #    self.write(self._NL)
     163  
     164      def _write(self, msg):
     165          # We can't write the headers yet because of the following scenario:
     166          # say a multipart message includes the boundary string somewhere in
     167          # its body.  We'd have to calculate the new boundary /before/ we write
     168          # the headers so that we can write the correct Content-Type:
     169          # parameter.
     170          #
     171          # The way we do this, so as to make the _handle_*() methods simpler,
     172          # is to cache any subpart writes into a buffer.  The we write the
     173          # headers and the buffer contents.  That way, subpart handlers can
     174          # Do The Right Thing, and can still modify the Content-Type: header if
     175          # necessary.
     176          oldfp = self._fp
     177          try:
     178              self._munge_cte = None
     179              self._fp = sfp = self._new_buffer()
     180              self._dispatch(msg)
     181          finally:
     182              self._fp = oldfp
     183              munge_cte = self._munge_cte
     184              del self._munge_cte
     185          # If we munged the cte, copy the message again and re-fix the CTE.
     186          if munge_cte:
     187              msg = deepcopy(msg)
     188              # Preserve the header order if the CTE header already exists.
     189              if msg.get('content-transfer-encoding') is None:
     190                  msg['Content-Transfer-Encoding'] = munge_cte[0]
     191              else:
     192                  msg.replace_header('content-transfer-encoding', munge_cte[0])
     193              msg.replace_header('content-type', munge_cte[1])
     194          # Write the headers.  First we see if the message object wants to
     195          # handle that itself.  If not, we'll do it generically.
     196          meth = getattr(msg, '_write_headers', None)
     197          if meth is None:
     198              self._write_headers(msg)
     199          else:
     200              meth(self)
     201          self._fp.write(sfp.getvalue())
     202  
     203      def _dispatch(self, msg):
     204          # Get the Content-Type: for the message, then try to dispatch to
     205          # self._handle_<maintype>_<subtype>().  If there's no handler for the
     206          # full MIME type, then dispatch to self._handle_<maintype>().  If
     207          # that's missing too, then dispatch to self._writeBody().
     208          main = msg.get_content_maintype()
     209          sub = msg.get_content_subtype()
     210          specific = UNDERSCORE.join((main, sub)).replace('-', '_')
     211          meth = getattr(self, '_handle_' + specific, None)
     212          if meth is None:
     213              generic = main.replace('-', '_')
     214              meth = getattr(self, '_handle_' + generic, None)
     215              if meth is None:
     216                  meth = self._writeBody
     217          meth(msg)
     218  
     219      #
     220      # Default handlers
     221      #
     222  
     223      def _write_headers(self, msg):
     224          for h, v in msg.raw_items():
     225              self.write(self.policy.fold(h, v))
     226          # A blank line always separates headers from body
     227          self.write(self._NL)
     228  
     229      #
     230      # Handlers for writing types and subtypes
     231      #
     232  
     233      def _handle_text(self, msg):
     234          payload = msg.get_payload()
     235          if payload is None:
     236              return
     237          if not isinstance(payload, str):
     238              raise TypeError('string payload expected: %s' % type(payload))
     239          if _has_surrogates(msg._payload):
     240              charset = msg.get_param('charset')
     241              if charset is not None:
     242                  # XXX: This copy stuff is an ugly hack to avoid modifying the
     243                  # existing message.
     244                  msg = deepcopy(msg)
     245                  del msg['content-transfer-encoding']
     246                  msg.set_payload(payload, charset)
     247                  payload = msg.get_payload()
     248                  self._munge_cte = (msg['content-transfer-encoding'],
     249                                     msg['content-type'])
     250          if self._mangle_from_:
     251              payload = fcre.sub('>From ', payload)
     252          self._write_lines(payload)
     253  
     254      # Default body handler
     255      _writeBody = _handle_text
     256  
     257      def _handle_multipart(self, msg):
     258          # The trick here is to write out each part separately, merge them all
     259          # together, and then make sure that the boundary we've chosen isn't
     260          # present in the payload.
     261          msgtexts = []
     262          subparts = msg.get_payload()
     263          if subparts is None:
     264              subparts = []
     265          elif isinstance(subparts, str):
     266              # e.g. a non-strict parse of a message with no starting boundary.
     267              self.write(subparts)
     268              return
     269          elif not isinstance(subparts, list):
     270              # Scalar payload
     271              subparts = [subparts]
     272          for part in subparts:
     273              s = self._new_buffer()
     274              g = self.clone(s)
     275              g.flatten(part, unixfrom=False, linesep=self._NL)
     276              msgtexts.append(s.getvalue())
     277          # BAW: What about boundaries that are wrapped in double-quotes?
     278          boundary = msg.get_boundary()
     279          if not boundary:
     280              # Create a boundary that doesn't appear in any of the
     281              # message texts.
     282              alltext = self._encoded_NL.join(msgtexts)
     283              boundary = self._make_boundary(alltext)
     284              msg.set_boundary(boundary)
     285          # If there's a preamble, write it out, with a trailing CRLF
     286          if msg.preamble is not None:
     287              if self._mangle_from_:
     288                  preamble = fcre.sub('>From ', msg.preamble)
     289              else:
     290                  preamble = msg.preamble
     291              self._write_lines(preamble)
     292              self.write(self._NL)
     293          # dash-boundary transport-padding CRLF
     294          self.write('--' + boundary + self._NL)
     295          # body-part
     296          if msgtexts:
     297              self._fp.write(msgtexts.pop(0))
     298          # *encapsulation
     299          # --> delimiter transport-padding
     300          # --> CRLF body-part
     301          for body_part in msgtexts:
     302              # delimiter transport-padding CRLF
     303              self.write(self._NL + '--' + boundary + self._NL)
     304              # body-part
     305              self._fp.write(body_part)
     306          # close-delimiter transport-padding
     307          self.write(self._NL + '--' + boundary + '--' + self._NL)
     308          if msg.epilogue is not None:
     309              if self._mangle_from_:
     310                  epilogue = fcre.sub('>From ', msg.epilogue)
     311              else:
     312                  epilogue = msg.epilogue
     313              self._write_lines(epilogue)
     314  
     315      def _handle_multipart_signed(self, msg):
     316          # The contents of signed parts has to stay unmodified in order to keep
     317          # the signature intact per RFC1847 2.1, so we disable header wrapping.
     318          # RDM: This isn't enough to completely preserve the part, but it helps.
     319          p = self.policy
     320          self.policy = p.clone(max_line_length=0)
     321          try:
     322              self._handle_multipart(msg)
     323          finally:
     324              self.policy = p
     325  
     326      def _handle_message_delivery_status(self, msg):
     327          # We can't just write the headers directly to self's file object
     328          # because this will leave an extra newline between the last header
     329          # block and the boundary.  Sigh.
     330          blocks = []
     331          for part in msg.get_payload():
     332              s = self._new_buffer()
     333              g = self.clone(s)
     334              g.flatten(part, unixfrom=False, linesep=self._NL)
     335              text = s.getvalue()
     336              lines = text.split(self._encoded_NL)
     337              # Strip off the unnecessary trailing empty line
     338              if lines and lines[-1] == self._encoded_EMPTY:
     339                  blocks.append(self._encoded_NL.join(lines[:-1]))
     340              else:
     341                  blocks.append(text)
     342          # Now join all the blocks with an empty line.  This has the lovely
     343          # effect of separating each block with an empty line, but not adding
     344          # an extra one after the last one.
     345          self._fp.write(self._encoded_NL.join(blocks))
     346  
     347      def _handle_message(self, msg):
     348          s = self._new_buffer()
     349          g = self.clone(s)
     350          # The payload of a message/rfc822 part should be a multipart sequence
     351          # of length 1.  The zeroth element of the list should be the Message
     352          # object for the subpart.  Extract that object, stringify it, and
     353          # write it out.
     354          # Except, it turns out, when it's a string instead, which happens when
     355          # and only when HeaderParser is used on a message of mime type
     356          # message/rfc822.  Such messages are generated by, for example,
     357          # Groupwise when forwarding unadorned messages.  (Issue 7970.)  So
     358          # in that case we just emit the string body.
     359          payload = msg._payload
     360          if isinstance(payload, list):
     361              g.flatten(msg.get_payload(0), unixfrom=False, linesep=self._NL)
     362              payload = s.getvalue()
     363          else:
     364              payload = self._encode(payload)
     365          self._fp.write(payload)
     366  
     367      # This used to be a module level function; we use a classmethod for this
     368      # and _compile_re so we can continue to provide the module level function
     369      # for backward compatibility by doing
     370      #   _make_boundary = Generator._make_boundary
     371      # at the end of the module.  It *is* internal, so we could drop that...
     372      @classmethod
     373      def _make_boundary(cls, text=None):
     374          # Craft a random boundary.  If text is given, ensure that the chosen
     375          # boundary doesn't appear in the text.
     376          token = random.randrange(sys.maxsize)
     377          boundary = ('=' * 15) + (_fmt % token) + '=='
     378          if text is None:
     379              return boundary
     380          b = boundary
     381          counter = 0
     382          while True:
     383              cre = cls._compile_re('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
     384              if not cre.search(text):
     385                  break
     386              b = boundary + '.' + str(counter)
     387              counter += 1
     388          return b
     389  
     390      @classmethod
     391      def _compile_re(cls, s, flags):
     392          return re.compile(s, flags)
     393  
     394  
     395  class ESC[4;38;5;81mBytesGenerator(ESC[4;38;5;149mGenerator):
     396      """Generates a bytes version of a Message object tree.
     397  
     398      Functionally identical to the base Generator except that the output is
     399      bytes and not string.  When surrogates were used in the input to encode
     400      bytes, these are decoded back to bytes for output.  If the policy has
     401      cte_type set to 7bit, then the message is transformed such that the
     402      non-ASCII bytes are properly content transfer encoded, using the charset
     403      unknown-8bit.
     404  
     405      The outfp object must accept bytes in its write method.
     406      """
     407  
     408      def write(self, s):
     409          self._fp.write(s.encode('ascii', 'surrogateescape'))
     410  
     411      def _new_buffer(self):
     412          return BytesIO()
     413  
     414      def _encode(self, s):
     415          return s.encode('ascii')
     416  
     417      def _write_headers(self, msg):
     418          # This is almost the same as the string version, except for handling
     419          # strings with 8bit bytes.
     420          for h, v in msg.raw_items():
     421              self._fp.write(self.policy.fold_binary(h, v))
     422          # A blank line always separates headers from body
     423          self.write(self._NL)
     424  
     425      def _handle_text(self, msg):
     426          # If the string has surrogates the original source was bytes, so
     427          # just write it back out.
     428          if msg._payload is None:
     429              return
     430          if _has_surrogates(msg._payload) and not self.policy.cte_type=='7bit':
     431              if self._mangle_from_:
     432                  msg._payload = fcre.sub(">From ", msg._payload)
     433              self._write_lines(msg._payload)
     434          else:
     435              super(BytesGenerator,self)._handle_text(msg)
     436  
     437      # Default body handler
     438      _writeBody = _handle_text
     439  
     440      @classmethod
     441      def _compile_re(cls, s, flags):
     442          return re.compile(s.encode('ascii'), flags)
     443  
     444  
     445  _FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]'
     446  
     447  class ESC[4;38;5;81mDecodedGenerator(ESC[4;38;5;149mGenerator):
     448      """Generates a text representation of a message.
     449  
     450      Like the Generator base class, except that non-text parts are substituted
     451      with a format string representing the part.
     452      """
     453      def __init__(self, outfp, mangle_from_=None, maxheaderlen=None, fmt=None, *,
     454                   policy=None):
     455          """Like Generator.__init__() except that an additional optional
     456          argument is allowed.
     457  
     458          Walks through all subparts of a message.  If the subpart is of main
     459          type `text', then it prints the decoded payload of the subpart.
     460  
     461          Otherwise, fmt is a format string that is used instead of the message
     462          payload.  fmt is expanded with the following keywords (in
     463          %(keyword)s format):
     464  
     465          type       : Full MIME type of the non-text part
     466          maintype   : Main MIME type of the non-text part
     467          subtype    : Sub-MIME type of the non-text part
     468          filename   : Filename of the non-text part
     469          description: Description associated with the non-text part
     470          encoding   : Content transfer encoding of the non-text part
     471  
     472          The default value for fmt is None, meaning
     473  
     474          [Non-text (%(type)s) part of message omitted, filename %(filename)s]
     475          """
     476          Generator.__init__(self, outfp, mangle_from_, maxheaderlen,
     477                             policy=policy)
     478          if fmt is None:
     479              self._fmt = _FMT
     480          else:
     481              self._fmt = fmt
     482  
     483      def _dispatch(self, msg):
     484          for part in msg.walk():
     485              maintype = part.get_content_maintype()
     486              if maintype == 'text':
     487                  print(part.get_payload(decode=False), file=self)
     488              elif maintype == 'multipart':
     489                  # Just skip this
     490                  pass
     491              else:
     492                  print(self._fmt % {
     493                      'type'       : part.get_content_type(),
     494                      'maintype'   : part.get_content_maintype(),
     495                      'subtype'    : part.get_content_subtype(),
     496                      'filename'   : part.get_filename('[no filename]'),
     497                      'description': part.get('Content-Description',
     498                                              '[no description]'),
     499                      'encoding'   : part.get('Content-Transfer-Encoding',
     500                                              '[no encoding]'),
     501                      }, file=self)
     502  
     503  
     504  # Helper used by Generator._make_boundary
     505  _width = len(repr(sys.maxsize-1))
     506  _fmt = '%%0%dd' % _width
     507  
     508  # Backward compatibility
     509  _make_boundary = Generator._make_boundary