1  # Copyright (C) 2001-2007 Python Software Foundation
       2  # Author: Barry Warsaw
       3  # Contact: email-sig@python.org
       4  
       5  """Basic message object for the email package object model."""
       6  
       7  __all__ = ['Message', 'EmailMessage']
       8  
       9  import binascii
      10  import re
      11  import quopri
      12  from io import BytesIO, StringIO
      13  
      14  # Intrapackage imports
      15  from email import utils
      16  from email import errors
      17  from email._policybase import Policy, compat32
      18  from email import charset as _charset
      19  from email._encoded_words import decode_b
      20  Charset = _charset.Charset
      21  
      22  SEMISPACE = '; '
      23  
      24  # Regular expression that matches `special' characters in parameters, the
      25  # existence of which force quoting of the parameter value.
      26  tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
      27  
      28  
      29  def _splitparam(param):
      30      # Split header parameters.  BAW: this may be too simple.  It isn't
      31      # strictly RFC 2045 (section 5.1) compliant, but it catches most headers
      32      # found in the wild.  We may eventually need a full fledged parser.
      33      # RDM: we might have a Header here; for now just stringify it.
      34      a, sep, b = str(param).partition(';')
      35      if not sep:
      36          return a.strip(), None
      37      return a.strip(), b.strip()
      38  
      39  def _formatparam(param, value=None, quote=True):
      40      """Convenience function to format and return a key=value pair.
      41  
      42      This will quote the value if needed or if quote is true.  If value is a
      43      three tuple (charset, language, value), it will be encoded according
      44      to RFC2231 rules.  If it contains non-ascii characters it will likewise
      45      be encoded according to RFC2231 rules, using the utf-8 charset and
      46      a null language.
      47      """
      48      if value is not None and len(value) > 0:
      49          # A tuple is used for RFC 2231 encoded parameter values where items
      50          # are (charset, language, value).  charset is a string, not a Charset
      51          # instance.  RFC 2231 encoded values are never quoted, per RFC.
      52          if isinstance(value, tuple):
      53              # Encode as per RFC 2231
      54              param += '*'
      55              value = utils.encode_rfc2231(value[2], value[0], value[1])
      56              return '%s=%s' % (param, value)
      57          else:
      58              try:
      59                  value.encode('ascii')
      60              except UnicodeEncodeError:
      61                  param += '*'
      62                  value = utils.encode_rfc2231(value, 'utf-8', '')
      63                  return '%s=%s' % (param, value)
      64          # BAW: Please check this.  I think that if quote is set it should
      65          # force quoting even if not necessary.
      66          if quote or tspecials.search(value):
      67              return '%s="%s"' % (param, utils.quote(value))
      68          else:
      69              return '%s=%s' % (param, value)
      70      else:
      71          return param
      72  
      73  def _parseparam(s):
      74      # RDM This might be a Header, so for now stringify it.
      75      s = ';' + str(s)
      76      plist = []
      77      while s[:1] == ';':
      78          s = s[1:]
      79          end = s.find(';')
      80          while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
      81              end = s.find(';', end + 1)
      82          if end < 0:
      83              end = len(s)
      84          f = s[:end]
      85          if '=' in f:
      86              i = f.index('=')
      87              f = f[:i].strip().lower() + '=' + f[i+1:].strip()
      88          plist.append(f.strip())
      89          s = s[end:]
      90      return plist
      91  
      92  
      93  def _unquotevalue(value):
      94      # This is different than utils.collapse_rfc2231_value() because it doesn't
      95      # try to convert the value to a unicode.  Message.get_param() and
      96      # Message.get_params() are both currently defined to return the tuple in
      97      # the face of RFC 2231 parameters.
      98      if isinstance(value, tuple):
      99          return value[0], value[1], utils.unquote(value[2])
     100      else:
     101          return utils.unquote(value)
     102  
     103  
     104  def _decode_uu(encoded):
     105      """Decode uuencoded data."""
     106      decoded_lines = []
     107      encoded_lines_iter = iter(encoded.splitlines())
     108      for line in encoded_lines_iter:
     109          if line.startswith(b"begin "):
     110              mode, _, path = line.removeprefix(b"begin ").partition(b" ")
     111              try:
     112                  int(mode, base=8)
     113              except ValueError:
     114                  continue
     115              else:
     116                  break
     117      else:
     118          raise ValueError("`begin` line not found")
     119      for line in encoded_lines_iter:
     120          if not line:
     121              raise ValueError("Truncated input")
     122          elif line.strip(b' \t\r\n\f') == b'end':
     123              break
     124          try:
     125              decoded_line = binascii.a2b_uu(line)
     126          except binascii.Error:
     127              # Workaround for broken uuencoders by /Fredrik Lundh
     128              nbytes = (((line[0]-32) & 63) * 4 + 5) // 3
     129              decoded_line = binascii.a2b_uu(line[:nbytes])
     130          decoded_lines.append(decoded_line)
     131  
     132      return b''.join(decoded_lines)
     133  
     134  
     135  class ESC[4;38;5;81mMessage:
     136      """Basic message object.
     137  
     138      A message object is defined as something that has a bunch of RFC 2822
     139      headers and a payload.  It may optionally have an envelope header
     140      (a.k.a. Unix-From or From_ header).  If the message is a container (i.e. a
     141      multipart or a message/rfc822), then the payload is a list of Message
     142      objects, otherwise it is a string.
     143  
     144      Message objects implement part of the `mapping' interface, which assumes
     145      there is exactly one occurrence of the header per message.  Some headers
     146      do in fact appear multiple times (e.g. Received) and for those headers,
     147      you must use the explicit API to set or get all the headers.  Not all of
     148      the mapping methods are implemented.
     149      """
     150      def __init__(self, policy=compat32):
     151          self.policy = policy
     152          self._headers = []
     153          self._unixfrom = None
     154          self._payload = None
     155          self._charset = None
     156          # Defaults for multipart messages
     157          self.preamble = self.epilogue = None
     158          self.defects = []
     159          # Default content type
     160          self._default_type = 'text/plain'
     161  
     162      def __str__(self):
     163          """Return the entire formatted message as a string.
     164          """
     165          return self.as_string()
     166  
     167      def as_string(self, unixfrom=False, maxheaderlen=0, policy=None):
     168          """Return the entire formatted message as a string.
     169  
     170          Optional 'unixfrom', when true, means include the Unix From_ envelope
     171          header.  For backward compatibility reasons, if maxheaderlen is
     172          not specified it defaults to 0, so you must override it explicitly
     173          if you want a different maxheaderlen.  'policy' is passed to the
     174          Generator instance used to serialize the message; if it is not
     175          specified the policy associated with the message instance is used.
     176  
     177          If the message object contains binary data that is not encoded
     178          according to RFC standards, the non-compliant data will be replaced by
     179          unicode "unknown character" code points.
     180          """
     181          from email.generator import Generator
     182          policy = self.policy if policy is None else policy
     183          fp = StringIO()
     184          g = Generator(fp,
     185                        mangle_from_=False,
     186                        maxheaderlen=maxheaderlen,
     187                        policy=policy)
     188          g.flatten(self, unixfrom=unixfrom)
     189          return fp.getvalue()
     190  
     191      def __bytes__(self):
     192          """Return the entire formatted message as a bytes object.
     193          """
     194          return self.as_bytes()
     195  
     196      def as_bytes(self, unixfrom=False, policy=None):
     197          """Return the entire formatted message as a bytes object.
     198  
     199          Optional 'unixfrom', when true, means include the Unix From_ envelope
     200          header.  'policy' is passed to the BytesGenerator instance used to
     201          serialize the message; if not specified the policy associated with
     202          the message instance is used.
     203          """
     204          from email.generator import BytesGenerator
     205          policy = self.policy if policy is None else policy
     206          fp = BytesIO()
     207          g = BytesGenerator(fp, mangle_from_=False, policy=policy)
     208          g.flatten(self, unixfrom=unixfrom)
     209          return fp.getvalue()
     210  
     211      def is_multipart(self):
     212          """Return True if the message consists of multiple parts."""
     213          return isinstance(self._payload, list)
     214  
     215      #
     216      # Unix From_ line
     217      #
     218      def set_unixfrom(self, unixfrom):
     219          self._unixfrom = unixfrom
     220  
     221      def get_unixfrom(self):
     222          return self._unixfrom
     223  
     224      #
     225      # Payload manipulation.
     226      #
     227      def attach(self, payload):
     228          """Add the given payload to the current payload.
     229  
     230          The current payload will always be a list of objects after this method
     231          is called.  If you want to set the payload to a scalar object, use
     232          set_payload() instead.
     233          """
     234          if self._payload is None:
     235              self._payload = [payload]
     236          else:
     237              try:
     238                  self._payload.append(payload)
     239              except AttributeError:
     240                  raise TypeError("Attach is not valid on a message with a"
     241                                  " non-multipart payload")
     242  
     243      def get_payload(self, i=None, decode=False):
     244          """Return a reference to the payload.
     245  
     246          The payload will either be a list object or a string.  If you mutate
     247          the list object, you modify the message's payload in place.  Optional
     248          i returns that index into the payload.
     249  
     250          Optional decode is a flag indicating whether the payload should be
     251          decoded or not, according to the Content-Transfer-Encoding header
     252          (default is False).
     253  
     254          When True and the message is not a multipart, the payload will be
     255          decoded if this header's value is `quoted-printable' or `base64'.  If
     256          some other encoding is used, or the header is missing, or if the
     257          payload has bogus data (i.e. bogus base64 or uuencoded data), the
     258          payload is returned as-is.
     259  
     260          If the message is a multipart and the decode flag is True, then None
     261          is returned.
     262          """
     263          # Here is the logic table for this code, based on the email5.0.0 code:
     264          #   i     decode  is_multipart  result
     265          # ------  ------  ------------  ------------------------------
     266          #  None   True    True          None
     267          #   i     True    True          None
     268          #  None   False   True          _payload (a list)
     269          #   i     False   True          _payload element i (a Message)
     270          #   i     False   False         error (not a list)
     271          #   i     True    False         error (not a list)
     272          #  None   False   False         _payload
     273          #  None   True    False         _payload decoded (bytes)
     274          # Note that Barry planned to factor out the 'decode' case, but that
     275          # isn't so easy now that we handle the 8 bit data, which needs to be
     276          # converted in both the decode and non-decode path.
     277          if self.is_multipart():
     278              if decode:
     279                  return None
     280              if i is None:
     281                  return self._payload
     282              else:
     283                  return self._payload[i]
     284          # For backward compatibility, Use isinstance and this error message
     285          # instead of the more logical is_multipart test.
     286          if i is not None and not isinstance(self._payload, list):
     287              raise TypeError('Expected list, got %s' % type(self._payload))
     288          payload = self._payload
     289          # cte might be a Header, so for now stringify it.
     290          cte = str(self.get('content-transfer-encoding', '')).lower()
     291          # payload may be bytes here.
     292          if isinstance(payload, str):
     293              if utils._has_surrogates(payload):
     294                  bpayload = payload.encode('ascii', 'surrogateescape')
     295                  if not decode:
     296                      try:
     297                          payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace')
     298                      except LookupError:
     299                          payload = bpayload.decode('ascii', 'replace')
     300              elif decode:
     301                  try:
     302                      bpayload = payload.encode('ascii')
     303                  except UnicodeError:
     304                      # This won't happen for RFC compliant messages (messages
     305                      # containing only ASCII code points in the unicode input).
     306                      # If it does happen, turn the string into bytes in a way
     307                      # guaranteed not to fail.
     308                      bpayload = payload.encode('raw-unicode-escape')
     309          if not decode:
     310              return payload
     311          if cte == 'quoted-printable':
     312              return quopri.decodestring(bpayload)
     313          elif cte == 'base64':
     314              # XXX: this is a bit of a hack; decode_b should probably be factored
     315              # out somewhere, but I haven't figured out where yet.
     316              value, defects = decode_b(b''.join(bpayload.splitlines()))
     317              for defect in defects:
     318                  self.policy.handle_defect(self, defect)
     319              return value
     320          elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
     321              try:
     322                  return _decode_uu(bpayload)
     323              except ValueError:
     324                  # Some decoding problem.
     325                  return bpayload
     326          if isinstance(payload, str):
     327              return bpayload
     328          return payload
     329  
     330      def set_payload(self, payload, charset=None):
     331          """Set the payload to the given value.
     332  
     333          Optional charset sets the message's default character set.  See
     334          set_charset() for details.
     335          """
     336          if hasattr(payload, 'encode'):
     337              if charset is None:
     338                  self._payload = payload
     339                  return
     340              if not isinstance(charset, Charset):
     341                  charset = Charset(charset)
     342              payload = payload.encode(charset.output_charset)
     343          if hasattr(payload, 'decode'):
     344              self._payload = payload.decode('ascii', 'surrogateescape')
     345          else:
     346              self._payload = payload
     347          if charset is not None:
     348              self.set_charset(charset)
     349  
     350      def set_charset(self, charset):
     351          """Set the charset of the payload to a given character set.
     352  
     353          charset can be a Charset instance, a string naming a character set, or
     354          None.  If it is a string it will be converted to a Charset instance.
     355          If charset is None, the charset parameter will be removed from the
     356          Content-Type field.  Anything else will generate a TypeError.
     357  
     358          The message will be assumed to be of type text/* encoded with
     359          charset.input_charset.  It will be converted to charset.output_charset
     360          and encoded properly, if needed, when generating the plain text
     361          representation of the message.  MIME headers (MIME-Version,
     362          Content-Type, Content-Transfer-Encoding) will be added as needed.
     363          """
     364          if charset is None:
     365              self.del_param('charset')
     366              self._charset = None
     367              return
     368          if not isinstance(charset, Charset):
     369              charset = Charset(charset)
     370          self._charset = charset
     371          if 'MIME-Version' not in self:
     372              self.add_header('MIME-Version', '1.0')
     373          if 'Content-Type' not in self:
     374              self.add_header('Content-Type', 'text/plain',
     375                              charset=charset.get_output_charset())
     376          else:
     377              self.set_param('charset', charset.get_output_charset())
     378          if charset != charset.get_output_charset():
     379              self._payload = charset.body_encode(self._payload)
     380          if 'Content-Transfer-Encoding' not in self:
     381              cte = charset.get_body_encoding()
     382              try:
     383                  cte(self)
     384              except TypeError:
     385                  # This 'if' is for backward compatibility, it allows unicode
     386                  # through even though that won't work correctly if the
     387                  # message is serialized.
     388                  payload = self._payload
     389                  if payload:
     390                      try:
     391                          payload = payload.encode('ascii', 'surrogateescape')
     392                      except UnicodeError:
     393                          payload = payload.encode(charset.output_charset)
     394                  self._payload = charset.body_encode(payload)
     395                  self.add_header('Content-Transfer-Encoding', cte)
     396  
     397      def get_charset(self):
     398          """Return the Charset instance associated with the message's payload.
     399          """
     400          return self._charset
     401  
     402      #
     403      # MAPPING INTERFACE (partial)
     404      #
     405      def __len__(self):
     406          """Return the total number of headers, including duplicates."""
     407          return len(self._headers)
     408  
     409      def __getitem__(self, name):
     410          """Get a header value.
     411  
     412          Return None if the header is missing instead of raising an exception.
     413  
     414          Note that if the header appeared multiple times, exactly which
     415          occurrence gets returned is undefined.  Use get_all() to get all
     416          the values matching a header field name.
     417          """
     418          return self.get(name)
     419  
     420      def __setitem__(self, name, val):
     421          """Set the value of a header.
     422  
     423          Note: this does not overwrite an existing header with the same field
     424          name.  Use __delitem__() first to delete any existing headers.
     425          """
     426          max_count = self.policy.header_max_count(name)
     427          if max_count:
     428              lname = name.lower()
     429              found = 0
     430              for k, v in self._headers:
     431                  if k.lower() == lname:
     432                      found += 1
     433                      if found >= max_count:
     434                          raise ValueError("There may be at most {} {} headers "
     435                                           "in a message".format(max_count, name))
     436          self._headers.append(self.policy.header_store_parse(name, val))
     437  
     438      def __delitem__(self, name):
     439          """Delete all occurrences of a header, if present.
     440  
     441          Does not raise an exception if the header is missing.
     442          """
     443          name = name.lower()
     444          newheaders = []
     445          for k, v in self._headers:
     446              if k.lower() != name:
     447                  newheaders.append((k, v))
     448          self._headers = newheaders
     449  
     450      def __contains__(self, name):
     451          return name.lower() in [k.lower() for k, v in self._headers]
     452  
     453      def __iter__(self):
     454          for field, value in self._headers:
     455              yield field
     456  
     457      def keys(self):
     458          """Return a list of all the message's header field names.
     459  
     460          These will be sorted in the order they appeared in the original
     461          message, or were added to the message, and may contain duplicates.
     462          Any fields deleted and re-inserted are always appended to the header
     463          list.
     464          """
     465          return [k for k, v in self._headers]
     466  
     467      def values(self):
     468          """Return a list of all the message's header values.
     469  
     470          These will be sorted in the order they appeared in the original
     471          message, or were added to the message, and may contain duplicates.
     472          Any fields deleted and re-inserted are always appended to the header
     473          list.
     474          """
     475          return [self.policy.header_fetch_parse(k, v)
     476                  for k, v in self._headers]
     477  
     478      def items(self):
     479          """Get all the message's header fields and values.
     480  
     481          These will be sorted in the order they appeared in the original
     482          message, or were added to the message, and may contain duplicates.
     483          Any fields deleted and re-inserted are always appended to the header
     484          list.
     485          """
     486          return [(k, self.policy.header_fetch_parse(k, v))
     487                  for k, v in self._headers]
     488  
     489      def get(self, name, failobj=None):
     490          """Get a header value.
     491  
     492          Like __getitem__() but return failobj instead of None when the field
     493          is missing.
     494          """
     495          name = name.lower()
     496          for k, v in self._headers:
     497              if k.lower() == name:
     498                  return self.policy.header_fetch_parse(k, v)
     499          return failobj
     500  
     501      #
     502      # "Internal" methods (public API, but only intended for use by a parser
     503      # or generator, not normal application code.
     504      #
     505  
     506      def set_raw(self, name, value):
     507          """Store name and value in the model without modification.
     508  
     509          This is an "internal" API, intended only for use by a parser.
     510          """
     511          self._headers.append((name, value))
     512  
     513      def raw_items(self):
     514          """Return the (name, value) header pairs without modification.
     515  
     516          This is an "internal" API, intended only for use by a generator.
     517          """
     518          return iter(self._headers.copy())
     519  
     520      #
     521      # Additional useful stuff
     522      #
     523  
     524      def get_all(self, name, failobj=None):
     525          """Return a list of all the values for the named field.
     526  
     527          These will be sorted in the order they appeared in the original
     528          message, and may contain duplicates.  Any fields deleted and
     529          re-inserted are always appended to the header list.
     530  
     531          If no such fields exist, failobj is returned (defaults to None).
     532          """
     533          values = []
     534          name = name.lower()
     535          for k, v in self._headers:
     536              if k.lower() == name:
     537                  values.append(self.policy.header_fetch_parse(k, v))
     538          if not values:
     539              return failobj
     540          return values
     541  
     542      def add_header(self, _name, _value, **_params):
     543          """Extended header setting.
     544  
     545          name is the header field to add.  keyword arguments can be used to set
     546          additional parameters for the header field, with underscores converted
     547          to dashes.  Normally the parameter will be added as key="value" unless
     548          value is None, in which case only the key will be added.  If a
     549          parameter value contains non-ASCII characters it can be specified as a
     550          three-tuple of (charset, language, value), in which case it will be
     551          encoded according to RFC2231 rules.  Otherwise it will be encoded using
     552          the utf-8 charset and a language of ''.
     553  
     554          Examples:
     555  
     556          msg.add_header('content-disposition', 'attachment', filename='bud.gif')
     557          msg.add_header('content-disposition', 'attachment',
     558                         filename=('utf-8', '', Fußballer.ppt'))
     559          msg.add_header('content-disposition', 'attachment',
     560                         filename='Fußballer.ppt'))
     561          """
     562          parts = []
     563          for k, v in _params.items():
     564              if v is None:
     565                  parts.append(k.replace('_', '-'))
     566              else:
     567                  parts.append(_formatparam(k.replace('_', '-'), v))
     568          if _value is not None:
     569              parts.insert(0, _value)
     570          self[_name] = SEMISPACE.join(parts)
     571  
     572      def replace_header(self, _name, _value):
     573          """Replace a header.
     574  
     575          Replace the first matching header found in the message, retaining
     576          header order and case.  If no matching header was found, a KeyError is
     577          raised.
     578          """
     579          _name = _name.lower()
     580          for i, (k, v) in zip(range(len(self._headers)), self._headers):
     581              if k.lower() == _name:
     582                  self._headers[i] = self.policy.header_store_parse(k, _value)
     583                  break
     584          else:
     585              raise KeyError(_name)
     586  
     587      #
     588      # Use these three methods instead of the three above.
     589      #
     590  
     591      def get_content_type(self):
     592          """Return the message's content type.
     593  
     594          The returned string is coerced to lower case of the form
     595          `maintype/subtype'.  If there was no Content-Type header in the
     596          message, the default type as given by get_default_type() will be
     597          returned.  Since according to RFC 2045, messages always have a default
     598          type this will always return a value.
     599  
     600          RFC 2045 defines a message's default type to be text/plain unless it
     601          appears inside a multipart/digest container, in which case it would be
     602          message/rfc822.
     603          """
     604          missing = object()
     605          value = self.get('content-type', missing)
     606          if value is missing:
     607              # This should have no parameters
     608              return self.get_default_type()
     609          ctype = _splitparam(value)[0].lower()
     610          # RFC 2045, section 5.2 says if its invalid, use text/plain
     611          if ctype.count('/') != 1:
     612              return 'text/plain'
     613          return ctype
     614  
     615      def get_content_maintype(self):
     616          """Return the message's main content type.
     617  
     618          This is the `maintype' part of the string returned by
     619          get_content_type().
     620          """
     621          ctype = self.get_content_type()
     622          return ctype.split('/')[0]
     623  
     624      def get_content_subtype(self):
     625          """Returns the message's sub-content type.
     626  
     627          This is the `subtype' part of the string returned by
     628          get_content_type().
     629          """
     630          ctype = self.get_content_type()
     631          return ctype.split('/')[1]
     632  
     633      def get_default_type(self):
     634          """Return the `default' content type.
     635  
     636          Most messages have a default content type of text/plain, except for
     637          messages that are subparts of multipart/digest containers.  Such
     638          subparts have a default content type of message/rfc822.
     639          """
     640          return self._default_type
     641  
     642      def set_default_type(self, ctype):
     643          """Set the `default' content type.
     644  
     645          ctype should be either "text/plain" or "message/rfc822", although this
     646          is not enforced.  The default content type is not stored in the
     647          Content-Type header.
     648          """
     649          self._default_type = ctype
     650  
     651      def _get_params_preserve(self, failobj, header):
     652          # Like get_params() but preserves the quoting of values.  BAW:
     653          # should this be part of the public interface?
     654          missing = object()
     655          value = self.get(header, missing)
     656          if value is missing:
     657              return failobj
     658          params = []
     659          for p in _parseparam(value):
     660              try:
     661                  name, val = p.split('=', 1)
     662                  name = name.strip()
     663                  val = val.strip()
     664              except ValueError:
     665                  # Must have been a bare attribute
     666                  name = p.strip()
     667                  val = ''
     668              params.append((name, val))
     669          params = utils.decode_params(params)
     670          return params
     671  
     672      def get_params(self, failobj=None, header='content-type', unquote=True):
     673          """Return the message's Content-Type parameters, as a list.
     674  
     675          The elements of the returned list are 2-tuples of key/value pairs, as
     676          split on the `=' sign.  The left hand side of the `=' is the key,
     677          while the right hand side is the value.  If there is no `=' sign in
     678          the parameter the value is the empty string.  The value is as
     679          described in the get_param() method.
     680  
     681          Optional failobj is the object to return if there is no Content-Type
     682          header.  Optional header is the header to search instead of
     683          Content-Type.  If unquote is True, the value is unquoted.
     684          """
     685          missing = object()
     686          params = self._get_params_preserve(missing, header)
     687          if params is missing:
     688              return failobj
     689          if unquote:
     690              return [(k, _unquotevalue(v)) for k, v in params]
     691          else:
     692              return params
     693  
     694      def get_param(self, param, failobj=None, header='content-type',
     695                    unquote=True):
     696          """Return the parameter value if found in the Content-Type header.
     697  
     698          Optional failobj is the object to return if there is no Content-Type
     699          header, or the Content-Type header has no such parameter.  Optional
     700          header is the header to search instead of Content-Type.
     701  
     702          Parameter keys are always compared case insensitively.  The return
     703          value can either be a string, or a 3-tuple if the parameter was RFC
     704          2231 encoded.  When it's a 3-tuple, the elements of the value are of
     705          the form (CHARSET, LANGUAGE, VALUE).  Note that both CHARSET and
     706          LANGUAGE can be None, in which case you should consider VALUE to be
     707          encoded in the us-ascii charset.  You can usually ignore LANGUAGE.
     708          The parameter value (either the returned string, or the VALUE item in
     709          the 3-tuple) is always unquoted, unless unquote is set to False.
     710  
     711          If your application doesn't care whether the parameter was RFC 2231
     712          encoded, it can turn the return value into a string as follows:
     713  
     714              rawparam = msg.get_param('foo')
     715              param = email.utils.collapse_rfc2231_value(rawparam)
     716  
     717          """
     718          if header not in self:
     719              return failobj
     720          for k, v in self._get_params_preserve(failobj, header):
     721              if k.lower() == param.lower():
     722                  if unquote:
     723                      return _unquotevalue(v)
     724                  else:
     725                      return v
     726          return failobj
     727  
     728      def set_param(self, param, value, header='Content-Type', requote=True,
     729                    charset=None, language='', replace=False):
     730          """Set a parameter in the Content-Type header.
     731  
     732          If the parameter already exists in the header, its value will be
     733          replaced with the new value.
     734  
     735          If header is Content-Type and has not yet been defined for this
     736          message, it will be set to "text/plain" and the new parameter and
     737          value will be appended as per RFC 2045.
     738  
     739          An alternate header can be specified in the header argument, and all
     740          parameters will be quoted as necessary unless requote is False.
     741  
     742          If charset is specified, the parameter will be encoded according to RFC
     743          2231.  Optional language specifies the RFC 2231 language, defaulting
     744          to the empty string.  Both charset and language should be strings.
     745          """
     746          if not isinstance(value, tuple) and charset:
     747              value = (charset, language, value)
     748  
     749          if header not in self and header.lower() == 'content-type':
     750              ctype = 'text/plain'
     751          else:
     752              ctype = self.get(header)
     753          if not self.get_param(param, header=header):
     754              if not ctype:
     755                  ctype = _formatparam(param, value, requote)
     756              else:
     757                  ctype = SEMISPACE.join(
     758                      [ctype, _formatparam(param, value, requote)])
     759          else:
     760              ctype = ''
     761              for old_param, old_value in self.get_params(header=header,
     762                                                          unquote=requote):
     763                  append_param = ''
     764                  if old_param.lower() == param.lower():
     765                      append_param = _formatparam(param, value, requote)
     766                  else:
     767                      append_param = _formatparam(old_param, old_value, requote)
     768                  if not ctype:
     769                      ctype = append_param
     770                  else:
     771                      ctype = SEMISPACE.join([ctype, append_param])
     772          if ctype != self.get(header):
     773              if replace:
     774                  self.replace_header(header, ctype)
     775              else:
     776                  del self[header]
     777                  self[header] = ctype
     778  
     779      def del_param(self, param, header='content-type', requote=True):
     780          """Remove the given parameter completely from the Content-Type header.
     781  
     782          The header will be re-written in place without the parameter or its
     783          value. All values will be quoted as necessary unless requote is
     784          False.  Optional header specifies an alternative to the Content-Type
     785          header.
     786          """
     787          if header not in self:
     788              return
     789          new_ctype = ''
     790          for p, v in self.get_params(header=header, unquote=requote):
     791              if p.lower() != param.lower():
     792                  if not new_ctype:
     793                      new_ctype = _formatparam(p, v, requote)
     794                  else:
     795                      new_ctype = SEMISPACE.join([new_ctype,
     796                                                  _formatparam(p, v, requote)])
     797          if new_ctype != self.get(header):
     798              del self[header]
     799              self[header] = new_ctype
     800  
     801      def set_type(self, type, header='Content-Type', requote=True):
     802          """Set the main type and subtype for the Content-Type header.
     803  
     804          type must be a string in the form "maintype/subtype", otherwise a
     805          ValueError is raised.
     806  
     807          This method replaces the Content-Type header, keeping all the
     808          parameters in place.  If requote is False, this leaves the existing
     809          header's quoting as is.  Otherwise, the parameters will be quoted (the
     810          default).
     811  
     812          An alternative header can be specified in the header argument.  When
     813          the Content-Type header is set, we'll always also add a MIME-Version
     814          header.
     815          """
     816          # BAW: should we be strict?
     817          if not type.count('/') == 1:
     818              raise ValueError
     819          # Set the Content-Type, you get a MIME-Version
     820          if header.lower() == 'content-type':
     821              del self['mime-version']
     822              self['MIME-Version'] = '1.0'
     823          if header not in self:
     824              self[header] = type
     825              return
     826          params = self.get_params(header=header, unquote=requote)
     827          del self[header]
     828          self[header] = type
     829          # Skip the first param; it's the old type.
     830          for p, v in params[1:]:
     831              self.set_param(p, v, header, requote)
     832  
     833      def get_filename(self, failobj=None):
     834          """Return the filename associated with the payload if present.
     835  
     836          The filename is extracted from the Content-Disposition header's
     837          `filename' parameter, and it is unquoted.  If that header is missing
     838          the `filename' parameter, this method falls back to looking for the
     839          `name' parameter.
     840          """
     841          missing = object()
     842          filename = self.get_param('filename', missing, 'content-disposition')
     843          if filename is missing:
     844              filename = self.get_param('name', missing, 'content-type')
     845          if filename is missing:
     846              return failobj
     847          return utils.collapse_rfc2231_value(filename).strip()
     848  
     849      def get_boundary(self, failobj=None):
     850          """Return the boundary associated with the payload if present.
     851  
     852          The boundary is extracted from the Content-Type header's `boundary'
     853          parameter, and it is unquoted.
     854          """
     855          missing = object()
     856          boundary = self.get_param('boundary', missing)
     857          if boundary is missing:
     858              return failobj
     859          # RFC 2046 says that boundaries may begin but not end in w/s
     860          return utils.collapse_rfc2231_value(boundary).rstrip()
     861  
     862      def set_boundary(self, boundary):
     863          """Set the boundary parameter in Content-Type to 'boundary'.
     864  
     865          This is subtly different than deleting the Content-Type header and
     866          adding a new one with a new boundary parameter via add_header().  The
     867          main difference is that using the set_boundary() method preserves the
     868          order of the Content-Type header in the original message.
     869  
     870          HeaderParseError is raised if the message has no Content-Type header.
     871          """
     872          missing = object()
     873          params = self._get_params_preserve(missing, 'content-type')
     874          if params is missing:
     875              # There was no Content-Type header, and we don't know what type
     876              # to set it to, so raise an exception.
     877              raise errors.HeaderParseError('No Content-Type header found')
     878          newparams = []
     879          foundp = False
     880          for pk, pv in params:
     881              if pk.lower() == 'boundary':
     882                  newparams.append(('boundary', '"%s"' % boundary))
     883                  foundp = True
     884              else:
     885                  newparams.append((pk, pv))
     886          if not foundp:
     887              # The original Content-Type header had no boundary attribute.
     888              # Tack one on the end.  BAW: should we raise an exception
     889              # instead???
     890              newparams.append(('boundary', '"%s"' % boundary))
     891          # Replace the existing Content-Type header with the new value
     892          newheaders = []
     893          for h, v in self._headers:
     894              if h.lower() == 'content-type':
     895                  parts = []
     896                  for k, v in newparams:
     897                      if v == '':
     898                          parts.append(k)
     899                      else:
     900                          parts.append('%s=%s' % (k, v))
     901                  val = SEMISPACE.join(parts)
     902                  newheaders.append(self.policy.header_store_parse(h, val))
     903  
     904              else:
     905                  newheaders.append((h, v))
     906          self._headers = newheaders
     907  
     908      def get_content_charset(self, failobj=None):
     909          """Return the charset parameter of the Content-Type header.
     910  
     911          The returned string is always coerced to lower case.  If there is no
     912          Content-Type header, or if that header has no charset parameter,
     913          failobj is returned.
     914          """
     915          missing = object()
     916          charset = self.get_param('charset', missing)
     917          if charset is missing:
     918              return failobj
     919          if isinstance(charset, tuple):
     920              # RFC 2231 encoded, so decode it, and it better end up as ascii.
     921              pcharset = charset[0] or 'us-ascii'
     922              try:
     923                  # LookupError will be raised if the charset isn't known to
     924                  # Python.  UnicodeError will be raised if the encoded text
     925                  # contains a character not in the charset.
     926                  as_bytes = charset[2].encode('raw-unicode-escape')
     927                  charset = str(as_bytes, pcharset)
     928              except (LookupError, UnicodeError):
     929                  charset = charset[2]
     930          # charset characters must be in us-ascii range
     931          try:
     932              charset.encode('us-ascii')
     933          except UnicodeError:
     934              return failobj
     935          # RFC 2046, $4.1.2 says charsets are not case sensitive
     936          return charset.lower()
     937  
     938      def get_charsets(self, failobj=None):
     939          """Return a list containing the charset(s) used in this message.
     940  
     941          The returned list of items describes the Content-Type headers'
     942          charset parameter for this message and all the subparts in its
     943          payload.
     944  
     945          Each item will either be a string (the value of the charset parameter
     946          in the Content-Type header of that part) or the value of the
     947          'failobj' parameter (defaults to None), if the part does not have a
     948          main MIME type of "text", or the charset is not defined.
     949  
     950          The list will contain one string for each part of the message, plus
     951          one for the container message (i.e. self), so that a non-multipart
     952          message will still return a list of length 1.
     953          """
     954          return [part.get_content_charset(failobj) for part in self.walk()]
     955  
     956      def get_content_disposition(self):
     957          """Return the message's content-disposition if it exists, or None.
     958  
     959          The return values can be either 'inline', 'attachment' or None
     960          according to the rfc2183.
     961          """
     962          value = self.get('content-disposition')
     963          if value is None:
     964              return None
     965          c_d = _splitparam(value)[0].lower()
     966          return c_d
     967  
     968      # I.e. def walk(self): ...
     969      from email.iterators import walk
     970  
     971  
     972  class ESC[4;38;5;81mMIMEPart(ESC[4;38;5;149mMessage):
     973  
     974      def __init__(self, policy=None):
     975          if policy is None:
     976              from email.policy import default
     977              policy = default
     978          super().__init__(policy)
     979  
     980  
     981      def as_string(self, unixfrom=False, maxheaderlen=None, policy=None):
     982          """Return the entire formatted message as a string.
     983  
     984          Optional 'unixfrom', when true, means include the Unix From_ envelope
     985          header.  maxheaderlen is retained for backward compatibility with the
     986          base Message class, but defaults to None, meaning that the policy value
     987          for max_line_length controls the header maximum length.  'policy' is
     988          passed to the Generator instance used to serialize the message; if it
     989          is not specified the policy associated with the message instance is
     990          used.
     991          """
     992          policy = self.policy if policy is None else policy
     993          if maxheaderlen is None:
     994              maxheaderlen = policy.max_line_length
     995          return super().as_string(unixfrom, maxheaderlen, policy)
     996  
     997      def __str__(self):
     998          return self.as_string(policy=self.policy.clone(utf8=True))
     999  
    1000      def is_attachment(self):
    1001          c_d = self.get('content-disposition')
    1002          return False if c_d is None else c_d.content_disposition == 'attachment'
    1003  
    1004      def _find_body(self, part, preferencelist):
    1005          if part.is_attachment():
    1006              return
    1007          maintype, subtype = part.get_content_type().split('/')
    1008          if maintype == 'text':
    1009              if subtype in preferencelist:
    1010                  yield (preferencelist.index(subtype), part)
    1011              return
    1012          if maintype != 'multipart' or not self.is_multipart():
    1013              return
    1014          if subtype != 'related':
    1015              for subpart in part.iter_parts():
    1016                  yield from self._find_body(subpart, preferencelist)
    1017              return
    1018          if 'related' in preferencelist:
    1019              yield (preferencelist.index('related'), part)
    1020          candidate = None
    1021          start = part.get_param('start')
    1022          if start:
    1023              for subpart in part.iter_parts():
    1024                  if subpart['content-id'] == start:
    1025                      candidate = subpart
    1026                      break
    1027          if candidate is None:
    1028              subparts = part.get_payload()
    1029              candidate = subparts[0] if subparts else None
    1030          if candidate is not None:
    1031              yield from self._find_body(candidate, preferencelist)
    1032  
    1033      def get_body(self, preferencelist=('related', 'html', 'plain')):
    1034          """Return best candidate mime part for display as 'body' of message.
    1035  
    1036          Do a depth first search, starting with self, looking for the first part
    1037          matching each of the items in preferencelist, and return the part
    1038          corresponding to the first item that has a match, or None if no items
    1039          have a match.  If 'related' is not included in preferencelist, consider
    1040          the root part of any multipart/related encountered as a candidate
    1041          match.  Ignore parts with 'Content-Disposition: attachment'.
    1042          """
    1043          best_prio = len(preferencelist)
    1044          body = None
    1045          for prio, part in self._find_body(self, preferencelist):
    1046              if prio < best_prio:
    1047                  best_prio = prio
    1048                  body = part
    1049                  if prio == 0:
    1050                      break
    1051          return body
    1052  
    1053      _body_types = {('text', 'plain'),
    1054                     ('text', 'html'),
    1055                     ('multipart', 'related'),
    1056                     ('multipart', 'alternative')}
    1057      def iter_attachments(self):
    1058          """Return an iterator over the non-main parts of a multipart.
    1059  
    1060          Skip the first of each occurrence of text/plain, text/html,
    1061          multipart/related, or multipart/alternative in the multipart (unless
    1062          they have a 'Content-Disposition: attachment' header) and include all
    1063          remaining subparts in the returned iterator.  When applied to a
    1064          multipart/related, return all parts except the root part.  Return an
    1065          empty iterator when applied to a multipart/alternative or a
    1066          non-multipart.
    1067          """
    1068          maintype, subtype = self.get_content_type().split('/')
    1069          if maintype != 'multipart' or subtype == 'alternative':
    1070              return
    1071          payload = self.get_payload()
    1072          # Certain malformed messages can have content type set to `multipart/*`
    1073          # but still have single part body, in which case payload.copy() can
    1074          # fail with AttributeError.
    1075          try:
    1076              parts = payload.copy()
    1077          except AttributeError:
    1078              # payload is not a list, it is most probably a string.
    1079              return
    1080  
    1081          if maintype == 'multipart' and subtype == 'related':
    1082              # For related, we treat everything but the root as an attachment.
    1083              # The root may be indicated by 'start'; if there's no start or we
    1084              # can't find the named start, treat the first subpart as the root.
    1085              start = self.get_param('start')
    1086              if start:
    1087                  found = False
    1088                  attachments = []
    1089                  for part in parts:
    1090                      if part.get('content-id') == start:
    1091                          found = True
    1092                      else:
    1093                          attachments.append(part)
    1094                  if found:
    1095                      yield from attachments
    1096                      return
    1097              parts.pop(0)
    1098              yield from parts
    1099              return
    1100          # Otherwise we more or less invert the remaining logic in get_body.
    1101          # This only really works in edge cases (ex: non-text related or
    1102          # alternatives) if the sending agent sets content-disposition.
    1103          seen = []   # Only skip the first example of each candidate type.
    1104          for part in parts:
    1105              maintype, subtype = part.get_content_type().split('/')
    1106              if ((maintype, subtype) in self._body_types and
    1107                      not part.is_attachment() and subtype not in seen):
    1108                  seen.append(subtype)
    1109                  continue
    1110              yield part
    1111  
    1112      def iter_parts(self):
    1113          """Return an iterator over all immediate subparts of a multipart.
    1114  
    1115          Return an empty iterator for a non-multipart.
    1116          """
    1117          if self.is_multipart():
    1118              yield from self.get_payload()
    1119  
    1120      def get_content(self, *args, content_manager=None, **kw):
    1121          if content_manager is None:
    1122              content_manager = self.policy.content_manager
    1123          return content_manager.get_content(self, *args, **kw)
    1124  
    1125      def set_content(self, *args, content_manager=None, **kw):
    1126          if content_manager is None:
    1127              content_manager = self.policy.content_manager
    1128          content_manager.set_content(self, *args, **kw)
    1129  
    1130      def _make_multipart(self, subtype, disallowed_subtypes, boundary):
    1131          if self.get_content_maintype() == 'multipart':
    1132              existing_subtype = self.get_content_subtype()
    1133              disallowed_subtypes = disallowed_subtypes + (subtype,)
    1134              if existing_subtype in disallowed_subtypes:
    1135                  raise ValueError("Cannot convert {} to {}".format(
    1136                      existing_subtype, subtype))
    1137          keep_headers = []
    1138          part_headers = []
    1139          for name, value in self._headers:
    1140              if name.lower().startswith('content-'):
    1141                  part_headers.append((name, value))
    1142              else:
    1143                  keep_headers.append((name, value))
    1144          if part_headers:
    1145              # There is existing content, move it to the first subpart.
    1146              part = type(self)(policy=self.policy)
    1147              part._headers = part_headers
    1148              part._payload = self._payload
    1149              self._payload = [part]
    1150          else:
    1151              self._payload = []
    1152          self._headers = keep_headers
    1153          self['Content-Type'] = 'multipart/' + subtype
    1154          if boundary is not None:
    1155              self.set_param('boundary', boundary)
    1156  
    1157      def make_related(self, boundary=None):
    1158          self._make_multipart('related', ('alternative', 'mixed'), boundary)
    1159  
    1160      def make_alternative(self, boundary=None):
    1161          self._make_multipart('alternative', ('mixed',), boundary)
    1162  
    1163      def make_mixed(self, boundary=None):
    1164          self._make_multipart('mixed', (), boundary)
    1165  
    1166      def _add_multipart(self, _subtype, *args, _disp=None, **kw):
    1167          if (self.get_content_maintype() != 'multipart' or
    1168                  self.get_content_subtype() != _subtype):
    1169              getattr(self, 'make_' + _subtype)()
    1170          part = type(self)(policy=self.policy)
    1171          part.set_content(*args, **kw)
    1172          if _disp and 'content-disposition' not in part:
    1173              part['Content-Disposition'] = _disp
    1174          self.attach(part)
    1175  
    1176      def add_related(self, *args, **kw):
    1177          self._add_multipart('related', *args, _disp='inline', **kw)
    1178  
    1179      def add_alternative(self, *args, **kw):
    1180          self._add_multipart('alternative', *args, **kw)
    1181  
    1182      def add_attachment(self, *args, **kw):
    1183          self._add_multipart('mixed', *args, _disp='attachment', **kw)
    1184  
    1185      def clear(self):
    1186          self._headers = []
    1187          self._payload = None
    1188  
    1189      def clear_content(self):
    1190          self._headers = [(n, v) for n, v in self._headers
    1191                           if not n.lower().startswith('content-')]
    1192          self._payload = None
    1193  
    1194  
    1195  class ESC[4;38;5;81mEmailMessage(ESC[4;38;5;149mMIMEPart):
    1196  
    1197      def set_content(self, *args, **kw):
    1198          super().set_content(*args, **kw)
    1199          if 'MIME-Version' not in self:
    1200              self['MIME-Version'] = '1.0'