python (3.12.0)

Browse
Build Log
Usage
       1  # Copyright (C) 2001-2007 Python Software Foundation
       2  # Author: Barry Warsaw
       3  # Contact: email-sig@python.org
       4  
       5  """Basic message object for the email package object model."""
       6  
       7  __all__ = ['Message', 'EmailMessage']
       8  
       9  import binascii
      10  import re
      11  import quopri
      12  from io import BytesIO, StringIO
      13  
      14  # Intrapackage imports
      15  from email import utils
      16  from email import errors
      17  from email._policybase import compat32
      18  from email import charset as _charset
      19  from email._encoded_words import decode_b
      20  Charset = _charset.Charset
      21  
      22  SEMISPACE = '; '
      23  
      24  # Regular expression that matches `special' characters in parameters, the
      25  # existence of which force quoting of the parameter value.
      26  tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
      27  
      28  
      29  def _splitparam(param):
      30      # Split header parameters.  BAW: this may be too simple.  It isn't
      31      # strictly RFC 2045 (section 5.1) compliant, but it catches most headers
      32      # found in the wild.  We may eventually need a full fledged parser.
      33      # RDM: we might have a Header here; for now just stringify it.
      34      a, sep, b = str(param).partition(';')
      35      if not sep:
      36          return a.strip(), None
      37      return a.strip(), b.strip()
      38  
      39  def _formatparam(param, value=None, quote=True):
      40      """Convenience function to format and return a key=value pair.
      41  
      42      This will quote the value if needed or if quote is true.  If value is a
      43      three tuple (charset, language, value), it will be encoded according
      44      to RFC2231 rules.  If it contains non-ascii characters it will likewise
      45      be encoded according to RFC2231 rules, using the utf-8 charset and
      46      a null language.
      47      """
      48      if value is not None and len(value) > 0:
      49          # A tuple is used for RFC 2231 encoded parameter values where items
      50          # are (charset, language, value).  charset is a string, not a Charset
      51          # instance.  RFC 2231 encoded values are never quoted, per RFC.
      52          if isinstance(value, tuple):
      53              # Encode as per RFC 2231
      54              param += '*'
      55              value = utils.encode_rfc2231(value[2], value[0], value[1])
      56              return '%s=%s' % (param, value)
      57          else:
      58              try:
      59                  value.encode('ascii')
      60              except UnicodeEncodeError:
      61                  param += '*'
      62                  value = utils.encode_rfc2231(value, 'utf-8', '')
      63                  return '%s=%s' % (param, value)
      64          # BAW: Please check this.  I think that if quote is set it should
      65          # force quoting even if not necessary.
      66          if quote or tspecials.search(value):
      67              return '%s="%s"' % (param, utils.quote(value))
      68          else:
      69              return '%s=%s' % (param, value)
      70      else:
      71          return param
      72  
      73  def _parseparam(s):
      74      # RDM This might be a Header, so for now stringify it.
      75      s = ';' + str(s)
      76      plist = []
      77      while s[:1] == ';':
      78          s = s[1:]
      79          end = s.find(';')
      80          while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
      81              end = s.find(';', end + 1)
      82          if end < 0:
      83              end = len(s)
      84          f = s[:end]
      85          if '=' in f:
      86              i = f.index('=')
      87              f = f[:i].strip().lower() + '=' + f[i+1:].strip()
      88          plist.append(f.strip())
      89          s = s[end:]
      90      return plist
      91  
      92  
      93  def _unquotevalue(value):
      94      # This is different than utils.collapse_rfc2231_value() because it doesn't
      95      # try to convert the value to a unicode.  Message.get_param() and
      96      # Message.get_params() are both currently defined to return the tuple in
      97      # the face of RFC 2231 parameters.
      98      if isinstance(value, tuple):
      99          return value[0], value[1], utils.unquote(value[2])
     100      else:
     101          return utils.unquote(value)
     102  
     103  
     104  def _decode_uu(encoded):
     105      """Decode uuencoded data."""
     106      decoded_lines = []
     107      encoded_lines_iter = iter(encoded.splitlines())
     108      for line in encoded_lines_iter:
     109          if line.startswith(b"begin "):
     110              mode, _, path = line.removeprefix(b"begin ").partition(b" ")
     111              try:
     112                  int(mode, base=8)
     113              except ValueError:
     114                  continue
     115              else:
     116                  break
     117      else:
     118          raise ValueError("`begin` line not found")
     119      for line in encoded_lines_iter:
     120          if not line:
     121              raise ValueError("Truncated input")
     122          elif line.strip(b' \t\r\n\f') == b'end':
     123              break
     124          try:
     125              decoded_line = binascii.a2b_uu(line)
     126          except binascii.Error:
     127              # Workaround for broken uuencoders by /Fredrik Lundh
     128              nbytes = (((line[0]-32) & 63) * 4 + 5) // 3
     129              decoded_line = binascii.a2b_uu(line[:nbytes])
     130          decoded_lines.append(decoded_line)
     131  
     132      return b''.join(decoded_lines)
     133  
     134  
     135  class ESC[4;38;5;81mMessage:
     136      """Basic message object.
     137  
     138      A message object is defined as something that has a bunch of RFC 2822
     139      headers and a payload.  It may optionally have an envelope header
     140      (a.k.a. Unix-From or From_ header).  If the message is a container (i.e. a
     141      multipart or a message/rfc822), then the payload is a list of Message
     142      objects, otherwise it is a string.
     143  
     144      Message objects implement part of the `mapping' interface, which assumes
     145      there is exactly one occurrence of the header per message.  Some headers
     146      do in fact appear multiple times (e.g. Received) and for those headers,
     147      you must use the explicit API to set or get all the headers.  Not all of
     148      the mapping methods are implemented.
     149      """
     150      def __init__(self, policy=compat32):
     151          self.policy = policy
     152          self._headers = []
     153          self._unixfrom = None
     154          self._payload = None
     155          self._charset = None
     156          # Defaults for multipart messages
     157          self.preamble = self.epilogue = None
     158          self.defects = []
     159          # Default content type
     160          self._default_type = 'text/plain'
     161  
     162      def __str__(self):
     163          """Return the entire formatted message as a string.
     164          """
     165          return self.as_string()
     166  
     167      def as_string(self, unixfrom=False, maxheaderlen=0, policy=None):
     168          """Return the entire formatted message as a string.
     169  
     170          Optional 'unixfrom', when true, means include the Unix From_ envelope
     171          header.  For backward compatibility reasons, if maxheaderlen is
     172          not specified it defaults to 0, so you must override it explicitly
     173          if you want a different maxheaderlen.  'policy' is passed to the
     174          Generator instance used to serialize the message; if it is not
     175          specified the policy associated with the message instance is used.
     176  
     177          If the message object contains binary data that is not encoded
     178          according to RFC standards, the non-compliant data will be replaced by
     179          unicode "unknown character" code points.
     180          """
     181          from email.generator import Generator
     182          policy = self.policy if policy is None else policy
     183          fp = StringIO()
     184          g = Generator(fp,
     185                        mangle_from_=False,
     186                        maxheaderlen=maxheaderlen,
     187                        policy=policy)
     188          g.flatten(self, unixfrom=unixfrom)
     189          return fp.getvalue()
     190  
     191      def __bytes__(self):
     192          """Return the entire formatted message as a bytes object.
     193          """
     194          return self.as_bytes()
     195  
     196      def as_bytes(self, unixfrom=False, policy=None):
     197          """Return the entire formatted message as a bytes object.
     198  
     199          Optional 'unixfrom', when true, means include the Unix From_ envelope
     200          header.  'policy' is passed to the BytesGenerator instance used to
     201          serialize the message; if not specified the policy associated with
     202          the message instance is used.
     203          """
     204          from email.generator import BytesGenerator
     205          policy = self.policy if policy is None else policy
     206          fp = BytesIO()
     207          g = BytesGenerator(fp, mangle_from_=False, policy=policy)
     208          g.flatten(self, unixfrom=unixfrom)
     209          return fp.getvalue()
     210  
     211      def is_multipart(self):
     212          """Return True if the message consists of multiple parts."""
     213          return isinstance(self._payload, list)
     214  
     215      #
     216      # Unix From_ line
     217      #
     218      def set_unixfrom(self, unixfrom):
     219          self._unixfrom = unixfrom
     220  
     221      def get_unixfrom(self):
     222          return self._unixfrom
     223  
     224      #
     225      # Payload manipulation.
     226      #
     227      def attach(self, payload):
     228          """Add the given payload to the current payload.
     229  
     230          The current payload will always be a list of objects after this method
     231          is called.  If you want to set the payload to a scalar object, use
     232          set_payload() instead.
     233          """
     234          if self._payload is None:
     235              self._payload = [payload]
     236          else:
     237              try:
     238                  self._payload.append(payload)
     239              except AttributeError:
     240                  raise TypeError("Attach is not valid on a message with a"
     241                                  " non-multipart payload")
     242  
     243      def get_payload(self, i=None, decode=False):
     244          """Return a reference to the payload.
     245  
     246          The payload will either be a list object or a string.  If you mutate
     247          the list object, you modify the message's payload in place.  Optional
     248          i returns that index into the payload.
     249  
     250          Optional decode is a flag indicating whether the payload should be
     251          decoded or not, according to the Content-Transfer-Encoding header
     252          (default is False).
     253  
     254          When True and the message is not a multipart, the payload will be
     255          decoded if this header's value is `quoted-printable' or `base64'.  If
     256          some other encoding is used, or the header is missing, or if the
     257          payload has bogus data (i.e. bogus base64 or uuencoded data), the
     258          payload is returned as-is.
     259  
     260          If the message is a multipart and the decode flag is True, then None
     261          is returned.
     262          """
     263          # Here is the logic table for this code, based on the email5.0.0 code:
     264          #   i     decode  is_multipart  result
     265          # ------  ------  ------------  ------------------------------
     266          #  None   True    True          None
     267          #   i     True    True          None
     268          #  None   False   True          _payload (a list)
     269          #   i     False   True          _payload element i (a Message)
     270          #   i     False   False         error (not a list)
     271          #   i     True    False         error (not a list)
     272          #  None   False   False         _payload
     273          #  None   True    False         _payload decoded (bytes)
     274          # Note that Barry planned to factor out the 'decode' case, but that
     275          # isn't so easy now that we handle the 8 bit data, which needs to be
     276          # converted in both the decode and non-decode path.
     277          if self.is_multipart():
     278              if decode:
     279                  return None
     280              if i is None:
     281                  return self._payload
     282              else:
     283                  return self._payload[i]
     284          # For backward compatibility, Use isinstance and this error message
     285          # instead of the more logical is_multipart test.
     286          if i is not None and not isinstance(self._payload, list):
     287              raise TypeError('Expected list, got %s' % type(self._payload))
     288          payload = self._payload
     289          # cte might be a Header, so for now stringify it.
     290          cte = str(self.get('content-transfer-encoding', '')).lower()
     291          # payload may be bytes here.
     292          if isinstance(payload, str):
     293              if utils._has_surrogates(payload):
     294                  bpayload = payload.encode('ascii', 'surrogateescape')
     295                  if not decode:
     296                      try:
     297                          payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace')
     298                      except LookupError:
     299                          payload = bpayload.decode('ascii', 'replace')
     300              elif decode:
     301                  try:
     302                      bpayload = payload.encode('ascii')
     303                  except UnicodeError:
     304                      # This won't happen for RFC compliant messages (messages
     305                      # containing only ASCII code points in the unicode input).
     306                      # If it does happen, turn the string into bytes in a way
     307                      # guaranteed not to fail.
     308                      bpayload = payload.encode('raw-unicode-escape')
     309          if not decode:
     310              return payload
     311          if cte == 'quoted-printable':
     312              return quopri.decodestring(bpayload)
     313          elif cte == 'base64':
     314              # XXX: this is a bit of a hack; decode_b should probably be factored
     315              # out somewhere, but I haven't figured out where yet.
     316              value, defects = decode_b(b''.join(bpayload.splitlines()))
     317              for defect in defects:
     318                  self.policy.handle_defect(self, defect)
     319              return value
     320          elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
     321              try:
     322                  return _decode_uu(bpayload)
     323              except ValueError:
     324                  # Some decoding problem.
     325                  return bpayload
     326          if isinstance(payload, str):
     327              return bpayload
     328          return payload
     329  
     330      def set_payload(self, payload, charset=None):
     331          """Set the payload to the given value.
     332  
     333          Optional charset sets the message's default character set.  See
     334          set_charset() for details.
     335          """
     336          if hasattr(payload, 'encode'):
     337              if charset is None:
     338                  self._payload = payload
     339                  return
     340              if not isinstance(charset, Charset):
     341                  charset = Charset(charset)
     342              payload = payload.encode(charset.output_charset)
     343          if hasattr(payload, 'decode'):
     344              self._payload = payload.decode('ascii', 'surrogateescape')
     345          else:
     346              self._payload = payload
     347          if charset is not None:
     348              self.set_charset(charset)
     349  
     350      def set_charset(self, charset):
     351          """Set the charset of the payload to a given character set.
     352  
     353          charset can be a Charset instance, a string naming a character set, or
     354          None.  If it is a string it will be converted to a Charset instance.
     355          If charset is None, the charset parameter will be removed from the
     356          Content-Type field.  Anything else will generate a TypeError.
     357  
     358          The message will be assumed to be of type text/* encoded with
     359          charset.input_charset.  It will be converted to charset.output_charset
     360          and encoded properly, if needed, when generating the plain text
     361          representation of the message.  MIME headers (MIME-Version,
     362          Content-Type, Content-Transfer-Encoding) will be added as needed.
     363          """
     364          if charset is None:
     365              self.del_param('charset')
     366              self._charset = None
     367              return
     368          if not isinstance(charset, Charset):
     369              charset = Charset(charset)
     370          self._charset = charset
     371          if 'MIME-Version' not in self:
     372              self.add_header('MIME-Version', '1.0')
     373          if 'Content-Type' not in self:
     374              self.add_header('Content-Type', 'text/plain',
     375                              charset=charset.get_output_charset())
     376          else:
     377              self.set_param('charset', charset.get_output_charset())
     378          if charset != charset.get_output_charset():
     379              self._payload = charset.body_encode(self._payload)
     380          if 'Content-Transfer-Encoding' not in self:
     381              cte = charset.get_body_encoding()
     382              try:
     383                  cte(self)
     384              except TypeError:
     385                  # This 'if' is for backward compatibility, it allows unicode
     386                  # through even though that won't work correctly if the
     387                  # message is serialized.
     388                  payload = self._payload
     389                  if payload:
     390                      try:
     391                          payload = payload.encode('ascii', 'surrogateescape')
     392                      except UnicodeError:
     393                          payload = payload.encode(charset.output_charset)
     394                  self._payload = charset.body_encode(payload)
     395                  self.add_header('Content-Transfer-Encoding', cte)
     396  
     397      def get_charset(self):
     398          """Return the Charset instance associated with the message's payload.
     399          """
     400          return self._charset
     401  
     402      #
     403      # MAPPING INTERFACE (partial)
     404      #
     405      def __len__(self):
     406          """Return the total number of headers, including duplicates."""
     407          return len(self._headers)
     408  
     409      def __getitem__(self, name):
     410          """Get a header value.
     411  
     412          Return None if the header is missing instead of raising an exception.
     413  
     414          Note that if the header appeared multiple times, exactly which
     415          occurrence gets returned is undefined.  Use get_all() to get all
     416          the values matching a header field name.
     417          """
     418          return self.get(name)
     419  
     420      def __setitem__(self, name, val):
     421          """Set the value of a header.
     422  
     423          Note: this does not overwrite an existing header with the same field
     424          name.  Use __delitem__() first to delete any existing headers.
     425          """
     426          max_count = self.policy.header_max_count(name)
     427          if max_count:
     428              lname = name.lower()
     429              found = 0
     430              for k, v in self._headers:
     431                  if k.lower() == lname:
     432                      found += 1
     433                      if found >= max_count:
     434                          raise ValueError("There may be at most {} {} headers "
     435                                           "in a message".format(max_count, name))
     436          self._headers.append(self.policy.header_store_parse(name, val))
     437  
     438      def __delitem__(self, name):
     439          """Delete all occurrences of a header, if present.
     440  
     441          Does not raise an exception if the header is missing.
     442          """
     443          name = name.lower()
     444          newheaders = []
     445          for k, v in self._headers:
     446              if k.lower() != name:
     447                  newheaders.append((k, v))
     448          self._headers = newheaders
     449  
     450      def __contains__(self, name):
     451          name_lower = name.lower()
     452          for k, v in self._headers:
     453              if name_lower == k.lower():
     454                  return True
     455          return False
     456  
     457      def __iter__(self):
     458          for field, value in self._headers:
     459              yield field
     460  
     461      def keys(self):
     462          """Return a list of all the message's header field names.
     463  
     464          These will be sorted in the order they appeared in the original
     465          message, or were added to the message, and may contain duplicates.
     466          Any fields deleted and re-inserted are always appended to the header
     467          list.
     468          """
     469          return [k for k, v in self._headers]
     470  
     471      def values(self):
     472          """Return a list of all the message's header values.
     473  
     474          These will be sorted in the order they appeared in the original
     475          message, or were added to the message, and may contain duplicates.
     476          Any fields deleted and re-inserted are always appended to the header
     477          list.
     478          """
     479          return [self.policy.header_fetch_parse(k, v)
     480                  for k, v in self._headers]
     481  
     482      def items(self):
     483          """Get all the message's header fields and values.
     484  
     485          These will be sorted in the order they appeared in the original
     486          message, or were added to the message, and may contain duplicates.
     487          Any fields deleted and re-inserted are always appended to the header
     488          list.
     489          """
     490          return [(k, self.policy.header_fetch_parse(k, v))
     491                  for k, v in self._headers]
     492  
     493      def get(self, name, failobj=None):
     494          """Get a header value.
     495  
     496          Like __getitem__() but return failobj instead of None when the field
     497          is missing.
     498          """
     499          name = name.lower()
     500          for k, v in self._headers:
     501              if k.lower() == name:
     502                  return self.policy.header_fetch_parse(k, v)
     503          return failobj
     504  
     505      #
     506      # "Internal" methods (public API, but only intended for use by a parser
     507      # or generator, not normal application code.
     508      #
     509  
     510      def set_raw(self, name, value):
     511          """Store name and value in the model without modification.
     512  
     513          This is an "internal" API, intended only for use by a parser.
     514          """
     515          self._headers.append((name, value))
     516  
     517      def raw_items(self):
     518          """Return the (name, value) header pairs without modification.
     519  
     520          This is an "internal" API, intended only for use by a generator.
     521          """
     522          return iter(self._headers.copy())
     523  
     524      #
     525      # Additional useful stuff
     526      #
     527  
     528      def get_all(self, name, failobj=None):
     529          """Return a list of all the values for the named field.
     530  
     531          These will be sorted in the order they appeared in the original
     532          message, and may contain duplicates.  Any fields deleted and
     533          re-inserted are always appended to the header list.
     534  
     535          If no such fields exist, failobj is returned (defaults to None).
     536          """
     537          values = []
     538          name = name.lower()
     539          for k, v in self._headers:
     540              if k.lower() == name:
     541                  values.append(self.policy.header_fetch_parse(k, v))
     542          if not values:
     543              return failobj
     544          return values
     545  
     546      def add_header(self, _name, _value, **_params):
     547          """Extended header setting.
     548  
     549          name is the header field to add.  keyword arguments can be used to set
     550          additional parameters for the header field, with underscores converted
     551          to dashes.  Normally the parameter will be added as key="value" unless
     552          value is None, in which case only the key will be added.  If a
     553          parameter value contains non-ASCII characters it can be specified as a
     554          three-tuple of (charset, language, value), in which case it will be
     555          encoded according to RFC2231 rules.  Otherwise it will be encoded using
     556          the utf-8 charset and a language of ''.
     557  
     558          Examples:
     559  
     560          msg.add_header('content-disposition', 'attachment', filename='bud.gif')
     561          msg.add_header('content-disposition', 'attachment',
     562                         filename=('utf-8', '', Fußballer.ppt'))
     563          msg.add_header('content-disposition', 'attachment',
     564                         filename='Fußballer.ppt'))
     565          """
     566          parts = []
     567          for k, v in _params.items():
     568              if v is None:
     569                  parts.append(k.replace('_', '-'))
     570              else:
     571                  parts.append(_formatparam(k.replace('_', '-'), v))
     572          if _value is not None:
     573              parts.insert(0, _value)
     574          self[_name] = SEMISPACE.join(parts)
     575  
     576      def replace_header(self, _name, _value):
     577          """Replace a header.
     578  
     579          Replace the first matching header found in the message, retaining
     580          header order and case.  If no matching header was found, a KeyError is
     581          raised.
     582          """
     583          _name = _name.lower()
     584          for i, (k, v) in zip(range(len(self._headers)), self._headers):
     585              if k.lower() == _name:
     586                  self._headers[i] = self.policy.header_store_parse(k, _value)
     587                  break
     588          else:
     589              raise KeyError(_name)
     590  
     591      #
     592      # Use these three methods instead of the three above.
     593      #
     594  
     595      def get_content_type(self):
     596          """Return the message's content type.
     597  
     598          The returned string is coerced to lower case of the form
     599          `maintype/subtype'.  If there was no Content-Type header in the
     600          message, the default type as given by get_default_type() will be
     601          returned.  Since according to RFC 2045, messages always have a default
     602          type this will always return a value.
     603  
     604          RFC 2045 defines a message's default type to be text/plain unless it
     605          appears inside a multipart/digest container, in which case it would be
     606          message/rfc822.
     607          """
     608          missing = object()
     609          value = self.get('content-type', missing)
     610          if value is missing:
     611              # This should have no parameters
     612              return self.get_default_type()
     613          ctype = _splitparam(value)[0].lower()
     614          # RFC 2045, section 5.2 says if its invalid, use text/plain
     615          if ctype.count('/') != 1:
     616              return 'text/plain'
     617          return ctype
     618  
     619      def get_content_maintype(self):
     620          """Return the message's main content type.
     621  
     622          This is the `maintype' part of the string returned by
     623          get_content_type().
     624          """
     625          ctype = self.get_content_type()
     626          return ctype.split('/')[0]
     627  
     628      def get_content_subtype(self):
     629          """Returns the message's sub-content type.
     630  
     631          This is the `subtype' part of the string returned by
     632          get_content_type().
     633          """
     634          ctype = self.get_content_type()
     635          return ctype.split('/')[1]
     636  
     637      def get_default_type(self):
     638          """Return the `default' content type.
     639  
     640          Most messages have a default content type of text/plain, except for
     641          messages that are subparts of multipart/digest containers.  Such
     642          subparts have a default content type of message/rfc822.
     643          """
     644          return self._default_type
     645  
     646      def set_default_type(self, ctype):
     647          """Set the `default' content type.
     648  
     649          ctype should be either "text/plain" or "message/rfc822", although this
     650          is not enforced.  The default content type is not stored in the
     651          Content-Type header.
     652          """
     653          self._default_type = ctype
     654  
     655      def _get_params_preserve(self, failobj, header):
     656          # Like get_params() but preserves the quoting of values.  BAW:
     657          # should this be part of the public interface?
     658          missing = object()
     659          value = self.get(header, missing)
     660          if value is missing:
     661              return failobj
     662          params = []
     663          for p in _parseparam(value):
     664              try:
     665                  name, val = p.split('=', 1)
     666                  name = name.strip()
     667                  val = val.strip()
     668              except ValueError:
     669                  # Must have been a bare attribute
     670                  name = p.strip()
     671                  val = ''
     672              params.append((name, val))
     673          params = utils.decode_params(params)
     674          return params
     675  
     676      def get_params(self, failobj=None, header='content-type', unquote=True):
     677          """Return the message's Content-Type parameters, as a list.
     678  
     679          The elements of the returned list are 2-tuples of key/value pairs, as
     680          split on the `=' sign.  The left hand side of the `=' is the key,
     681          while the right hand side is the value.  If there is no `=' sign in
     682          the parameter the value is the empty string.  The value is as
     683          described in the get_param() method.
     684  
     685          Optional failobj is the object to return if there is no Content-Type
     686          header.  Optional header is the header to search instead of
     687          Content-Type.  If unquote is True, the value is unquoted.
     688          """
     689          missing = object()
     690          params = self._get_params_preserve(missing, header)
     691          if params is missing:
     692              return failobj
     693          if unquote:
     694              return [(k, _unquotevalue(v)) for k, v in params]
     695          else:
     696              return params
     697  
     698      def get_param(self, param, failobj=None, header='content-type',
     699                    unquote=True):
     700          """Return the parameter value if found in the Content-Type header.
     701  
     702          Optional failobj is the object to return if there is no Content-Type
     703          header, or the Content-Type header has no such parameter.  Optional
     704          header is the header to search instead of Content-Type.
     705  
     706          Parameter keys are always compared case insensitively.  The return
     707          value can either be a string, or a 3-tuple if the parameter was RFC
     708          2231 encoded.  When it's a 3-tuple, the elements of the value are of
     709          the form (CHARSET, LANGUAGE, VALUE).  Note that both CHARSET and
     710          LANGUAGE can be None, in which case you should consider VALUE to be
     711          encoded in the us-ascii charset.  You can usually ignore LANGUAGE.
     712          The parameter value (either the returned string, or the VALUE item in
     713          the 3-tuple) is always unquoted, unless unquote is set to False.
     714  
     715          If your application doesn't care whether the parameter was RFC 2231
     716          encoded, it can turn the return value into a string as follows:
     717  
     718              rawparam = msg.get_param('foo')
     719              param = email.utils.collapse_rfc2231_value(rawparam)
     720  
     721          """
     722          if header not in self:
     723              return failobj
     724          for k, v in self._get_params_preserve(failobj, header):
     725              if k.lower() == param.lower():
     726                  if unquote:
     727                      return _unquotevalue(v)
     728                  else:
     729                      return v
     730          return failobj
     731  
     732      def set_param(self, param, value, header='Content-Type', requote=True,
     733                    charset=None, language='', replace=False):
     734          """Set a parameter in the Content-Type header.
     735  
     736          If the parameter already exists in the header, its value will be
     737          replaced with the new value.
     738  
     739          If header is Content-Type and has not yet been defined for this
     740          message, it will be set to "text/plain" and the new parameter and
     741          value will be appended as per RFC 2045.
     742  
     743          An alternate header can be specified in the header argument, and all
     744          parameters will be quoted as necessary unless requote is False.
     745  
     746          If charset is specified, the parameter will be encoded according to RFC
     747          2231.  Optional language specifies the RFC 2231 language, defaulting
     748          to the empty string.  Both charset and language should be strings.
     749          """
     750          if not isinstance(value, tuple) and charset:
     751              value = (charset, language, value)
     752  
     753          if header not in self and header.lower() == 'content-type':
     754              ctype = 'text/plain'
     755          else:
     756              ctype = self.get(header)
     757          if not self.get_param(param, header=header):
     758              if not ctype:
     759                  ctype = _formatparam(param, value, requote)
     760              else:
     761                  ctype = SEMISPACE.join(
     762                      [ctype, _formatparam(param, value, requote)])
     763          else:
     764              ctype = ''
     765              for old_param, old_value in self.get_params(header=header,
     766                                                          unquote=requote):
     767                  append_param = ''
     768                  if old_param.lower() == param.lower():
     769                      append_param = _formatparam(param, value, requote)
     770                  else:
     771                      append_param = _formatparam(old_param, old_value, requote)
     772                  if not ctype:
     773                      ctype = append_param
     774                  else:
     775                      ctype = SEMISPACE.join([ctype, append_param])
     776          if ctype != self.get(header):
     777              if replace:
     778                  self.replace_header(header, ctype)
     779              else:
     780                  del self[header]
     781                  self[header] = ctype
     782  
     783      def del_param(self, param, header='content-type', requote=True):
     784          """Remove the given parameter completely from the Content-Type header.
     785  
     786          The header will be re-written in place without the parameter or its
     787          value. All values will be quoted as necessary unless requote is
     788          False.  Optional header specifies an alternative to the Content-Type
     789          header.
     790          """
     791          if header not in self:
     792              return
     793          new_ctype = ''
     794          for p, v in self.get_params(header=header, unquote=requote):
     795              if p.lower() != param.lower():
     796                  if not new_ctype:
     797                      new_ctype = _formatparam(p, v, requote)
     798                  else:
     799                      new_ctype = SEMISPACE.join([new_ctype,
     800                                                  _formatparam(p, v, requote)])
     801          if new_ctype != self.get(header):
     802              del self[header]
     803              self[header] = new_ctype
     804  
     805      def set_type(self, type, header='Content-Type', requote=True):
     806          """Set the main type and subtype for the Content-Type header.
     807  
     808          type must be a string in the form "maintype/subtype", otherwise a
     809          ValueError is raised.
     810  
     811          This method replaces the Content-Type header, keeping all the
     812          parameters in place.  If requote is False, this leaves the existing
     813          header's quoting as is.  Otherwise, the parameters will be quoted (the
     814          default).
     815  
     816          An alternative header can be specified in the header argument.  When
     817          the Content-Type header is set, we'll always also add a MIME-Version
     818          header.
     819          """
     820          # BAW: should we be strict?
     821          if not type.count('/') == 1:
     822              raise ValueError
     823          # Set the Content-Type, you get a MIME-Version
     824          if header.lower() == 'content-type':
     825              del self['mime-version']
     826              self['MIME-Version'] = '1.0'
     827          if header not in self:
     828              self[header] = type
     829              return
     830          params = self.get_params(header=header, unquote=requote)
     831          del self[header]
     832          self[header] = type
     833          # Skip the first param; it's the old type.
     834          for p, v in params[1:]:
     835              self.set_param(p, v, header, requote)
     836  
     837      def get_filename(self, failobj=None):
     838          """Return the filename associated with the payload if present.
     839  
     840          The filename is extracted from the Content-Disposition header's
     841          `filename' parameter, and it is unquoted.  If that header is missing
     842          the `filename' parameter, this method falls back to looking for the
     843          `name' parameter.
     844          """
     845          missing = object()
     846          filename = self.get_param('filename', missing, 'content-disposition')
     847          if filename is missing:
     848              filename = self.get_param('name', missing, 'content-type')
     849          if filename is missing:
     850              return failobj
     851          return utils.collapse_rfc2231_value(filename).strip()
     852  
     853      def get_boundary(self, failobj=None):
     854          """Return the boundary associated with the payload if present.
     855  
     856          The boundary is extracted from the Content-Type header's `boundary'
     857          parameter, and it is unquoted.
     858          """
     859          missing = object()
     860          boundary = self.get_param('boundary', missing)
     861          if boundary is missing:
     862              return failobj
     863          # RFC 2046 says that boundaries may begin but not end in w/s
     864          return utils.collapse_rfc2231_value(boundary).rstrip()
     865  
     866      def set_boundary(self, boundary):
     867          """Set the boundary parameter in Content-Type to 'boundary'.
     868  
     869          This is subtly different than deleting the Content-Type header and
     870          adding a new one with a new boundary parameter via add_header().  The
     871          main difference is that using the set_boundary() method preserves the
     872          order of the Content-Type header in the original message.
     873  
     874          HeaderParseError is raised if the message has no Content-Type header.
     875          """
     876          missing = object()
     877          params = self._get_params_preserve(missing, 'content-type')
     878          if params is missing:
     879              # There was no Content-Type header, and we don't know what type
     880              # to set it to, so raise an exception.
     881              raise errors.HeaderParseError('No Content-Type header found')
     882          newparams = []
     883          foundp = False
     884          for pk, pv in params:
     885              if pk.lower() == 'boundary':
     886                  newparams.append(('boundary', '"%s"' % boundary))
     887                  foundp = True
     888              else:
     889                  newparams.append((pk, pv))
     890          if not foundp:
     891              # The original Content-Type header had no boundary attribute.
     892              # Tack one on the end.  BAW: should we raise an exception
     893              # instead???
     894              newparams.append(('boundary', '"%s"' % boundary))
     895          # Replace the existing Content-Type header with the new value
     896          newheaders = []
     897          for h, v in self._headers:
     898              if h.lower() == 'content-type':
     899                  parts = []
     900                  for k, v in newparams:
     901                      if v == '':
     902                          parts.append(k)
     903                      else:
     904                          parts.append('%s=%s' % (k, v))
     905                  val = SEMISPACE.join(parts)
     906                  newheaders.append(self.policy.header_store_parse(h, val))
     907  
     908              else:
     909                  newheaders.append((h, v))
     910          self._headers = newheaders
     911  
     912      def get_content_charset(self, failobj=None):
     913          """Return the charset parameter of the Content-Type header.
     914  
     915          The returned string is always coerced to lower case.  If there is no
     916          Content-Type header, or if that header has no charset parameter,
     917          failobj is returned.
     918          """
     919          missing = object()
     920          charset = self.get_param('charset', missing)
     921          if charset is missing:
     922              return failobj
     923          if isinstance(charset, tuple):
     924              # RFC 2231 encoded, so decode it, and it better end up as ascii.
     925              pcharset = charset[0] or 'us-ascii'
     926              try:
     927                  # LookupError will be raised if the charset isn't known to
     928                  # Python.  UnicodeError will be raised if the encoded text
     929                  # contains a character not in the charset.
     930                  as_bytes = charset[2].encode('raw-unicode-escape')
     931                  charset = str(as_bytes, pcharset)
     932              except (LookupError, UnicodeError):
     933                  charset = charset[2]
     934          # charset characters must be in us-ascii range
     935          try:
     936              charset.encode('us-ascii')
     937          except UnicodeError:
     938              return failobj
     939          # RFC 2046, $4.1.2 says charsets are not case sensitive
     940          return charset.lower()
     941  
     942      def get_charsets(self, failobj=None):
     943          """Return a list containing the charset(s) used in this message.
     944  
     945          The returned list of items describes the Content-Type headers'
     946          charset parameter for this message and all the subparts in its
     947          payload.
     948  
     949          Each item will either be a string (the value of the charset parameter
     950          in the Content-Type header of that part) or the value of the
     951          'failobj' parameter (defaults to None), if the part does not have a
     952          main MIME type of "text", or the charset is not defined.
     953  
     954          The list will contain one string for each part of the message, plus
     955          one for the container message (i.e. self), so that a non-multipart
     956          message will still return a list of length 1.
     957          """
     958          return [part.get_content_charset(failobj) for part in self.walk()]
     959  
     960      def get_content_disposition(self):
     961          """Return the message's content-disposition if it exists, or None.
     962  
     963          The return values can be either 'inline', 'attachment' or None
     964          according to the rfc2183.
     965          """
     966          value = self.get('content-disposition')
     967          if value is None:
     968              return None
     969          c_d = _splitparam(value)[0].lower()
     970          return c_d
     971  
     972      # I.e. def walk(self): ...
     973      from email.iterators import walk
     974  
     975  
     976  class ESC[4;38;5;81mMIMEPart(ESC[4;38;5;149mMessage):
     977  
     978      def __init__(self, policy=None):
     979          if policy is None:
     980              from email.policy import default
     981              policy = default
     982          super().__init__(policy)
     983  
     984  
     985      def as_string(self, unixfrom=False, maxheaderlen=None, policy=None):
     986          """Return the entire formatted message as a string.
     987  
     988          Optional 'unixfrom', when true, means include the Unix From_ envelope
     989          header.  maxheaderlen is retained for backward compatibility with the
     990          base Message class, but defaults to None, meaning that the policy value
     991          for max_line_length controls the header maximum length.  'policy' is
     992          passed to the Generator instance used to serialize the message; if it
     993          is not specified the policy associated with the message instance is
     994          used.
     995          """
     996          policy = self.policy if policy is None else policy
     997          if maxheaderlen is None:
     998              maxheaderlen = policy.max_line_length
     999          return super().as_string(unixfrom, maxheaderlen, policy)
    1000  
    1001      def __str__(self):
    1002          return self.as_string(policy=self.policy.clone(utf8=True))
    1003  
    1004      def is_attachment(self):
    1005          c_d = self.get('content-disposition')
    1006          return False if c_d is None else c_d.content_disposition == 'attachment'
    1007  
    1008      def _find_body(self, part, preferencelist):
    1009          if part.is_attachment():
    1010              return
    1011          maintype, subtype = part.get_content_type().split('/')
    1012          if maintype == 'text':
    1013              if subtype in preferencelist:
    1014                  yield (preferencelist.index(subtype), part)
    1015              return
    1016          if maintype != 'multipart' or not self.is_multipart():
    1017              return
    1018          if subtype != 'related':
    1019              for subpart in part.iter_parts():
    1020                  yield from self._find_body(subpart, preferencelist)
    1021              return
    1022          if 'related' in preferencelist:
    1023              yield (preferencelist.index('related'), part)
    1024          candidate = None
    1025          start = part.get_param('start')
    1026          if start:
    1027              for subpart in part.iter_parts():
    1028                  if subpart['content-id'] == start:
    1029                      candidate = subpart
    1030                      break
    1031          if candidate is None:
    1032              subparts = part.get_payload()
    1033              candidate = subparts[0] if subparts else None
    1034          if candidate is not None:
    1035              yield from self._find_body(candidate, preferencelist)
    1036  
    1037      def get_body(self, preferencelist=('related', 'html', 'plain')):
    1038          """Return best candidate mime part for display as 'body' of message.
    1039  
    1040          Do a depth first search, starting with self, looking for the first part
    1041          matching each of the items in preferencelist, and return the part
    1042          corresponding to the first item that has a match, or None if no items
    1043          have a match.  If 'related' is not included in preferencelist, consider
    1044          the root part of any multipart/related encountered as a candidate
    1045          match.  Ignore parts with 'Content-Disposition: attachment'.
    1046          """
    1047          best_prio = len(preferencelist)
    1048          body = None
    1049          for prio, part in self._find_body(self, preferencelist):
    1050              if prio < best_prio:
    1051                  best_prio = prio
    1052                  body = part
    1053                  if prio == 0:
    1054                      break
    1055          return body
    1056  
    1057      _body_types = {('text', 'plain'),
    1058                     ('text', 'html'),
    1059                     ('multipart', 'related'),
    1060                     ('multipart', 'alternative')}
    1061      def iter_attachments(self):
    1062          """Return an iterator over the non-main parts of a multipart.
    1063  
    1064          Skip the first of each occurrence of text/plain, text/html,
    1065          multipart/related, or multipart/alternative in the multipart (unless
    1066          they have a 'Content-Disposition: attachment' header) and include all
    1067          remaining subparts in the returned iterator.  When applied to a
    1068          multipart/related, return all parts except the root part.  Return an
    1069          empty iterator when applied to a multipart/alternative or a
    1070          non-multipart.
    1071          """
    1072          maintype, subtype = self.get_content_type().split('/')
    1073          if maintype != 'multipart' or subtype == 'alternative':
    1074              return
    1075          payload = self.get_payload()
    1076          # Certain malformed messages can have content type set to `multipart/*`
    1077          # but still have single part body, in which case payload.copy() can
    1078          # fail with AttributeError.
    1079          try:
    1080              parts = payload.copy()
    1081          except AttributeError:
    1082              # payload is not a list, it is most probably a string.
    1083              return
    1084  
    1085          if maintype == 'multipart' and subtype == 'related':
    1086              # For related, we treat everything but the root as an attachment.
    1087              # The root may be indicated by 'start'; if there's no start or we
    1088              # can't find the named start, treat the first subpart as the root.
    1089              start = self.get_param('start')
    1090              if start:
    1091                  found = False
    1092                  attachments = []
    1093                  for part in parts:
    1094                      if part.get('content-id') == start:
    1095                          found = True
    1096                      else:
    1097                          attachments.append(part)
    1098                  if found:
    1099                      yield from attachments
    1100                      return
    1101              parts.pop(0)
    1102              yield from parts
    1103              return
    1104          # Otherwise we more or less invert the remaining logic in get_body.
    1105          # This only really works in edge cases (ex: non-text related or
    1106          # alternatives) if the sending agent sets content-disposition.
    1107          seen = []   # Only skip the first example of each candidate type.
    1108          for part in parts:
    1109              maintype, subtype = part.get_content_type().split('/')
    1110              if ((maintype, subtype) in self._body_types and
    1111                      not part.is_attachment() and subtype not in seen):
    1112                  seen.append(subtype)
    1113                  continue
    1114              yield part
    1115  
    1116      def iter_parts(self):
    1117          """Return an iterator over all immediate subparts of a multipart.
    1118  
    1119          Return an empty iterator for a non-multipart.
    1120          """
    1121          if self.is_multipart():
    1122              yield from self.get_payload()
    1123  
    1124      def get_content(self, *args, content_manager=None, **kw):
    1125          if content_manager is None:
    1126              content_manager = self.policy.content_manager
    1127          return content_manager.get_content(self, *args, **kw)
    1128  
    1129      def set_content(self, *args, content_manager=None, **kw):
    1130          if content_manager is None:
    1131              content_manager = self.policy.content_manager
    1132          content_manager.set_content(self, *args, **kw)
    1133  
    1134      def _make_multipart(self, subtype, disallowed_subtypes, boundary):
    1135          if self.get_content_maintype() == 'multipart':
    1136              existing_subtype = self.get_content_subtype()
    1137              disallowed_subtypes = disallowed_subtypes + (subtype,)
    1138              if existing_subtype in disallowed_subtypes:
    1139                  raise ValueError("Cannot convert {} to {}".format(
    1140                      existing_subtype, subtype))
    1141          keep_headers = []
    1142          part_headers = []
    1143          for name, value in self._headers:
    1144              if name.lower().startswith('content-'):
    1145                  part_headers.append((name, value))
    1146              else:
    1147                  keep_headers.append((name, value))
    1148          if part_headers:
    1149              # There is existing content, move it to the first subpart.
    1150              part = type(self)(policy=self.policy)
    1151              part._headers = part_headers
    1152              part._payload = self._payload
    1153              self._payload = [part]
    1154          else:
    1155              self._payload = []
    1156          self._headers = keep_headers
    1157          self['Content-Type'] = 'multipart/' + subtype
    1158          if boundary is not None:
    1159              self.set_param('boundary', boundary)
    1160  
    1161      def make_related(self, boundary=None):
    1162          self._make_multipart('related', ('alternative', 'mixed'), boundary)
    1163  
    1164      def make_alternative(self, boundary=None):
    1165          self._make_multipart('alternative', ('mixed',), boundary)
    1166  
    1167      def make_mixed(self, boundary=None):
    1168          self._make_multipart('mixed', (), boundary)
    1169  
    1170      def _add_multipart(self, _subtype, *args, _disp=None, **kw):
    1171          if (self.get_content_maintype() != 'multipart' or
    1172                  self.get_content_subtype() != _subtype):
    1173              getattr(self, 'make_' + _subtype)()
    1174          part = type(self)(policy=self.policy)
    1175          part.set_content(*args, **kw)
    1176          if _disp and 'content-disposition' not in part:
    1177              part['Content-Disposition'] = _disp
    1178          self.attach(part)
    1179  
    1180      def add_related(self, *args, **kw):
    1181          self._add_multipart('related', *args, _disp='inline', **kw)
    1182  
    1183      def add_alternative(self, *args, **kw):
    1184          self._add_multipart('alternative', *args, **kw)
    1185  
    1186      def add_attachment(self, *args, **kw):
    1187          self._add_multipart('mixed', *args, _disp='attachment', **kw)
    1188  
    1189      def clear(self):
    1190          self._headers = []
    1191          self._payload = None
    1192  
    1193      def clear_content(self):
    1194          self._headers = [(n, v) for n, v in self._headers
    1195                           if not n.lower().startswith('content-')]
    1196          self._payload = None
    1197  
    1198  
    1199  class ESC[4;38;5;81mEmailMessage(ESC[4;38;5;149mMIMEPart):
    1200  
    1201      def set_content(self, *args, **kw):
    1202          super().set_content(*args, **kw)
    1203          if 'MIME-Version' not in self:
    1204              self['MIME-Version'] = '1.0'