python (3.12.0)
1 # Copyright (C) 2001-2007 Python Software Foundation
2 # Author: Barry Warsaw
3 # Contact: email-sig@python.org
4
5 """Basic message object for the email package object model."""
6
7 __all__ = ['Message', 'EmailMessage']
8
9 import binascii
10 import re
11 import quopri
12 from io import BytesIO, StringIO
13
14 # Intrapackage imports
15 from email import utils
16 from email import errors
17 from email._policybase import compat32
18 from email import charset as _charset
19 from email._encoded_words import decode_b
20 Charset = _charset.Charset
21
22 SEMISPACE = '; '
23
24 # Regular expression that matches `special' characters in parameters, the
25 # existence of which force quoting of the parameter value.
26 tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
27
28
29 def _splitparam(param):
30 # Split header parameters. BAW: this may be too simple. It isn't
31 # strictly RFC 2045 (section 5.1) compliant, but it catches most headers
32 # found in the wild. We may eventually need a full fledged parser.
33 # RDM: we might have a Header here; for now just stringify it.
34 a, sep, b = str(param).partition(';')
35 if not sep:
36 return a.strip(), None
37 return a.strip(), b.strip()
38
39 def _formatparam(param, value=None, quote=True):
40 """Convenience function to format and return a key=value pair.
41
42 This will quote the value if needed or if quote is true. If value is a
43 three tuple (charset, language, value), it will be encoded according
44 to RFC2231 rules. If it contains non-ascii characters it will likewise
45 be encoded according to RFC2231 rules, using the utf-8 charset and
46 a null language.
47 """
48 if value is not None and len(value) > 0:
49 # A tuple is used for RFC 2231 encoded parameter values where items
50 # are (charset, language, value). charset is a string, not a Charset
51 # instance. RFC 2231 encoded values are never quoted, per RFC.
52 if isinstance(value, tuple):
53 # Encode as per RFC 2231
54 param += '*'
55 value = utils.encode_rfc2231(value[2], value[0], value[1])
56 return '%s=%s' % (param, value)
57 else:
58 try:
59 value.encode('ascii')
60 except UnicodeEncodeError:
61 param += '*'
62 value = utils.encode_rfc2231(value, 'utf-8', '')
63 return '%s=%s' % (param, value)
64 # BAW: Please check this. I think that if quote is set it should
65 # force quoting even if not necessary.
66 if quote or tspecials.search(value):
67 return '%s="%s"' % (param, utils.quote(value))
68 else:
69 return '%s=%s' % (param, value)
70 else:
71 return param
72
73 def _parseparam(s):
74 # RDM This might be a Header, so for now stringify it.
75 s = ';' + str(s)
76 plist = []
77 while s[:1] == ';':
78 s = s[1:]
79 end = s.find(';')
80 while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
81 end = s.find(';', end + 1)
82 if end < 0:
83 end = len(s)
84 f = s[:end]
85 if '=' in f:
86 i = f.index('=')
87 f = f[:i].strip().lower() + '=' + f[i+1:].strip()
88 plist.append(f.strip())
89 s = s[end:]
90 return plist
91
92
93 def _unquotevalue(value):
94 # This is different than utils.collapse_rfc2231_value() because it doesn't
95 # try to convert the value to a unicode. Message.get_param() and
96 # Message.get_params() are both currently defined to return the tuple in
97 # the face of RFC 2231 parameters.
98 if isinstance(value, tuple):
99 return value[0], value[1], utils.unquote(value[2])
100 else:
101 return utils.unquote(value)
102
103
104 def _decode_uu(encoded):
105 """Decode uuencoded data."""
106 decoded_lines = []
107 encoded_lines_iter = iter(encoded.splitlines())
108 for line in encoded_lines_iter:
109 if line.startswith(b"begin "):
110 mode, _, path = line.removeprefix(b"begin ").partition(b" ")
111 try:
112 int(mode, base=8)
113 except ValueError:
114 continue
115 else:
116 break
117 else:
118 raise ValueError("`begin` line not found")
119 for line in encoded_lines_iter:
120 if not line:
121 raise ValueError("Truncated input")
122 elif line.strip(b' \t\r\n\f') == b'end':
123 break
124 try:
125 decoded_line = binascii.a2b_uu(line)
126 except binascii.Error:
127 # Workaround for broken uuencoders by /Fredrik Lundh
128 nbytes = (((line[0]-32) & 63) * 4 + 5) // 3
129 decoded_line = binascii.a2b_uu(line[:nbytes])
130 decoded_lines.append(decoded_line)
131
132 return b''.join(decoded_lines)
133
134
135 class ESC[4;38;5;81mMessage:
136 """Basic message object.
137
138 A message object is defined as something that has a bunch of RFC 2822
139 headers and a payload. It may optionally have an envelope header
140 (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
141 multipart or a message/rfc822), then the payload is a list of Message
142 objects, otherwise it is a string.
143
144 Message objects implement part of the `mapping' interface, which assumes
145 there is exactly one occurrence of the header per message. Some headers
146 do in fact appear multiple times (e.g. Received) and for those headers,
147 you must use the explicit API to set or get all the headers. Not all of
148 the mapping methods are implemented.
149 """
150 def __init__(self, policy=compat32):
151 self.policy = policy
152 self._headers = []
153 self._unixfrom = None
154 self._payload = None
155 self._charset = None
156 # Defaults for multipart messages
157 self.preamble = self.epilogue = None
158 self.defects = []
159 # Default content type
160 self._default_type = 'text/plain'
161
162 def __str__(self):
163 """Return the entire formatted message as a string.
164 """
165 return self.as_string()
166
167 def as_string(self, unixfrom=False, maxheaderlen=0, policy=None):
168 """Return the entire formatted message as a string.
169
170 Optional 'unixfrom', when true, means include the Unix From_ envelope
171 header. For backward compatibility reasons, if maxheaderlen is
172 not specified it defaults to 0, so you must override it explicitly
173 if you want a different maxheaderlen. 'policy' is passed to the
174 Generator instance used to serialize the message; if it is not
175 specified the policy associated with the message instance is used.
176
177 If the message object contains binary data that is not encoded
178 according to RFC standards, the non-compliant data will be replaced by
179 unicode "unknown character" code points.
180 """
181 from email.generator import Generator
182 policy = self.policy if policy is None else policy
183 fp = StringIO()
184 g = Generator(fp,
185 mangle_from_=False,
186 maxheaderlen=maxheaderlen,
187 policy=policy)
188 g.flatten(self, unixfrom=unixfrom)
189 return fp.getvalue()
190
191 def __bytes__(self):
192 """Return the entire formatted message as a bytes object.
193 """
194 return self.as_bytes()
195
196 def as_bytes(self, unixfrom=False, policy=None):
197 """Return the entire formatted message as a bytes object.
198
199 Optional 'unixfrom', when true, means include the Unix From_ envelope
200 header. 'policy' is passed to the BytesGenerator instance used to
201 serialize the message; if not specified the policy associated with
202 the message instance is used.
203 """
204 from email.generator import BytesGenerator
205 policy = self.policy if policy is None else policy
206 fp = BytesIO()
207 g = BytesGenerator(fp, mangle_from_=False, policy=policy)
208 g.flatten(self, unixfrom=unixfrom)
209 return fp.getvalue()
210
211 def is_multipart(self):
212 """Return True if the message consists of multiple parts."""
213 return isinstance(self._payload, list)
214
215 #
216 # Unix From_ line
217 #
218 def set_unixfrom(self, unixfrom):
219 self._unixfrom = unixfrom
220
221 def get_unixfrom(self):
222 return self._unixfrom
223
224 #
225 # Payload manipulation.
226 #
227 def attach(self, payload):
228 """Add the given payload to the current payload.
229
230 The current payload will always be a list of objects after this method
231 is called. If you want to set the payload to a scalar object, use
232 set_payload() instead.
233 """
234 if self._payload is None:
235 self._payload = [payload]
236 else:
237 try:
238 self._payload.append(payload)
239 except AttributeError:
240 raise TypeError("Attach is not valid on a message with a"
241 " non-multipart payload")
242
243 def get_payload(self, i=None, decode=False):
244 """Return a reference to the payload.
245
246 The payload will either be a list object or a string. If you mutate
247 the list object, you modify the message's payload in place. Optional
248 i returns that index into the payload.
249
250 Optional decode is a flag indicating whether the payload should be
251 decoded or not, according to the Content-Transfer-Encoding header
252 (default is False).
253
254 When True and the message is not a multipart, the payload will be
255 decoded if this header's value is `quoted-printable' or `base64'. If
256 some other encoding is used, or the header is missing, or if the
257 payload has bogus data (i.e. bogus base64 or uuencoded data), the
258 payload is returned as-is.
259
260 If the message is a multipart and the decode flag is True, then None
261 is returned.
262 """
263 # Here is the logic table for this code, based on the email5.0.0 code:
264 # i decode is_multipart result
265 # ------ ------ ------------ ------------------------------
266 # None True True None
267 # i True True None
268 # None False True _payload (a list)
269 # i False True _payload element i (a Message)
270 # i False False error (not a list)
271 # i True False error (not a list)
272 # None False False _payload
273 # None True False _payload decoded (bytes)
274 # Note that Barry planned to factor out the 'decode' case, but that
275 # isn't so easy now that we handle the 8 bit data, which needs to be
276 # converted in both the decode and non-decode path.
277 if self.is_multipart():
278 if decode:
279 return None
280 if i is None:
281 return self._payload
282 else:
283 return self._payload[i]
284 # For backward compatibility, Use isinstance and this error message
285 # instead of the more logical is_multipart test.
286 if i is not None and not isinstance(self._payload, list):
287 raise TypeError('Expected list, got %s' % type(self._payload))
288 payload = self._payload
289 # cte might be a Header, so for now stringify it.
290 cte = str(self.get('content-transfer-encoding', '')).lower()
291 # payload may be bytes here.
292 if isinstance(payload, str):
293 if utils._has_surrogates(payload):
294 bpayload = payload.encode('ascii', 'surrogateescape')
295 if not decode:
296 try:
297 payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace')
298 except LookupError:
299 payload = bpayload.decode('ascii', 'replace')
300 elif decode:
301 try:
302 bpayload = payload.encode('ascii')
303 except UnicodeError:
304 # This won't happen for RFC compliant messages (messages
305 # containing only ASCII code points in the unicode input).
306 # If it does happen, turn the string into bytes in a way
307 # guaranteed not to fail.
308 bpayload = payload.encode('raw-unicode-escape')
309 if not decode:
310 return payload
311 if cte == 'quoted-printable':
312 return quopri.decodestring(bpayload)
313 elif cte == 'base64':
314 # XXX: this is a bit of a hack; decode_b should probably be factored
315 # out somewhere, but I haven't figured out where yet.
316 value, defects = decode_b(b''.join(bpayload.splitlines()))
317 for defect in defects:
318 self.policy.handle_defect(self, defect)
319 return value
320 elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
321 try:
322 return _decode_uu(bpayload)
323 except ValueError:
324 # Some decoding problem.
325 return bpayload
326 if isinstance(payload, str):
327 return bpayload
328 return payload
329
330 def set_payload(self, payload, charset=None):
331 """Set the payload to the given value.
332
333 Optional charset sets the message's default character set. See
334 set_charset() for details.
335 """
336 if hasattr(payload, 'encode'):
337 if charset is None:
338 self._payload = payload
339 return
340 if not isinstance(charset, Charset):
341 charset = Charset(charset)
342 payload = payload.encode(charset.output_charset)
343 if hasattr(payload, 'decode'):
344 self._payload = payload.decode('ascii', 'surrogateescape')
345 else:
346 self._payload = payload
347 if charset is not None:
348 self.set_charset(charset)
349
350 def set_charset(self, charset):
351 """Set the charset of the payload to a given character set.
352
353 charset can be a Charset instance, a string naming a character set, or
354 None. If it is a string it will be converted to a Charset instance.
355 If charset is None, the charset parameter will be removed from the
356 Content-Type field. Anything else will generate a TypeError.
357
358 The message will be assumed to be of type text/* encoded with
359 charset.input_charset. It will be converted to charset.output_charset
360 and encoded properly, if needed, when generating the plain text
361 representation of the message. MIME headers (MIME-Version,
362 Content-Type, Content-Transfer-Encoding) will be added as needed.
363 """
364 if charset is None:
365 self.del_param('charset')
366 self._charset = None
367 return
368 if not isinstance(charset, Charset):
369 charset = Charset(charset)
370 self._charset = charset
371 if 'MIME-Version' not in self:
372 self.add_header('MIME-Version', '1.0')
373 if 'Content-Type' not in self:
374 self.add_header('Content-Type', 'text/plain',
375 charset=charset.get_output_charset())
376 else:
377 self.set_param('charset', charset.get_output_charset())
378 if charset != charset.get_output_charset():
379 self._payload = charset.body_encode(self._payload)
380 if 'Content-Transfer-Encoding' not in self:
381 cte = charset.get_body_encoding()
382 try:
383 cte(self)
384 except TypeError:
385 # This 'if' is for backward compatibility, it allows unicode
386 # through even though that won't work correctly if the
387 # message is serialized.
388 payload = self._payload
389 if payload:
390 try:
391 payload = payload.encode('ascii', 'surrogateescape')
392 except UnicodeError:
393 payload = payload.encode(charset.output_charset)
394 self._payload = charset.body_encode(payload)
395 self.add_header('Content-Transfer-Encoding', cte)
396
397 def get_charset(self):
398 """Return the Charset instance associated with the message's payload.
399 """
400 return self._charset
401
402 #
403 # MAPPING INTERFACE (partial)
404 #
405 def __len__(self):
406 """Return the total number of headers, including duplicates."""
407 return len(self._headers)
408
409 def __getitem__(self, name):
410 """Get a header value.
411
412 Return None if the header is missing instead of raising an exception.
413
414 Note that if the header appeared multiple times, exactly which
415 occurrence gets returned is undefined. Use get_all() to get all
416 the values matching a header field name.
417 """
418 return self.get(name)
419
420 def __setitem__(self, name, val):
421 """Set the value of a header.
422
423 Note: this does not overwrite an existing header with the same field
424 name. Use __delitem__() first to delete any existing headers.
425 """
426 max_count = self.policy.header_max_count(name)
427 if max_count:
428 lname = name.lower()
429 found = 0
430 for k, v in self._headers:
431 if k.lower() == lname:
432 found += 1
433 if found >= max_count:
434 raise ValueError("There may be at most {} {} headers "
435 "in a message".format(max_count, name))
436 self._headers.append(self.policy.header_store_parse(name, val))
437
438 def __delitem__(self, name):
439 """Delete all occurrences of a header, if present.
440
441 Does not raise an exception if the header is missing.
442 """
443 name = name.lower()
444 newheaders = []
445 for k, v in self._headers:
446 if k.lower() != name:
447 newheaders.append((k, v))
448 self._headers = newheaders
449
450 def __contains__(self, name):
451 name_lower = name.lower()
452 for k, v in self._headers:
453 if name_lower == k.lower():
454 return True
455 return False
456
457 def __iter__(self):
458 for field, value in self._headers:
459 yield field
460
461 def keys(self):
462 """Return a list of all the message's header field names.
463
464 These will be sorted in the order they appeared in the original
465 message, or were added to the message, and may contain duplicates.
466 Any fields deleted and re-inserted are always appended to the header
467 list.
468 """
469 return [k for k, v in self._headers]
470
471 def values(self):
472 """Return a list of all the message's header values.
473
474 These will be sorted in the order they appeared in the original
475 message, or were added to the message, and may contain duplicates.
476 Any fields deleted and re-inserted are always appended to the header
477 list.
478 """
479 return [self.policy.header_fetch_parse(k, v)
480 for k, v in self._headers]
481
482 def items(self):
483 """Get all the message's header fields and values.
484
485 These will be sorted in the order they appeared in the original
486 message, or were added to the message, and may contain duplicates.
487 Any fields deleted and re-inserted are always appended to the header
488 list.
489 """
490 return [(k, self.policy.header_fetch_parse(k, v))
491 for k, v in self._headers]
492
493 def get(self, name, failobj=None):
494 """Get a header value.
495
496 Like __getitem__() but return failobj instead of None when the field
497 is missing.
498 """
499 name = name.lower()
500 for k, v in self._headers:
501 if k.lower() == name:
502 return self.policy.header_fetch_parse(k, v)
503 return failobj
504
505 #
506 # "Internal" methods (public API, but only intended for use by a parser
507 # or generator, not normal application code.
508 #
509
510 def set_raw(self, name, value):
511 """Store name and value in the model without modification.
512
513 This is an "internal" API, intended only for use by a parser.
514 """
515 self._headers.append((name, value))
516
517 def raw_items(self):
518 """Return the (name, value) header pairs without modification.
519
520 This is an "internal" API, intended only for use by a generator.
521 """
522 return iter(self._headers.copy())
523
524 #
525 # Additional useful stuff
526 #
527
528 def get_all(self, name, failobj=None):
529 """Return a list of all the values for the named field.
530
531 These will be sorted in the order they appeared in the original
532 message, and may contain duplicates. Any fields deleted and
533 re-inserted are always appended to the header list.
534
535 If no such fields exist, failobj is returned (defaults to None).
536 """
537 values = []
538 name = name.lower()
539 for k, v in self._headers:
540 if k.lower() == name:
541 values.append(self.policy.header_fetch_parse(k, v))
542 if not values:
543 return failobj
544 return values
545
546 def add_header(self, _name, _value, **_params):
547 """Extended header setting.
548
549 name is the header field to add. keyword arguments can be used to set
550 additional parameters for the header field, with underscores converted
551 to dashes. Normally the parameter will be added as key="value" unless
552 value is None, in which case only the key will be added. If a
553 parameter value contains non-ASCII characters it can be specified as a
554 three-tuple of (charset, language, value), in which case it will be
555 encoded according to RFC2231 rules. Otherwise it will be encoded using
556 the utf-8 charset and a language of ''.
557
558 Examples:
559
560 msg.add_header('content-disposition', 'attachment', filename='bud.gif')
561 msg.add_header('content-disposition', 'attachment',
562 filename=('utf-8', '', Fußballer.ppt'))
563 msg.add_header('content-disposition', 'attachment',
564 filename='Fußballer.ppt'))
565 """
566 parts = []
567 for k, v in _params.items():
568 if v is None:
569 parts.append(k.replace('_', '-'))
570 else:
571 parts.append(_formatparam(k.replace('_', '-'), v))
572 if _value is not None:
573 parts.insert(0, _value)
574 self[_name] = SEMISPACE.join(parts)
575
576 def replace_header(self, _name, _value):
577 """Replace a header.
578
579 Replace the first matching header found in the message, retaining
580 header order and case. If no matching header was found, a KeyError is
581 raised.
582 """
583 _name = _name.lower()
584 for i, (k, v) in zip(range(len(self._headers)), self._headers):
585 if k.lower() == _name:
586 self._headers[i] = self.policy.header_store_parse(k, _value)
587 break
588 else:
589 raise KeyError(_name)
590
591 #
592 # Use these three methods instead of the three above.
593 #
594
595 def get_content_type(self):
596 """Return the message's content type.
597
598 The returned string is coerced to lower case of the form
599 `maintype/subtype'. If there was no Content-Type header in the
600 message, the default type as given by get_default_type() will be
601 returned. Since according to RFC 2045, messages always have a default
602 type this will always return a value.
603
604 RFC 2045 defines a message's default type to be text/plain unless it
605 appears inside a multipart/digest container, in which case it would be
606 message/rfc822.
607 """
608 missing = object()
609 value = self.get('content-type', missing)
610 if value is missing:
611 # This should have no parameters
612 return self.get_default_type()
613 ctype = _splitparam(value)[0].lower()
614 # RFC 2045, section 5.2 says if its invalid, use text/plain
615 if ctype.count('/') != 1:
616 return 'text/plain'
617 return ctype
618
619 def get_content_maintype(self):
620 """Return the message's main content type.
621
622 This is the `maintype' part of the string returned by
623 get_content_type().
624 """
625 ctype = self.get_content_type()
626 return ctype.split('/')[0]
627
628 def get_content_subtype(self):
629 """Returns the message's sub-content type.
630
631 This is the `subtype' part of the string returned by
632 get_content_type().
633 """
634 ctype = self.get_content_type()
635 return ctype.split('/')[1]
636
637 def get_default_type(self):
638 """Return the `default' content type.
639
640 Most messages have a default content type of text/plain, except for
641 messages that are subparts of multipart/digest containers. Such
642 subparts have a default content type of message/rfc822.
643 """
644 return self._default_type
645
646 def set_default_type(self, ctype):
647 """Set the `default' content type.
648
649 ctype should be either "text/plain" or "message/rfc822", although this
650 is not enforced. The default content type is not stored in the
651 Content-Type header.
652 """
653 self._default_type = ctype
654
655 def _get_params_preserve(self, failobj, header):
656 # Like get_params() but preserves the quoting of values. BAW:
657 # should this be part of the public interface?
658 missing = object()
659 value = self.get(header, missing)
660 if value is missing:
661 return failobj
662 params = []
663 for p in _parseparam(value):
664 try:
665 name, val = p.split('=', 1)
666 name = name.strip()
667 val = val.strip()
668 except ValueError:
669 # Must have been a bare attribute
670 name = p.strip()
671 val = ''
672 params.append((name, val))
673 params = utils.decode_params(params)
674 return params
675
676 def get_params(self, failobj=None, header='content-type', unquote=True):
677 """Return the message's Content-Type parameters, as a list.
678
679 The elements of the returned list are 2-tuples of key/value pairs, as
680 split on the `=' sign. The left hand side of the `=' is the key,
681 while the right hand side is the value. If there is no `=' sign in
682 the parameter the value is the empty string. The value is as
683 described in the get_param() method.
684
685 Optional failobj is the object to return if there is no Content-Type
686 header. Optional header is the header to search instead of
687 Content-Type. If unquote is True, the value is unquoted.
688 """
689 missing = object()
690 params = self._get_params_preserve(missing, header)
691 if params is missing:
692 return failobj
693 if unquote:
694 return [(k, _unquotevalue(v)) for k, v in params]
695 else:
696 return params
697
698 def get_param(self, param, failobj=None, header='content-type',
699 unquote=True):
700 """Return the parameter value if found in the Content-Type header.
701
702 Optional failobj is the object to return if there is no Content-Type
703 header, or the Content-Type header has no such parameter. Optional
704 header is the header to search instead of Content-Type.
705
706 Parameter keys are always compared case insensitively. The return
707 value can either be a string, or a 3-tuple if the parameter was RFC
708 2231 encoded. When it's a 3-tuple, the elements of the value are of
709 the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
710 LANGUAGE can be None, in which case you should consider VALUE to be
711 encoded in the us-ascii charset. You can usually ignore LANGUAGE.
712 The parameter value (either the returned string, or the VALUE item in
713 the 3-tuple) is always unquoted, unless unquote is set to False.
714
715 If your application doesn't care whether the parameter was RFC 2231
716 encoded, it can turn the return value into a string as follows:
717
718 rawparam = msg.get_param('foo')
719 param = email.utils.collapse_rfc2231_value(rawparam)
720
721 """
722 if header not in self:
723 return failobj
724 for k, v in self._get_params_preserve(failobj, header):
725 if k.lower() == param.lower():
726 if unquote:
727 return _unquotevalue(v)
728 else:
729 return v
730 return failobj
731
732 def set_param(self, param, value, header='Content-Type', requote=True,
733 charset=None, language='', replace=False):
734 """Set a parameter in the Content-Type header.
735
736 If the parameter already exists in the header, its value will be
737 replaced with the new value.
738
739 If header is Content-Type and has not yet been defined for this
740 message, it will be set to "text/plain" and the new parameter and
741 value will be appended as per RFC 2045.
742
743 An alternate header can be specified in the header argument, and all
744 parameters will be quoted as necessary unless requote is False.
745
746 If charset is specified, the parameter will be encoded according to RFC
747 2231. Optional language specifies the RFC 2231 language, defaulting
748 to the empty string. Both charset and language should be strings.
749 """
750 if not isinstance(value, tuple) and charset:
751 value = (charset, language, value)
752
753 if header not in self and header.lower() == 'content-type':
754 ctype = 'text/plain'
755 else:
756 ctype = self.get(header)
757 if not self.get_param(param, header=header):
758 if not ctype:
759 ctype = _formatparam(param, value, requote)
760 else:
761 ctype = SEMISPACE.join(
762 [ctype, _formatparam(param, value, requote)])
763 else:
764 ctype = ''
765 for old_param, old_value in self.get_params(header=header,
766 unquote=requote):
767 append_param = ''
768 if old_param.lower() == param.lower():
769 append_param = _formatparam(param, value, requote)
770 else:
771 append_param = _formatparam(old_param, old_value, requote)
772 if not ctype:
773 ctype = append_param
774 else:
775 ctype = SEMISPACE.join([ctype, append_param])
776 if ctype != self.get(header):
777 if replace:
778 self.replace_header(header, ctype)
779 else:
780 del self[header]
781 self[header] = ctype
782
783 def del_param(self, param, header='content-type', requote=True):
784 """Remove the given parameter completely from the Content-Type header.
785
786 The header will be re-written in place without the parameter or its
787 value. All values will be quoted as necessary unless requote is
788 False. Optional header specifies an alternative to the Content-Type
789 header.
790 """
791 if header not in self:
792 return
793 new_ctype = ''
794 for p, v in self.get_params(header=header, unquote=requote):
795 if p.lower() != param.lower():
796 if not new_ctype:
797 new_ctype = _formatparam(p, v, requote)
798 else:
799 new_ctype = SEMISPACE.join([new_ctype,
800 _formatparam(p, v, requote)])
801 if new_ctype != self.get(header):
802 del self[header]
803 self[header] = new_ctype
804
805 def set_type(self, type, header='Content-Type', requote=True):
806 """Set the main type and subtype for the Content-Type header.
807
808 type must be a string in the form "maintype/subtype", otherwise a
809 ValueError is raised.
810
811 This method replaces the Content-Type header, keeping all the
812 parameters in place. If requote is False, this leaves the existing
813 header's quoting as is. Otherwise, the parameters will be quoted (the
814 default).
815
816 An alternative header can be specified in the header argument. When
817 the Content-Type header is set, we'll always also add a MIME-Version
818 header.
819 """
820 # BAW: should we be strict?
821 if not type.count('/') == 1:
822 raise ValueError
823 # Set the Content-Type, you get a MIME-Version
824 if header.lower() == 'content-type':
825 del self['mime-version']
826 self['MIME-Version'] = '1.0'
827 if header not in self:
828 self[header] = type
829 return
830 params = self.get_params(header=header, unquote=requote)
831 del self[header]
832 self[header] = type
833 # Skip the first param; it's the old type.
834 for p, v in params[1:]:
835 self.set_param(p, v, header, requote)
836
837 def get_filename(self, failobj=None):
838 """Return the filename associated with the payload if present.
839
840 The filename is extracted from the Content-Disposition header's
841 `filename' parameter, and it is unquoted. If that header is missing
842 the `filename' parameter, this method falls back to looking for the
843 `name' parameter.
844 """
845 missing = object()
846 filename = self.get_param('filename', missing, 'content-disposition')
847 if filename is missing:
848 filename = self.get_param('name', missing, 'content-type')
849 if filename is missing:
850 return failobj
851 return utils.collapse_rfc2231_value(filename).strip()
852
853 def get_boundary(self, failobj=None):
854 """Return the boundary associated with the payload if present.
855
856 The boundary is extracted from the Content-Type header's `boundary'
857 parameter, and it is unquoted.
858 """
859 missing = object()
860 boundary = self.get_param('boundary', missing)
861 if boundary is missing:
862 return failobj
863 # RFC 2046 says that boundaries may begin but not end in w/s
864 return utils.collapse_rfc2231_value(boundary).rstrip()
865
866 def set_boundary(self, boundary):
867 """Set the boundary parameter in Content-Type to 'boundary'.
868
869 This is subtly different than deleting the Content-Type header and
870 adding a new one with a new boundary parameter via add_header(). The
871 main difference is that using the set_boundary() method preserves the
872 order of the Content-Type header in the original message.
873
874 HeaderParseError is raised if the message has no Content-Type header.
875 """
876 missing = object()
877 params = self._get_params_preserve(missing, 'content-type')
878 if params is missing:
879 # There was no Content-Type header, and we don't know what type
880 # to set it to, so raise an exception.
881 raise errors.HeaderParseError('No Content-Type header found')
882 newparams = []
883 foundp = False
884 for pk, pv in params:
885 if pk.lower() == 'boundary':
886 newparams.append(('boundary', '"%s"' % boundary))
887 foundp = True
888 else:
889 newparams.append((pk, pv))
890 if not foundp:
891 # The original Content-Type header had no boundary attribute.
892 # Tack one on the end. BAW: should we raise an exception
893 # instead???
894 newparams.append(('boundary', '"%s"' % boundary))
895 # Replace the existing Content-Type header with the new value
896 newheaders = []
897 for h, v in self._headers:
898 if h.lower() == 'content-type':
899 parts = []
900 for k, v in newparams:
901 if v == '':
902 parts.append(k)
903 else:
904 parts.append('%s=%s' % (k, v))
905 val = SEMISPACE.join(parts)
906 newheaders.append(self.policy.header_store_parse(h, val))
907
908 else:
909 newheaders.append((h, v))
910 self._headers = newheaders
911
912 def get_content_charset(self, failobj=None):
913 """Return the charset parameter of the Content-Type header.
914
915 The returned string is always coerced to lower case. If there is no
916 Content-Type header, or if that header has no charset parameter,
917 failobj is returned.
918 """
919 missing = object()
920 charset = self.get_param('charset', missing)
921 if charset is missing:
922 return failobj
923 if isinstance(charset, tuple):
924 # RFC 2231 encoded, so decode it, and it better end up as ascii.
925 pcharset = charset[0] or 'us-ascii'
926 try:
927 # LookupError will be raised if the charset isn't known to
928 # Python. UnicodeError will be raised if the encoded text
929 # contains a character not in the charset.
930 as_bytes = charset[2].encode('raw-unicode-escape')
931 charset = str(as_bytes, pcharset)
932 except (LookupError, UnicodeError):
933 charset = charset[2]
934 # charset characters must be in us-ascii range
935 try:
936 charset.encode('us-ascii')
937 except UnicodeError:
938 return failobj
939 # RFC 2046, $4.1.2 says charsets are not case sensitive
940 return charset.lower()
941
942 def get_charsets(self, failobj=None):
943 """Return a list containing the charset(s) used in this message.
944
945 The returned list of items describes the Content-Type headers'
946 charset parameter for this message and all the subparts in its
947 payload.
948
949 Each item will either be a string (the value of the charset parameter
950 in the Content-Type header of that part) or the value of the
951 'failobj' parameter (defaults to None), if the part does not have a
952 main MIME type of "text", or the charset is not defined.
953
954 The list will contain one string for each part of the message, plus
955 one for the container message (i.e. self), so that a non-multipart
956 message will still return a list of length 1.
957 """
958 return [part.get_content_charset(failobj) for part in self.walk()]
959
960 def get_content_disposition(self):
961 """Return the message's content-disposition if it exists, or None.
962
963 The return values can be either 'inline', 'attachment' or None
964 according to the rfc2183.
965 """
966 value = self.get('content-disposition')
967 if value is None:
968 return None
969 c_d = _splitparam(value)[0].lower()
970 return c_d
971
972 # I.e. def walk(self): ...
973 from email.iterators import walk
974
975
976 class ESC[4;38;5;81mMIMEPart(ESC[4;38;5;149mMessage):
977
978 def __init__(self, policy=None):
979 if policy is None:
980 from email.policy import default
981 policy = default
982 super().__init__(policy)
983
984
985 def as_string(self, unixfrom=False, maxheaderlen=None, policy=None):
986 """Return the entire formatted message as a string.
987
988 Optional 'unixfrom', when true, means include the Unix From_ envelope
989 header. maxheaderlen is retained for backward compatibility with the
990 base Message class, but defaults to None, meaning that the policy value
991 for max_line_length controls the header maximum length. 'policy' is
992 passed to the Generator instance used to serialize the message; if it
993 is not specified the policy associated with the message instance is
994 used.
995 """
996 policy = self.policy if policy is None else policy
997 if maxheaderlen is None:
998 maxheaderlen = policy.max_line_length
999 return super().as_string(unixfrom, maxheaderlen, policy)
1000
1001 def __str__(self):
1002 return self.as_string(policy=self.policy.clone(utf8=True))
1003
1004 def is_attachment(self):
1005 c_d = self.get('content-disposition')
1006 return False if c_d is None else c_d.content_disposition == 'attachment'
1007
1008 def _find_body(self, part, preferencelist):
1009 if part.is_attachment():
1010 return
1011 maintype, subtype = part.get_content_type().split('/')
1012 if maintype == 'text':
1013 if subtype in preferencelist:
1014 yield (preferencelist.index(subtype), part)
1015 return
1016 if maintype != 'multipart' or not self.is_multipart():
1017 return
1018 if subtype != 'related':
1019 for subpart in part.iter_parts():
1020 yield from self._find_body(subpart, preferencelist)
1021 return
1022 if 'related' in preferencelist:
1023 yield (preferencelist.index('related'), part)
1024 candidate = None
1025 start = part.get_param('start')
1026 if start:
1027 for subpart in part.iter_parts():
1028 if subpart['content-id'] == start:
1029 candidate = subpart
1030 break
1031 if candidate is None:
1032 subparts = part.get_payload()
1033 candidate = subparts[0] if subparts else None
1034 if candidate is not None:
1035 yield from self._find_body(candidate, preferencelist)
1036
1037 def get_body(self, preferencelist=('related', 'html', 'plain')):
1038 """Return best candidate mime part for display as 'body' of message.
1039
1040 Do a depth first search, starting with self, looking for the first part
1041 matching each of the items in preferencelist, and return the part
1042 corresponding to the first item that has a match, or None if no items
1043 have a match. If 'related' is not included in preferencelist, consider
1044 the root part of any multipart/related encountered as a candidate
1045 match. Ignore parts with 'Content-Disposition: attachment'.
1046 """
1047 best_prio = len(preferencelist)
1048 body = None
1049 for prio, part in self._find_body(self, preferencelist):
1050 if prio < best_prio:
1051 best_prio = prio
1052 body = part
1053 if prio == 0:
1054 break
1055 return body
1056
1057 _body_types = {('text', 'plain'),
1058 ('text', 'html'),
1059 ('multipart', 'related'),
1060 ('multipart', 'alternative')}
1061 def iter_attachments(self):
1062 """Return an iterator over the non-main parts of a multipart.
1063
1064 Skip the first of each occurrence of text/plain, text/html,
1065 multipart/related, or multipart/alternative in the multipart (unless
1066 they have a 'Content-Disposition: attachment' header) and include all
1067 remaining subparts in the returned iterator. When applied to a
1068 multipart/related, return all parts except the root part. Return an
1069 empty iterator when applied to a multipart/alternative or a
1070 non-multipart.
1071 """
1072 maintype, subtype = self.get_content_type().split('/')
1073 if maintype != 'multipart' or subtype == 'alternative':
1074 return
1075 payload = self.get_payload()
1076 # Certain malformed messages can have content type set to `multipart/*`
1077 # but still have single part body, in which case payload.copy() can
1078 # fail with AttributeError.
1079 try:
1080 parts = payload.copy()
1081 except AttributeError:
1082 # payload is not a list, it is most probably a string.
1083 return
1084
1085 if maintype == 'multipart' and subtype == 'related':
1086 # For related, we treat everything but the root as an attachment.
1087 # The root may be indicated by 'start'; if there's no start or we
1088 # can't find the named start, treat the first subpart as the root.
1089 start = self.get_param('start')
1090 if start:
1091 found = False
1092 attachments = []
1093 for part in parts:
1094 if part.get('content-id') == start:
1095 found = True
1096 else:
1097 attachments.append(part)
1098 if found:
1099 yield from attachments
1100 return
1101 parts.pop(0)
1102 yield from parts
1103 return
1104 # Otherwise we more or less invert the remaining logic in get_body.
1105 # This only really works in edge cases (ex: non-text related or
1106 # alternatives) if the sending agent sets content-disposition.
1107 seen = [] # Only skip the first example of each candidate type.
1108 for part in parts:
1109 maintype, subtype = part.get_content_type().split('/')
1110 if ((maintype, subtype) in self._body_types and
1111 not part.is_attachment() and subtype not in seen):
1112 seen.append(subtype)
1113 continue
1114 yield part
1115
1116 def iter_parts(self):
1117 """Return an iterator over all immediate subparts of a multipart.
1118
1119 Return an empty iterator for a non-multipart.
1120 """
1121 if self.is_multipart():
1122 yield from self.get_payload()
1123
1124 def get_content(self, *args, content_manager=None, **kw):
1125 if content_manager is None:
1126 content_manager = self.policy.content_manager
1127 return content_manager.get_content(self, *args, **kw)
1128
1129 def set_content(self, *args, content_manager=None, **kw):
1130 if content_manager is None:
1131 content_manager = self.policy.content_manager
1132 content_manager.set_content(self, *args, **kw)
1133
1134 def _make_multipart(self, subtype, disallowed_subtypes, boundary):
1135 if self.get_content_maintype() == 'multipart':
1136 existing_subtype = self.get_content_subtype()
1137 disallowed_subtypes = disallowed_subtypes + (subtype,)
1138 if existing_subtype in disallowed_subtypes:
1139 raise ValueError("Cannot convert {} to {}".format(
1140 existing_subtype, subtype))
1141 keep_headers = []
1142 part_headers = []
1143 for name, value in self._headers:
1144 if name.lower().startswith('content-'):
1145 part_headers.append((name, value))
1146 else:
1147 keep_headers.append((name, value))
1148 if part_headers:
1149 # There is existing content, move it to the first subpart.
1150 part = type(self)(policy=self.policy)
1151 part._headers = part_headers
1152 part._payload = self._payload
1153 self._payload = [part]
1154 else:
1155 self._payload = []
1156 self._headers = keep_headers
1157 self['Content-Type'] = 'multipart/' + subtype
1158 if boundary is not None:
1159 self.set_param('boundary', boundary)
1160
1161 def make_related(self, boundary=None):
1162 self._make_multipart('related', ('alternative', 'mixed'), boundary)
1163
1164 def make_alternative(self, boundary=None):
1165 self._make_multipart('alternative', ('mixed',), boundary)
1166
1167 def make_mixed(self, boundary=None):
1168 self._make_multipart('mixed', (), boundary)
1169
1170 def _add_multipart(self, _subtype, *args, _disp=None, **kw):
1171 if (self.get_content_maintype() != 'multipart' or
1172 self.get_content_subtype() != _subtype):
1173 getattr(self, 'make_' + _subtype)()
1174 part = type(self)(policy=self.policy)
1175 part.set_content(*args, **kw)
1176 if _disp and 'content-disposition' not in part:
1177 part['Content-Disposition'] = _disp
1178 self.attach(part)
1179
1180 def add_related(self, *args, **kw):
1181 self._add_multipart('related', *args, _disp='inline', **kw)
1182
1183 def add_alternative(self, *args, **kw):
1184 self._add_multipart('alternative', *args, **kw)
1185
1186 def add_attachment(self, *args, **kw):
1187 self._add_multipart('mixed', *args, _disp='attachment', **kw)
1188
1189 def clear(self):
1190 self._headers = []
1191 self._payload = None
1192
1193 def clear_content(self):
1194 self._headers = [(n, v) for n, v in self._headers
1195 if not n.lower().startswith('content-')]
1196 self._payload = None
1197
1198
1199 class ESC[4;38;5;81mEmailMessage(ESC[4;38;5;149mMIMEPart):
1200
1201 def set_content(self, *args, **kw):
1202 super().set_content(*args, **kw)
1203 if 'MIME-Version' not in self:
1204 self['MIME-Version'] = '1.0'