python (3.11.7)
1 # Copyright (C) 2001-2010 Python Software Foundation
2 # Author: Barry Warsaw
3 # Contact: email-sig@python.org
4
5 """Classes to generate plain text from a message object tree."""
6
7 __all__ = ['Generator', 'DecodedGenerator', 'BytesGenerator']
8
9 import re
10 import sys
11 import time
12 import random
13
14 from copy import deepcopy
15 from io import StringIO, BytesIO
16 from email.utils import _has_surrogates
17
18 UNDERSCORE = '_'
19 NL = '\n' # XXX: no longer used by the code below.
20
21 NLCRE = re.compile(r'\r\n|\r|\n')
22 fcre = re.compile(r'^From ', re.MULTILINE)
23
24
25 class ESC[4;38;5;81mGenerator:
26 """Generates output from a Message object tree.
27
28 This basic generator writes the message to the given file object as plain
29 text.
30 """
31 #
32 # Public interface
33 #
34
35 def __init__(self, outfp, mangle_from_=None, maxheaderlen=None, *,
36 policy=None):
37 """Create the generator for message flattening.
38
39 outfp is the output file-like object for writing the message to. It
40 must have a write() method.
41
42 Optional mangle_from_ is a flag that, when True (the default if policy
43 is not set), escapes From_ lines in the body of the message by putting
44 a `>' in front of them.
45
46 Optional maxheaderlen specifies the longest length for a non-continued
47 header. When a header line is longer (in characters, with tabs
48 expanded to 8 spaces) than maxheaderlen, the header will split as
49 defined in the Header class. Set maxheaderlen to zero to disable
50 header wrapping. The default is 78, as recommended (but not required)
51 by RFC 2822.
52
53 The policy keyword specifies a policy object that controls a number of
54 aspects of the generator's operation. If no policy is specified,
55 the policy associated with the Message object passed to the
56 flatten method is used.
57
58 """
59
60 if mangle_from_ is None:
61 mangle_from_ = True if policy is None else policy.mangle_from_
62 self._fp = outfp
63 self._mangle_from_ = mangle_from_
64 self.maxheaderlen = maxheaderlen
65 self.policy = policy
66
67 def write(self, s):
68 # Just delegate to the file object
69 self._fp.write(s)
70
71 def flatten(self, msg, unixfrom=False, linesep=None):
72 r"""Print the message object tree rooted at msg to the output file
73 specified when the Generator instance was created.
74
75 unixfrom is a flag that forces the printing of a Unix From_ delimiter
76 before the first object in the message tree. If the original message
77 has no From_ delimiter, a `standard' one is crafted. By default, this
78 is False to inhibit the printing of any From_ delimiter.
79
80 Note that for subobjects, no From_ line is printed.
81
82 linesep specifies the characters used to indicate a new line in
83 the output. The default value is determined by the policy specified
84 when the Generator instance was created or, if none was specified,
85 from the policy associated with the msg.
86
87 """
88 # We use the _XXX constants for operating on data that comes directly
89 # from the msg, and _encoded_XXX constants for operating on data that
90 # has already been converted (to bytes in the BytesGenerator) and
91 # inserted into a temporary buffer.
92 policy = msg.policy if self.policy is None else self.policy
93 if linesep is not None:
94 policy = policy.clone(linesep=linesep)
95 if self.maxheaderlen is not None:
96 policy = policy.clone(max_line_length=self.maxheaderlen)
97 self._NL = policy.linesep
98 self._encoded_NL = self._encode(self._NL)
99 self._EMPTY = ''
100 self._encoded_EMPTY = self._encode(self._EMPTY)
101 # Because we use clone (below) when we recursively process message
102 # subparts, and because clone uses the computed policy (not None),
103 # submessages will automatically get set to the computed policy when
104 # they are processed by this code.
105 old_gen_policy = self.policy
106 old_msg_policy = msg.policy
107 try:
108 self.policy = policy
109 msg.policy = policy
110 if unixfrom:
111 ufrom = msg.get_unixfrom()
112 if not ufrom:
113 ufrom = 'From nobody ' + time.ctime(time.time())
114 self.write(ufrom + self._NL)
115 self._write(msg)
116 finally:
117 self.policy = old_gen_policy
118 msg.policy = old_msg_policy
119
120 def clone(self, fp):
121 """Clone this generator with the exact same options."""
122 return self.__class__(fp,
123 self._mangle_from_,
124 None, # Use policy setting, which we've adjusted
125 policy=self.policy)
126
127 #
128 # Protected interface - undocumented ;/
129 #
130
131 # Note that we use 'self.write' when what we are writing is coming from
132 # the source, and self._fp.write when what we are writing is coming from a
133 # buffer (because the Bytes subclass has already had a chance to transform
134 # the data in its write method in that case). This is an entirely
135 # pragmatic split determined by experiment; we could be more general by
136 # always using write and having the Bytes subclass write method detect when
137 # it has already transformed the input; but, since this whole thing is a
138 # hack anyway this seems good enough.
139
140 def _new_buffer(self):
141 # BytesGenerator overrides this to return BytesIO.
142 return StringIO()
143
144 def _encode(self, s):
145 # BytesGenerator overrides this to encode strings to bytes.
146 return s
147
148 def _write_lines(self, lines):
149 # We have to transform the line endings.
150 if not lines:
151 return
152 lines = NLCRE.split(lines)
153 for line in lines[:-1]:
154 self.write(line)
155 self.write(self._NL)
156 if lines[-1]:
157 self.write(lines[-1])
158 # XXX logic tells me this else should be needed, but the tests fail
159 # with it and pass without it. (NLCRE.split ends with a blank element
160 # if and only if there was a trailing newline.)
161 #else:
162 # self.write(self._NL)
163
164 def _write(self, msg):
165 # We can't write the headers yet because of the following scenario:
166 # say a multipart message includes the boundary string somewhere in
167 # its body. We'd have to calculate the new boundary /before/ we write
168 # the headers so that we can write the correct Content-Type:
169 # parameter.
170 #
171 # The way we do this, so as to make the _handle_*() methods simpler,
172 # is to cache any subpart writes into a buffer. The we write the
173 # headers and the buffer contents. That way, subpart handlers can
174 # Do The Right Thing, and can still modify the Content-Type: header if
175 # necessary.
176 oldfp = self._fp
177 try:
178 self._munge_cte = None
179 self._fp = sfp = self._new_buffer()
180 self._dispatch(msg)
181 finally:
182 self._fp = oldfp
183 munge_cte = self._munge_cte
184 del self._munge_cte
185 # If we munged the cte, copy the message again and re-fix the CTE.
186 if munge_cte:
187 msg = deepcopy(msg)
188 # Preserve the header order if the CTE header already exists.
189 if msg.get('content-transfer-encoding') is None:
190 msg['Content-Transfer-Encoding'] = munge_cte[0]
191 else:
192 msg.replace_header('content-transfer-encoding', munge_cte[0])
193 msg.replace_header('content-type', munge_cte[1])
194 # Write the headers. First we see if the message object wants to
195 # handle that itself. If not, we'll do it generically.
196 meth = getattr(msg, '_write_headers', None)
197 if meth is None:
198 self._write_headers(msg)
199 else:
200 meth(self)
201 self._fp.write(sfp.getvalue())
202
203 def _dispatch(self, msg):
204 # Get the Content-Type: for the message, then try to dispatch to
205 # self._handle_<maintype>_<subtype>(). If there's no handler for the
206 # full MIME type, then dispatch to self._handle_<maintype>(). If
207 # that's missing too, then dispatch to self._writeBody().
208 main = msg.get_content_maintype()
209 sub = msg.get_content_subtype()
210 specific = UNDERSCORE.join((main, sub)).replace('-', '_')
211 meth = getattr(self, '_handle_' + specific, None)
212 if meth is None:
213 generic = main.replace('-', '_')
214 meth = getattr(self, '_handle_' + generic, None)
215 if meth is None:
216 meth = self._writeBody
217 meth(msg)
218
219 #
220 # Default handlers
221 #
222
223 def _write_headers(self, msg):
224 for h, v in msg.raw_items():
225 self.write(self.policy.fold(h, v))
226 # A blank line always separates headers from body
227 self.write(self._NL)
228
229 #
230 # Handlers for writing types and subtypes
231 #
232
233 def _handle_text(self, msg):
234 payload = msg.get_payload()
235 if payload is None:
236 return
237 if not isinstance(payload, str):
238 raise TypeError('string payload expected: %s' % type(payload))
239 if _has_surrogates(msg._payload):
240 charset = msg.get_param('charset')
241 if charset is not None:
242 # XXX: This copy stuff is an ugly hack to avoid modifying the
243 # existing message.
244 msg = deepcopy(msg)
245 del msg['content-transfer-encoding']
246 msg.set_payload(payload, charset)
247 payload = msg.get_payload()
248 self._munge_cte = (msg['content-transfer-encoding'],
249 msg['content-type'])
250 if self._mangle_from_:
251 payload = fcre.sub('>From ', payload)
252 self._write_lines(payload)
253
254 # Default body handler
255 _writeBody = _handle_text
256
257 def _handle_multipart(self, msg):
258 # The trick here is to write out each part separately, merge them all
259 # together, and then make sure that the boundary we've chosen isn't
260 # present in the payload.
261 msgtexts = []
262 subparts = msg.get_payload()
263 if subparts is None:
264 subparts = []
265 elif isinstance(subparts, str):
266 # e.g. a non-strict parse of a message with no starting boundary.
267 self.write(subparts)
268 return
269 elif not isinstance(subparts, list):
270 # Scalar payload
271 subparts = [subparts]
272 for part in subparts:
273 s = self._new_buffer()
274 g = self.clone(s)
275 g.flatten(part, unixfrom=False, linesep=self._NL)
276 msgtexts.append(s.getvalue())
277 # BAW: What about boundaries that are wrapped in double-quotes?
278 boundary = msg.get_boundary()
279 if not boundary:
280 # Create a boundary that doesn't appear in any of the
281 # message texts.
282 alltext = self._encoded_NL.join(msgtexts)
283 boundary = self._make_boundary(alltext)
284 msg.set_boundary(boundary)
285 # If there's a preamble, write it out, with a trailing CRLF
286 if msg.preamble is not None:
287 if self._mangle_from_:
288 preamble = fcre.sub('>From ', msg.preamble)
289 else:
290 preamble = msg.preamble
291 self._write_lines(preamble)
292 self.write(self._NL)
293 # dash-boundary transport-padding CRLF
294 self.write('--' + boundary + self._NL)
295 # body-part
296 if msgtexts:
297 self._fp.write(msgtexts.pop(0))
298 # *encapsulation
299 # --> delimiter transport-padding
300 # --> CRLF body-part
301 for body_part in msgtexts:
302 # delimiter transport-padding CRLF
303 self.write(self._NL + '--' + boundary + self._NL)
304 # body-part
305 self._fp.write(body_part)
306 # close-delimiter transport-padding
307 self.write(self._NL + '--' + boundary + '--' + self._NL)
308 if msg.epilogue is not None:
309 if self._mangle_from_:
310 epilogue = fcre.sub('>From ', msg.epilogue)
311 else:
312 epilogue = msg.epilogue
313 self._write_lines(epilogue)
314
315 def _handle_multipart_signed(self, msg):
316 # The contents of signed parts has to stay unmodified in order to keep
317 # the signature intact per RFC1847 2.1, so we disable header wrapping.
318 # RDM: This isn't enough to completely preserve the part, but it helps.
319 p = self.policy
320 self.policy = p.clone(max_line_length=0)
321 try:
322 self._handle_multipart(msg)
323 finally:
324 self.policy = p
325
326 def _handle_message_delivery_status(self, msg):
327 # We can't just write the headers directly to self's file object
328 # because this will leave an extra newline between the last header
329 # block and the boundary. Sigh.
330 blocks = []
331 for part in msg.get_payload():
332 s = self._new_buffer()
333 g = self.clone(s)
334 g.flatten(part, unixfrom=False, linesep=self._NL)
335 text = s.getvalue()
336 lines = text.split(self._encoded_NL)
337 # Strip off the unnecessary trailing empty line
338 if lines and lines[-1] == self._encoded_EMPTY:
339 blocks.append(self._encoded_NL.join(lines[:-1]))
340 else:
341 blocks.append(text)
342 # Now join all the blocks with an empty line. This has the lovely
343 # effect of separating each block with an empty line, but not adding
344 # an extra one after the last one.
345 self._fp.write(self._encoded_NL.join(blocks))
346
347 def _handle_message(self, msg):
348 s = self._new_buffer()
349 g = self.clone(s)
350 # The payload of a message/rfc822 part should be a multipart sequence
351 # of length 1. The zeroth element of the list should be the Message
352 # object for the subpart. Extract that object, stringify it, and
353 # write it out.
354 # Except, it turns out, when it's a string instead, which happens when
355 # and only when HeaderParser is used on a message of mime type
356 # message/rfc822. Such messages are generated by, for example,
357 # Groupwise when forwarding unadorned messages. (Issue 7970.) So
358 # in that case we just emit the string body.
359 payload = msg._payload
360 if isinstance(payload, list):
361 g.flatten(msg.get_payload(0), unixfrom=False, linesep=self._NL)
362 payload = s.getvalue()
363 else:
364 payload = self._encode(payload)
365 self._fp.write(payload)
366
367 # This used to be a module level function; we use a classmethod for this
368 # and _compile_re so we can continue to provide the module level function
369 # for backward compatibility by doing
370 # _make_boundary = Generator._make_boundary
371 # at the end of the module. It *is* internal, so we could drop that...
372 @classmethod
373 def _make_boundary(cls, text=None):
374 # Craft a random boundary. If text is given, ensure that the chosen
375 # boundary doesn't appear in the text.
376 token = random.randrange(sys.maxsize)
377 boundary = ('=' * 15) + (_fmt % token) + '=='
378 if text is None:
379 return boundary
380 b = boundary
381 counter = 0
382 while True:
383 cre = cls._compile_re('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
384 if not cre.search(text):
385 break
386 b = boundary + '.' + str(counter)
387 counter += 1
388 return b
389
390 @classmethod
391 def _compile_re(cls, s, flags):
392 return re.compile(s, flags)
393
394
395 class ESC[4;38;5;81mBytesGenerator(ESC[4;38;5;149mGenerator):
396 """Generates a bytes version of a Message object tree.
397
398 Functionally identical to the base Generator except that the output is
399 bytes and not string. When surrogates were used in the input to encode
400 bytes, these are decoded back to bytes for output. If the policy has
401 cte_type set to 7bit, then the message is transformed such that the
402 non-ASCII bytes are properly content transfer encoded, using the charset
403 unknown-8bit.
404
405 The outfp object must accept bytes in its write method.
406 """
407
408 def write(self, s):
409 self._fp.write(s.encode('ascii', 'surrogateescape'))
410
411 def _new_buffer(self):
412 return BytesIO()
413
414 def _encode(self, s):
415 return s.encode('ascii')
416
417 def _write_headers(self, msg):
418 # This is almost the same as the string version, except for handling
419 # strings with 8bit bytes.
420 for h, v in msg.raw_items():
421 self._fp.write(self.policy.fold_binary(h, v))
422 # A blank line always separates headers from body
423 self.write(self._NL)
424
425 def _handle_text(self, msg):
426 # If the string has surrogates the original source was bytes, so
427 # just write it back out.
428 if msg._payload is None:
429 return
430 if _has_surrogates(msg._payload) and not self.policy.cte_type=='7bit':
431 if self._mangle_from_:
432 msg._payload = fcre.sub(">From ", msg._payload)
433 self._write_lines(msg._payload)
434 else:
435 super(BytesGenerator,self)._handle_text(msg)
436
437 # Default body handler
438 _writeBody = _handle_text
439
440 @classmethod
441 def _compile_re(cls, s, flags):
442 return re.compile(s.encode('ascii'), flags)
443
444
445 _FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]'
446
447 class ESC[4;38;5;81mDecodedGenerator(ESC[4;38;5;149mGenerator):
448 """Generates a text representation of a message.
449
450 Like the Generator base class, except that non-text parts are substituted
451 with a format string representing the part.
452 """
453 def __init__(self, outfp, mangle_from_=None, maxheaderlen=None, fmt=None, *,
454 policy=None):
455 """Like Generator.__init__() except that an additional optional
456 argument is allowed.
457
458 Walks through all subparts of a message. If the subpart is of main
459 type `text', then it prints the decoded payload of the subpart.
460
461 Otherwise, fmt is a format string that is used instead of the message
462 payload. fmt is expanded with the following keywords (in
463 %(keyword)s format):
464
465 type : Full MIME type of the non-text part
466 maintype : Main MIME type of the non-text part
467 subtype : Sub-MIME type of the non-text part
468 filename : Filename of the non-text part
469 description: Description associated with the non-text part
470 encoding : Content transfer encoding of the non-text part
471
472 The default value for fmt is None, meaning
473
474 [Non-text (%(type)s) part of message omitted, filename %(filename)s]
475 """
476 Generator.__init__(self, outfp, mangle_from_, maxheaderlen,
477 policy=policy)
478 if fmt is None:
479 self._fmt = _FMT
480 else:
481 self._fmt = fmt
482
483 def _dispatch(self, msg):
484 for part in msg.walk():
485 maintype = part.get_content_maintype()
486 if maintype == 'text':
487 print(part.get_payload(decode=False), file=self)
488 elif maintype == 'multipart':
489 # Just skip this
490 pass
491 else:
492 print(self._fmt % {
493 'type' : part.get_content_type(),
494 'maintype' : part.get_content_maintype(),
495 'subtype' : part.get_content_subtype(),
496 'filename' : part.get_filename('[no filename]'),
497 'description': part.get('Content-Description',
498 '[no description]'),
499 'encoding' : part.get('Content-Transfer-Encoding',
500 '[no encoding]'),
501 }, file=self)
502
503
504 # Helper used by Generator._make_boundary
505 _width = len(repr(sys.maxsize-1))
506 _fmt = '%%0%dd' % _width
507
508 # Backward compatibility
509 _make_boundary = Generator._make_boundary