1  import os
       2  import sys
       3  import tempfile
       4  import mimetypes
       5  import webbrowser
       6  
       7  # Import the email modules we'll need
       8  from email import policy
       9  from email.parser import BytesParser
      10  
      11  
      12  def magic_html_parser(html_text, partfiles):
      13      """Return safety-sanitized html linked to partfiles.
      14  
      15      Rewrite the href="cid:...." attributes to point to the filenames in partfiles.
      16      Though not trivial, this should be possible using html.parser.
      17      """
      18      raise NotImplementedError("Add the magic needed")
      19  
      20  
      21  # In a real program you'd get the filename from the arguments.
      22  with open('outgoing.msg', 'rb') as fp:
      23      msg = BytesParser(policy=policy.default).parse(fp)
      24  
      25  # Now the header items can be accessed as a dictionary, and any non-ASCII will
      26  # be converted to unicode:
      27  print('To:', msg['to'])
      28  print('From:', msg['from'])
      29  print('Subject:', msg['subject'])
      30  
      31  # If we want to print a preview of the message content, we can extract whatever
      32  # the least formatted payload is and print the first three lines.  Of course,
      33  # if the message has no plain text part printing the first three lines of html
      34  # is probably useless, but this is just a conceptual example.
      35  simplest = msg.get_body(preferencelist=('plain', 'html'))
      36  print()
      37  print(''.join(simplest.get_content().splitlines(keepends=True)[:3]))
      38  
      39  ans = input("View full message?")
      40  if ans.lower()[0] == 'n':
      41      sys.exit()
      42  
      43  # We can extract the richest alternative in order to display it:
      44  richest = msg.get_body()
      45  partfiles = {}
      46  if richest['content-type'].maintype == 'text':
      47      if richest['content-type'].subtype == 'plain':
      48          for line in richest.get_content().splitlines():
      49              print(line)
      50          sys.exit()
      51      elif richest['content-type'].subtype == 'html':
      52          body = richest
      53      else:
      54          print("Don't know how to display {}".format(richest.get_content_type()))
      55          sys.exit()
      56  elif richest['content-type'].content_type == 'multipart/related':
      57      body = richest.get_body(preferencelist=('html'))
      58      for part in richest.iter_attachments():
      59          fn = part.get_filename()
      60          if fn:
      61              extension = os.path.splitext(part.get_filename())[1]
      62          else:
      63              extension = mimetypes.guess_extension(part.get_content_type())
      64          with tempfile.NamedTemporaryFile(suffix=extension, delete=False) as f:
      65              f.write(part.get_content())
      66              # again strip the <> to go from email form of cid to html form.
      67              partfiles[part['content-id'][1:-1]] = f.name
      68  else:
      69      print("Don't know how to display {}".format(richest.get_content_type()))
      70      sys.exit()
      71  with tempfile.NamedTemporaryFile(mode='w', delete=False) as f:
      72      f.write(magic_html_parser(body.get_content(), partfiles))
      73  webbrowser.open(f.name)
      74  os.remove(f.name)
      75  for fn in partfiles.values():
      76      os.remove(fn)
      77  
      78  # Of course, there are lots of email messages that could break this simple
      79  # minded program, but it will handle the most common ones.