1 #!/usr/bin/env python3
2 """Classes to parse mailer-daemon messages."""
3
4 import calendar
5 import email.message
6 import re
7 import os
8 import sys
9
10
11 class ESC[4;38;5;81mUnparseable(ESC[4;38;5;149mException):
12 pass
13
14
15 class ESC[4;38;5;81mErrorMessage(ESC[4;38;5;149memailESC[4;38;5;149m.ESC[4;38;5;149mmessageESC[4;38;5;149m.ESC[4;38;5;149mMessage):
16 def __init__(self):
17 email.message.Message.__init__(self)
18 self.sub = ''
19
20 def is_warning(self):
21 sub = self.get('Subject')
22 if not sub:
23 return 0
24 sub = sub.lower()
25 if sub.startswith('waiting mail'):
26 return 1
27 if 'warning' in sub:
28 return 1
29 self.sub = sub
30 return 0
31
32 def get_errors(self):
33 for p in EMPARSERS:
34 self.rewindbody()
35 try:
36 return p(self.fp, self.sub)
37 except Unparseable:
38 pass
39 raise Unparseable
40
41 # List of re's or tuples of re's.
42 # If a re, it should contain at least a group (?P<email>...) which
43 # should refer to the email address. The re can also contain a group
44 # (?P<reason>...) which should refer to the reason (error message).
45 # If no reason is present, the emparse_list_reason list is used to
46 # find a reason.
47 # If a tuple, the tuple should contain 2 re's. The first re finds a
48 # location, the second re is repeated one or more times to find
49 # multiple email addresses. The second re is matched (not searched)
50 # where the previous match ended.
51 # The re's are compiled using the re module.
52 emparse_list_list = [
53 'error: (?P<reason>unresolvable): (?P<email>.+)',
54 ('----- The following addresses had permanent fatal errors -----\n',
55 '(?P<email>[^ \n].*)\n( .*\n)?'),
56 'remote execution.*\n.*rmail (?P<email>.+)',
57 ('The following recipients did not receive your message:\n\n',
58 ' +(?P<email>.*)\n(The following recipients did not receive your message:\n\n)?'),
59 '------- Failure Reasons --------\n\n(?P<reason>.*)\n(?P<email>.*)',
60 '^<(?P<email>.*)>:\n(?P<reason>.*)',
61 '^(?P<reason>User mailbox exceeds allowed size): (?P<email>.+)',
62 '^5\\d{2} <(?P<email>[^\n>]+)>\\.\\.\\. (?P<reason>.+)',
63 '^Original-Recipient: rfc822;(?P<email>.*)',
64 '^did not reach the following recipient\\(s\\):\n\n(?P<email>.*) on .*\n +(?P<reason>.*)',
65 '^ <(?P<email>[^\n>]+)> \\.\\.\\. (?P<reason>.*)',
66 '^Report on your message to: (?P<email>.*)\nReason: (?P<reason>.*)',
67 '^Your message was not delivered to +(?P<email>.*)\n +for the following reason:\n +(?P<reason>.*)',
68 '^ was not +(?P<email>[^ \n].*?) *\n.*\n.*\n.*\n because:.*\n +(?P<reason>[^ \n].*?) *\n',
69 ]
70 # compile the re's in the list and store them in-place.
71 for i in range(len(emparse_list_list)):
72 x = emparse_list_list[i]
73 if type(x) is type(''):
74 x = re.compile(x, re.MULTILINE)
75 else:
76 xl = []
77 for x in x:
78 xl.append(re.compile(x, re.MULTILINE))
79 x = tuple(xl)
80 del xl
81 emparse_list_list[i] = x
82 del x
83 del i
84
85 # list of re's used to find reasons (error messages).
86 # if a string, "<>" is replaced by a copy of the email address.
87 # The expressions are searched for in order. After the first match,
88 # no more expressions are searched for. So, order is important.
89 emparse_list_reason = [
90 r'^5\d{2} <>\.\.\. (?P<reason>.*)',
91 r'<>\.\.\. (?P<reason>.*)',
92 re.compile(r'^<<< 5\d{2} (?P<reason>.*)', re.MULTILINE),
93 re.compile('===== stderr was =====\nrmail: (?P<reason>.*)'),
94 re.compile('^Diagnostic-Code: (?P<reason>.*)', re.MULTILINE),
95 ]
96 emparse_list_from = re.compile('^From:', re.IGNORECASE|re.MULTILINE)
97 def emparse_list(fp, sub):
98 data = fp.read()
99 res = emparse_list_from.search(data)
100 if res is None:
101 from_index = len(data)
102 else:
103 from_index = res.start(0)
104 errors = []
105 emails = []
106 reason = None
107 for regexp in emparse_list_list:
108 if type(regexp) is type(()):
109 res = regexp[0].search(data, 0, from_index)
110 if res is not None:
111 try:
112 reason = res.group('reason')
113 except IndexError:
114 pass
115 while 1:
116 res = regexp[1].match(data, res.end(0), from_index)
117 if res is None:
118 break
119 emails.append(res.group('email'))
120 break
121 else:
122 res = regexp.search(data, 0, from_index)
123 if res is not None:
124 emails.append(res.group('email'))
125 try:
126 reason = res.group('reason')
127 except IndexError:
128 pass
129 break
130 if not emails:
131 raise Unparseable
132 if not reason:
133 reason = sub
134 if reason[:15] == 'returned mail: ':
135 reason = reason[15:]
136 for regexp in emparse_list_reason:
137 if type(regexp) is type(''):
138 for i in range(len(emails)-1,-1,-1):
139 email = emails[i]
140 exp = re.compile(re.escape(email).join(regexp.split('<>')), re.MULTILINE)
141 res = exp.search(data)
142 if res is not None:
143 errors.append(' '.join((email.strip()+': '+res.group('reason')).split()))
144 del emails[i]
145 continue
146 res = regexp.search(data)
147 if res is not None:
148 reason = res.group('reason')
149 break
150 for email in emails:
151 errors.append(' '.join((email.strip()+': '+reason).split()))
152 return errors
153
154 EMPARSERS = [emparse_list]
155
156 def sort_numeric(a, b):
157 a = int(a)
158 b = int(b)
159 if a < b:
160 return -1
161 elif a > b:
162 return 1
163 else:
164 return 0
165
166 def parsedir(dir, modify):
167 os.chdir(dir)
168 pat = re.compile('^[0-9]*$')
169 errordict = {}
170 errorfirst = {}
171 errorlast = {}
172 nok = nwarn = nbad = 0
173
174 # find all numeric file names and sort them
175 files = list(filter(lambda fn, pat=pat: pat.match(fn) is not None, os.listdir('.')))
176 files.sort(sort_numeric)
177
178 for fn in files:
179 # Lets try to parse the file.
180 fp = open(fn)
181 m = email.message_from_file(fp, _class=ErrorMessage)
182 sender = m.getaddr('From')
183 print('%s\t%-40s\t'%(fn, sender[1]), end=' ')
184
185 if m.is_warning():
186 fp.close()
187 print('warning only')
188 nwarn = nwarn + 1
189 if modify:
190 os.rename(fn, ','+fn)
191 ## os.unlink(fn)
192 continue
193
194 try:
195 errors = m.get_errors()
196 except Unparseable:
197 print('** Not parseable')
198 nbad = nbad + 1
199 fp.close()
200 continue
201 print(len(errors), 'errors')
202
203 # Remember them
204 for e in errors:
205 try:
206 mm, dd = m.getdate('date')[1:1+2]
207 date = '%s %02d' % (calendar.month_abbr[mm], dd)
208 except:
209 date = '??????'
210 if e not in errordict:
211 errordict[e] = 1
212 errorfirst[e] = '%s (%s)' % (fn, date)
213 else:
214 errordict[e] = errordict[e] + 1
215 errorlast[e] = '%s (%s)' % (fn, date)
216
217 fp.close()
218 nok = nok + 1
219 if modify:
220 os.rename(fn, ','+fn)
221 ## os.unlink(fn)
222
223 print('--------------')
224 print(nok, 'files parsed,',nwarn,'files warning-only,', end=' ')
225 print(nbad,'files unparseable')
226 print('--------------')
227 list = []
228 for e in errordict.keys():
229 list.append((errordict[e], errorfirst[e], errorlast[e], e))
230 list.sort()
231 for num, first, last, e in list:
232 print('%d %s - %s\t%s' % (num, first, last, e))
233
234 def main():
235 modify = 0
236 if len(sys.argv) > 1 and sys.argv[1] == '-d':
237 modify = 1
238 del sys.argv[1]
239 if len(sys.argv) > 1:
240 for folder in sys.argv[1:]:
241 parsedir(folder, modify)
242 else:
243 parsedir('/ufs/jack/Mail/errorsinbox', modify)
244
245 if __name__ == '__main__' or sys.argv[0] == __name__:
246 main()