1 #!/usr/bin/env python3
2
3 import sys, os, re, difflib, unicodedata, errno, cgi, itertools
4 from itertools import *
5
6 diff_symbols = "-+=*&^%$#@!~/"
7 diff_colors = ['red', 'green', 'blue']
8
9 def codepoints(s):
10 return (ord (u) for u in s)
11
12 class ESC[4;38;5;81mColorFormatter:
13
14 class ESC[4;38;5;81mNull:
15 @staticmethod
16 def start_color (c): return ''
17 @staticmethod
18 def end_color (): return ''
19 @staticmethod
20 def escape (s): return s
21 @staticmethod
22 def newline (): return '\n'
23
24 class ESC[4;38;5;81mANSI:
25 @staticmethod
26 def start_color (c):
27 return {
28 'red': '\033[41;37;1m',
29 'green': '\033[42;37;1m',
30 'blue': '\033[44;37;1m',
31 }[c]
32 @staticmethod
33 def end_color ():
34 return '\033[m'
35 @staticmethod
36 def escape (s): return s
37 @staticmethod
38 def newline (): return '\n'
39
40 class ESC[4;38;5;81mHTML:
41 @staticmethod
42 def start_color (c):
43 return '<span style="background:%s">' % c
44 @staticmethod
45 def end_color ():
46 return '</span>'
47 @staticmethod
48 def escape (s): return cgi.escape (s)
49 @staticmethod
50 def newline (): return '<br/>\n'
51
52 @staticmethod
53 def Auto (argv = [], out = sys.stdout):
54 format = ColorFormatter.ANSI
55 if "--format" in argv:
56 argv.remove ("--format")
57 format = ColorFormatter.ANSI
58 if "--format=ansi" in argv:
59 argv.remove ("--format=ansi")
60 format = ColorFormatter.ANSI
61 if "--format=html" in argv:
62 argv.remove ("--format=html")
63 format = ColorFormatter.HTML
64 if "--no-format" in argv:
65 argv.remove ("--no-format")
66 format = ColorFormatter.Null
67 return format
68
69
70 class ESC[4;38;5;81mDiffColorizer:
71
72 diff_regex = re.compile ('([a-za-z0-9_]*)([^a-za-z0-9_]?)')
73
74 def __init__ (self, formatter, colors=diff_colors, symbols=diff_symbols):
75 self.formatter = formatter
76 self.colors = colors
77 self.symbols = symbols
78
79 def colorize_lines (self, lines):
80 lines = (l if l else '' for l in lines)
81 ss = [self.diff_regex.sub (r'\1\n\2\n', l).splitlines (True) for l in lines]
82 oo = ["",""]
83 st = [False, False]
84 for l in difflib.Differ().compare (*ss):
85 if l[0] == '?':
86 continue
87 if l[0] == ' ':
88 for i in range(2):
89 if st[i]:
90 oo[i] += self.formatter.end_color ()
91 st[i] = False
92 oo = [o + self.formatter.escape (l[2:]) for o in oo]
93 continue
94 if l[0] in self.symbols:
95 i = self.symbols.index (l[0])
96 if not st[i]:
97 oo[i] += self.formatter.start_color (self.colors[i])
98 st[i] = True
99 oo[i] += self.formatter.escape (l[2:])
100 continue
101 for i in range(2):
102 if st[i]:
103 oo[i] += self.formatter.end_color ()
104 st[i] = False
105 oo = [o.replace ('\n', '') for o in oo]
106 return [s1+s2+self.formatter.newline () for (s1,s2) in zip (self.symbols, oo) if s2]
107
108 def colorize_diff (self, f):
109 lines = [None, None]
110 for l in f:
111 if l[0] not in self.symbols:
112 yield self.formatter.escape (l).replace ('\n', self.formatter.newline ())
113 continue
114 i = self.symbols.index (l[0])
115 if lines[i]:
116 # Flush
117 for line in self.colorize_lines (lines):
118 yield line
119 lines = [None, None]
120 lines[i] = l[1:]
121 if (all (lines)):
122 # Flush
123 for line in self.colorize_lines (lines):
124 yield line
125 lines = [None, None]
126 if (any (lines)):
127 # Flush
128 for line in self.colorize_lines (lines):
129 yield line
130
131
132 class ESC[4;38;5;81mZipDiffer:
133
134 @staticmethod
135 def diff_files (files, symbols=diff_symbols):
136 files = tuple (files) # in case it's a generator, copy it
137 try:
138 for lines in itertools.zip_longest (*files):
139 if all (lines[0] == line for line in lines[1:]):
140 sys.stdout.writelines ([" ", lines[0]])
141 continue
142
143 for i, l in enumerate (lines):
144 if l:
145 sys.stdout.writelines ([symbols[i], l])
146 except IOError as e:
147 if e.errno != errno.EPIPE:
148 sys.exit ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror))
149
150
151 class ESC[4;38;5;81mDiffFilters:
152
153 @staticmethod
154 def filter_failures (f):
155 for key, lines in DiffHelpers.separate_test_cases (f):
156 lines = list (lines)
157 if not DiffHelpers.test_passed (lines):
158 for l in lines: yield l
159
160 class ESC[4;38;5;81mStat:
161
162 def __init__ (self):
163 self.count = 0
164 self.freq = 0
165
166 def add (self, test):
167 self.count += 1
168 self.freq += test.freq
169
170 class ESC[4;38;5;81mStats:
171
172 def __init__ (self):
173 self.passed = Stat ()
174 self.failed = Stat ()
175 self.total = Stat ()
176
177 def add (self, test):
178 self.total.add (test)
179 if test.passed:
180 self.passed.add (test)
181 else:
182 self.failed.add (test)
183
184 def mean (self):
185 return float (self.passed.count) / self.total.count
186
187 def variance (self):
188 return (float (self.passed.count) / self.total.count) * \
189 (float (self.failed.count) / self.total.count)
190
191 def stddev (self):
192 return self.variance () ** .5
193
194 def zscore (self, population):
195 """Calculate the standard score.
196 Population is the Stats for population.
197 Self is Stats for sample.
198 Returns larger absolute value if sample is highly unlikely to be random.
199 Anything outside of -3..+3 is very unlikely to be random.
200 See: https://en.wikipedia.org/wiki/Standard_score"""
201
202 return (self.mean () - population.mean ()) / population.stddev ()
203
204
205
206
207 class ESC[4;38;5;81mDiffSinks:
208
209 @staticmethod
210 def print_stat (f):
211 passed = 0
212 failed = 0
213 # XXX port to Stats, but that would really slow us down here
214 for key, lines in DiffHelpers.separate_test_cases (f):
215 if DiffHelpers.test_passed (lines):
216 passed += 1
217 else:
218 failed += 1
219 total = passed + failed
220 print ("%d out of %d tests passed. %d failed (%g%%)" % (passed, total, failed, 100. * failed / total))
221
222
223 class ESC[4;38;5;81mTest:
224
225 def __init__ (self, lines):
226 self.freq = 1
227 self.passed = True
228 self.identifier = None
229 self.text = None
230 self.unicodes = None
231 self.glyphs = None
232 for l in lines:
233 symbol = l[0]
234 if symbol != ' ':
235 self.passed = False
236 i = 1
237 if ':' in l:
238 i = l.index (':')
239 if not self.identifier:
240 self.identifier = l[1:i]
241 i = i + 2 # Skip colon and space
242 j = -1
243 if l[j] == '\n':
244 j -= 1
245 brackets = l[i] + l[j]
246 l = l[i+1:-2]
247 if brackets == '()':
248 self.text = l
249 elif brackets == '<>':
250 self.unicodes = Unicode.parse (l)
251 elif brackets == '[]':
252 # XXX we don't handle failed tests here
253 self.glyphs = l
254
255
256 class ESC[4;38;5;81mDiffHelpers:
257
258 @staticmethod
259 def separate_test_cases (f):
260 '''Reads lines from f, and if the lines have identifiers, ie.
261 have a colon character, groups them by identifier,
262 yielding lists of all lines with the same identifier.'''
263
264 def identifier (l):
265 if ':' in l[1:]:
266 return l[1:l.index (':')]
267 return l
268 return groupby (f, key=identifier)
269
270 @staticmethod
271 def test_passed (lines):
272 lines = list (lines)
273 # XXX This is a hack, but does the job for now.
274 if any (l.find("space+0|space+0") >= 0 for l in lines if l[0] == '+'): return True
275 if any (l.find("uni25CC") >= 0 for l in lines if l[0] == '+'): return True
276 if any (l.find("dottedcircle") >= 0 for l in lines if l[0] == '+'): return True
277 if any (l.find("glyph0") >= 0 for l in lines if l[0] == '+'): return True
278 if any (l.find("gid0") >= 0 for l in lines if l[0] == '+'): return True
279 if any (l.find("notdef") >= 0 for l in lines if l[0] == '+'): return True
280 return all (l[0] == ' ' for l in lines)
281
282
283 class ESC[4;38;5;81mFilterHelpers:
284
285 @staticmethod
286 def filter_printer_function (filter_callback):
287 def printer (f):
288 for line in filter_callback (f):
289 print (line)
290 return printer
291
292 @staticmethod
293 def filter_printer_function_no_newline (filter_callback):
294 def printer (f):
295 for line in filter_callback (f):
296 sys.stdout.writelines ([line])
297 return printer
298
299
300 class ESC[4;38;5;81mNgram:
301
302 @staticmethod
303 def generator (n):
304
305 def gen (f):
306 l = []
307 for x in f:
308 l.append (x)
309 if len (l) == n:
310 yield tuple (l)
311 l[:1] = []
312
313 gen.n = n
314 return gen
315
316
317 class ESC[4;38;5;81mUtilMains:
318
319 @staticmethod
320 def process_multiple_files (callback, mnemonic = "FILE"):
321
322 if "--help" in sys.argv:
323 sys.exit ("Usage: %s %s..." % (sys.argv[0], mnemonic))
324
325 try:
326 files = sys.argv[1:] if len (sys.argv) > 1 else ['-']
327 for s in files:
328 callback (FileHelpers.open_file_or_stdin (s))
329 except IOError as e:
330 if e.errno != errno.EPIPE:
331 sys.exit ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror))
332
333 @staticmethod
334 def process_multiple_args (callback, mnemonic):
335
336 if len (sys.argv) == 1 or "--help" in sys.argv:
337 sys.exit ("Usage: %s %s..." % (sys.argv[0], mnemonic))
338
339 try:
340 for s in sys.argv[1:]:
341 callback (s)
342 except IOError as e:
343 if e.errno != errno.EPIPE:
344 sys.exit ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror))
345
346 @staticmethod
347 def filter_multiple_strings_or_stdin (callback, mnemonic, \
348 separator = " ", \
349 concat_separator = False):
350
351 if "--help" in sys.argv:
352 sys.exit ("""Usage:
353 %s %s...
354 or:
355 %s
356 When called with no arguments, input is read from standard input.
357 """ % (sys.argv[0], mnemonic, sys.argv[0]))
358
359 try:
360 if len (sys.argv) == 1:
361 while (1):
362 line = sys.stdin.readline ()
363 if not len (line):
364 break
365 if line[-1] == '\n':
366 line = line[:-1]
367 print (callback (line))
368 else:
369 args = sys.argv[1:]
370 if concat_separator != False:
371 args = [concat_separator.join (args)]
372 print (separator.join (callback (x) for x in (args)))
373 except IOError as e:
374 if e.errno != errno.EPIPE:
375 sys.exit ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror))
376
377
378 class ESC[4;38;5;81mUnicode:
379
380 @staticmethod
381 def decode (s):
382 return ','.join ("U+%04X" % cp for cp in codepoints (s))
383
384 @staticmethod
385 def parse (s):
386 s = re.sub (r"0[xX]", " ", s)
387 s = re.sub (r"[<+\->{},;&#\\xXuUnNiI\n\t]", " ", s)
388 return [int (x, 16) for x in s.split ()]
389
390 @staticmethod
391 def encode (s):
392 return ''.join (chr (x) for x in Unicode.parse (s))
393
394 shorthands = {
395 "ZERO WIDTH NON-JOINER": "ZWNJ",
396 "ZERO WIDTH JOINER": "ZWJ",
397 "NARROW NO-BREAK SPACE": "NNBSP",
398 "COMBINING GRAPHEME JOINER": "CGJ",
399 "LEFT-TO-RIGHT MARK": "LRM",
400 "RIGHT-TO-LEFT MARK": "RLM",
401 "LEFT-TO-RIGHT EMBEDDING": "LRE",
402 "RIGHT-TO-LEFT EMBEDDING": "RLE",
403 "POP DIRECTIONAL FORMATTING": "PDF",
404 "LEFT-TO-RIGHT OVERRIDE": "LRO",
405 "RIGHT-TO-LEFT OVERRIDE": "RLO",
406 }
407
408 @staticmethod
409 def pretty_name (u):
410 try:
411 s = unicodedata.name (u)
412 except ValueError:
413 return "XXX"
414 s = re.sub (".* LETTER ", "", s)
415 s = re.sub (".* VOWEL SIGN (.*)", r"\1-MATRA", s)
416 s = re.sub (".* SIGN ", "", s)
417 s = re.sub (".* COMBINING ", "", s)
418 if re.match (".* VIRAMA", s):
419 s = "HALANT"
420 if s in Unicode.shorthands:
421 s = Unicode.shorthands[s]
422 return s
423
424 @staticmethod
425 def pretty_names (s):
426 s = re.sub (r"[<+>\\uU]", " ", s)
427 s = re.sub (r"0[xX]", " ", s)
428 s = [chr (int (x, 16)) for x in re.split ('[, \n]', s) if len (x)]
429 return ' + '.join (Unicode.pretty_name (x) for x in s)
430
431
432 class ESC[4;38;5;81mFileHelpers:
433
434 @staticmethod
435 def open_file_or_stdin (f):
436 if f == '-':
437 return sys.stdin
438 return open (f)
439
440
441 class ESC[4;38;5;81mManifest:
442
443 @staticmethod
444 def read (s, strict = True):
445
446 if not os.path.exists (s):
447 if strict:
448 sys.exit ("%s: %s does not exist" % (sys.argv[0], s))
449 return
450
451 s = os.path.normpath (s)
452
453 if os.path.isdir (s):
454
455 try:
456 m = open (os.path.join (s, "MANIFEST"))
457 items = [x.strip () for x in m.readlines ()]
458 for f in items:
459 for p in Manifest.read (os.path.join (s, f)):
460 yield p
461 except IOError:
462 if strict:
463 sys.exit ("%s: %s does not exist" % (sys.argv[0], os.path.join (s, "MANIFEST")))
464 return
465 else:
466 yield s
467
468 @staticmethod
469 def update_recursive (s):
470
471 for dirpath, dirnames, filenames in os.walk (s, followlinks=True):
472
473 for f in ["MANIFEST", "README", "LICENSE", "COPYING", "AUTHORS", "SOURCES", "ChangeLog"]:
474 if f in dirnames:
475 dirnames.remove (f)
476 if f in filenames:
477 filenames.remove (f)
478 dirnames.sort ()
479 filenames.sort ()
480 ms = os.path.join (dirpath, "MANIFEST")
481 print (" GEN %s" % ms)
482 m = open (ms, "w")
483 for f in filenames:
484 print (f, file=m)
485 for f in dirnames:
486 print (f, file=m)
487 for f in dirnames:
488 Manifest.update_recursive (os.path.join (dirpath, f))
489
490 if __name__ == '__main__':
491 pass