1 import itertools
2 import os
3 import platform
4 import re
5 import sys
6 import time
7 from optparse import OptionParser
8
9 out = sys.stdout
10
11 TEXT_ENCODING = 'utf8'
12 NEWLINES = 'lf'
13
14
15 def text_open(fn, mode, encoding=None):
16 try:
17 return open(fn, mode, encoding=encoding or TEXT_ENCODING)
18 except TypeError:
19 return open(fn, mode)
20
21
22 def get_file_sizes():
23 for s in ['20 KiB', '400 KiB', '10 MiB']:
24 size, unit = s.split()
25 size = int(size) * {'KiB': 1024, 'MiB': 1024 ** 2}[unit]
26 yield s.replace(' ', ''), size
27
28
29 def get_binary_files():
30 return ((name + ".bin", size) for name, size in get_file_sizes())
31
32
33 def get_text_files():
34 return ((f"{name}-{TEXT_ENCODING}-{NEWLINES}.txt", size)
35 for name, size in get_file_sizes())
36
37
38 def with_open_mode(mode):
39 def decorate(f):
40 f.file_open_mode = mode
41 return f
42 return decorate
43
44
45 def with_sizes(*sizes):
46 def decorate(f):
47 f.file_sizes = sizes
48 return f
49 return decorate
50
51
52 # Here begin the tests
53
54 @with_open_mode("r")
55 @with_sizes("medium")
56 def read_bytewise(f):
57 """ read one unit at a time """
58 f.seek(0)
59 while f.read(1):
60 pass
61
62
63 @with_open_mode("r")
64 @with_sizes("medium")
65 def read_small_chunks(f):
66 """ read 20 units at a time """
67 f.seek(0)
68 while f.read(20):
69 pass
70
71
72 @with_open_mode("r")
73 @with_sizes("medium")
74 def read_big_chunks(f):
75 """ read 4096 units at a time """
76 f.seek(0)
77 while f.read(4096):
78 pass
79
80
81 @with_open_mode("r")
82 @with_sizes("small", "medium", "large")
83 def read_whole_file(f):
84 """ read whole contents at once """
85 f.seek(0)
86 while f.read():
87 pass
88
89
90 @with_open_mode("rt")
91 @with_sizes("medium")
92 def read_lines(f):
93 """ read one line at a time """
94 f.seek(0)
95 for line in f:
96 pass
97
98
99 @with_open_mode("r")
100 @with_sizes("medium")
101 def seek_forward_bytewise(f):
102 """ seek forward one unit at a time """
103 f.seek(0, 2)
104 size = f.tell()
105 f.seek(0, 0)
106 for i in range(0, size - 1):
107 f.seek(i, 0)
108
109
110 @with_open_mode("r")
111 @with_sizes("medium")
112 def seek_forward_blockwise(f):
113 """ seek forward 1000 units at a time """
114 f.seek(0, 2)
115 size = f.tell()
116 f.seek(0, 0)
117 for i in range(0, size - 1, 1000):
118 f.seek(i, 0)
119
120
121 @with_open_mode("rb")
122 @with_sizes("medium")
123 def read_seek_bytewise(f):
124 """ alternate read & seek one unit """
125 f.seek(0)
126 while f.read(1):
127 f.seek(1, 1)
128
129
130 @with_open_mode("rb")
131 @with_sizes("medium")
132 def read_seek_blockwise(f):
133 """ alternate read & seek 1000 units """
134 f.seek(0)
135 while f.read(1000):
136 f.seek(1000, 1)
137
138
139 @with_open_mode("w")
140 @with_sizes("small")
141 def write_bytewise(f, source):
142 """ write one unit at a time """
143 for i in range(0, len(source)):
144 f.write(source[i:i+1])
145
146
147 @with_open_mode("w")
148 @with_sizes("medium")
149 def write_small_chunks(f, source):
150 """ write 20 units at a time """
151 for i in range(0, len(source), 20):
152 f.write(source[i:i+20])
153
154
155 @with_open_mode("w")
156 @with_sizes("medium")
157 def write_medium_chunks(f, source):
158 """ write 4096 units at a time """
159 for i in range(0, len(source), 4096):
160 f.write(source[i:i+4096])
161
162
163 @with_open_mode("w")
164 @with_sizes("large")
165 def write_large_chunks(f, source):
166 """ write 1e6 units at a time """
167 for i in range(0, len(source), 1000000):
168 f.write(source[i:i+1000000])
169
170
171 @with_open_mode("w+")
172 @with_sizes("small")
173 def modify_bytewise(f, source):
174 """ modify one unit at a time """
175 f.seek(0)
176 for i in range(0, len(source)):
177 f.write(source[i:i+1])
178
179
180 @with_open_mode("w+")
181 @with_sizes("medium")
182 def modify_small_chunks(f, source):
183 """ modify 20 units at a time """
184 f.seek(0)
185 for i in range(0, len(source), 20):
186 f.write(source[i:i+20])
187
188
189 @with_open_mode("w+")
190 @with_sizes("medium")
191 def modify_medium_chunks(f, source):
192 """ modify 4096 units at a time """
193 f.seek(0)
194 for i in range(0, len(source), 4096):
195 f.write(source[i:i+4096])
196
197
198 @with_open_mode("wb+")
199 @with_sizes("medium")
200 def modify_seek_forward_bytewise(f, source):
201 """ alternate write & seek one unit """
202 f.seek(0)
203 for i in range(0, len(source), 2):
204 f.write(source[i:i+1])
205 f.seek(i+2)
206
207
208 @with_open_mode("wb+")
209 @with_sizes("medium")
210 def modify_seek_forward_blockwise(f, source):
211 """ alternate write & seek 1000 units """
212 f.seek(0)
213 for i in range(0, len(source), 2000):
214 f.write(source[i:i+1000])
215 f.seek(i+2000)
216
217
218 # XXX the 2 following tests don't work with py3k's text IO
219 @with_open_mode("wb+")
220 @with_sizes("medium")
221 def read_modify_bytewise(f, source):
222 """ alternate read & write one unit """
223 f.seek(0)
224 for i in range(0, len(source), 2):
225 f.read(1)
226 f.write(source[i+1:i+2])
227
228
229 @with_open_mode("wb+")
230 @with_sizes("medium")
231 def read_modify_blockwise(f, source):
232 """ alternate read & write 1000 units """
233 f.seek(0)
234 for i in range(0, len(source), 2000):
235 f.read(1000)
236 f.write(source[i+1000:i+2000])
237
238
239 read_tests = [
240 read_bytewise, read_small_chunks, read_lines, read_big_chunks,
241 None, read_whole_file, None,
242 seek_forward_bytewise, seek_forward_blockwise,
243 read_seek_bytewise, read_seek_blockwise,
244 ]
245
246 write_tests = [
247 write_bytewise, write_small_chunks, write_medium_chunks, write_large_chunks,
248 ]
249
250 modify_tests = [
251 modify_bytewise, modify_small_chunks, modify_medium_chunks,
252 None,
253 modify_seek_forward_bytewise, modify_seek_forward_blockwise,
254 read_modify_bytewise, read_modify_blockwise,
255 ]
256
257
258 def run_during(duration, func):
259 _t = time.time
260 n = 0
261 start = os.times()
262 start_timestamp = _t()
263 real_start = start[4] or start_timestamp
264 while True:
265 func()
266 n += 1
267 if _t() - start_timestamp > duration:
268 break
269 end = os.times()
270 real = (end[4] if start[4] else time.time()) - real_start
271 return n, real, sum(end[0:2]) - sum(start[0:2])
272
273
274 def warm_cache(filename):
275 with open(filename, "rb") as f:
276 f.read()
277
278
279 def run_all_tests(options):
280 def print_label(filename, func):
281 name = re.split(r'[-.]', filename)[0]
282 out.write(
283 f"[{name.center(7)}] {func.__doc__.strip()}... ".ljust(52))
284 out.flush()
285
286 def print_results(size, n, real, cpu):
287 bw = n * float(size) / 1024 ** 2 / real
288 bw = ("%4d MiB/s" if bw > 100 else "%.3g MiB/s") % bw
289 out.write(bw.rjust(12) + "\n")
290 if cpu < 0.90 * real:
291 out.write(" warning: test above used only "
292 f"{cpu / real:%} CPU, "
293 "result may be flawed!\n")
294
295 def run_one_test(name, size, open_func, test_func, *args):
296 mode = test_func.file_open_mode
297 print_label(name, test_func)
298 if "w" not in mode or "+" in mode:
299 warm_cache(name)
300 with open_func(name) as f:
301 n, real, cpu = run_during(1.5, lambda: test_func(f, *args))
302 print_results(size, n, real, cpu)
303
304 def run_test_family(tests, mode_filter, files, open_func, *make_args):
305 for test_func in tests:
306 if test_func is None:
307 out.write("\n")
308 continue
309 if mode_filter in test_func.file_open_mode:
310 continue
311 for s in test_func.file_sizes:
312 name, size = files[size_names[s]]
313 #name += file_ext
314 args = tuple(f(name, size) for f in make_args)
315 run_one_test(name, size,
316 open_func, test_func, *args)
317
318 size_names = {
319 "small": 0,
320 "medium": 1,
321 "large": 2,
322 }
323
324 print(f"Python {sys.version}")
325 print("Unicode: PEP 393")
326 print(platform.platform())
327 binary_files = list(get_binary_files())
328 text_files = list(get_text_files())
329 if "b" in options:
330 print("Binary unit = one byte")
331 if "t" in options:
332 print(f"Text unit = one character ({TEXT_ENCODING}-decoded)")
333
334 # Binary reads
335 if "b" in options and "r" in options:
336 print("\n** Binary input **\n")
337 run_test_family(read_tests, "t", binary_files, lambda fn: open(fn, "rb"))
338
339 # Text reads
340 if "t" in options and "r" in options:
341 print("\n** Text input **\n")
342 run_test_family(read_tests, "b", text_files, lambda fn: text_open(fn, "r"))
343
344 # Binary writes
345 if "b" in options and "w" in options:
346 print("\n** Binary append **\n")
347
348 def make_test_source(name, size):
349 with open(name, "rb") as f:
350 return f.read()
351 run_test_family(write_tests, "t", binary_files,
352 lambda fn: open(os.devnull, "wb"), make_test_source)
353
354 # Text writes
355 if "t" in options and "w" in options:
356 print("\n** Text append **\n")
357
358 def make_test_source(name, size):
359 with text_open(name, "r") as f:
360 return f.read()
361 run_test_family(write_tests, "b", text_files,
362 lambda fn: text_open(os.devnull, "w"), make_test_source)
363
364 # Binary overwrites
365 if "b" in options and "w" in options:
366 print("\n** Binary overwrite **\n")
367
368 def make_test_source(name, size):
369 with open(name, "rb") as f:
370 return f.read()
371 run_test_family(modify_tests, "t", binary_files,
372 lambda fn: open(fn, "r+b"), make_test_source)
373
374 # Text overwrites
375 if "t" in options and "w" in options:
376 print("\n** Text overwrite **\n")
377
378 def make_test_source(name, size):
379 with text_open(name, "r") as f:
380 return f.read()
381 run_test_family(modify_tests, "b", text_files,
382 lambda fn: text_open(fn, "r+"), make_test_source)
383
384
385 def prepare_files():
386 print("Preparing files...")
387 # Binary files
388 for name, size in get_binary_files():
389 if os.path.isfile(name) and os.path.getsize(name) == size:
390 continue
391 with open(name, "wb") as f:
392 f.write(os.urandom(size))
393 # Text files
394 chunk = []
395 with text_open(__file__, "r", encoding='utf8') as f:
396 for line in f:
397 if line.startswith("# <iobench text chunk marker>"):
398 break
399 else:
400 raise RuntimeError(
401 f"Couldn't find chunk marker in {__file__} !")
402 if NEWLINES == "all":
403 it = itertools.cycle(["\n", "\r", "\r\n"])
404 else:
405 it = itertools.repeat(
406 {"cr": "\r", "lf": "\n", "crlf": "\r\n"}[NEWLINES])
407 chunk = "".join(line.replace("\n", next(it)) for line in f)
408 if isinstance(chunk, bytes):
409 chunk = chunk.decode('utf8')
410 chunk = chunk.encode(TEXT_ENCODING)
411 for name, size in get_text_files():
412 if os.path.isfile(name) and os.path.getsize(name) == size:
413 continue
414 head = chunk * (size // len(chunk))
415 tail = chunk[:size % len(chunk)]
416 # Adjust tail to end on a character boundary
417 while True:
418 try:
419 tail.decode(TEXT_ENCODING)
420 break
421 except UnicodeDecodeError:
422 tail = tail[:-1]
423 with open(name, "wb") as f:
424 f.write(head)
425 f.write(tail)
426
427
428 def main():
429 global TEXT_ENCODING, NEWLINES
430
431 usage = "usage: %prog [-h|--help] [options]"
432 parser = OptionParser(usage=usage)
433 parser.add_option("-b", "--binary",
434 action="store_true", dest="binary", default=False,
435 help="run binary I/O tests")
436 parser.add_option("-t", "--text",
437 action="store_true", dest="text", default=False,
438 help="run text I/O tests")
439 parser.add_option("-r", "--read",
440 action="store_true", dest="read", default=False,
441 help="run read tests")
442 parser.add_option("-w", "--write",
443 action="store_true", dest="write", default=False,
444 help="run write & modify tests")
445 parser.add_option("-E", "--encoding",
446 action="store", dest="encoding", default=None,
447 help=f"encoding for text tests (default: {TEXT_ENCODING})")
448 parser.add_option("-N", "--newlines",
449 action="store", dest="newlines", default='lf',
450 help="line endings for text tests "
451 "(one of: {lf (default), cr, crlf, all})")
452 parser.add_option("-m", "--io-module",
453 action="store", dest="io_module", default=None,
454 help="io module to test (default: builtin open())")
455 options, args = parser.parse_args()
456 if args:
457 parser.error("unexpected arguments")
458 NEWLINES = options.newlines.lower()
459 if NEWLINES not in ('lf', 'cr', 'crlf', 'all'):
460 parser.error(f"invalid 'newlines' option: {NEWLINES!r}")
461
462 test_options = ""
463 if options.read:
464 test_options += "r"
465 if options.write:
466 test_options += "w"
467 elif not options.read:
468 test_options += "rw"
469 if options.text:
470 test_options += "t"
471 if options.binary:
472 test_options += "b"
473 elif not options.text:
474 test_options += "tb"
475
476 if options.encoding:
477 TEXT_ENCODING = options.encoding
478
479 if options.io_module:
480 globals()['open'] = __import__(options.io_module, {}, {}, ['open']).open
481
482 prepare_files()
483 run_all_tests(test_options)
484
485
486 if __name__ == "__main__":
487 main()
488
489
490 # -- This part to exercise text reading. Don't change anything! --
491 # <iobench text chunk marker>
492
493 """
494 1.
495 Gáttir allar,
496 áðr gangi fram,
497 um skoðask skyli,
498 um skyggnast skyli,
499 því at óvíst er at vita,
500 hvar óvinir
501 sitja á fleti fyrir.
502
503 2.
504 Gefendr heilir!
505 Gestr er inn kominn,
506 hvar skal sitja sjá?
507 Mjök er bráðr,
508 sá er á bröndum skal
509 síns of freista frama.
510
511 3.
512 Elds er þörf,
513 þeims inn er kominn
514 ok á kné kalinn;
515 matar ok váða
516 er manni þörf,
517 þeim er hefr um fjall farit.
518
519 4.
520 Vatns er þörf,
521 þeim er til verðar kemr,
522 þerru ok þjóðlaðar,
523 góðs of æðis,
524 ef sér geta mætti,
525 orðs ok endrþögu.
526
527 5.
528 Vits er þörf,
529 þeim er víða ratar;
530 dælt er heima hvat;
531 at augabragði verðr,
532 sá er ekki kann
533 ok með snotrum sitr.
534
535 6.
536 At hyggjandi sinni
537 skyli-t maðr hræsinn vera,
538 heldr gætinn at geði;
539 þá er horskr ok þögull
540 kemr heimisgarða til,
541 sjaldan verðr víti vörum,
542 því at óbrigðra vin
543 fær maðr aldregi
544 en mannvit mikit.
545
546 7.
547 Inn vari gestr,
548 er til verðar kemr,
549 þunnu hljóði þegir,
550 eyrum hlýðir,
551 en augum skoðar;
552 svá nýsisk fróðra hverr fyrir.
553
554 8.
555 Hinn er sæll,
556 er sér of getr
557 lof ok líknstafi;
558 ódælla er við þat,
559 er maðr eiga skal
560 annars brjóstum í.
561 """
562
563 """
564 C'est revenir tard, je le sens, sur un sujet trop rebattu et déjà presque oublié. Mon état, qui ne me permet plus aucun travail suivi, mon aversion pour le genre polémique, ont causé ma lenteur à écrire et ma répugnance à publier. J'aurais même tout à fait supprimé ces Lettres, ou plutôt je lie les aurais point écrites, s'il n'eût été question que de moi : Mais ma patrie ne m'est pas tellement devenue étrangère que je puisse voir tranquillement opprimer ses citoyens, surtout lorsqu'ils n'ont compromis leurs droits qu'en défendant ma cause. Je serais le dernier des hommes si dans une telle occasion j'écoutais un sentiment qui n'est plus ni douceur ni patience, mais faiblesse et lâcheté, dans celui qu'il empêche de remplir son devoir.
565 Rien de moins important pour le public, j'en conviens, que la matière de ces lettres. La constitution d'une petite République, le sort d'un petit particulier, l'exposé de quelques injustices, la réfutation de quelques sophismes ; tout cela n'a rien en soi d'assez considérable pour mériter beaucoup de lecteurs : mais si mes sujets sont petits mes objets sont grands, et dignes de l'attention de tout honnête homme. Laissons Genève à sa place, et Rousseau dans sa dépression ; mais la religion, mais la liberté, la justice ! voilà, qui que vous soyez, ce qui n'est pas au-dessous de vous.
566 Qu'on ne cherche pas même ici dans le style le dédommagement de l'aridité de la matière. Ceux que quelques traits heureux de ma plume ont si fort irrités trouveront de quoi s'apaiser dans ces lettres, L'honneur de défendre un opprimé eût enflammé mon coeur si j'avais parlé pour un autre. Réduit au triste emploi de me défendre moi-même, j'ai dû me borner à raisonner ; m'échauffer eût été m'avilir. J'aurai donc trouvé grâce en ce point devant ceux qui s'imaginent qu'il est essentiel à la vérité d'être dite froidement ; opinion que pourtant j'ai peine à comprendre. Lorsqu'une vive persuasion nous anime, le moyen d'employer un langage glacé ? Quand Archimède tout transporté courait nu dans les rues de Syracuse, en avait-il moins trouvé la vérité parce qu'il se passionnait pour elle ? Tout au contraire, celui qui la sent ne peut s'abstenir de l'adorer ; celui qui demeure froid ne l'a pas vue.
567 Quoi qu'il en soit, je prie les lecteurs de vouloir bien mettre à part mon beau style, et d'examiner seulement si je raisonne bien ou mal ; car enfin, de cela seul qu'un auteur s'exprime en bons termes, je ne vois pas comment il peut s'ensuivre que cet auteur ne sait ce qu'il dit.
568 """