1 #!/usr/bin/env python3
2
3 """
4 Markov chain simulation of words or characters.
5 """
6
7 class ESC[4;38;5;81mMarkov:
8 def __init__(self, histsize, choice):
9 self.histsize = histsize
10 self.choice = choice
11 self.trans = {}
12
13 def add(self, state, next):
14 self.trans.setdefault(state, []).append(next)
15
16 def put(self, seq):
17 n = self.histsize
18 add = self.add
19 add(None, seq[:0])
20 for i in range(len(seq)):
21 add(seq[max(0, i-n):i], seq[i:i+1])
22 add(seq[len(seq)-n:], None)
23
24 def get(self):
25 choice = self.choice
26 trans = self.trans
27 n = self.histsize
28 seq = choice(trans[None])
29 while True:
30 subseq = seq[max(0, len(seq)-n):]
31 options = trans[subseq]
32 next = choice(options)
33 if not next:
34 break
35 seq += next
36 return seq
37
38
39 def test():
40 import sys, random, getopt
41 args = sys.argv[1:]
42 try:
43 opts, args = getopt.getopt(args, '0123456789cdwq')
44 except getopt.error:
45 print('Usage: %s [-#] [-cddqw] [file] ...' % sys.argv[0])
46 print('Options:')
47 print('-#: 1-digit history size (default 2)')
48 print('-c: characters (default)')
49 print('-w: words')
50 print('-d: more debugging output')
51 print('-q: no debugging output')
52 print('Input files (default stdin) are split in paragraphs')
53 print('separated blank lines and each paragraph is split')
54 print('in words by whitespace, then reconcatenated with')
55 print('exactly one space separating words.')
56 print('Output consists of paragraphs separated by blank')
57 print('lines, where lines are no longer than 72 characters.')
58 sys.exit(2)
59 histsize = 2
60 do_words = False
61 debug = 1
62 for o, a in opts:
63 if '-0' <= o <= '-9': histsize = int(o[1:])
64 if o == '-c': do_words = False
65 if o == '-d': debug += 1
66 if o == '-q': debug = 0
67 if o == '-w': do_words = True
68 if not args:
69 args = ['-']
70
71 m = Markov(histsize, random.choice)
72 try:
73 for filename in args:
74 if filename == '-':
75 f = sys.stdin
76 if f.isatty():
77 print('Sorry, need stdin from file')
78 continue
79 else:
80 f = open(filename, 'r')
81 with f:
82 if debug: print('processing', filename, '...')
83 text = f.read()
84 paralist = text.split('\n\n')
85 for para in paralist:
86 if debug > 1: print('feeding ...')
87 words = para.split()
88 if words:
89 if do_words:
90 data = tuple(words)
91 else:
92 data = ' '.join(words)
93 m.put(data)
94 except KeyboardInterrupt:
95 print('Interrupted -- continue with data read so far')
96 if not m.trans:
97 print('No valid input files')
98 return
99 if debug: print('done.')
100
101 if debug > 1:
102 for key in m.trans.keys():
103 if key is None or len(key) < histsize:
104 print(repr(key), m.trans[key])
105 if histsize == 0: print(repr(''), m.trans[''])
106 print()
107 while True:
108 data = m.get()
109 if do_words:
110 words = data
111 else:
112 words = data.split()
113 n = 0
114 limit = 72
115 for w in words:
116 if n + len(w) > limit:
117 print()
118 n = 0
119 print(w, end=' ')
120 n += len(w) + 1
121 print()
122 print()
123
124 if __name__ == "__main__":
125 test()