1 #! /usr/bin/env python3
2 # This script generates token related files from Grammar/Tokens:
3 #
4 # Doc/library/token-list.inc
5 # Include/token.h
6 # Parser/token.c
7 # Lib/token.py
8
9
10 NT_OFFSET = 256
11
12 def load_tokens(path):
13 tok_names = []
14 string_to_tok = {}
15 ERRORTOKEN = None
16 with open(path) as fp:
17 for line in fp:
18 line = line.strip()
19 # strip comments
20 i = line.find('#')
21 if i >= 0:
22 line = line[:i].strip()
23 if not line:
24 continue
25 fields = line.split()
26 name = fields[0]
27 value = len(tok_names)
28 if name == 'ERRORTOKEN':
29 ERRORTOKEN = value
30 string = fields[1] if len(fields) > 1 else None
31 if string:
32 string = eval(string)
33 string_to_tok[string] = value
34 tok_names.append(name)
35 return tok_names, ERRORTOKEN, string_to_tok
36
37
38 def update_file(file, content):
39 try:
40 with open(file, 'r') as fobj:
41 if fobj.read() == content:
42 return False
43 except (OSError, ValueError):
44 pass
45 with open(file, 'w') as fobj:
46 fobj.write(content)
47 return True
48
49
50 token_h_template = """\
51 /* Auto-generated by Tools/scripts/generate_token.py */
52
53 /* Token types */
54 #ifndef Py_LIMITED_API
55 #ifndef Py_TOKEN_H
56 #define Py_TOKEN_H
57 #ifdef __cplusplus
58 extern "C" {
59 #endif
60
61 #undef TILDE /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */
62
63 %s\
64 #define N_TOKENS %d
65 #define NT_OFFSET %d
66
67 /* Special definitions for cooperation with parser */
68
69 #define ISTERMINAL(x) ((x) < NT_OFFSET)
70 #define ISNONTERMINAL(x) ((x) >= NT_OFFSET)
71 #define ISEOF(x) ((x) == ENDMARKER)
72 #define ISWHITESPACE(x) ((x) == ENDMARKER || \\
73 (x) == NEWLINE || \\
74 (x) == INDENT || \\
75 (x) == DEDENT)
76
77
78 PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */
79 PyAPI_FUNC(int) PyToken_OneChar(int);
80 PyAPI_FUNC(int) PyToken_TwoChars(int, int);
81 PyAPI_FUNC(int) PyToken_ThreeChars(int, int, int);
82
83 #ifdef __cplusplus
84 }
85 #endif
86 #endif /* !Py_TOKEN_H */
87 #endif /* Py_LIMITED_API */
88 """
89
90 def make_h(infile, outfile='Include/token.h'):
91 tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
92
93 defines = []
94 for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
95 defines.append("#define %-15s %d\n" % (name, value))
96
97 if update_file(outfile, token_h_template % (
98 ''.join(defines),
99 len(tok_names),
100 NT_OFFSET
101 )):
102 print("%s regenerated from %s" % (outfile, infile))
103
104
105 token_c_template = """\
106 /* Auto-generated by Tools/scripts/generate_token.py */
107
108 #include "Python.h"
109 #include "token.h"
110
111 /* Token names */
112
113 const char * const _PyParser_TokenNames[] = {
114 %s\
115 };
116
117 /* Return the token corresponding to a single character */
118
119 int
120 PyToken_OneChar(int c1)
121 {
122 %s\
123 return OP;
124 }
125
126 int
127 PyToken_TwoChars(int c1, int c2)
128 {
129 %s\
130 return OP;
131 }
132
133 int
134 PyToken_ThreeChars(int c1, int c2, int c3)
135 {
136 %s\
137 return OP;
138 }
139 """
140
141 def generate_chars_to_token(mapping, n=1):
142 result = []
143 write = result.append
144 indent = ' ' * n
145 write(indent)
146 write('switch (c%d) {\n' % (n,))
147 for c in sorted(mapping):
148 write(indent)
149 value = mapping[c]
150 if isinstance(value, dict):
151 write("case '%s':\n" % (c,))
152 write(generate_chars_to_token(value, n + 1))
153 write(indent)
154 write(' break;\n')
155 else:
156 write("case '%s': return %s;\n" % (c, value))
157 write(indent)
158 write('}\n')
159 return ''.join(result)
160
161 def make_c(infile, outfile='Parser/token.c'):
162 tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
163 string_to_tok['<>'] = string_to_tok['!=']
164 chars_to_token = {}
165 for string, value in string_to_tok.items():
166 assert 1 <= len(string) <= 3
167 name = tok_names[value]
168 m = chars_to_token.setdefault(len(string), {})
169 for c in string[:-1]:
170 m = m.setdefault(c, {})
171 m[string[-1]] = name
172
173 names = []
174 for value, name in enumerate(tok_names):
175 if value >= ERRORTOKEN:
176 name = '<%s>' % name
177 names.append(' "%s",\n' % name)
178 names.append(' "<N_TOKENS>",\n')
179
180 if update_file(outfile, token_c_template % (
181 ''.join(names),
182 generate_chars_to_token(chars_to_token[1]),
183 generate_chars_to_token(chars_to_token[2]),
184 generate_chars_to_token(chars_to_token[3])
185 )):
186 print("%s regenerated from %s" % (outfile, infile))
187
188
189 token_inc_template = """\
190 .. Auto-generated by Tools/scripts/generate_token.py
191 %s
192 .. data:: N_TOKENS
193
194 .. data:: NT_OFFSET
195 """
196
197 def make_rst(infile, outfile='Doc/library/token-list.inc'):
198 tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
199 tok_to_string = {value: s for s, value in string_to_tok.items()}
200
201 names = []
202 for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
203 names.append('.. data:: %s' % (name,))
204 if value in tok_to_string:
205 names.append('')
206 names.append(' Token value for ``"%s"``.' % tok_to_string[value])
207 names.append('')
208
209 if update_file(outfile, token_inc_template % '\n'.join(names)):
210 print("%s regenerated from %s" % (outfile, infile))
211
212
213 token_py_template = '''\
214 """Token constants."""
215 # Auto-generated by Tools/scripts/generate_token.py
216
217 __all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF']
218
219 %s
220 N_TOKENS = %d
221 # Special definitions for cooperation with parser
222 NT_OFFSET = %d
223
224 tok_name = {value: name
225 for name, value in globals().items()
226 if isinstance(value, int) and not name.startswith('_')}
227 __all__.extend(tok_name.values())
228
229 EXACT_TOKEN_TYPES = {
230 %s
231 }
232
233 def ISTERMINAL(x):
234 return x < NT_OFFSET
235
236 def ISNONTERMINAL(x):
237 return x >= NT_OFFSET
238
239 def ISEOF(x):
240 return x == ENDMARKER
241 '''
242
243 def make_py(infile, outfile='Lib/token.py'):
244 tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
245
246 constants = []
247 for value, name in enumerate(tok_names):
248 constants.append('%s = %d' % (name, value))
249 constants.insert(ERRORTOKEN,
250 "# These aren't used by the C tokenizer but are needed for tokenize.py")
251
252 token_types = []
253 for s, value in sorted(string_to_tok.items()):
254 token_types.append(' %r: %s,' % (s, tok_names[value]))
255
256 if update_file(outfile, token_py_template % (
257 '\n'.join(constants),
258 len(tok_names),
259 NT_OFFSET,
260 '\n'.join(token_types),
261 )):
262 print("%s regenerated from %s" % (outfile, infile))
263
264
265 def main(op, infile='Grammar/Tokens', *args):
266 make = globals()['make_' + op]
267 make(infile, *args)
268
269
270 if __name__ == '__main__':
271 import sys
272 main(*sys.argv[1:])