1 #! /usr/bin/env python3
2 # This script generates token related files from Grammar/Tokens:
3 #
4 # Doc/library/token-list.inc
5 # Include/token.h
6 # Parser/token.c
7 # Lib/token.py
8
9
10 SCRIPT_NAME = 'Tools/build/generate_token.py'
11 AUTO_GENERATED_BY_SCRIPT = f'Auto-generated by {SCRIPT_NAME}'
12 NT_OFFSET = 256
13
14 def load_tokens(path):
15 tok_names = []
16 string_to_tok = {}
17 ERRORTOKEN = None
18 with open(path) as fp:
19 for line in fp:
20 line = line.strip()
21 # strip comments
22 i = line.find('#')
23 if i >= 0:
24 line = line[:i].strip()
25 if not line:
26 continue
27 fields = line.split()
28 name = fields[0]
29 value = len(tok_names)
30 if name == 'ERRORTOKEN':
31 ERRORTOKEN = value
32 string = fields[1] if len(fields) > 1 else None
33 if string:
34 string = eval(string)
35 string_to_tok[string] = value
36 tok_names.append(name)
37 return tok_names, ERRORTOKEN, string_to_tok
38
39
40 def update_file(file, content):
41 try:
42 with open(file, 'r') as fobj:
43 if fobj.read() == content:
44 return False
45 except (OSError, ValueError):
46 pass
47 with open(file, 'w') as fobj:
48 fobj.write(content)
49 return True
50
51
52 token_h_template = f"""\
53 /* {AUTO_GENERATED_BY_SCRIPT} */
54 """
55 token_h_template += """\
56
57 /* Token types */
58 #ifndef Py_INTERNAL_TOKEN_H
59 #define Py_INTERNAL_TOKEN_H
60 #ifdef __cplusplus
61 extern "C" {
62 #endif
63
64 #ifndef Py_BUILD_CORE
65 # error "this header requires Py_BUILD_CORE define"
66 #endif
67
68 #undef TILDE /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */
69
70 %s\
71 #define N_TOKENS %d
72 #define NT_OFFSET %d
73
74 /* Special definitions for cooperation with parser */
75
76 #define ISTERMINAL(x) ((x) < NT_OFFSET)
77 #define ISNONTERMINAL(x) ((x) >= NT_OFFSET)
78 #define ISEOF(x) ((x) == ENDMARKER)
79 #define ISWHITESPACE(x) ((x) == ENDMARKER || \\
80 (x) == NEWLINE || \\
81 (x) == INDENT || \\
82 (x) == DEDENT)
83 #define ISSTRINGLIT(x) ((x) == STRING || \\
84 (x) == FSTRING_MIDDLE)
85
86
87 // Symbols exported for test_peg_generator
88 PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */
89 PyAPI_FUNC(int) _PyToken_OneChar(int);
90 PyAPI_FUNC(int) _PyToken_TwoChars(int, int);
91 PyAPI_FUNC(int) _PyToken_ThreeChars(int, int, int);
92
93 #ifdef __cplusplus
94 }
95 #endif
96 #endif // !Py_INTERNAL_TOKEN_H
97 """
98
99 def make_h(infile, outfile='Include/internal/pycore_token.h'):
100 tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
101
102 defines = []
103 for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
104 defines.append("#define %-15s %d\n" % (name, value))
105
106 if update_file(outfile, token_h_template % (
107 ''.join(defines),
108 len(tok_names),
109 NT_OFFSET
110 )):
111 print("%s regenerated from %s" % (outfile, infile))
112
113
114 token_c_template = f"""\
115 /* {AUTO_GENERATED_BY_SCRIPT} */
116 """
117 token_c_template += """\
118
119 #include "Python.h"
120 #include "pycore_token.h"
121
122 /* Token names */
123
124 const char * const _PyParser_TokenNames[] = {
125 %s\
126 };
127
128 /* Return the token corresponding to a single character */
129
130 int
131 _PyToken_OneChar(int c1)
132 {
133 %s\
134 return OP;
135 }
136
137 int
138 _PyToken_TwoChars(int c1, int c2)
139 {
140 %s\
141 return OP;
142 }
143
144 int
145 _PyToken_ThreeChars(int c1, int c2, int c3)
146 {
147 %s\
148 return OP;
149 }
150 """
151
152 def generate_chars_to_token(mapping, n=1):
153 result = []
154 write = result.append
155 indent = ' ' * n
156 write(indent)
157 write('switch (c%d) {\n' % (n,))
158 for c in sorted(mapping):
159 write(indent)
160 value = mapping[c]
161 if isinstance(value, dict):
162 write("case '%s':\n" % (c,))
163 write(generate_chars_to_token(value, n + 1))
164 write(indent)
165 write(' break;\n')
166 else:
167 write("case '%s': return %s;\n" % (c, value))
168 write(indent)
169 write('}\n')
170 return ''.join(result)
171
172 def make_c(infile, outfile='Parser/token.c'):
173 tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
174 string_to_tok['<>'] = string_to_tok['!=']
175 chars_to_token = {}
176 for string, value in string_to_tok.items():
177 assert 1 <= len(string) <= 3
178 name = tok_names[value]
179 m = chars_to_token.setdefault(len(string), {})
180 for c in string[:-1]:
181 m = m.setdefault(c, {})
182 m[string[-1]] = name
183
184 names = []
185 for value, name in enumerate(tok_names):
186 if value >= ERRORTOKEN:
187 name = '<%s>' % name
188 names.append(' "%s",\n' % name)
189 names.append(' "<N_TOKENS>",\n')
190
191 if update_file(outfile, token_c_template % (
192 ''.join(names),
193 generate_chars_to_token(chars_to_token[1]),
194 generate_chars_to_token(chars_to_token[2]),
195 generate_chars_to_token(chars_to_token[3])
196 )):
197 print("%s regenerated from %s" % (outfile, infile))
198
199
200 token_inc_template = f"""\
201 .. {AUTO_GENERATED_BY_SCRIPT}
202 %s
203 .. data:: N_TOKENS
204
205 .. data:: NT_OFFSET
206 """
207
208 def make_rst(infile, outfile='Doc/library/token-list.inc'):
209 tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
210 tok_to_string = {value: s for s, value in string_to_tok.items()}
211
212 names = []
213 for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
214 names.append('.. data:: %s' % (name,))
215 if value in tok_to_string:
216 names.append('')
217 names.append(' Token value for ``"%s"``.' % tok_to_string[value])
218 names.append('')
219
220 if update_file(outfile, token_inc_template % '\n'.join(names)):
221 print("%s regenerated from %s" % (outfile, infile))
222
223
224 token_py_template = f'''\
225 """Token constants."""
226 # {AUTO_GENERATED_BY_SCRIPT}
227 '''
228 token_py_template += '''
229 __all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF']
230
231 %s
232 N_TOKENS = %d
233 # Special definitions for cooperation with parser
234 NT_OFFSET = %d
235
236 tok_name = {value: name
237 for name, value in globals().items()
238 if isinstance(value, int) and not name.startswith('_')}
239 __all__.extend(tok_name.values())
240
241 EXACT_TOKEN_TYPES = {
242 %s
243 }
244
245 def ISTERMINAL(x):
246 return x < NT_OFFSET
247
248 def ISNONTERMINAL(x):
249 return x >= NT_OFFSET
250
251 def ISEOF(x):
252 return x == ENDMARKER
253 '''
254
255 def make_py(infile, outfile='Lib/token.py'):
256 tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
257
258 constants = []
259 for value, name in enumerate(tok_names):
260 constants.append('%s = %d' % (name, value))
261 constants.insert(ERRORTOKEN,
262 "# These aren't used by the C tokenizer but are needed for tokenize.py")
263
264 token_types = []
265 for s, value in sorted(string_to_tok.items()):
266 token_types.append(' %r: %s,' % (s, tok_names[value]))
267
268 if update_file(outfile, token_py_template % (
269 '\n'.join(constants),
270 len(tok_names),
271 NT_OFFSET,
272 '\n'.join(token_types),
273 )):
274 print("%s regenerated from %s" % (outfile, infile))
275
276
277 def main(op, infile='Grammar/Tokens', *args):
278 make = globals()['make_' + op]
279 make(infile, *args)
280
281
282 if __name__ == '__main__':
283 import sys
284 main(*sys.argv[1:])