1 import itertools
2 import os
3 import pathlib
4 import sys
5 import sysconfig
6 import tempfile
7 import tokenize
8 from typing import IO, Dict, List, Optional, Set, Tuple
9
10 from pegen.c_generator import CParserGenerator
11 from pegen.grammar import Grammar
12 from pegen.grammar_parser import GeneratedParser as GrammarParser
13 from pegen.parser import Parser
14 from pegen.parser_generator import ParserGenerator
15 from pegen.python_generator import PythonParserGenerator
16 from pegen.tokenizer import Tokenizer
17
18 MOD_DIR = pathlib.Path(__file__).resolve().parent
19
20 TokenDefinitions = Tuple[Dict[int, str], Dict[str, int], Set[str]]
21
22
23 def get_extra_flags(compiler_flags: str, compiler_py_flags_nodist: str) -> List[str]:
24 flags = sysconfig.get_config_var(compiler_flags)
25 py_flags_nodist = sysconfig.get_config_var(compiler_py_flags_nodist)
26 if flags is None or py_flags_nodist is None:
27 return []
28 return f"{flags} {py_flags_nodist}".split()
29
30
31 def fixup_build_ext(cmd):
32 """Function needed to make build_ext tests pass.
33
34 When Python was built with --enable-shared on Unix, -L. is not enough to
35 find libpython<blah>.so, because regrtest runs in a tempdir, not in the
36 source directory where the .so lives.
37
38 When Python was built with in debug mode on Windows, build_ext commands
39 need their debug attribute set, and it is not done automatically for
40 some reason.
41
42 This function handles both of these things. Example use:
43
44 cmd = build_ext(dist)
45 support.fixup_build_ext(cmd)
46 cmd.ensure_finalized()
47
48 Unlike most other Unix platforms, Mac OS X embeds absolute paths
49 to shared libraries into executables, so the fixup is not needed there.
50
51 Taken from distutils (was part of the CPython stdlib until Python 3.11)
52 """
53 if os.name == 'nt':
54 cmd.debug = sys.executable.endswith('_d.exe')
55 elif sysconfig.get_config_var('Py_ENABLE_SHARED'):
56 # To further add to the shared builds fun on Unix, we can't just add
57 # library_dirs to the Extension() instance because that doesn't get
58 # plumbed through to the final compiler command.
59 runshared = sysconfig.get_config_var('RUNSHARED')
60 if runshared is None:
61 cmd.library_dirs = ['.']
62 else:
63 if sys.platform == 'darwin':
64 cmd.library_dirs = []
65 else:
66 name, equals, value = runshared.partition('=')
67 cmd.library_dirs = [d for d in value.split(os.pathsep) if d]
68
69
70
71 def compile_c_extension(
72 generated_source_path: str,
73 build_dir: Optional[str] = None,
74 verbose: bool = False,
75 keep_asserts: bool = True,
76 disable_optimization: bool = False,
77 library_dir: Optional[str] = None,
78 ) -> str:
79 """Compile the generated source for a parser generator into an extension module.
80
81 The extension module will be generated in the same directory as the provided path
82 for the generated source, with the same basename (in addition to extension module
83 metadata). For example, for the source mydir/parser.c the generated extension
84 in a darwin system with python 3.8 will be mydir/parser.cpython-38-darwin.so.
85
86 If *build_dir* is provided, that path will be used as the temporary build directory
87 of distutils (this is useful in case you want to use a temporary directory).
88
89 If *library_dir* is provided, that path will be used as the directory for a
90 static library of the common parser sources (this is useful in case you are
91 creating multiple extensions).
92 """
93 import setuptools.logging
94
95 from setuptools import Extension, Distribution
96 from setuptools._distutils.dep_util import newer_group
97 from setuptools._distutils.ccompiler import new_compiler
98 from setuptools._distutils.sysconfig import customize_compiler
99
100 if verbose:
101 setuptools.logging.set_threshold(setuptools.logging.logging.DEBUG)
102
103 source_file_path = pathlib.Path(generated_source_path)
104 extension_name = source_file_path.stem
105 extra_compile_args = get_extra_flags("CFLAGS", "PY_CFLAGS_NODIST")
106 extra_compile_args.append("-DPy_BUILD_CORE_MODULE")
107 # Define _Py_TEST_PEGEN to not call PyAST_Validate() in Parser/pegen.c
108 extra_compile_args.append("-D_Py_TEST_PEGEN")
109 extra_link_args = get_extra_flags("LDFLAGS", "PY_LDFLAGS_NODIST")
110 if keep_asserts:
111 extra_compile_args.append("-UNDEBUG")
112 if disable_optimization:
113 if sys.platform == 'win32':
114 extra_compile_args.append("/Od")
115 extra_link_args.append("/LTCG:OFF")
116 else:
117 extra_compile_args.append("-O0")
118 if sysconfig.get_config_var("GNULD") == "yes":
119 extra_link_args.append("-fno-lto")
120
121 common_sources = [
122 str(MOD_DIR.parent.parent.parent / "Python" / "Python-ast.c"),
123 str(MOD_DIR.parent.parent.parent / "Python" / "asdl.c"),
124 str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer.c"),
125 str(MOD_DIR.parent.parent.parent / "Parser" / "pegen.c"),
126 str(MOD_DIR.parent.parent.parent / "Parser" / "pegen_errors.c"),
127 str(MOD_DIR.parent.parent.parent / "Parser" / "action_helpers.c"),
128 str(MOD_DIR.parent.parent.parent / "Parser" / "string_parser.c"),
129 str(MOD_DIR.parent / "peg_extension" / "peg_extension.c"),
130 ]
131 include_dirs = [
132 str(MOD_DIR.parent.parent.parent / "Include" / "internal"),
133 str(MOD_DIR.parent.parent.parent / "Parser"),
134 ]
135 extension = Extension(
136 extension_name,
137 sources=[generated_source_path],
138 extra_compile_args=extra_compile_args,
139 extra_link_args=extra_link_args,
140 )
141 dist = Distribution({"name": extension_name, "ext_modules": [extension]})
142 cmd = dist.get_command_obj("build_ext")
143 fixup_build_ext(cmd)
144 cmd.build_lib = str(source_file_path.parent)
145 cmd.include_dirs = include_dirs
146 if build_dir:
147 cmd.build_temp = build_dir
148 cmd.ensure_finalized()
149
150 compiler = new_compiler()
151 customize_compiler(compiler)
152 compiler.set_include_dirs(cmd.include_dirs)
153 compiler.set_library_dirs(cmd.library_dirs)
154 # build static lib
155 if library_dir:
156 library_filename = compiler.library_filename(extension_name,
157 output_dir=library_dir)
158 if newer_group(common_sources, library_filename, 'newer'):
159 if sys.platform == 'win32':
160 pdb = compiler.static_lib_format % (extension_name, '.pdb')
161 compile_opts = [f"/Fd{library_dir}\\{pdb}"]
162 compile_opts.extend(extra_compile_args)
163 else:
164 compile_opts = extra_compile_args
165 objects = compiler.compile(common_sources,
166 output_dir=library_dir,
167 debug=cmd.debug,
168 extra_postargs=compile_opts)
169 compiler.create_static_lib(objects, extension_name,
170 output_dir=library_dir,
171 debug=cmd.debug)
172 if sys.platform == 'win32':
173 compiler.add_library_dir(library_dir)
174 extension.libraries = [extension_name]
175 elif sys.platform == 'darwin':
176 compiler.set_link_objects([
177 '-Wl,-force_load', library_filename,
178 ])
179 else:
180 compiler.set_link_objects([
181 '-Wl,--whole-archive', library_filename, '-Wl,--no-whole-archive',
182 ])
183 else:
184 extension.sources[0:0] = common_sources
185
186 # Compile the source code to object files.
187 ext_path = cmd.get_ext_fullpath(extension_name)
188 if newer_group(extension.sources, ext_path, 'newer'):
189 objects = compiler.compile(extension.sources,
190 output_dir=cmd.build_temp,
191 debug=cmd.debug,
192 extra_postargs=extra_compile_args)
193 else:
194 objects = compiler.object_filenames(extension.sources,
195 output_dir=cmd.build_temp)
196 # Now link the object files together into a "shared object"
197 compiler.link_shared_object(
198 objects, ext_path,
199 libraries=cmd.get_libraries(extension),
200 extra_postargs=extra_link_args,
201 export_symbols=cmd.get_export_symbols(extension),
202 debug=cmd.debug,
203 build_temp=cmd.build_temp)
204
205 return pathlib.Path(ext_path)
206
207
208 def build_parser(
209 grammar_file: str, verbose_tokenizer: bool = False, verbose_parser: bool = False
210 ) -> Tuple[Grammar, Parser, Tokenizer]:
211 with open(grammar_file) as file:
212 tokenizer = Tokenizer(tokenize.generate_tokens(file.readline), verbose=verbose_tokenizer)
213 parser = GrammarParser(tokenizer, verbose=verbose_parser)
214 grammar = parser.start()
215
216 if not grammar:
217 raise parser.make_syntax_error(grammar_file)
218
219 return grammar, parser, tokenizer
220
221
222 def generate_token_definitions(tokens: IO[str]) -> TokenDefinitions:
223 all_tokens = {}
224 exact_tokens = {}
225 non_exact_tokens = set()
226 numbers = itertools.count(0)
227
228 for line in tokens:
229 line = line.strip()
230
231 if not line or line.startswith("#"):
232 continue
233
234 pieces = line.split()
235 index = next(numbers)
236
237 if len(pieces) == 1:
238 (token,) = pieces
239 non_exact_tokens.add(token)
240 all_tokens[index] = token
241 elif len(pieces) == 2:
242 token, op = pieces
243 exact_tokens[op.strip("'")] = index
244 all_tokens[index] = token
245 else:
246 raise ValueError(f"Unexpected line found in Tokens file: {line}")
247
248 return all_tokens, exact_tokens, non_exact_tokens
249
250
251 def build_c_generator(
252 grammar: Grammar,
253 grammar_file: str,
254 tokens_file: str,
255 output_file: str,
256 compile_extension: bool = False,
257 verbose_c_extension: bool = False,
258 keep_asserts_in_extension: bool = True,
259 skip_actions: bool = False,
260 ) -> ParserGenerator:
261 with open(tokens_file, "r") as tok_file:
262 all_tokens, exact_tok, non_exact_tok = generate_token_definitions(tok_file)
263 with open(output_file, "w") as file:
264 gen: ParserGenerator = CParserGenerator(
265 grammar, all_tokens, exact_tok, non_exact_tok, file, skip_actions=skip_actions
266 )
267 gen.generate(grammar_file)
268
269 if compile_extension:
270 with tempfile.TemporaryDirectory() as build_dir:
271 compile_c_extension(
272 output_file,
273 build_dir=build_dir,
274 verbose=verbose_c_extension,
275 keep_asserts=keep_asserts_in_extension,
276 )
277 return gen
278
279
280 def build_python_generator(
281 grammar: Grammar,
282 grammar_file: str,
283 output_file: str,
284 skip_actions: bool = False,
285 ) -> ParserGenerator:
286 with open(output_file, "w") as file:
287 gen: ParserGenerator = PythonParserGenerator(grammar, file) # TODO: skip_actions
288 gen.generate(grammar_file)
289 return gen
290
291
292 def build_c_parser_and_generator(
293 grammar_file: str,
294 tokens_file: str,
295 output_file: str,
296 compile_extension: bool = False,
297 verbose_tokenizer: bool = False,
298 verbose_parser: bool = False,
299 verbose_c_extension: bool = False,
300 keep_asserts_in_extension: bool = True,
301 skip_actions: bool = False,
302 ) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
303 """Generate rules, C parser, tokenizer, parser generator for a given grammar
304
305 Args:
306 grammar_file (string): Path for the grammar file
307 tokens_file (string): Path for the tokens file
308 output_file (string): Path for the output file
309 compile_extension (bool, optional): Whether to compile the C extension.
310 Defaults to False.
311 verbose_tokenizer (bool, optional): Whether to display additional output
312 when generating the tokenizer. Defaults to False.
313 verbose_parser (bool, optional): Whether to display additional output
314 when generating the parser. Defaults to False.
315 verbose_c_extension (bool, optional): Whether to display additional
316 output when compiling the C extension . Defaults to False.
317 keep_asserts_in_extension (bool, optional): Whether to keep the assert statements
318 when compiling the extension module. Defaults to True.
319 skip_actions (bool, optional): Whether to pretend no rule has any actions.
320 """
321 grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser)
322 gen = build_c_generator(
323 grammar,
324 grammar_file,
325 tokens_file,
326 output_file,
327 compile_extension,
328 verbose_c_extension,
329 keep_asserts_in_extension,
330 skip_actions=skip_actions,
331 )
332
333 return grammar, parser, tokenizer, gen
334
335
336 def build_python_parser_and_generator(
337 grammar_file: str,
338 output_file: str,
339 verbose_tokenizer: bool = False,
340 verbose_parser: bool = False,
341 skip_actions: bool = False,
342 ) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
343 """Generate rules, python parser, tokenizer, parser generator for a given grammar
344
345 Args:
346 grammar_file (string): Path for the grammar file
347 output_file (string): Path for the output file
348 verbose_tokenizer (bool, optional): Whether to display additional output
349 when generating the tokenizer. Defaults to False.
350 verbose_parser (bool, optional): Whether to display additional output
351 when generating the parser. Defaults to False.
352 skip_actions (bool, optional): Whether to pretend no rule has any actions.
353 """
354 grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser)
355 gen = build_python_generator(
356 grammar,
357 grammar_file,
358 output_file,
359 skip_actions=skip_actions,
360 )
361 return grammar, parser, tokenizer, gen