1 import itertools
2 import pathlib
3 import sys
4 import sysconfig
5 import tempfile
6 import tokenize
7 from typing import IO, Dict, List, Optional, Set, Tuple
8
9 from pegen.c_generator import CParserGenerator
10 from pegen.grammar import Grammar
11 from pegen.grammar_parser import GeneratedParser as GrammarParser
12 from pegen.parser import Parser
13 from pegen.parser_generator import ParserGenerator
14 from pegen.python_generator import PythonParserGenerator
15 from pegen.tokenizer import Tokenizer
16
17 MOD_DIR = pathlib.Path(__file__).resolve().parent
18
19 TokenDefinitions = Tuple[Dict[int, str], Dict[str, int], Set[str]]
20
21
22 def get_extra_flags(compiler_flags: str, compiler_py_flags_nodist: str) -> List[str]:
23 flags = sysconfig.get_config_var(compiler_flags)
24 py_flags_nodist = sysconfig.get_config_var(compiler_py_flags_nodist)
25 if flags is None or py_flags_nodist is None:
26 return []
27 return f"{flags} {py_flags_nodist}".split()
28
29
30 def compile_c_extension(
31 generated_source_path: str,
32 build_dir: Optional[str] = None,
33 verbose: bool = False,
34 keep_asserts: bool = True,
35 disable_optimization: bool = False,
36 library_dir: Optional[str] = None,
37 ) -> str:
38 """Compile the generated source for a parser generator into an extension module.
39
40 The extension module will be generated in the same directory as the provided path
41 for the generated source, with the same basename (in addition to extension module
42 metadata). For example, for the source mydir/parser.c the generated extension
43 in a darwin system with python 3.8 will be mydir/parser.cpython-38-darwin.so.
44
45 If *build_dir* is provided, that path will be used as the temporary build directory
46 of distutils (this is useful in case you want to use a temporary directory).
47
48 If *library_dir* is provided, that path will be used as the directory for a
49 static library of the common parser sources (this is useful in case you are
50 creating multiple extensions).
51 """
52 import distutils.log
53 from distutils.core import Distribution, Extension
54 from distutils.tests.support import fixup_build_ext # type: ignore
55
56 from distutils.ccompiler import new_compiler
57 from distutils.dep_util import newer_group
58 from distutils.sysconfig import customize_compiler
59
60 if verbose:
61 distutils.log.set_threshold(distutils.log.DEBUG)
62
63 source_file_path = pathlib.Path(generated_source_path)
64 extension_name = source_file_path.stem
65 extra_compile_args = get_extra_flags("CFLAGS", "PY_CFLAGS_NODIST")
66 extra_compile_args.append("-DPy_BUILD_CORE_MODULE")
67 # Define _Py_TEST_PEGEN to not call PyAST_Validate() in Parser/pegen.c
68 extra_compile_args.append("-D_Py_TEST_PEGEN")
69 extra_link_args = get_extra_flags("LDFLAGS", "PY_LDFLAGS_NODIST")
70 if keep_asserts:
71 extra_compile_args.append("-UNDEBUG")
72 if disable_optimization:
73 if sys.platform == 'win32':
74 extra_compile_args.append("/Od")
75 extra_link_args.append("/LTCG:OFF")
76 else:
77 extra_compile_args.append("-O0")
78 if sysconfig.get_config_var("GNULD") == "yes":
79 extra_link_args.append("-fno-lto")
80
81 common_sources = [
82 str(MOD_DIR.parent.parent.parent / "Python" / "Python-ast.c"),
83 str(MOD_DIR.parent.parent.parent / "Python" / "asdl.c"),
84 str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer.c"),
85 str(MOD_DIR.parent.parent.parent / "Parser" / "pegen.c"),
86 str(MOD_DIR.parent.parent.parent / "Parser" / "pegen_errors.c"),
87 str(MOD_DIR.parent.parent.parent / "Parser" / "action_helpers.c"),
88 str(MOD_DIR.parent.parent.parent / "Parser" / "string_parser.c"),
89 str(MOD_DIR.parent / "peg_extension" / "peg_extension.c"),
90 ]
91 include_dirs = [
92 str(MOD_DIR.parent.parent.parent / "Include" / "internal"),
93 str(MOD_DIR.parent.parent.parent / "Parser"),
94 ]
95 extension = Extension(
96 extension_name,
97 sources=[generated_source_path],
98 extra_compile_args=extra_compile_args,
99 extra_link_args=extra_link_args,
100 )
101 dist = Distribution({"name": extension_name, "ext_modules": [extension]})
102 cmd = dist.get_command_obj("build_ext")
103 fixup_build_ext(cmd)
104 cmd.build_lib = str(source_file_path.parent)
105 cmd.include_dirs = include_dirs
106 if build_dir:
107 cmd.build_temp = build_dir
108 cmd.ensure_finalized()
109
110 compiler = new_compiler()
111 customize_compiler(compiler)
112 compiler.set_include_dirs(cmd.include_dirs)
113 compiler.set_library_dirs(cmd.library_dirs)
114 # build static lib
115 if library_dir:
116 library_filename = compiler.library_filename(extension_name,
117 output_dir=library_dir)
118 if newer_group(common_sources, library_filename, 'newer'):
119 if sys.platform == 'win32':
120 pdb = compiler.static_lib_format % (extension_name, '.pdb')
121 compile_opts = [f"/Fd{library_dir}\\{pdb}"]
122 compile_opts.extend(extra_compile_args)
123 else:
124 compile_opts = extra_compile_args
125 objects = compiler.compile(common_sources,
126 output_dir=library_dir,
127 debug=cmd.debug,
128 extra_postargs=compile_opts)
129 compiler.create_static_lib(objects, extension_name,
130 output_dir=library_dir,
131 debug=cmd.debug)
132 if sys.platform == 'win32':
133 compiler.add_library_dir(library_dir)
134 extension.libraries = [extension_name]
135 elif sys.platform == 'darwin':
136 compiler.set_link_objects([
137 '-Wl,-force_load', library_filename,
138 ])
139 else:
140 compiler.set_link_objects([
141 '-Wl,--whole-archive', library_filename, '-Wl,--no-whole-archive',
142 ])
143 else:
144 extension.sources[0:0] = common_sources
145
146 # Compile the source code to object files.
147 ext_path = cmd.get_ext_fullpath(extension_name)
148 if newer_group(extension.sources, ext_path, 'newer'):
149 objects = compiler.compile(extension.sources,
150 output_dir=cmd.build_temp,
151 debug=cmd.debug,
152 extra_postargs=extra_compile_args)
153 else:
154 objects = compiler.object_filenames(extension.sources,
155 output_dir=cmd.build_temp)
156 # Now link the object files together into a "shared object"
157 compiler.link_shared_object(
158 objects, ext_path,
159 libraries=cmd.get_libraries(extension),
160 extra_postargs=extra_link_args,
161 export_symbols=cmd.get_export_symbols(extension),
162 debug=cmd.debug,
163 build_temp=cmd.build_temp)
164
165 return pathlib.Path(ext_path)
166
167
168 def build_parser(
169 grammar_file: str, verbose_tokenizer: bool = False, verbose_parser: bool = False
170 ) -> Tuple[Grammar, Parser, Tokenizer]:
171 with open(grammar_file) as file:
172 tokenizer = Tokenizer(tokenize.generate_tokens(file.readline), verbose=verbose_tokenizer)
173 parser = GrammarParser(tokenizer, verbose=verbose_parser)
174 grammar = parser.start()
175
176 if not grammar:
177 raise parser.make_syntax_error(grammar_file)
178
179 return grammar, parser, tokenizer
180
181
182 def generate_token_definitions(tokens: IO[str]) -> TokenDefinitions:
183 all_tokens = {}
184 exact_tokens = {}
185 non_exact_tokens = set()
186 numbers = itertools.count(0)
187
188 for line in tokens:
189 line = line.strip()
190
191 if not line or line.startswith("#"):
192 continue
193
194 pieces = line.split()
195 index = next(numbers)
196
197 if len(pieces) == 1:
198 (token,) = pieces
199 non_exact_tokens.add(token)
200 all_tokens[index] = token
201 elif len(pieces) == 2:
202 token, op = pieces
203 exact_tokens[op.strip("'")] = index
204 all_tokens[index] = token
205 else:
206 raise ValueError(f"Unexpected line found in Tokens file: {line}")
207
208 return all_tokens, exact_tokens, non_exact_tokens
209
210
211 def build_c_generator(
212 grammar: Grammar,
213 grammar_file: str,
214 tokens_file: str,
215 output_file: str,
216 compile_extension: bool = False,
217 verbose_c_extension: bool = False,
218 keep_asserts_in_extension: bool = True,
219 skip_actions: bool = False,
220 ) -> ParserGenerator:
221 with open(tokens_file, "r") as tok_file:
222 all_tokens, exact_tok, non_exact_tok = generate_token_definitions(tok_file)
223 with open(output_file, "w") as file:
224 gen: ParserGenerator = CParserGenerator(
225 grammar, all_tokens, exact_tok, non_exact_tok, file, skip_actions=skip_actions
226 )
227 gen.generate(grammar_file)
228
229 if compile_extension:
230 with tempfile.TemporaryDirectory() as build_dir:
231 compile_c_extension(
232 output_file,
233 build_dir=build_dir,
234 verbose=verbose_c_extension,
235 keep_asserts=keep_asserts_in_extension,
236 )
237 return gen
238
239
240 def build_python_generator(
241 grammar: Grammar,
242 grammar_file: str,
243 output_file: str,
244 skip_actions: bool = False,
245 ) -> ParserGenerator:
246 with open(output_file, "w") as file:
247 gen: ParserGenerator = PythonParserGenerator(grammar, file) # TODO: skip_actions
248 gen.generate(grammar_file)
249 return gen
250
251
252 def build_c_parser_and_generator(
253 grammar_file: str,
254 tokens_file: str,
255 output_file: str,
256 compile_extension: bool = False,
257 verbose_tokenizer: bool = False,
258 verbose_parser: bool = False,
259 verbose_c_extension: bool = False,
260 keep_asserts_in_extension: bool = True,
261 skip_actions: bool = False,
262 ) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
263 """Generate rules, C parser, tokenizer, parser generator for a given grammar
264
265 Args:
266 grammar_file (string): Path for the grammar file
267 tokens_file (string): Path for the tokens file
268 output_file (string): Path for the output file
269 compile_extension (bool, optional): Whether to compile the C extension.
270 Defaults to False.
271 verbose_tokenizer (bool, optional): Whether to display additional output
272 when generating the tokenizer. Defaults to False.
273 verbose_parser (bool, optional): Whether to display additional output
274 when generating the parser. Defaults to False.
275 verbose_c_extension (bool, optional): Whether to display additional
276 output when compiling the C extension . Defaults to False.
277 keep_asserts_in_extension (bool, optional): Whether to keep the assert statements
278 when compiling the extension module. Defaults to True.
279 skip_actions (bool, optional): Whether to pretend no rule has any actions.
280 """
281 grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser)
282 gen = build_c_generator(
283 grammar,
284 grammar_file,
285 tokens_file,
286 output_file,
287 compile_extension,
288 verbose_c_extension,
289 keep_asserts_in_extension,
290 skip_actions=skip_actions,
291 )
292
293 return grammar, parser, tokenizer, gen
294
295
296 def build_python_parser_and_generator(
297 grammar_file: str,
298 output_file: str,
299 verbose_tokenizer: bool = False,
300 verbose_parser: bool = False,
301 skip_actions: bool = False,
302 ) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
303 """Generate rules, python parser, tokenizer, parser generator for a given grammar
304
305 Args:
306 grammar_file (string): Path for the grammar file
307 output_file (string): Path for the output file
308 verbose_tokenizer (bool, optional): Whether to display additional output
309 when generating the tokenizer. Defaults to False.
310 verbose_parser (bool, optional): Whether to display additional output
311 when generating the parser. Defaults to False.
312 skip_actions (bool, optional): Whether to pretend no rule has any actions.
313 """
314 grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser)
315 gen = build_python_generator(
316 grammar,
317 grammar_file,
318 output_file,
319 skip_actions=skip_actions,
320 )
321 return grammar, parser, tokenizer, gen