1 import importlib.util
2 import io
3 import os
4 import pathlib
5 import sys
6 import textwrap
7 import token
8 import tokenize
9 from typing import IO, Any, Dict, Final, Optional, Type, cast
10
11 from pegen.build import compile_c_extension
12 from pegen.c_generator import CParserGenerator
13 from pegen.grammar import Grammar
14 from pegen.grammar_parser import GeneratedParser as GrammarParser
15 from pegen.parser import Parser
16 from pegen.python_generator import PythonParserGenerator
17 from pegen.tokenizer import Tokenizer
18
19 ALL_TOKENS = token.tok_name
20 EXACT_TOKENS = token.EXACT_TOKEN_TYPES
21 NON_EXACT_TOKENS = {
22 name for index, name in token.tok_name.items() if index not in EXACT_TOKENS.values()
23 }
24
25
26 def generate_parser(grammar: Grammar) -> Type[Parser]:
27 # Generate a parser.
28 out = io.StringIO()
29 genr = PythonParserGenerator(grammar, out)
30 genr.generate("<string>")
31
32 # Load the generated parser class.
33 ns: Dict[str, Any] = {}
34 exec(out.getvalue(), ns)
35 return ns["GeneratedParser"]
36
37
38 def run_parser(file: IO[bytes], parser_class: Type[Parser], *, verbose: bool = False) -> Any:
39 # Run a parser on a file (stream).
40 tokenizer = Tokenizer(tokenize.generate_tokens(file.readline)) # type: ignore # typeshed issue #3515
41 parser = parser_class(tokenizer, verbose=verbose)
42 result = parser.start()
43 if result is None:
44 raise parser.make_syntax_error("invalid syntax")
45 return result
46
47
48 def parse_string(
49 source: str, parser_class: Type[Parser], *, dedent: bool = True, verbose: bool = False
50 ) -> Any:
51 # Run the parser on a string.
52 if dedent:
53 source = textwrap.dedent(source)
54 file = io.StringIO(source)
55 return run_parser(file, parser_class, verbose=verbose) # type: ignore # typeshed issue #3515
56
57
58 def make_parser(source: str) -> Type[Parser]:
59 # Combine parse_string() and generate_parser().
60 grammar = parse_string(source, GrammarParser)
61 return generate_parser(grammar)
62
63
64 def import_file(full_name: str, path: str) -> Any:
65 """Import a python module from a path"""
66
67 spec = importlib.util.spec_from_file_location(full_name, path)
68 assert spec is not None
69 mod = importlib.util.module_from_spec(spec)
70
71 # We assume this is not None and has an exec_module() method.
72 # See https://docs.python.org/3/reference/import.html?highlight=exec_module#loading
73 loader = cast(Any, spec.loader)
74 loader.exec_module(mod)
75 return mod
76
77
78 def generate_c_parser_source(grammar: Grammar) -> str:
79 out = io.StringIO()
80 genr = CParserGenerator(grammar, ALL_TOKENS, EXACT_TOKENS, NON_EXACT_TOKENS, out)
81 genr.generate("<string>")
82 return out.getvalue()
83
84
85 def generate_parser_c_extension(
86 grammar: Grammar, path: pathlib.PurePath, debug: bool = False,
87 library_dir: Optional[str] = None,
88 ) -> Any:
89 """Generate a parser c extension for the given grammar in the given path
90
91 Returns a module object with a parse_string() method.
92 TODO: express that using a Protocol.
93 """
94 # Make sure that the working directory is empty: reusing non-empty temporary
95 # directories when generating extensions can lead to segmentation faults.
96 # Check issue #95 (https://github.com/gvanrossum/pegen/issues/95) for more
97 # context.
98 assert not os.listdir(path)
99 source = path / "parse.c"
100 with open(source, "w", encoding="utf-8") as file:
101 genr = CParserGenerator(
102 grammar, ALL_TOKENS, EXACT_TOKENS, NON_EXACT_TOKENS, file, debug=debug
103 )
104 genr.generate("parse.c")
105 compile_c_extension(
106 str(source),
107 build_dir=str(path),
108 # Significant test_peg_generator speedups
109 disable_optimization=True,
110 library_dir=library_dir,
111 )
112
113
114 def print_memstats() -> bool:
115 MiB: Final = 2 ** 20
116 try:
117 import psutil # type: ignore
118 except ImportError:
119 return False
120 print("Memory stats:")
121 process = psutil.Process()
122 meminfo = process.memory_info()
123 res = {}
124 res["rss"] = meminfo.rss / MiB
125 res["vms"] = meminfo.vms / MiB
126 if sys.platform == "win32":
127 res["maxrss"] = meminfo.peak_wset / MiB
128 else:
129 # See https://stackoverflow.com/questions/938733/total-memory-used-by-python-process
130 import resource # Since it doesn't exist on Windows.
131
132 rusage = resource.getrusage(resource.RUSAGE_SELF)
133 if sys.platform == "darwin":
134 factor = 1
135 else:
136 factor = 1024 # Linux
137 res["maxrss"] = rusage.ru_maxrss * factor / MiB
138 for key, value in res.items():
139 print(f" {key:12.12s}: {value:10.0f} MiB")
140 return True