1 #!/usr/bin/env python3.8
2
3 """pegen -- PEG Generator.
4
5 Search the web for PEG Parsers for reference.
6 """
7
8 import argparse
9 import sys
10 import time
11 import token
12 import traceback
13 from typing import Tuple
14
15 from pegen.build import Grammar, Parser, ParserGenerator, Tokenizer
16 from pegen.validator import validate_grammar
17
18
19 def generate_c_code(
20 args: argparse.Namespace,
21 ) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
22 from pegen.build import build_c_parser_and_generator
23
24 verbose = args.verbose
25 verbose_tokenizer = verbose >= 3
26 verbose_parser = verbose == 2 or verbose >= 4
27 try:
28 grammar, parser, tokenizer, gen = build_c_parser_and_generator(
29 args.grammar_filename,
30 args.tokens_filename,
31 args.output,
32 args.compile_extension,
33 verbose_tokenizer,
34 verbose_parser,
35 args.verbose,
36 keep_asserts_in_extension=False if args.optimized else True,
37 skip_actions=args.skip_actions,
38 )
39 return grammar, parser, tokenizer, gen
40 except Exception as err:
41 if args.verbose:
42 raise # Show traceback
43 traceback.print_exception(err.__class__, err, None)
44 sys.stderr.write("For full traceback, use -v\n")
45 sys.exit(1)
46
47
48 def generate_python_code(
49 args: argparse.Namespace,
50 ) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
51 from pegen.build import build_python_parser_and_generator
52
53 verbose = args.verbose
54 verbose_tokenizer = verbose >= 3
55 verbose_parser = verbose == 2 or verbose >= 4
56 try:
57 grammar, parser, tokenizer, gen = build_python_parser_and_generator(
58 args.grammar_filename,
59 args.output,
60 verbose_tokenizer,
61 verbose_parser,
62 skip_actions=args.skip_actions,
63 )
64 return grammar, parser, tokenizer, gen
65 except Exception as err:
66 if args.verbose:
67 raise # Show traceback
68 traceback.print_exception(err.__class__, err, None)
69 sys.stderr.write("For full traceback, use -v\n")
70 sys.exit(1)
71
72
73 argparser = argparse.ArgumentParser(
74 prog="pegen", description="Experimental PEG-like parser generator"
75 )
76 argparser.add_argument("-q", "--quiet", action="store_true", help="Don't print the parsed grammar")
77 argparser.add_argument(
78 "-v",
79 "--verbose",
80 action="count",
81 default=0,
82 help="Print timing stats; repeat for more debug output",
83 )
84 subparsers = argparser.add_subparsers(help="target language for the generated code")
85
86 c_parser = subparsers.add_parser("c", help="Generate C code for inclusion into CPython")
87 c_parser.set_defaults(func=generate_c_code)
88 c_parser.add_argument("grammar_filename", help="Grammar description")
89 c_parser.add_argument("tokens_filename", help="Tokens description")
90 c_parser.add_argument(
91 "-o", "--output", metavar="OUT", default="parse.c", help="Where to write the generated parser"
92 )
93 c_parser.add_argument(
94 "--compile-extension",
95 action="store_true",
96 help="Compile generated C code into an extension module",
97 )
98 c_parser.add_argument(
99 "--optimized", action="store_true", help="Compile the extension in optimized mode"
100 )
101 c_parser.add_argument(
102 "--skip-actions",
103 action="store_true",
104 help="Suppress code emission for rule actions",
105 )
106
107 python_parser = subparsers.add_parser("python", help="Generate Python code")
108 python_parser.set_defaults(func=generate_python_code)
109 python_parser.add_argument("grammar_filename", help="Grammar description")
110 python_parser.add_argument(
111 "-o",
112 "--output",
113 metavar="OUT",
114 default="parse.py",
115 help="Where to write the generated parser",
116 )
117 python_parser.add_argument(
118 "--skip-actions",
119 action="store_true",
120 help="Suppress code emission for rule actions",
121 )
122
123
124 def main() -> None:
125 from pegen.testutil import print_memstats
126
127 args = argparser.parse_args()
128 if "func" not in args:
129 argparser.error("Must specify the target language mode ('c' or 'python')")
130
131 t0 = time.time()
132 grammar, parser, tokenizer, gen = args.func(args)
133 t1 = time.time()
134
135 validate_grammar(grammar)
136
137 if not args.quiet:
138 if args.verbose:
139 print("Raw Grammar:")
140 for line in repr(grammar).splitlines():
141 print(" ", line)
142
143 print("Clean Grammar:")
144 for line in str(grammar).splitlines():
145 print(" ", line)
146
147 if args.verbose:
148 print("First Graph:")
149 for src, dsts in gen.first_graph.items():
150 print(f" {src} -> {', '.join(dsts)}")
151 print("First SCCS:")
152 for scc in gen.first_sccs:
153 print(" ", scc, end="")
154 if len(scc) > 1:
155 print(
156 " # Indirectly left-recursive; leaders:",
157 {name for name in scc if grammar.rules[name].leader},
158 )
159 else:
160 name = next(iter(scc))
161 if name in gen.first_graph[name]:
162 print(" # Left-recursive")
163 else:
164 print()
165
166 if args.verbose:
167 dt = t1 - t0
168 diag = tokenizer.diagnose()
169 nlines = diag.end[0]
170 if diag.type == token.ENDMARKER:
171 nlines -= 1
172 print(f"Total time: {dt:.3f} sec; {nlines} lines", end="")
173 if dt:
174 print(f"; {nlines / dt:.0f} lines/sec")
175 else:
176 print()
177 print("Caches sizes:")
178 print(f" token array : {len(tokenizer._tokens):10}")
179 print(f" cache : {len(parser._cache):10}")
180 if not print_memstats():
181 print("(Can't find psutil; install it for memory stats.)")
182
183
184 if __name__ == "__main__":
185 if sys.version_info < (3, 8):
186 print("ERROR: using pegen requires at least Python 3.8!", file=sys.stderr)
187 sys.exit(1)
188 main()