1 #!/usr/bin/env python3.8
2
3 import argparse
4 import pprint
5 import sys
6 from typing import Dict, Set
7
8 from pegen.build import build_parser
9 from pegen.grammar import (
10 Alt,
11 Cut,
12 Gather,
13 GrammarVisitor,
14 Group,
15 Lookahead,
16 NamedItem,
17 NameLeaf,
18 NegativeLookahead,
19 Opt,
20 Repeat0,
21 Repeat1,
22 Rhs,
23 Rule,
24 StringLeaf,
25 )
26 from pegen.parser_generator import compute_nullables
27
28 argparser = argparse.ArgumentParser(
29 prog="calculate_first_sets",
30 description="Calculate the first sets of a grammar",
31 )
32 argparser.add_argument("grammar_file", help="The grammar file")
33
34
35 class ESC[4;38;5;81mFirstSetCalculator(ESC[4;38;5;149mGrammarVisitor):
36 def __init__(self, rules: Dict[str, Rule]) -> None:
37 self.rules = rules
38 self.nullables = compute_nullables(rules)
39 self.first_sets: Dict[str, Set[str]] = dict()
40 self.in_process: Set[str] = set()
41
42 def calculate(self) -> Dict[str, Set[str]]:
43 for name, rule in self.rules.items():
44 self.visit(rule)
45 return self.first_sets
46
47 def visit_Alt(self, item: Alt) -> Set[str]:
48 result: Set[str] = set()
49 to_remove: Set[str] = set()
50 for other in item.items:
51 new_terminals = self.visit(other)
52 if isinstance(other.item, NegativeLookahead):
53 to_remove |= new_terminals
54 result |= new_terminals
55 if to_remove:
56 result -= to_remove
57
58 # If the set of new terminals can start with the empty string,
59 # it means that the item is completely nullable and we should
60 # also considering at least the next item in case the current
61 # one fails to parse.
62
63 if "" in new_terminals:
64 continue
65
66 if not isinstance(other.item, (Opt, NegativeLookahead, Repeat0)):
67 break
68
69 # Do not allow the empty string to propagate.
70 result.discard("")
71
72 return result
73
74 def visit_Cut(self, item: Cut) -> Set[str]:
75 return set()
76
77 def visit_Group(self, item: Group) -> Set[str]:
78 return self.visit(item.rhs)
79
80 def visit_PositiveLookahead(self, item: Lookahead) -> Set[str]:
81 return self.visit(item.node)
82
83 def visit_NegativeLookahead(self, item: NegativeLookahead) -> Set[str]:
84 return self.visit(item.node)
85
86 def visit_NamedItem(self, item: NamedItem) -> Set[str]:
87 return self.visit(item.item)
88
89 def visit_Opt(self, item: Opt) -> Set[str]:
90 return self.visit(item.node)
91
92 def visit_Gather(self, item: Gather) -> Set[str]:
93 return self.visit(item.node)
94
95 def visit_Repeat0(self, item: Repeat0) -> Set[str]:
96 return self.visit(item.node)
97
98 def visit_Repeat1(self, item: Repeat1) -> Set[str]:
99 return self.visit(item.node)
100
101 def visit_NameLeaf(self, item: NameLeaf) -> Set[str]:
102 if item.value not in self.rules:
103 return {item.value}
104
105 if item.value not in self.first_sets:
106 self.first_sets[item.value] = self.visit(self.rules[item.value])
107 return self.first_sets[item.value]
108 elif item.value in self.in_process:
109 return set()
110
111 return self.first_sets[item.value]
112
113 def visit_StringLeaf(self, item: StringLeaf) -> Set[str]:
114 return {item.value}
115
116 def visit_Rhs(self, item: Rhs) -> Set[str]:
117 result: Set[str] = set()
118 for alt in item.alts:
119 result |= self.visit(alt)
120 return result
121
122 def visit_Rule(self, item: Rule) -> Set[str]:
123 if item.name in self.in_process:
124 return set()
125 elif item.name not in self.first_sets:
126 self.in_process.add(item.name)
127 terminals = self.visit(item.rhs)
128 if item in self.nullables:
129 terminals.add("")
130 self.first_sets[item.name] = terminals
131 self.in_process.remove(item.name)
132 return self.first_sets[item.name]
133
134
135 def main() -> None:
136 args = argparser.parse_args()
137
138 try:
139 grammar, parser, tokenizer = build_parser(args.grammar_file)
140 except Exception as err:
141 print("ERROR: Failed to parse grammar file", file=sys.stderr)
142 sys.exit(1)
143
144 firs_sets = FirstSetCalculator(grammar.rules).calculate()
145 pprint.pprint(firs_sets)
146
147
148 if __name__ == "__main__":
149 main()