1 import ast
2 import contextlib
3 import re
4 from abc import abstractmethod
5 from typing import (
6 IO,
7 AbstractSet,
8 Any,
9 Dict,
10 Iterable,
11 Iterator,
12 List,
13 Optional,
14 Set,
15 Text,
16 Tuple,
17 Union,
18 )
19
20 from pegen import sccutils
21 from pegen.grammar import (
22 Alt,
23 Cut,
24 Forced,
25 Gather,
26 Grammar,
27 GrammarError,
28 GrammarVisitor,
29 Group,
30 Lookahead,
31 NamedItem,
32 NameLeaf,
33 Opt,
34 Plain,
35 Repeat0,
36 Repeat1,
37 Rhs,
38 Rule,
39 StringLeaf,
40 )
41
42
43 class ESC[4;38;5;81mRuleCollectorVisitor(ESC[4;38;5;149mGrammarVisitor):
44 """Visitor that invokes a provieded callmaker visitor with just the NamedItem nodes"""
45
46 def __init__(self, rules: Dict[str, Rule], callmakervisitor: GrammarVisitor) -> None:
47 self.rulses = rules
48 self.callmaker = callmakervisitor
49
50 def visit_Rule(self, rule: Rule) -> None:
51 self.visit(rule.flatten())
52
53 def visit_NamedItem(self, item: NamedItem) -> None:
54 self.callmaker.visit(item)
55
56
57 class ESC[4;38;5;81mKeywordCollectorVisitor(ESC[4;38;5;149mGrammarVisitor):
58 """Visitor that collects all the keywods and soft keywords in the Grammar"""
59
60 def __init__(self, gen: "ParserGenerator", keywords: Dict[str, int], soft_keywords: Set[str]):
61 self.generator = gen
62 self.keywords = keywords
63 self.soft_keywords = soft_keywords
64
65 def visit_StringLeaf(self, node: StringLeaf) -> None:
66 val = ast.literal_eval(node.value)
67 if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword
68 if node.value.endswith("'") and node.value not in self.keywords:
69 self.keywords[val] = self.generator.keyword_type()
70 else:
71 return self.soft_keywords.add(node.value.replace('"', ""))
72
73
74 class ESC[4;38;5;81mRuleCheckingVisitor(ESC[4;38;5;149mGrammarVisitor):
75 def __init__(self, rules: Dict[str, Rule], tokens: Set[str]):
76 self.rules = rules
77 self.tokens = tokens
78
79 def visit_NameLeaf(self, node: NameLeaf) -> None:
80 if node.value not in self.rules and node.value not in self.tokens:
81 raise GrammarError(f"Dangling reference to rule {node.value!r}")
82
83 def visit_NamedItem(self, node: NamedItem) -> None:
84 if node.name and node.name.startswith("_"):
85 raise GrammarError(f"Variable names cannot start with underscore: '{node.name}'")
86 self.visit(node.item)
87
88
89 class ESC[4;38;5;81mParserGenerator:
90
91 callmakervisitor: GrammarVisitor
92
93 def __init__(self, grammar: Grammar, tokens: Set[str], file: Optional[IO[Text]]):
94 self.grammar = grammar
95 self.tokens = tokens
96 self.keywords: Dict[str, int] = {}
97 self.soft_keywords: Set[str] = set()
98 self.rules = grammar.rules
99 self.validate_rule_names()
100 if "trailer" not in grammar.metas and "start" not in self.rules:
101 raise GrammarError("Grammar without a trailer must have a 'start' rule")
102 checker = RuleCheckingVisitor(self.rules, self.tokens)
103 for rule in self.rules.values():
104 checker.visit(rule)
105 self.file = file
106 self.level = 0
107 self.first_graph, self.first_sccs = compute_left_recursives(self.rules)
108 self.counter = 0 # For name_rule()/name_loop()
109 self.keyword_counter = 499 # For keyword_type()
110 self.all_rules: Dict[str, Rule] = self.rules.copy() # Rules + temporal rules
111 self._local_variable_stack: List[List[str]] = []
112
113 def validate_rule_names(self) -> None:
114 for rule in self.rules:
115 if rule.startswith("_"):
116 raise GrammarError(f"Rule names cannot start with underscore: '{rule}'")
117
118 @contextlib.contextmanager
119 def local_variable_context(self) -> Iterator[None]:
120 self._local_variable_stack.append([])
121 yield
122 self._local_variable_stack.pop()
123
124 @property
125 def local_variable_names(self) -> List[str]:
126 return self._local_variable_stack[-1]
127
128 @abstractmethod
129 def generate(self, filename: str) -> None:
130 raise NotImplementedError
131
132 @contextlib.contextmanager
133 def indent(self) -> Iterator[None]:
134 self.level += 1
135 try:
136 yield
137 finally:
138 self.level -= 1
139
140 def print(self, *args: object) -> None:
141 if not args:
142 print(file=self.file)
143 else:
144 print(" " * self.level, end="", file=self.file)
145 print(*args, file=self.file)
146
147 def printblock(self, lines: str) -> None:
148 for line in lines.splitlines():
149 self.print(line)
150
151 def collect_rules(self) -> None:
152 keyword_collector = KeywordCollectorVisitor(self, self.keywords, self.soft_keywords)
153 for rule in self.all_rules.values():
154 keyword_collector.visit(rule)
155
156 rule_collector = RuleCollectorVisitor(self.rules, self.callmakervisitor)
157 done: Set[str] = set()
158 while True:
159 computed_rules = list(self.all_rules)
160 todo = [i for i in computed_rules if i not in done]
161 if not todo:
162 break
163 done = set(self.all_rules)
164 for rulename in todo:
165 rule_collector.visit(self.all_rules[rulename])
166
167 def keyword_type(self) -> int:
168 self.keyword_counter += 1
169 return self.keyword_counter
170
171 def artifical_rule_from_rhs(self, rhs: Rhs) -> str:
172 self.counter += 1
173 name = f"_tmp_{self.counter}" # TODO: Pick a nicer name.
174 self.all_rules[name] = Rule(name, None, rhs)
175 return name
176
177 def artificial_rule_from_repeat(self, node: Plain, is_repeat1: bool) -> str:
178 self.counter += 1
179 if is_repeat1:
180 prefix = "_loop1_"
181 else:
182 prefix = "_loop0_"
183 name = f"{prefix}{self.counter}"
184 self.all_rules[name] = Rule(name, None, Rhs([Alt([NamedItem(None, node)])]))
185 return name
186
187 def artifical_rule_from_gather(self, node: Gather) -> str:
188 self.counter += 1
189 name = f"_gather_{self.counter}"
190 self.counter += 1
191 extra_function_name = f"_loop0_{self.counter}"
192 extra_function_alt = Alt(
193 [NamedItem(None, node.separator), NamedItem("elem", node.node)],
194 action="elem",
195 )
196 self.all_rules[extra_function_name] = Rule(
197 extra_function_name,
198 None,
199 Rhs([extra_function_alt]),
200 )
201 alt = Alt(
202 [NamedItem("elem", node.node), NamedItem("seq", NameLeaf(extra_function_name))],
203 )
204 self.all_rules[name] = Rule(
205 name,
206 None,
207 Rhs([alt]),
208 )
209 return name
210
211 def dedupe(self, name: str) -> str:
212 origname = name
213 counter = 0
214 while name in self.local_variable_names:
215 counter += 1
216 name = f"{origname}_{counter}"
217 self.local_variable_names.append(name)
218 return name
219
220
221 class ESC[4;38;5;81mNullableVisitor(ESC[4;38;5;149mGrammarVisitor):
222 def __init__(self, rules: Dict[str, Rule]) -> None:
223 self.rules = rules
224 self.visited: Set[Any] = set()
225 self.nullables: Set[Union[Rule, NamedItem]] = set()
226
227 def visit_Rule(self, rule: Rule) -> bool:
228 if rule in self.visited:
229 return False
230 self.visited.add(rule)
231 if self.visit(rule.rhs):
232 self.nullables.add(rule)
233 return rule in self.nullables
234
235 def visit_Rhs(self, rhs: Rhs) -> bool:
236 for alt in rhs.alts:
237 if self.visit(alt):
238 return True
239 return False
240
241 def visit_Alt(self, alt: Alt) -> bool:
242 for item in alt.items:
243 if not self.visit(item):
244 return False
245 return True
246
247 def visit_Forced(self, force: Forced) -> bool:
248 return True
249
250 def visit_LookAhead(self, lookahead: Lookahead) -> bool:
251 return True
252
253 def visit_Opt(self, opt: Opt) -> bool:
254 return True
255
256 def visit_Repeat0(self, repeat: Repeat0) -> bool:
257 return True
258
259 def visit_Repeat1(self, repeat: Repeat1) -> bool:
260 return False
261
262 def visit_Gather(self, gather: Gather) -> bool:
263 return False
264
265 def visit_Cut(self, cut: Cut) -> bool:
266 return False
267
268 def visit_Group(self, group: Group) -> bool:
269 return self.visit(group.rhs)
270
271 def visit_NamedItem(self, item: NamedItem) -> bool:
272 if self.visit(item.item):
273 self.nullables.add(item)
274 return item in self.nullables
275
276 def visit_NameLeaf(self, node: NameLeaf) -> bool:
277 if node.value in self.rules:
278 return self.visit(self.rules[node.value])
279 # Token or unknown; never empty.
280 return False
281
282 def visit_StringLeaf(self, node: StringLeaf) -> bool:
283 # The string token '' is considered empty.
284 return not node.value
285
286
287 def compute_nullables(rules: Dict[str, Rule]) -> Set[Any]:
288 """Compute which rules in a grammar are nullable.
289
290 Thanks to TatSu (tatsu/leftrec.py) for inspiration.
291 """
292 nullable_visitor = NullableVisitor(rules)
293 for rule in rules.values():
294 nullable_visitor.visit(rule)
295 return nullable_visitor.nullables
296
297
298 class ESC[4;38;5;81mInitialNamesVisitor(ESC[4;38;5;149mGrammarVisitor):
299 def __init__(self, rules: Dict[str, Rule]) -> None:
300 self.rules = rules
301 self.nullables = compute_nullables(rules)
302
303 def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> Set[Any]:
304 names: Set[str] = set()
305 for value in node:
306 if isinstance(value, list):
307 for item in value:
308 names |= self.visit(item, *args, **kwargs)
309 else:
310 names |= self.visit(value, *args, **kwargs)
311 return names
312
313 def visit_Alt(self, alt: Alt) -> Set[Any]:
314 names: Set[str] = set()
315 for item in alt.items:
316 names |= self.visit(item)
317 if item not in self.nullables:
318 break
319 return names
320
321 def visit_Forced(self, force: Forced) -> Set[Any]:
322 return set()
323
324 def visit_LookAhead(self, lookahead: Lookahead) -> Set[Any]:
325 return set()
326
327 def visit_Cut(self, cut: Cut) -> Set[Any]:
328 return set()
329
330 def visit_NameLeaf(self, node: NameLeaf) -> Set[Any]:
331 return {node.value}
332
333 def visit_StringLeaf(self, node: StringLeaf) -> Set[Any]:
334 return set()
335
336
337 def compute_left_recursives(
338 rules: Dict[str, Rule]
339 ) -> Tuple[Dict[str, AbstractSet[str]], List[AbstractSet[str]]]:
340 graph = make_first_graph(rules)
341 sccs = list(sccutils.strongly_connected_components(graph.keys(), graph))
342 for scc in sccs:
343 if len(scc) > 1:
344 for name in scc:
345 rules[name].left_recursive = True
346 # Try to find a leader such that all cycles go through it.
347 leaders = set(scc)
348 for start in scc:
349 for cycle in sccutils.find_cycles_in_scc(graph, scc, start):
350 # print("Cycle:", " -> ".join(cycle))
351 leaders -= scc - set(cycle)
352 if not leaders:
353 raise ValueError(
354 f"SCC {scc} has no leadership candidate (no element is included in all cycles)"
355 )
356 # print("Leaders:", leaders)
357 leader = min(leaders) # Pick an arbitrary leader from the candidates.
358 rules[leader].leader = True
359 else:
360 name = min(scc) # The only element.
361 if name in graph[name]:
362 rules[name].left_recursive = True
363 rules[name].leader = True
364 return graph, sccs
365
366
367 def make_first_graph(rules: Dict[str, Rule]) -> Dict[str, AbstractSet[str]]:
368 """Compute the graph of left-invocations.
369
370 There's an edge from A to B if A may invoke B at its initial
371 position.
372
373 Note that this requires the nullable flags to have been computed.
374 """
375 initial_name_visitor = InitialNamesVisitor(rules)
376 graph = {}
377 vertices: Set[str] = set()
378 for rulename, rhs in rules.items():
379 graph[rulename] = names = initial_name_visitor.visit(rhs)
380 vertices |= names
381 for vertex in vertices:
382 graph.setdefault(vertex, set())
383 return graph