1 import ast
2 import os.path
3 import re
4 from dataclasses import dataclass, field
5 from enum import Enum
6 from typing import IO, Any, Dict, List, Optional, Set, Text, Tuple
7
8 from pegen import grammar
9 from pegen.grammar import (
10 Alt,
11 Cut,
12 Forced,
13 Gather,
14 GrammarVisitor,
15 Group,
16 Leaf,
17 Lookahead,
18 NamedItem,
19 NameLeaf,
20 NegativeLookahead,
21 Opt,
22 PositiveLookahead,
23 Repeat0,
24 Repeat1,
25 Rhs,
26 Rule,
27 StringLeaf,
28 )
29 from pegen.parser_generator import ParserGenerator
30
31 EXTENSION_PREFIX = """\
32 #include "pegen.h"
33
34 #if defined(Py_DEBUG) && defined(Py_BUILD_CORE)
35 # define D(x) if (Py_DebugFlag) x;
36 #else
37 # define D(x)
38 #endif
39
40 #ifdef __wasi__
41 # define MAXSTACK 4000
42 #else
43 # define MAXSTACK 6000
44 #endif
45
46 """
47
48
49 EXTENSION_SUFFIX = """
50 void *
51 _PyPegen_parse(Parser *p)
52 {
53 // Initialize keywords
54 p->keywords = reserved_keywords;
55 p->n_keyword_lists = n_keyword_lists;
56 p->soft_keywords = soft_keywords;
57
58 return start_rule(p);
59 }
60 """
61
62
63 class ESC[4;38;5;81mNodeTypes(ESC[4;38;5;149mEnum):
64 NAME_TOKEN = 0
65 NUMBER_TOKEN = 1
66 STRING_TOKEN = 2
67 GENERIC_TOKEN = 3
68 KEYWORD = 4
69 SOFT_KEYWORD = 5
70 CUT_OPERATOR = 6
71
72
73 BASE_NODETYPES = {
74 "NAME": NodeTypes.NAME_TOKEN,
75 "NUMBER": NodeTypes.NUMBER_TOKEN,
76 "STRING": NodeTypes.STRING_TOKEN,
77 "SOFT_KEYWORD": NodeTypes.SOFT_KEYWORD,
78 }
79
80
81 @dataclass
82 class ESC[4;38;5;81mFunctionCall:
83 function: str
84 arguments: List[Any] = field(default_factory=list)
85 assigned_variable: Optional[str] = None
86 assigned_variable_type: Optional[str] = None
87 return_type: Optional[str] = None
88 nodetype: Optional[NodeTypes] = None
89 force_true: bool = False
90 comment: Optional[str] = None
91
92 def __str__(self) -> str:
93 parts = []
94 parts.append(self.function)
95 if self.arguments:
96 parts.append(f"({', '.join(map(str, self.arguments))})")
97 if self.force_true:
98 parts.append(", !p->error_indicator")
99 if self.assigned_variable:
100 if self.assigned_variable_type:
101 parts = [
102 "(",
103 self.assigned_variable,
104 " = ",
105 "(",
106 self.assigned_variable_type,
107 ")",
108 *parts,
109 ")",
110 ]
111 else:
112 parts = ["(", self.assigned_variable, " = ", *parts, ")"]
113 if self.comment:
114 parts.append(f" // {self.comment}")
115 return "".join(parts)
116
117
118 class ESC[4;38;5;81mCCallMakerVisitor(ESC[4;38;5;149mGrammarVisitor):
119 def __init__(
120 self,
121 parser_generator: ParserGenerator,
122 exact_tokens: Dict[str, int],
123 non_exact_tokens: Set[str],
124 ):
125 self.gen = parser_generator
126 self.exact_tokens = exact_tokens
127 self.non_exact_tokens = non_exact_tokens
128 self.cache: Dict[Any, FunctionCall] = {}
129 self.cleanup_statements: List[str] = []
130
131 def keyword_helper(self, keyword: str) -> FunctionCall:
132 return FunctionCall(
133 assigned_variable="_keyword",
134 function="_PyPegen_expect_token",
135 arguments=["p", self.gen.keywords[keyword]],
136 return_type="Token *",
137 nodetype=NodeTypes.KEYWORD,
138 comment=f"token='{keyword}'",
139 )
140
141 def soft_keyword_helper(self, value: str) -> FunctionCall:
142 return FunctionCall(
143 assigned_variable="_keyword",
144 function="_PyPegen_expect_soft_keyword",
145 arguments=["p", value],
146 return_type="expr_ty",
147 nodetype=NodeTypes.SOFT_KEYWORD,
148 comment=f"soft_keyword='{value}'",
149 )
150
151 def visit_NameLeaf(self, node: NameLeaf) -> FunctionCall:
152 name = node.value
153 if name in self.non_exact_tokens:
154 if name in BASE_NODETYPES:
155 return FunctionCall(
156 assigned_variable=f"{name.lower()}_var",
157 function=f"_PyPegen_{name.lower()}_token",
158 arguments=["p"],
159 nodetype=BASE_NODETYPES[name],
160 return_type="expr_ty",
161 comment=name,
162 )
163 return FunctionCall(
164 assigned_variable=f"{name.lower()}_var",
165 function=f"_PyPegen_expect_token",
166 arguments=["p", name],
167 nodetype=NodeTypes.GENERIC_TOKEN,
168 return_type="Token *",
169 comment=f"token='{name}'",
170 )
171
172 type = None
173 rule = self.gen.all_rules.get(name.lower())
174 if rule is not None:
175 type = "asdl_seq *" if rule.is_loop() or rule.is_gather() else rule.type
176
177 return FunctionCall(
178 assigned_variable=f"{name}_var",
179 function=f"{name}_rule",
180 arguments=["p"],
181 return_type=type,
182 comment=f"{node}",
183 )
184
185 def visit_StringLeaf(self, node: StringLeaf) -> FunctionCall:
186 val = ast.literal_eval(node.value)
187 if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword
188 if node.value.endswith("'"):
189 return self.keyword_helper(val)
190 else:
191 return self.soft_keyword_helper(node.value)
192 else:
193 assert val in self.exact_tokens, f"{node.value} is not a known literal"
194 type = self.exact_tokens[val]
195 return FunctionCall(
196 assigned_variable="_literal",
197 function=f"_PyPegen_expect_token",
198 arguments=["p", type],
199 nodetype=NodeTypes.GENERIC_TOKEN,
200 return_type="Token *",
201 comment=f"token='{val}'",
202 )
203
204 def visit_Rhs(self, node: Rhs) -> FunctionCall:
205 if node in self.cache:
206 return self.cache[node]
207 if node.can_be_inlined:
208 self.cache[node] = self.generate_call(node.alts[0].items[0])
209 else:
210 name = self.gen.artifical_rule_from_rhs(node)
211 self.cache[node] = FunctionCall(
212 assigned_variable=f"{name}_var",
213 function=f"{name}_rule",
214 arguments=["p"],
215 comment=f"{node}",
216 )
217 return self.cache[node]
218
219 def visit_NamedItem(self, node: NamedItem) -> FunctionCall:
220 call = self.generate_call(node.item)
221 if node.name:
222 call.assigned_variable = node.name
223 if node.type:
224 call.assigned_variable_type = node.type
225 return call
226
227 def lookahead_call_helper(self, node: Lookahead, positive: int) -> FunctionCall:
228 call = self.generate_call(node.node)
229 if call.nodetype == NodeTypes.NAME_TOKEN:
230 return FunctionCall(
231 function=f"_PyPegen_lookahead_with_name",
232 arguments=[positive, call.function, *call.arguments],
233 return_type="int",
234 )
235 elif call.nodetype == NodeTypes.SOFT_KEYWORD:
236 return FunctionCall(
237 function=f"_PyPegen_lookahead_with_string",
238 arguments=[positive, call.function, *call.arguments],
239 return_type="int",
240 )
241 elif call.nodetype in {NodeTypes.GENERIC_TOKEN, NodeTypes.KEYWORD}:
242 return FunctionCall(
243 function=f"_PyPegen_lookahead_with_int",
244 arguments=[positive, call.function, *call.arguments],
245 return_type="int",
246 comment=f"token={node.node}",
247 )
248 else:
249 return FunctionCall(
250 function=f"_PyPegen_lookahead",
251 arguments=[positive, call.function, *call.arguments],
252 return_type="int",
253 )
254
255 def visit_PositiveLookahead(self, node: PositiveLookahead) -> FunctionCall:
256 return self.lookahead_call_helper(node, 1)
257
258 def visit_NegativeLookahead(self, node: NegativeLookahead) -> FunctionCall:
259 return self.lookahead_call_helper(node, 0)
260
261 def visit_Forced(self, node: Forced) -> FunctionCall:
262 call = self.generate_call(node.node)
263 if isinstance(node.node, Leaf):
264 assert isinstance(node.node, Leaf)
265 val = ast.literal_eval(node.node.value)
266 assert val in self.exact_tokens, f"{node.node.value} is not a known literal"
267 type = self.exact_tokens[val]
268 return FunctionCall(
269 assigned_variable="_literal",
270 function=f"_PyPegen_expect_forced_token",
271 arguments=["p", type, f'"{val}"'],
272 nodetype=NodeTypes.GENERIC_TOKEN,
273 return_type="Token *",
274 comment=f"forced_token='{val}'",
275 )
276 if isinstance(node.node, Group):
277 call = self.visit(node.node.rhs)
278 call.assigned_variable = None
279 call.comment = None
280 return FunctionCall(
281 assigned_variable="_literal",
282 function=f"_PyPegen_expect_forced_result",
283 arguments=["p", str(call), f'"{node.node.rhs!s}"'],
284 return_type="void *",
285 comment=f"forced_token=({node.node.rhs!s})",
286 )
287 else:
288 raise NotImplementedError(f"Forced tokens don't work with {node.node} nodes")
289
290 def visit_Opt(self, node: Opt) -> FunctionCall:
291 call = self.generate_call(node.node)
292 return FunctionCall(
293 assigned_variable="_opt_var",
294 function=call.function,
295 arguments=call.arguments,
296 force_true=True,
297 comment=f"{node}",
298 )
299
300 def visit_Repeat0(self, node: Repeat0) -> FunctionCall:
301 if node in self.cache:
302 return self.cache[node]
303 name = self.gen.artificial_rule_from_repeat(node.node, False)
304 self.cache[node] = FunctionCall(
305 assigned_variable=f"{name}_var",
306 function=f"{name}_rule",
307 arguments=["p"],
308 return_type="asdl_seq *",
309 comment=f"{node}",
310 )
311 return self.cache[node]
312
313 def visit_Repeat1(self, node: Repeat1) -> FunctionCall:
314 if node in self.cache:
315 return self.cache[node]
316 name = self.gen.artificial_rule_from_repeat(node.node, True)
317 self.cache[node] = FunctionCall(
318 assigned_variable=f"{name}_var",
319 function=f"{name}_rule",
320 arguments=["p"],
321 return_type="asdl_seq *",
322 comment=f"{node}",
323 )
324 return self.cache[node]
325
326 def visit_Gather(self, node: Gather) -> FunctionCall:
327 if node in self.cache:
328 return self.cache[node]
329 name = self.gen.artifical_rule_from_gather(node)
330 self.cache[node] = FunctionCall(
331 assigned_variable=f"{name}_var",
332 function=f"{name}_rule",
333 arguments=["p"],
334 return_type="asdl_seq *",
335 comment=f"{node}",
336 )
337 return self.cache[node]
338
339 def visit_Group(self, node: Group) -> FunctionCall:
340 return self.generate_call(node.rhs)
341
342 def visit_Cut(self, node: Cut) -> FunctionCall:
343 return FunctionCall(
344 assigned_variable="_cut_var",
345 return_type="int",
346 function="1",
347 nodetype=NodeTypes.CUT_OPERATOR,
348 )
349
350 def generate_call(self, node: Any) -> FunctionCall:
351 return super().visit(node)
352
353
354 class ESC[4;38;5;81mCParserGenerator(ESC[4;38;5;149mParserGenerator, ESC[4;38;5;149mGrammarVisitor):
355 def __init__(
356 self,
357 grammar: grammar.Grammar,
358 tokens: Dict[int, str],
359 exact_tokens: Dict[str, int],
360 non_exact_tokens: Set[str],
361 file: Optional[IO[Text]],
362 debug: bool = False,
363 skip_actions: bool = False,
364 ):
365 super().__init__(grammar, set(tokens.values()), file)
366 self.callmakervisitor: CCallMakerVisitor = CCallMakerVisitor(
367 self, exact_tokens, non_exact_tokens
368 )
369 self._varname_counter = 0
370 self.debug = debug
371 self.skip_actions = skip_actions
372 self.cleanup_statements: List[str] = []
373
374 def add_level(self) -> None:
375 self.print("if (p->level++ == MAXSTACK) {")
376 with self.indent():
377 self.print("p->error_indicator = 1;")
378 self.print("PyErr_NoMemory();")
379 self.print("}")
380
381 def remove_level(self) -> None:
382 self.print("p->level--;")
383
384 def add_return(self, ret_val: str) -> None:
385 for stmt in self.cleanup_statements:
386 self.print(stmt)
387 self.remove_level()
388 self.print(f"return {ret_val};")
389
390 def unique_varname(self, name: str = "tmpvar") -> str:
391 new_var = name + "_" + str(self._varname_counter)
392 self._varname_counter += 1
393 return new_var
394
395 def call_with_errorcheck_return(self, call_text: str, returnval: str) -> None:
396 error_var = self.unique_varname()
397 self.print(f"int {error_var} = {call_text};")
398 self.print(f"if ({error_var}) {{")
399 with self.indent():
400 self.add_return(returnval)
401 self.print("}")
402
403 def call_with_errorcheck_goto(self, call_text: str, goto_target: str) -> None:
404 error_var = self.unique_varname()
405 self.print(f"int {error_var} = {call_text};")
406 self.print(f"if ({error_var}) {{")
407 with self.indent():
408 self.print(f"goto {goto_target};")
409 self.print(f"}}")
410
411 def out_of_memory_return(
412 self,
413 expr: str,
414 cleanup_code: Optional[str] = None,
415 ) -> None:
416 self.print(f"if ({expr}) {{")
417 with self.indent():
418 if cleanup_code is not None:
419 self.print(cleanup_code)
420 self.print("p->error_indicator = 1;")
421 self.print("PyErr_NoMemory();")
422 self.add_return("NULL")
423 self.print(f"}}")
424
425 def out_of_memory_goto(self, expr: str, goto_target: str) -> None:
426 self.print(f"if ({expr}) {{")
427 with self.indent():
428 self.print("PyErr_NoMemory();")
429 self.print(f"goto {goto_target};")
430 self.print(f"}}")
431
432 def generate(self, filename: str) -> None:
433 self.collect_rules()
434 basename = os.path.basename(filename)
435 self.print(f"// @generated by pegen from {basename}")
436 header = self.grammar.metas.get("header", EXTENSION_PREFIX)
437 if header:
438 self.print(header.rstrip("\n"))
439 subheader = self.grammar.metas.get("subheader", "")
440 if subheader:
441 self.print(subheader)
442 self._setup_keywords()
443 self._setup_soft_keywords()
444 for i, (rulename, rule) in enumerate(self.all_rules.items(), 1000):
445 comment = " // Left-recursive" if rule.left_recursive else ""
446 self.print(f"#define {rulename}_type {i}{comment}")
447 self.print()
448 for rulename, rule in self.all_rules.items():
449 if rule.is_loop() or rule.is_gather():
450 type = "asdl_seq *"
451 elif rule.type:
452 type = rule.type + " "
453 else:
454 type = "void *"
455 self.print(f"static {type}{rulename}_rule(Parser *p);")
456 self.print()
457 for rulename, rule in list(self.all_rules.items()):
458 self.print()
459 if rule.left_recursive:
460 self.print("// Left-recursive")
461 self.visit(rule)
462 if self.skip_actions:
463 mode = 0
464 else:
465 mode = int(self.rules["start"].type == "mod_ty") if "start" in self.rules else 1
466 if mode == 1 and self.grammar.metas.get("bytecode"):
467 mode += 1
468 modulename = self.grammar.metas.get("modulename", "parse")
469 trailer = self.grammar.metas.get("trailer", EXTENSION_SUFFIX)
470 if trailer:
471 self.print(trailer.rstrip("\n") % dict(mode=mode, modulename=modulename))
472
473 def _group_keywords_by_length(self) -> Dict[int, List[Tuple[str, int]]]:
474 groups: Dict[int, List[Tuple[str, int]]] = {}
475 for keyword_str, keyword_type in self.keywords.items():
476 length = len(keyword_str)
477 if length in groups:
478 groups[length].append((keyword_str, keyword_type))
479 else:
480 groups[length] = [(keyword_str, keyword_type)]
481 return groups
482
483 def _setup_keywords(self) -> None:
484 n_keyword_lists = (
485 len(max(self.keywords.keys(), key=len)) + 1 if len(self.keywords) > 0 else 0
486 )
487 self.print(f"static const int n_keyword_lists = {n_keyword_lists};")
488 groups = self._group_keywords_by_length()
489 self.print("static KeywordToken *reserved_keywords[] = {")
490 with self.indent():
491 num_groups = max(groups) + 1 if groups else 1
492 for keywords_length in range(num_groups):
493 if keywords_length not in groups.keys():
494 self.print("(KeywordToken[]) {{NULL, -1}},")
495 else:
496 self.print("(KeywordToken[]) {")
497 with self.indent():
498 for keyword_str, keyword_type in groups[keywords_length]:
499 self.print(f'{{"{keyword_str}", {keyword_type}}},')
500 self.print("{NULL, -1},")
501 self.print("},")
502 self.print("};")
503
504 def _setup_soft_keywords(self) -> None:
505 soft_keywords = sorted(self.soft_keywords)
506 self.print("static char *soft_keywords[] = {")
507 with self.indent():
508 for keyword in soft_keywords:
509 self.print(f'"{keyword}",')
510 self.print("NULL,")
511 self.print("};")
512
513 def _set_up_token_start_metadata_extraction(self) -> None:
514 self.print("if (p->mark == p->fill && _PyPegen_fill_token(p) < 0) {")
515 with self.indent():
516 self.print("p->error_indicator = 1;")
517 self.add_return("NULL")
518 self.print("}")
519 self.print("int _start_lineno = p->tokens[_mark]->lineno;")
520 self.print("UNUSED(_start_lineno); // Only used by EXTRA macro")
521 self.print("int _start_col_offset = p->tokens[_mark]->col_offset;")
522 self.print("UNUSED(_start_col_offset); // Only used by EXTRA macro")
523
524 def _set_up_token_end_metadata_extraction(self) -> None:
525 self.print("Token *_token = _PyPegen_get_last_nonnwhitespace_token(p);")
526 self.print("if (_token == NULL) {")
527 with self.indent():
528 self.add_return("NULL")
529 self.print("}")
530 self.print("int _end_lineno = _token->end_lineno;")
531 self.print("UNUSED(_end_lineno); // Only used by EXTRA macro")
532 self.print("int _end_col_offset = _token->end_col_offset;")
533 self.print("UNUSED(_end_col_offset); // Only used by EXTRA macro")
534
535 def _check_for_errors(self) -> None:
536 self.print("if (p->error_indicator) {")
537 with self.indent():
538 self.add_return("NULL")
539 self.print("}")
540
541 def _set_up_rule_memoization(self, node: Rule, result_type: str) -> None:
542 self.print("{")
543 with self.indent():
544 self.add_level()
545 self.print(f"{result_type} _res = NULL;")
546 self.print(f"if (_PyPegen_is_memoized(p, {node.name}_type, &_res)) {{")
547 with self.indent():
548 self.add_return("_res")
549 self.print("}")
550 self.print("int _mark = p->mark;")
551 self.print("int _resmark = p->mark;")
552 self.print("while (1) {")
553 with self.indent():
554 self.call_with_errorcheck_return(
555 f"_PyPegen_update_memo(p, _mark, {node.name}_type, _res)", "_res"
556 )
557 self.print("p->mark = _mark;")
558 self.print(f"void *_raw = {node.name}_raw(p);")
559 self.print("if (p->error_indicator) {")
560 with self.indent():
561 self.add_return("NULL")
562 self.print("}")
563 self.print("if (_raw == NULL || p->mark <= _resmark)")
564 with self.indent():
565 self.print("break;")
566 self.print(f"_resmark = p->mark;")
567 self.print("_res = _raw;")
568 self.print("}")
569 self.print(f"p->mark = _resmark;")
570 self.add_return("_res")
571 self.print("}")
572 self.print(f"static {result_type}")
573 self.print(f"{node.name}_raw(Parser *p)")
574
575 def _should_memoize(self, node: Rule) -> bool:
576 return node.memo and not node.left_recursive
577
578 def _handle_default_rule_body(self, node: Rule, rhs: Rhs, result_type: str) -> None:
579 memoize = self._should_memoize(node)
580
581 with self.indent():
582 self.add_level()
583 self._check_for_errors()
584 self.print(f"{result_type} _res = NULL;")
585 if memoize:
586 self.print(f"if (_PyPegen_is_memoized(p, {node.name}_type, &_res)) {{")
587 with self.indent():
588 self.add_return("_res")
589 self.print("}")
590 self.print("int _mark = p->mark;")
591 if any(alt.action and "EXTRA" in alt.action for alt in rhs.alts):
592 self._set_up_token_start_metadata_extraction()
593 self.visit(
594 rhs,
595 is_loop=False,
596 is_gather=node.is_gather(),
597 rulename=node.name,
598 )
599 if self.debug:
600 self.print(f'D(fprintf(stderr, "Fail at %d: {node.name}\\n", p->mark));')
601 self.print("_res = NULL;")
602 self.print(" done:")
603 with self.indent():
604 if memoize:
605 self.print(f"_PyPegen_insert_memo(p, _mark, {node.name}_type, _res);")
606 self.add_return("_res")
607
608 def _handle_loop_rule_body(self, node: Rule, rhs: Rhs) -> None:
609 memoize = self._should_memoize(node)
610 is_repeat1 = node.name.startswith("_loop1")
611
612 with self.indent():
613 self.add_level()
614 self._check_for_errors()
615 self.print("void *_res = NULL;")
616 if memoize:
617 self.print(f"if (_PyPegen_is_memoized(p, {node.name}_type, &_res)) {{")
618 with self.indent():
619 self.add_return("_res")
620 self.print("}")
621 self.print("int _mark = p->mark;")
622 if memoize:
623 self.print("int _start_mark = p->mark;")
624 self.print("void **_children = PyMem_Malloc(sizeof(void *));")
625 self.out_of_memory_return(f"!_children")
626 self.print("Py_ssize_t _children_capacity = 1;")
627 self.print("Py_ssize_t _n = 0;")
628 if any(alt.action and "EXTRA" in alt.action for alt in rhs.alts):
629 self._set_up_token_start_metadata_extraction()
630 self.visit(
631 rhs,
632 is_loop=True,
633 is_gather=node.is_gather(),
634 rulename=node.name,
635 )
636 if is_repeat1:
637 self.print("if (_n == 0 || p->error_indicator) {")
638 with self.indent():
639 self.print("PyMem_Free(_children);")
640 self.add_return("NULL")
641 self.print("}")
642 self.print("asdl_seq *_seq = (asdl_seq*)_Py_asdl_generic_seq_new(_n, p->arena);")
643 self.out_of_memory_return(f"!_seq", cleanup_code="PyMem_Free(_children);")
644 self.print("for (int i = 0; i < _n; i++) asdl_seq_SET_UNTYPED(_seq, i, _children[i]);")
645 self.print("PyMem_Free(_children);")
646 if memoize and node.name:
647 self.print(f"_PyPegen_insert_memo(p, _start_mark, {node.name}_type, _seq);")
648 self.add_return("_seq")
649
650 def visit_Rule(self, node: Rule) -> None:
651 is_loop = node.is_loop()
652 is_gather = node.is_gather()
653 rhs = node.flatten()
654 if is_loop or is_gather:
655 result_type = "asdl_seq *"
656 elif node.type:
657 result_type = node.type
658 else:
659 result_type = "void *"
660
661 for line in str(node).splitlines():
662 self.print(f"// {line}")
663 if node.left_recursive and node.leader:
664 self.print(f"static {result_type} {node.name}_raw(Parser *);")
665
666 self.print(f"static {result_type}")
667 self.print(f"{node.name}_rule(Parser *p)")
668
669 if node.left_recursive and node.leader:
670 self._set_up_rule_memoization(node, result_type)
671
672 self.print("{")
673
674 if node.name.endswith("without_invalid"):
675 with self.indent():
676 self.print("int _prev_call_invalid = p->call_invalid_rules;")
677 self.print("p->call_invalid_rules = 0;")
678 self.cleanup_statements.append("p->call_invalid_rules = _prev_call_invalid;")
679
680 if is_loop:
681 self._handle_loop_rule_body(node, rhs)
682 else:
683 self._handle_default_rule_body(node, rhs, result_type)
684
685 if node.name.endswith("without_invalid"):
686 self.cleanup_statements.pop()
687
688 self.print("}")
689
690 def visit_NamedItem(self, node: NamedItem) -> None:
691 call = self.callmakervisitor.generate_call(node)
692 if call.assigned_variable:
693 call.assigned_variable = self.dedupe(call.assigned_variable)
694 self.print(call)
695
696 def visit_Rhs(
697 self, node: Rhs, is_loop: bool, is_gather: bool, rulename: Optional[str]
698 ) -> None:
699 if is_loop:
700 assert len(node.alts) == 1
701 for alt in node.alts:
702 self.visit(alt, is_loop=is_loop, is_gather=is_gather, rulename=rulename)
703
704 def join_conditions(self, keyword: str, node: Any) -> None:
705 self.print(f"{keyword} (")
706 with self.indent():
707 first = True
708 for item in node.items:
709 if first:
710 first = False
711 else:
712 self.print("&&")
713 self.visit(item)
714 self.print(")")
715
716 def emit_action(self, node: Alt, cleanup_code: Optional[str] = None) -> None:
717 self.print(f"_res = {node.action};")
718
719 self.print("if (_res == NULL && PyErr_Occurred()) {")
720 with self.indent():
721 self.print("p->error_indicator = 1;")
722 if cleanup_code:
723 self.print(cleanup_code)
724 self.add_return("NULL")
725 self.print("}")
726
727 if self.debug:
728 self.print(
729 f'D(fprintf(stderr, "Hit with action [%d-%d]: %s\\n", _mark, p->mark, "{node}"));'
730 )
731
732 def emit_default_action(self, is_gather: bool, node: Alt) -> None:
733 if len(self.local_variable_names) > 1:
734 if is_gather:
735 assert len(self.local_variable_names) == 2
736 self.print(
737 f"_res = _PyPegen_seq_insert_in_front(p, "
738 f"{self.local_variable_names[0]}, {self.local_variable_names[1]});"
739 )
740 else:
741 if self.debug:
742 self.print(
743 f'D(fprintf(stderr, "Hit without action [%d:%d]: %s\\n", _mark, p->mark, "{node}"));'
744 )
745 self.print(
746 f"_res = _PyPegen_dummy_name(p, {', '.join(self.local_variable_names)});"
747 )
748 else:
749 if self.debug:
750 self.print(
751 f'D(fprintf(stderr, "Hit with default action [%d:%d]: %s\\n", _mark, p->mark, "{node}"));'
752 )
753 self.print(f"_res = {self.local_variable_names[0]};")
754
755 def emit_dummy_action(self) -> None:
756 self.print("_res = _PyPegen_dummy_name(p);")
757
758 def handle_alt_normal(self, node: Alt, is_gather: bool, rulename: Optional[str]) -> None:
759 self.join_conditions(keyword="if", node=node)
760 self.print("{")
761 # We have parsed successfully all the conditions for the option.
762 with self.indent():
763 node_str = str(node).replace('"', '\\"')
764 self.print(
765 f'D(fprintf(stderr, "%*c+ {rulename}[%d-%d]: %s succeeded!\\n", p->level, \' \', _mark, p->mark, "{node_str}"));'
766 )
767 # Prepare to emit the rule action and do so
768 if node.action and "EXTRA" in node.action:
769 self._set_up_token_end_metadata_extraction()
770 if self.skip_actions:
771 self.emit_dummy_action()
772 elif node.action:
773 self.emit_action(node)
774 else:
775 self.emit_default_action(is_gather, node)
776
777 # As the current option has parsed correctly, do not continue with the rest.
778 self.print(f"goto done;")
779 self.print("}")
780
781 def handle_alt_loop(self, node: Alt, is_gather: bool, rulename: Optional[str]) -> None:
782 # Condition of the main body of the alternative
783 self.join_conditions(keyword="while", node=node)
784 self.print("{")
785 # We have parsed successfully one item!
786 with self.indent():
787 # Prepare to emit the rule action and do so
788 if node.action and "EXTRA" in node.action:
789 self._set_up_token_end_metadata_extraction()
790 if self.skip_actions:
791 self.emit_dummy_action()
792 elif node.action:
793 self.emit_action(node, cleanup_code="PyMem_Free(_children);")
794 else:
795 self.emit_default_action(is_gather, node)
796
797 # Add the result of rule to the temporary buffer of children. This buffer
798 # will populate later an asdl_seq with all elements to return.
799 self.print("if (_n == _children_capacity) {")
800 with self.indent():
801 self.print("_children_capacity *= 2;")
802 self.print(
803 "void **_new_children = PyMem_Realloc(_children, _children_capacity*sizeof(void *));"
804 )
805 self.out_of_memory_return(f"!_new_children", cleanup_code="PyMem_Free(_children);")
806 self.print("_children = _new_children;")
807 self.print("}")
808 self.print("_children[_n++] = _res;")
809 self.print("_mark = p->mark;")
810 self.print("}")
811
812 def visit_Alt(
813 self, node: Alt, is_loop: bool, is_gather: bool, rulename: Optional[str]
814 ) -> None:
815 if len(node.items) == 1 and str(node.items[0]).startswith("invalid_"):
816 self.print(f"if (p->call_invalid_rules) {{ // {node}")
817 else:
818 self.print(f"{{ // {node}")
819 with self.indent():
820 self._check_for_errors()
821 node_str = str(node).replace('"', '\\"')
822 self.print(
823 f'D(fprintf(stderr, "%*c> {rulename}[%d-%d]: %s\\n", p->level, \' \', _mark, p->mark, "{node_str}"));'
824 )
825 # Prepare variable declarations for the alternative
826 vars = self.collect_vars(node)
827 for v, var_type in sorted(item for item in vars.items() if item[0] is not None):
828 if not var_type:
829 var_type = "void *"
830 else:
831 var_type += " "
832 if v == "_cut_var":
833 v += " = 0" # cut_var must be initialized
834 self.print(f"{var_type}{v};")
835 if v and v.startswith("_opt_var"):
836 self.print(f"UNUSED({v}); // Silence compiler warnings")
837
838 with self.local_variable_context():
839 if is_loop:
840 self.handle_alt_loop(node, is_gather, rulename)
841 else:
842 self.handle_alt_normal(node, is_gather, rulename)
843
844 self.print("p->mark = _mark;")
845 node_str = str(node).replace('"', '\\"')
846 self.print(
847 f"D(fprintf(stderr, \"%*c%s {rulename}[%d-%d]: %s failed!\\n\", p->level, ' ',\n"
848 f' p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "{node_str}"));'
849 )
850 if "_cut_var" in vars:
851 self.print("if (_cut_var) {")
852 with self.indent():
853 self.add_return("NULL")
854 self.print("}")
855 self.print("}")
856
857 def collect_vars(self, node: Alt) -> Dict[Optional[str], Optional[str]]:
858 types = {}
859 with self.local_variable_context():
860 for item in node.items:
861 name, type = self.add_var(item)
862 types[name] = type
863 return types
864
865 def add_var(self, node: NamedItem) -> Tuple[Optional[str], Optional[str]]:
866 call = self.callmakervisitor.generate_call(node.item)
867 name = node.name if node.name else call.assigned_variable
868 if name is not None:
869 name = self.dedupe(name)
870 return_type = call.return_type if node.type is None else node.type
871 return name, return_type