(root)/
Python-3.12.0/
Tools/
peg_generator/
pegen/
python_generator.py
       1  import os.path
       2  import token
       3  from typing import IO, Any, Dict, Optional, Sequence, Set, Text, Tuple
       4  
       5  from pegen import grammar
       6  from pegen.grammar import (
       7      Alt,
       8      Cut,
       9      Forced,
      10      Gather,
      11      GrammarVisitor,
      12      Group,
      13      Lookahead,
      14      NamedItem,
      15      NameLeaf,
      16      NegativeLookahead,
      17      Opt,
      18      PositiveLookahead,
      19      Repeat0,
      20      Repeat1,
      21      Rhs,
      22      Rule,
      23      StringLeaf,
      24  )
      25  from pegen.parser_generator import ParserGenerator
      26  
      27  MODULE_PREFIX = """\
      28  #!/usr/bin/env python3.8
      29  # @generated by pegen from {filename}
      30  
      31  import ast
      32  import sys
      33  import tokenize
      34  
      35  from typing import Any, Optional
      36  
      37  from pegen.parser import memoize, memoize_left_rec, logger, Parser
      38  
      39  """
      40  MODULE_SUFFIX = """
      41  
      42  if __name__ == '__main__':
      43      from pegen.parser import simple_parser_main
      44      simple_parser_main({class_name})
      45  """
      46  
      47  
      48  class ESC[4;38;5;81mInvalidNodeVisitor(ESC[4;38;5;149mGrammarVisitor):
      49      def visit_NameLeaf(self, node: NameLeaf) -> bool:
      50          name = node.value
      51          return name.startswith("invalid")
      52  
      53      def visit_StringLeaf(self, node: StringLeaf) -> bool:
      54          return False
      55  
      56      def visit_NamedItem(self, node: NamedItem) -> bool:
      57          return self.visit(node.item)
      58  
      59      def visit_Rhs(self, node: Rhs) -> bool:
      60          return any(self.visit(alt) for alt in node.alts)
      61  
      62      def visit_Alt(self, node: Alt) -> bool:
      63          return any(self.visit(item) for item in node.items)
      64  
      65      def lookahead_call_helper(self, node: Lookahead) -> bool:
      66          return self.visit(node.node)
      67  
      68      def visit_PositiveLookahead(self, node: PositiveLookahead) -> bool:
      69          return self.lookahead_call_helper(node)
      70  
      71      def visit_NegativeLookahead(self, node: NegativeLookahead) -> bool:
      72          return self.lookahead_call_helper(node)
      73  
      74      def visit_Opt(self, node: Opt) -> bool:
      75          return self.visit(node.node)
      76  
      77      def visit_Repeat(self, node: Repeat0) -> Tuple[str, str]:
      78          return self.visit(node.node)
      79  
      80      def visit_Gather(self, node: Gather) -> Tuple[str, str]:
      81          return self.visit(node.node)
      82  
      83      def visit_Group(self, node: Group) -> bool:
      84          return self.visit(node.rhs)
      85  
      86      def visit_Cut(self, node: Cut) -> bool:
      87          return False
      88  
      89      def visit_Forced(self, node: Forced) -> bool:
      90          return self.visit(node.node)
      91  
      92  
      93  class ESC[4;38;5;81mPythonCallMakerVisitor(ESC[4;38;5;149mGrammarVisitor):
      94      def __init__(self, parser_generator: ParserGenerator):
      95          self.gen = parser_generator
      96          self.cache: Dict[Any, Any] = {}
      97  
      98      def visit_NameLeaf(self, node: NameLeaf) -> Tuple[Optional[str], str]:
      99          name = node.value
     100          if name == "SOFT_KEYWORD":
     101              return "soft_keyword", "self.soft_keyword()"
     102          if name in ("NAME", "NUMBER", "STRING", "OP", "TYPE_COMMENT"):
     103              name = name.lower()
     104              return name, f"self.{name}()"
     105          if name in ("NEWLINE", "DEDENT", "INDENT", "ENDMARKER", "ASYNC", "AWAIT"):
     106              # Avoid using names that can be Python keywords
     107              return "_" + name.lower(), f"self.expect({name!r})"
     108          return name, f"self.{name}()"
     109  
     110      def visit_StringLeaf(self, node: StringLeaf) -> Tuple[str, str]:
     111          return "literal", f"self.expect({node.value})"
     112  
     113      def visit_Rhs(self, node: Rhs) -> Tuple[Optional[str], str]:
     114          if node in self.cache:
     115              return self.cache[node]
     116          if len(node.alts) == 1 and len(node.alts[0].items) == 1:
     117              self.cache[node] = self.visit(node.alts[0].items[0])
     118          else:
     119              name = self.gen.artifical_rule_from_rhs(node)
     120              self.cache[node] = name, f"self.{name}()"
     121          return self.cache[node]
     122  
     123      def visit_NamedItem(self, node: NamedItem) -> Tuple[Optional[str], str]:
     124          name, call = self.visit(node.item)
     125          if node.name:
     126              name = node.name
     127          return name, call
     128  
     129      def lookahead_call_helper(self, node: Lookahead) -> Tuple[str, str]:
     130          name, call = self.visit(node.node)
     131          head, tail = call.split("(", 1)
     132          assert tail[-1] == ")"
     133          tail = tail[:-1]
     134          return head, tail
     135  
     136      def visit_PositiveLookahead(self, node: PositiveLookahead) -> Tuple[None, str]:
     137          head, tail = self.lookahead_call_helper(node)
     138          return None, f"self.positive_lookahead({head}, {tail})"
     139  
     140      def visit_NegativeLookahead(self, node: NegativeLookahead) -> Tuple[None, str]:
     141          head, tail = self.lookahead_call_helper(node)
     142          return None, f"self.negative_lookahead({head}, {tail})"
     143  
     144      def visit_Opt(self, node: Opt) -> Tuple[str, str]:
     145          name, call = self.visit(node.node)
     146          # Note trailing comma (the call may already have one comma
     147          # at the end, for example when rules have both repeat0 and optional
     148          # markers, e.g: [rule*])
     149          if call.endswith(","):
     150              return "opt", call
     151          else:
     152              return "opt", f"{call},"
     153  
     154      def visit_Repeat0(self, node: Repeat0) -> Tuple[str, str]:
     155          if node in self.cache:
     156              return self.cache[node]
     157          name = self.gen.artificial_rule_from_repeat(node.node, False)
     158          self.cache[node] = name, f"self.{name}(),"  # Also a trailing comma!
     159          return self.cache[node]
     160  
     161      def visit_Repeat1(self, node: Repeat1) -> Tuple[str, str]:
     162          if node in self.cache:
     163              return self.cache[node]
     164          name = self.gen.artificial_rule_from_repeat(node.node, True)
     165          self.cache[node] = name, f"self.{name}()"  # But no trailing comma here!
     166          return self.cache[node]
     167  
     168      def visit_Gather(self, node: Gather) -> Tuple[str, str]:
     169          if node in self.cache:
     170              return self.cache[node]
     171          name = self.gen.artifical_rule_from_gather(node)
     172          self.cache[node] = name, f"self.{name}()"  # No trailing comma here either!
     173          return self.cache[node]
     174  
     175      def visit_Group(self, node: Group) -> Tuple[Optional[str], str]:
     176          return self.visit(node.rhs)
     177  
     178      def visit_Cut(self, node: Cut) -> Tuple[str, str]:
     179          return "cut", "True"
     180  
     181      def visit_Forced(self, node: Forced) -> Tuple[str, str]:
     182          if isinstance(node.node, Group):
     183              _, val = self.visit(node.node.rhs)
     184              return "forced", f"self.expect_forced({val}, '''({node.node.rhs!s})''')"
     185          else:
     186              return (
     187                  "forced",
     188                  f"self.expect_forced(self.expect({node.node.value}), {node.node.value!r})",
     189              )
     190  
     191  
     192  class ESC[4;38;5;81mPythonParserGenerator(ESC[4;38;5;149mParserGenerator, ESC[4;38;5;149mGrammarVisitor):
     193      def __init__(
     194          self,
     195          grammar: grammar.Grammar,
     196          file: Optional[IO[Text]],
     197          tokens: Set[str] = set(token.tok_name.values()),
     198          location_formatting: Optional[str] = None,
     199          unreachable_formatting: Optional[str] = None,
     200      ):
     201          tokens.add("SOFT_KEYWORD")
     202          super().__init__(grammar, tokens, file)
     203          self.callmakervisitor: PythonCallMakerVisitor = PythonCallMakerVisitor(self)
     204          self.invalidvisitor: InvalidNodeVisitor = InvalidNodeVisitor()
     205          self.unreachable_formatting = unreachable_formatting or "None  # pragma: no cover"
     206          self.location_formatting = (
     207              location_formatting
     208              or "lineno=start_lineno, col_offset=start_col_offset, "
     209              "end_lineno=end_lineno, end_col_offset=end_col_offset"
     210          )
     211  
     212      def generate(self, filename: str) -> None:
     213          self.collect_rules()
     214          header = self.grammar.metas.get("header", MODULE_PREFIX)
     215          if header is not None:
     216              basename = os.path.basename(filename)
     217              self.print(header.rstrip("\n").format(filename=basename))
     218          subheader = self.grammar.metas.get("subheader", "")
     219          if subheader:
     220              self.print(subheader)
     221          cls_name = self.grammar.metas.get("class", "GeneratedParser")
     222          self.print("# Keywords and soft keywords are listed at the end of the parser definition.")
     223          self.print(f"class {cls_name}(Parser):")
     224          for rule in self.all_rules.values():
     225              self.print()
     226              with self.indent():
     227                  self.visit(rule)
     228  
     229          self.print()
     230          with self.indent():
     231              self.print(f"KEYWORDS = {tuple(self.keywords)}")
     232              self.print(f"SOFT_KEYWORDS = {tuple(self.soft_keywords)}")
     233  
     234          trailer = self.grammar.metas.get("trailer", MODULE_SUFFIX.format(class_name=cls_name))
     235          if trailer is not None:
     236              self.print(trailer.rstrip("\n"))
     237  
     238      def alts_uses_locations(self, alts: Sequence[Alt]) -> bool:
     239          for alt in alts:
     240              if alt.action and "LOCATIONS" in alt.action:
     241                  return True
     242              for n in alt.items:
     243                  if isinstance(n.item, Group) and self.alts_uses_locations(n.item.rhs.alts):
     244                      return True
     245          return False
     246  
     247      def visit_Rule(self, node: Rule) -> None:
     248          is_loop = node.is_loop()
     249          is_gather = node.is_gather()
     250          rhs = node.flatten()
     251          if node.left_recursive:
     252              if node.leader:
     253                  self.print("@memoize_left_rec")
     254              else:
     255                  # Non-leader rules in a cycle are not memoized,
     256                  # but they must still be logged.
     257                  self.print("@logger")
     258          else:
     259              self.print("@memoize")
     260          node_type = node.type or "Any"
     261          self.print(f"def {node.name}(self) -> Optional[{node_type}]:")
     262          with self.indent():
     263              self.print(f"# {node.name}: {rhs}")
     264              self.print("mark = self._mark()")
     265              if self.alts_uses_locations(node.rhs.alts):
     266                  self.print("tok = self._tokenizer.peek()")
     267                  self.print("start_lineno, start_col_offset = tok.start")
     268              if is_loop:
     269                  self.print("children = []")
     270              self.visit(rhs, is_loop=is_loop, is_gather=is_gather)
     271              if is_loop:
     272                  self.print("return children")
     273              else:
     274                  self.print("return None")
     275  
     276      def visit_NamedItem(self, node: NamedItem) -> None:
     277          name, call = self.callmakervisitor.visit(node.item)
     278          if node.name:
     279              name = node.name
     280          if not name:
     281              self.print(call)
     282          else:
     283              if name != "cut":
     284                  name = self.dedupe(name)
     285              self.print(f"({name} := {call})")
     286  
     287      def visit_Rhs(self, node: Rhs, is_loop: bool = False, is_gather: bool = False) -> None:
     288          if is_loop:
     289              assert len(node.alts) == 1
     290          for alt in node.alts:
     291              self.visit(alt, is_loop=is_loop, is_gather=is_gather)
     292  
     293      def visit_Alt(self, node: Alt, is_loop: bool, is_gather: bool) -> None:
     294          has_cut = any(isinstance(item.item, Cut) for item in node.items)
     295          with self.local_variable_context():
     296              if has_cut:
     297                  self.print("cut = False")
     298              if is_loop:
     299                  self.print("while (")
     300              else:
     301                  self.print("if (")
     302              with self.indent():
     303                  first = True
     304                  for item in node.items:
     305                      if first:
     306                          first = False
     307                      else:
     308                          self.print("and")
     309                      self.visit(item)
     310                      if is_gather:
     311                          self.print("is not None")
     312  
     313              self.print("):")
     314              with self.indent():
     315                  action = node.action
     316                  if not action:
     317                      if is_gather:
     318                          assert len(self.local_variable_names) == 2
     319                          action = (
     320                              f"[{self.local_variable_names[0]}] + {self.local_variable_names[1]}"
     321                          )
     322                      else:
     323                          if self.invalidvisitor.visit(node):
     324                              action = "UNREACHABLE"
     325                          elif len(self.local_variable_names) == 1:
     326                              action = f"{self.local_variable_names[0]}"
     327                          else:
     328                              action = f"[{', '.join(self.local_variable_names)}]"
     329                  elif "LOCATIONS" in action:
     330                      self.print("tok = self._tokenizer.get_last_non_whitespace_token()")
     331                      self.print("end_lineno, end_col_offset = tok.end")
     332                      action = action.replace("LOCATIONS", self.location_formatting)
     333  
     334                  if is_loop:
     335                      self.print(f"children.append({action})")
     336                      self.print(f"mark = self._mark()")
     337                  else:
     338                      if "UNREACHABLE" in action:
     339                          action = action.replace("UNREACHABLE", self.unreachable_formatting)
     340                      self.print(f"return {action}")
     341  
     342              self.print("self._reset(mark)")
     343              # Skip remaining alternatives if a cut was reached.
     344              if has_cut:
     345                  self.print("if cut: return None")