(root)/
Python-3.11.7/
Tools/
peg_generator/
pegen/
first_sets.py
       1  #!/usr/bin/env python3.8
       2  
       3  import argparse
       4  import pprint
       5  import sys
       6  from typing import Dict, Set
       7  
       8  from pegen.build import build_parser
       9  from pegen.grammar import (
      10      Alt,
      11      Cut,
      12      Gather,
      13      GrammarVisitor,
      14      Group,
      15      Lookahead,
      16      NamedItem,
      17      NameLeaf,
      18      NegativeLookahead,
      19      Opt,
      20      Repeat0,
      21      Repeat1,
      22      Rhs,
      23      Rule,
      24      StringLeaf,
      25  )
      26  from pegen.parser_generator import compute_nullables
      27  
      28  argparser = argparse.ArgumentParser(
      29      prog="calculate_first_sets",
      30      description="Calculate the first sets of a grammar",
      31  )
      32  argparser.add_argument("grammar_file", help="The grammar file")
      33  
      34  
      35  class ESC[4;38;5;81mFirstSetCalculator(ESC[4;38;5;149mGrammarVisitor):
      36      def __init__(self, rules: Dict[str, Rule]) -> None:
      37          self.rules = rules
      38          self.nullables = compute_nullables(rules)
      39          self.first_sets: Dict[str, Set[str]] = dict()
      40          self.in_process: Set[str] = set()
      41  
      42      def calculate(self) -> Dict[str, Set[str]]:
      43          for name, rule in self.rules.items():
      44              self.visit(rule)
      45          return self.first_sets
      46  
      47      def visit_Alt(self, item: Alt) -> Set[str]:
      48          result: Set[str] = set()
      49          to_remove: Set[str] = set()
      50          for other in item.items:
      51              new_terminals = self.visit(other)
      52              if isinstance(other.item, NegativeLookahead):
      53                  to_remove |= new_terminals
      54              result |= new_terminals
      55              if to_remove:
      56                  result -= to_remove
      57  
      58              # If the set of new terminals can start with the empty string,
      59              # it means that the item is completely nullable and we should
      60              # also considering at least the next item in case the current
      61              # one fails to parse.
      62  
      63              if "" in new_terminals:
      64                  continue
      65  
      66              if not isinstance(other.item, (Opt, NegativeLookahead, Repeat0)):
      67                  break
      68  
      69          # Do not allow the empty string to propagate.
      70          result.discard("")
      71  
      72          return result
      73  
      74      def visit_Cut(self, item: Cut) -> Set[str]:
      75          return set()
      76  
      77      def visit_Group(self, item: Group) -> Set[str]:
      78          return self.visit(item.rhs)
      79  
      80      def visit_PositiveLookahead(self, item: Lookahead) -> Set[str]:
      81          return self.visit(item.node)
      82  
      83      def visit_NegativeLookahead(self, item: NegativeLookahead) -> Set[str]:
      84          return self.visit(item.node)
      85  
      86      def visit_NamedItem(self, item: NamedItem) -> Set[str]:
      87          return self.visit(item.item)
      88  
      89      def visit_Opt(self, item: Opt) -> Set[str]:
      90          return self.visit(item.node)
      91  
      92      def visit_Gather(self, item: Gather) -> Set[str]:
      93          return self.visit(item.node)
      94  
      95      def visit_Repeat0(self, item: Repeat0) -> Set[str]:
      96          return self.visit(item.node)
      97  
      98      def visit_Repeat1(self, item: Repeat1) -> Set[str]:
      99          return self.visit(item.node)
     100  
     101      def visit_NameLeaf(self, item: NameLeaf) -> Set[str]:
     102          if item.value not in self.rules:
     103              return {item.value}
     104  
     105          if item.value not in self.first_sets:
     106              self.first_sets[item.value] = self.visit(self.rules[item.value])
     107              return self.first_sets[item.value]
     108          elif item.value in self.in_process:
     109              return set()
     110  
     111          return self.first_sets[item.value]
     112  
     113      def visit_StringLeaf(self, item: StringLeaf) -> Set[str]:
     114          return {item.value}
     115  
     116      def visit_Rhs(self, item: Rhs) -> Set[str]:
     117          result: Set[str] = set()
     118          for alt in item.alts:
     119              result |= self.visit(alt)
     120          return result
     121  
     122      def visit_Rule(self, item: Rule) -> Set[str]:
     123          if item.name in self.in_process:
     124              return set()
     125          elif item.name not in self.first_sets:
     126              self.in_process.add(item.name)
     127              terminals = self.visit(item.rhs)
     128              if item in self.nullables:
     129                  terminals.add("")
     130              self.first_sets[item.name] = terminals
     131              self.in_process.remove(item.name)
     132          return self.first_sets[item.name]
     133  
     134  
     135  def main() -> None:
     136      args = argparser.parse_args()
     137  
     138      try:
     139          grammar, parser, tokenizer = build_parser(args.grammar_file)
     140      except Exception as err:
     141          print("ERROR: Failed to parse grammar file", file=sys.stderr)
     142          sys.exit(1)
     143  
     144      firs_sets = FirstSetCalculator(grammar.rules).calculate()
     145      pprint.pprint(firs_sets)
     146  
     147  
     148  if __name__ == "__main__":
     149      main()