(root)/
Python-3.11.7/
Tools/
peg_generator/
pegen/
parser_generator.py
       1  import ast
       2  import contextlib
       3  import re
       4  from abc import abstractmethod
       5  from typing import (
       6      IO,
       7      AbstractSet,
       8      Any,
       9      Dict,
      10      Iterable,
      11      Iterator,
      12      List,
      13      Optional,
      14      Set,
      15      Text,
      16      Tuple,
      17      Union,
      18  )
      19  
      20  from pegen import sccutils
      21  from pegen.grammar import (
      22      Alt,
      23      Cut,
      24      Forced,
      25      Gather,
      26      Grammar,
      27      GrammarError,
      28      GrammarVisitor,
      29      Group,
      30      Lookahead,
      31      NamedItem,
      32      NameLeaf,
      33      Opt,
      34      Plain,
      35      Repeat0,
      36      Repeat1,
      37      Rhs,
      38      Rule,
      39      StringLeaf,
      40  )
      41  
      42  
      43  class ESC[4;38;5;81mRuleCollectorVisitor(ESC[4;38;5;149mGrammarVisitor):
      44      """Visitor that invokes a provieded callmaker visitor with just the NamedItem nodes"""
      45  
      46      def __init__(self, rules: Dict[str, Rule], callmakervisitor: GrammarVisitor) -> None:
      47          self.rulses = rules
      48          self.callmaker = callmakervisitor
      49  
      50      def visit_Rule(self, rule: Rule) -> None:
      51          self.visit(rule.flatten())
      52  
      53      def visit_NamedItem(self, item: NamedItem) -> None:
      54          self.callmaker.visit(item)
      55  
      56  
      57  class ESC[4;38;5;81mKeywordCollectorVisitor(ESC[4;38;5;149mGrammarVisitor):
      58      """Visitor that collects all the keywods and soft keywords in the Grammar"""
      59  
      60      def __init__(self, gen: "ParserGenerator", keywords: Dict[str, int], soft_keywords: Set[str]):
      61          self.generator = gen
      62          self.keywords = keywords
      63          self.soft_keywords = soft_keywords
      64  
      65      def visit_StringLeaf(self, node: StringLeaf) -> None:
      66          val = ast.literal_eval(node.value)
      67          if re.match(r"[a-zA-Z_]\w*\Z", val):  # This is a keyword
      68              if node.value.endswith("'") and node.value not in self.keywords:
      69                  self.keywords[val] = self.generator.keyword_type()
      70              else:
      71                  return self.soft_keywords.add(node.value.replace('"', ""))
      72  
      73  
      74  class ESC[4;38;5;81mRuleCheckingVisitor(ESC[4;38;5;149mGrammarVisitor):
      75      def __init__(self, rules: Dict[str, Rule], tokens: Set[str]):
      76          self.rules = rules
      77          self.tokens = tokens
      78  
      79      def visit_NameLeaf(self, node: NameLeaf) -> None:
      80          if node.value not in self.rules and node.value not in self.tokens:
      81              raise GrammarError(f"Dangling reference to rule {node.value!r}")
      82  
      83      def visit_NamedItem(self, node: NamedItem) -> None:
      84          if node.name and node.name.startswith("_"):
      85              raise GrammarError(f"Variable names cannot start with underscore: '{node.name}'")
      86          self.visit(node.item)
      87  
      88  
      89  class ESC[4;38;5;81mParserGenerator:
      90  
      91      callmakervisitor: GrammarVisitor
      92  
      93      def __init__(self, grammar: Grammar, tokens: Set[str], file: Optional[IO[Text]]):
      94          self.grammar = grammar
      95          self.tokens = tokens
      96          self.keywords: Dict[str, int] = {}
      97          self.soft_keywords: Set[str] = set()
      98          self.rules = grammar.rules
      99          self.validate_rule_names()
     100          if "trailer" not in grammar.metas and "start" not in self.rules:
     101              raise GrammarError("Grammar without a trailer must have a 'start' rule")
     102          checker = RuleCheckingVisitor(self.rules, self.tokens)
     103          for rule in self.rules.values():
     104              checker.visit(rule)
     105          self.file = file
     106          self.level = 0
     107          self.first_graph, self.first_sccs = compute_left_recursives(self.rules)
     108          self.counter = 0  # For name_rule()/name_loop()
     109          self.keyword_counter = 499  # For keyword_type()
     110          self.all_rules: Dict[str, Rule] = self.rules.copy()  # Rules + temporal rules
     111          self._local_variable_stack: List[List[str]] = []
     112  
     113      def validate_rule_names(self) -> None:
     114          for rule in self.rules:
     115              if rule.startswith("_"):
     116                  raise GrammarError(f"Rule names cannot start with underscore: '{rule}'")
     117  
     118      @contextlib.contextmanager
     119      def local_variable_context(self) -> Iterator[None]:
     120          self._local_variable_stack.append([])
     121          yield
     122          self._local_variable_stack.pop()
     123  
     124      @property
     125      def local_variable_names(self) -> List[str]:
     126          return self._local_variable_stack[-1]
     127  
     128      @abstractmethod
     129      def generate(self, filename: str) -> None:
     130          raise NotImplementedError
     131  
     132      @contextlib.contextmanager
     133      def indent(self) -> Iterator[None]:
     134          self.level += 1
     135          try:
     136              yield
     137          finally:
     138              self.level -= 1
     139  
     140      def print(self, *args: object) -> None:
     141          if not args:
     142              print(file=self.file)
     143          else:
     144              print("    " * self.level, end="", file=self.file)
     145              print(*args, file=self.file)
     146  
     147      def printblock(self, lines: str) -> None:
     148          for line in lines.splitlines():
     149              self.print(line)
     150  
     151      def collect_rules(self) -> None:
     152          keyword_collector = KeywordCollectorVisitor(self, self.keywords, self.soft_keywords)
     153          for rule in self.all_rules.values():
     154              keyword_collector.visit(rule)
     155  
     156          rule_collector = RuleCollectorVisitor(self.rules, self.callmakervisitor)
     157          done: Set[str] = set()
     158          while True:
     159              computed_rules = list(self.all_rules)
     160              todo = [i for i in computed_rules if i not in done]
     161              if not todo:
     162                  break
     163              done = set(self.all_rules)
     164              for rulename in todo:
     165                  rule_collector.visit(self.all_rules[rulename])
     166  
     167      def keyword_type(self) -> int:
     168          self.keyword_counter += 1
     169          return self.keyword_counter
     170  
     171      def artifical_rule_from_rhs(self, rhs: Rhs) -> str:
     172          self.counter += 1
     173          name = f"_tmp_{self.counter}"  # TODO: Pick a nicer name.
     174          self.all_rules[name] = Rule(name, None, rhs)
     175          return name
     176  
     177      def artificial_rule_from_repeat(self, node: Plain, is_repeat1: bool) -> str:
     178          self.counter += 1
     179          if is_repeat1:
     180              prefix = "_loop1_"
     181          else:
     182              prefix = "_loop0_"
     183          name = f"{prefix}{self.counter}"
     184          self.all_rules[name] = Rule(name, None, Rhs([Alt([NamedItem(None, node)])]))
     185          return name
     186  
     187      def artifical_rule_from_gather(self, node: Gather) -> str:
     188          self.counter += 1
     189          name = f"_gather_{self.counter}"
     190          self.counter += 1
     191          extra_function_name = f"_loop0_{self.counter}"
     192          extra_function_alt = Alt(
     193              [NamedItem(None, node.separator), NamedItem("elem", node.node)],
     194              action="elem",
     195          )
     196          self.all_rules[extra_function_name] = Rule(
     197              extra_function_name,
     198              None,
     199              Rhs([extra_function_alt]),
     200          )
     201          alt = Alt(
     202              [NamedItem("elem", node.node), NamedItem("seq", NameLeaf(extra_function_name))],
     203          )
     204          self.all_rules[name] = Rule(
     205              name,
     206              None,
     207              Rhs([alt]),
     208          )
     209          return name
     210  
     211      def dedupe(self, name: str) -> str:
     212          origname = name
     213          counter = 0
     214          while name in self.local_variable_names:
     215              counter += 1
     216              name = f"{origname}_{counter}"
     217          self.local_variable_names.append(name)
     218          return name
     219  
     220  
     221  class ESC[4;38;5;81mNullableVisitor(ESC[4;38;5;149mGrammarVisitor):
     222      def __init__(self, rules: Dict[str, Rule]) -> None:
     223          self.rules = rules
     224          self.visited: Set[Any] = set()
     225          self.nullables: Set[Union[Rule, NamedItem]] = set()
     226  
     227      def visit_Rule(self, rule: Rule) -> bool:
     228          if rule in self.visited:
     229              return False
     230          self.visited.add(rule)
     231          if self.visit(rule.rhs):
     232              self.nullables.add(rule)
     233          return rule in self.nullables
     234  
     235      def visit_Rhs(self, rhs: Rhs) -> bool:
     236          for alt in rhs.alts:
     237              if self.visit(alt):
     238                  return True
     239          return False
     240  
     241      def visit_Alt(self, alt: Alt) -> bool:
     242          for item in alt.items:
     243              if not self.visit(item):
     244                  return False
     245          return True
     246  
     247      def visit_Forced(self, force: Forced) -> bool:
     248          return True
     249  
     250      def visit_LookAhead(self, lookahead: Lookahead) -> bool:
     251          return True
     252  
     253      def visit_Opt(self, opt: Opt) -> bool:
     254          return True
     255  
     256      def visit_Repeat0(self, repeat: Repeat0) -> bool:
     257          return True
     258  
     259      def visit_Repeat1(self, repeat: Repeat1) -> bool:
     260          return False
     261  
     262      def visit_Gather(self, gather: Gather) -> bool:
     263          return False
     264  
     265      def visit_Cut(self, cut: Cut) -> bool:
     266          return False
     267  
     268      def visit_Group(self, group: Group) -> bool:
     269          return self.visit(group.rhs)
     270  
     271      def visit_NamedItem(self, item: NamedItem) -> bool:
     272          if self.visit(item.item):
     273              self.nullables.add(item)
     274          return item in self.nullables
     275  
     276      def visit_NameLeaf(self, node: NameLeaf) -> bool:
     277          if node.value in self.rules:
     278              return self.visit(self.rules[node.value])
     279          # Token or unknown; never empty.
     280          return False
     281  
     282      def visit_StringLeaf(self, node: StringLeaf) -> bool:
     283          # The string token '' is considered empty.
     284          return not node.value
     285  
     286  
     287  def compute_nullables(rules: Dict[str, Rule]) -> Set[Any]:
     288      """Compute which rules in a grammar are nullable.
     289  
     290      Thanks to TatSu (tatsu/leftrec.py) for inspiration.
     291      """
     292      nullable_visitor = NullableVisitor(rules)
     293      for rule in rules.values():
     294          nullable_visitor.visit(rule)
     295      return nullable_visitor.nullables
     296  
     297  
     298  class ESC[4;38;5;81mInitialNamesVisitor(ESC[4;38;5;149mGrammarVisitor):
     299      def __init__(self, rules: Dict[str, Rule]) -> None:
     300          self.rules = rules
     301          self.nullables = compute_nullables(rules)
     302  
     303      def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> Set[Any]:
     304          names: Set[str] = set()
     305          for value in node:
     306              if isinstance(value, list):
     307                  for item in value:
     308                      names |= self.visit(item, *args, **kwargs)
     309              else:
     310                  names |= self.visit(value, *args, **kwargs)
     311          return names
     312  
     313      def visit_Alt(self, alt: Alt) -> Set[Any]:
     314          names: Set[str] = set()
     315          for item in alt.items:
     316              names |= self.visit(item)
     317              if item not in self.nullables:
     318                  break
     319          return names
     320  
     321      def visit_Forced(self, force: Forced) -> Set[Any]:
     322          return set()
     323  
     324      def visit_LookAhead(self, lookahead: Lookahead) -> Set[Any]:
     325          return set()
     326  
     327      def visit_Cut(self, cut: Cut) -> Set[Any]:
     328          return set()
     329  
     330      def visit_NameLeaf(self, node: NameLeaf) -> Set[Any]:
     331          return {node.value}
     332  
     333      def visit_StringLeaf(self, node: StringLeaf) -> Set[Any]:
     334          return set()
     335  
     336  
     337  def compute_left_recursives(
     338      rules: Dict[str, Rule]
     339  ) -> Tuple[Dict[str, AbstractSet[str]], List[AbstractSet[str]]]:
     340      graph = make_first_graph(rules)
     341      sccs = list(sccutils.strongly_connected_components(graph.keys(), graph))
     342      for scc in sccs:
     343          if len(scc) > 1:
     344              for name in scc:
     345                  rules[name].left_recursive = True
     346              # Try to find a leader such that all cycles go through it.
     347              leaders = set(scc)
     348              for start in scc:
     349                  for cycle in sccutils.find_cycles_in_scc(graph, scc, start):
     350                      # print("Cycle:", " -> ".join(cycle))
     351                      leaders -= scc - set(cycle)
     352                      if not leaders:
     353                          raise ValueError(
     354                              f"SCC {scc} has no leadership candidate (no element is included in all cycles)"
     355                          )
     356              # print("Leaders:", leaders)
     357              leader = min(leaders)  # Pick an arbitrary leader from the candidates.
     358              rules[leader].leader = True
     359          else:
     360              name = min(scc)  # The only element.
     361              if name in graph[name]:
     362                  rules[name].left_recursive = True
     363                  rules[name].leader = True
     364      return graph, sccs
     365  
     366  
     367  def make_first_graph(rules: Dict[str, Rule]) -> Dict[str, AbstractSet[str]]:
     368      """Compute the graph of left-invocations.
     369  
     370      There's an edge from A to B if A may invoke B at its initial
     371      position.
     372  
     373      Note that this requires the nullable flags to have been computed.
     374      """
     375      initial_name_visitor = InitialNamesVisitor(rules)
     376      graph = {}
     377      vertices: Set[str] = set()
     378      for rulename, rhs in rules.items():
     379          graph[rulename] = names = initial_name_visitor.visit(rhs)
     380          vertices |= names
     381      for vertex in vertices:
     382          graph.setdefault(vertex, set())
     383      return graph