(root)/
Python-3.12.0/
Lib/
lib2to3/
patcomp.py
       1  # Copyright 2006 Google, Inc. All Rights Reserved.
       2  # Licensed to PSF under a Contributor Agreement.
       3  
       4  """Pattern compiler.
       5  
       6  The grammar is taken from PatternGrammar.txt.
       7  
       8  The compiler compiles a pattern to a pytree.*Pattern instance.
       9  """
      10  
      11  __author__ = "Guido van Rossum <guido@python.org>"
      12  
      13  # Python imports
      14  import io
      15  
      16  # Fairly local imports
      17  from .pgen2 import driver, literals, token, tokenize, parse, grammar
      18  
      19  # Really local imports
      20  from . import pytree
      21  from . import pygram
      22  
      23  
      24  class ESC[4;38;5;81mPatternSyntaxError(ESC[4;38;5;149mException):
      25      pass
      26  
      27  
      28  def tokenize_wrapper(input):
      29      """Tokenizes a string suppressing significant whitespace."""
      30      skip = {token.NEWLINE, token.INDENT, token.DEDENT}
      31      tokens = tokenize.generate_tokens(io.StringIO(input).readline)
      32      for quintuple in tokens:
      33          type, value, start, end, line_text = quintuple
      34          if type not in skip:
      35              yield quintuple
      36  
      37  
      38  class ESC[4;38;5;81mPatternCompiler(ESC[4;38;5;149mobject):
      39  
      40      def __init__(self, grammar_file=None):
      41          """Initializer.
      42  
      43          Takes an optional alternative filename for the pattern grammar.
      44          """
      45          if grammar_file is None:
      46              self.grammar = pygram.pattern_grammar
      47              self.syms = pygram.pattern_symbols
      48          else:
      49              self.grammar = driver.load_grammar(grammar_file)
      50              self.syms = pygram.Symbols(self.grammar)
      51          self.pygrammar = pygram.python_grammar
      52          self.pysyms = pygram.python_symbols
      53          self.driver = driver.Driver(self.grammar, convert=pattern_convert)
      54  
      55      def compile_pattern(self, input, debug=False, with_tree=False):
      56          """Compiles a pattern string to a nested pytree.*Pattern object."""
      57          tokens = tokenize_wrapper(input)
      58          try:
      59              root = self.driver.parse_tokens(tokens, debug=debug)
      60          except parse.ParseError as e:
      61              raise PatternSyntaxError(str(e)) from None
      62          if with_tree:
      63              return self.compile_node(root), root
      64          else:
      65              return self.compile_node(root)
      66  
      67      def compile_node(self, node):
      68          """Compiles a node, recursively.
      69  
      70          This is one big switch on the node type.
      71          """
      72          # XXX Optimize certain Wildcard-containing-Wildcard patterns
      73          # that can be merged
      74          if node.type == self.syms.Matcher:
      75              node = node.children[0] # Avoid unneeded recursion
      76  
      77          if node.type == self.syms.Alternatives:
      78              # Skip the odd children since they are just '|' tokens
      79              alts = [self.compile_node(ch) for ch in node.children[::2]]
      80              if len(alts) == 1:
      81                  return alts[0]
      82              p = pytree.WildcardPattern([[a] for a in alts], min=1, max=1)
      83              return p.optimize()
      84  
      85          if node.type == self.syms.Alternative:
      86              units = [self.compile_node(ch) for ch in node.children]
      87              if len(units) == 1:
      88                  return units[0]
      89              p = pytree.WildcardPattern([units], min=1, max=1)
      90              return p.optimize()
      91  
      92          if node.type == self.syms.NegatedUnit:
      93              pattern = self.compile_basic(node.children[1:])
      94              p = pytree.NegatedPattern(pattern)
      95              return p.optimize()
      96  
      97          assert node.type == self.syms.Unit
      98  
      99          name = None
     100          nodes = node.children
     101          if len(nodes) >= 3 and nodes[1].type == token.EQUAL:
     102              name = nodes[0].value
     103              nodes = nodes[2:]
     104          repeat = None
     105          if len(nodes) >= 2 and nodes[-1].type == self.syms.Repeater:
     106              repeat = nodes[-1]
     107              nodes = nodes[:-1]
     108  
     109          # Now we've reduced it to: STRING | NAME [Details] | (...) | [...]
     110          pattern = self.compile_basic(nodes, repeat)
     111  
     112          if repeat is not None:
     113              assert repeat.type == self.syms.Repeater
     114              children = repeat.children
     115              child = children[0]
     116              if child.type == token.STAR:
     117                  min = 0
     118                  max = pytree.HUGE
     119              elif child.type == token.PLUS:
     120                  min = 1
     121                  max = pytree.HUGE
     122              elif child.type == token.LBRACE:
     123                  assert children[-1].type == token.RBRACE
     124                  assert  len(children) in (3, 5)
     125                  min = max = self.get_int(children[1])
     126                  if len(children) == 5:
     127                      max = self.get_int(children[3])
     128              else:
     129                  assert False
     130              if min != 1 or max != 1:
     131                  pattern = pattern.optimize()
     132                  pattern = pytree.WildcardPattern([[pattern]], min=min, max=max)
     133  
     134          if name is not None:
     135              pattern.name = name
     136          return pattern.optimize()
     137  
     138      def compile_basic(self, nodes, repeat=None):
     139          # Compile STRING | NAME [Details] | (...) | [...]
     140          assert len(nodes) >= 1
     141          node = nodes[0]
     142          if node.type == token.STRING:
     143              value = str(literals.evalString(node.value))
     144              return pytree.LeafPattern(_type_of_literal(value), value)
     145          elif node.type == token.NAME:
     146              value = node.value
     147              if value.isupper():
     148                  if value not in TOKEN_MAP:
     149                      raise PatternSyntaxError("Invalid token: %r" % value)
     150                  if nodes[1:]:
     151                      raise PatternSyntaxError("Can't have details for token")
     152                  return pytree.LeafPattern(TOKEN_MAP[value])
     153              else:
     154                  if value == "any":
     155                      type = None
     156                  elif not value.startswith("_"):
     157                      type = getattr(self.pysyms, value, None)
     158                      if type is None:
     159                          raise PatternSyntaxError("Invalid symbol: %r" % value)
     160                  if nodes[1:]: # Details present
     161                      content = [self.compile_node(nodes[1].children[1])]
     162                  else:
     163                      content = None
     164                  return pytree.NodePattern(type, content)
     165          elif node.value == "(":
     166              return self.compile_node(nodes[1])
     167          elif node.value == "[":
     168              assert repeat is None
     169              subpattern = self.compile_node(nodes[1])
     170              return pytree.WildcardPattern([[subpattern]], min=0, max=1)
     171          assert False, node
     172  
     173      def get_int(self, node):
     174          assert node.type == token.NUMBER
     175          return int(node.value)
     176  
     177  
     178  # Map named tokens to the type value for a LeafPattern
     179  TOKEN_MAP = {"NAME": token.NAME,
     180               "STRING": token.STRING,
     181               "NUMBER": token.NUMBER,
     182               "TOKEN": None}
     183  
     184  
     185  def _type_of_literal(value):
     186      if value[0].isalpha():
     187          return token.NAME
     188      elif value in grammar.opmap:
     189          return grammar.opmap[value]
     190      else:
     191          return None
     192  
     193  
     194  def pattern_convert(grammar, raw_node_info):
     195      """Converts raw node information to a Node or Leaf instance."""
     196      type, value, context, children = raw_node_info
     197      if children or type in grammar.number2symbol:
     198          return pytree.Node(type, children, context=context)
     199      else:
     200          return pytree.Leaf(type, value, context=context)
     201  
     202  
     203  def compile_pattern(pattern):
     204      return PatternCompiler().compile_pattern(pattern)