(root)/
Python-3.12.0/
Lib/
lib2to3/
btm_matcher.py
       1  """A bottom-up tree matching algorithm implementation meant to speed
       2  up 2to3's matching process. After the tree patterns are reduced to
       3  their rarest linear path, a linear Aho-Corasick automaton is
       4  created. The linear automaton traverses the linear paths from the
       5  leaves to the root of the AST and returns a set of nodes for further
       6  matching. This reduces significantly the number of candidate nodes."""
       7  
       8  __author__ = "George Boutsioukis <gboutsioukis@gmail.com>"
       9  
      10  import logging
      11  import itertools
      12  from collections import defaultdict
      13  
      14  from . import pytree
      15  from .btm_utils import reduce_tree
      16  
      17  class ESC[4;38;5;81mBMNode(ESC[4;38;5;149mobject):
      18      """Class for a node of the Aho-Corasick automaton used in matching"""
      19      count = itertools.count()
      20      def __init__(self):
      21          self.transition_table = {}
      22          self.fixers = []
      23          self.id = next(BMNode.count)
      24          self.content = ''
      25  
      26  class ESC[4;38;5;81mBottomMatcher(ESC[4;38;5;149mobject):
      27      """The main matcher class. After instantiating the patterns should
      28      be added using the add_fixer method"""
      29  
      30      def __init__(self):
      31          self.match = set()
      32          self.root = BMNode()
      33          self.nodes = [self.root]
      34          self.fixers = []
      35          self.logger = logging.getLogger("RefactoringTool")
      36  
      37      def add_fixer(self, fixer):
      38          """Reduces a fixer's pattern tree to a linear path and adds it
      39          to the matcher(a common Aho-Corasick automaton). The fixer is
      40          appended on the matching states and called when they are
      41          reached"""
      42          self.fixers.append(fixer)
      43          tree = reduce_tree(fixer.pattern_tree)
      44          linear = tree.get_linear_subpattern()
      45          match_nodes = self.add(linear, start=self.root)
      46          for match_node in match_nodes:
      47              match_node.fixers.append(fixer)
      48  
      49      def add(self, pattern, start):
      50          "Recursively adds a linear pattern to the AC automaton"
      51          #print("adding pattern", pattern, "to", start)
      52          if not pattern:
      53              #print("empty pattern")
      54              return [start]
      55          if isinstance(pattern[0], tuple):
      56              #alternatives
      57              #print("alternatives")
      58              match_nodes = []
      59              for alternative in pattern[0]:
      60                  #add all alternatives, and add the rest of the pattern
      61                  #to each end node
      62                  end_nodes = self.add(alternative, start=start)
      63                  for end in end_nodes:
      64                      match_nodes.extend(self.add(pattern[1:], end))
      65              return match_nodes
      66          else:
      67              #single token
      68              #not last
      69              if pattern[0] not in start.transition_table:
      70                  #transition did not exist, create new
      71                  next_node = BMNode()
      72                  start.transition_table[pattern[0]] = next_node
      73              else:
      74                  #transition exists already, follow
      75                  next_node = start.transition_table[pattern[0]]
      76  
      77              if pattern[1:]:
      78                  end_nodes = self.add(pattern[1:], start=next_node)
      79              else:
      80                  end_nodes = [next_node]
      81              return end_nodes
      82  
      83      def run(self, leaves):
      84          """The main interface with the bottom matcher. The tree is
      85          traversed from the bottom using the constructed
      86          automaton. Nodes are only checked once as the tree is
      87          retraversed. When the automaton fails, we give it one more
      88          shot(in case the above tree matches as a whole with the
      89          rejected leaf), then we break for the next leaf. There is the
      90          special case of multiple arguments(see code comments) where we
      91          recheck the nodes
      92  
      93          Args:
      94             The leaves of the AST tree to be matched
      95  
      96          Returns:
      97             A dictionary of node matches with fixers as the keys
      98          """
      99          current_ac_node = self.root
     100          results = defaultdict(list)
     101          for leaf in leaves:
     102              current_ast_node = leaf
     103              while current_ast_node:
     104                  current_ast_node.was_checked = True
     105                  for child in current_ast_node.children:
     106                      # multiple statements, recheck
     107                      if isinstance(child, pytree.Leaf) and child.value == ";":
     108                          current_ast_node.was_checked = False
     109                          break
     110                  if current_ast_node.type == 1:
     111                      #name
     112                      node_token = current_ast_node.value
     113                  else:
     114                      node_token = current_ast_node.type
     115  
     116                  if node_token in current_ac_node.transition_table:
     117                      #token matches
     118                      current_ac_node = current_ac_node.transition_table[node_token]
     119                      for fixer in current_ac_node.fixers:
     120                          results[fixer].append(current_ast_node)
     121                  else:
     122                      #matching failed, reset automaton
     123                      current_ac_node = self.root
     124                      if (current_ast_node.parent is not None
     125                          and current_ast_node.parent.was_checked):
     126                          #the rest of the tree upwards has been checked, next leaf
     127                          break
     128  
     129                      #recheck the rejected node once from the root
     130                      if node_token in current_ac_node.transition_table:
     131                          #token matches
     132                          current_ac_node = current_ac_node.transition_table[node_token]
     133                          for fixer in current_ac_node.fixers:
     134                              results[fixer].append(current_ast_node)
     135  
     136                  current_ast_node = current_ast_node.parent
     137          return results
     138  
     139      def print_ac(self):
     140          "Prints a graphviz diagram of the BM automaton(for debugging)"
     141          print("digraph g{")
     142          def print_node(node):
     143              for subnode_key in node.transition_table.keys():
     144                  subnode = node.transition_table[subnode_key]
     145                  print("%d -> %d [label=%s] //%s" %
     146                        (node.id, subnode.id, type_repr(subnode_key), str(subnode.fixers)))
     147                  if subnode_key == 1:
     148                      print(subnode.content)
     149                  print_node(subnode)
     150          print_node(self.root)
     151          print("}")
     152  
     153  # taken from pytree.py for debugging; only used by print_ac
     154  _type_reprs = {}
     155  def type_repr(type_num):
     156      global _type_reprs
     157      if not _type_reprs:
     158          from .pygram import python_symbols
     159          # printing tokens is possible but not as useful
     160          # from .pgen2 import token // token.__dict__.items():
     161          for name, val in python_symbols.__dict__.items():
     162              if type(val) == int: _type_reprs[val] = name
     163      return _type_reprs.setdefault(type_num, type_num)