(root)/
Python-3.12.0/
Lib/
lib2to3/
btm_utils.py
       1  "Utility functions used by the btm_matcher module"
       2  
       3  from . import pytree
       4  from .pgen2 import grammar, token
       5  from .pygram import pattern_symbols, python_symbols
       6  
       7  syms = pattern_symbols
       8  pysyms = python_symbols
       9  tokens = grammar.opmap
      10  token_labels = token
      11  
      12  TYPE_ANY = -1
      13  TYPE_ALTERNATIVES = -2
      14  TYPE_GROUP = -3
      15  
      16  class ESC[4;38;5;81mMinNode(ESC[4;38;5;149mobject):
      17      """This class serves as an intermediate representation of the
      18      pattern tree during the conversion to sets of leaf-to-root
      19      subpatterns"""
      20  
      21      def __init__(self, type=None, name=None):
      22          self.type = type
      23          self.name = name
      24          self.children = []
      25          self.leaf = False
      26          self.parent = None
      27          self.alternatives = []
      28          self.group = []
      29  
      30      def __repr__(self):
      31          return str(self.type) + ' ' + str(self.name)
      32  
      33      def leaf_to_root(self):
      34          """Internal method. Returns a characteristic path of the
      35          pattern tree. This method must be run for all leaves until the
      36          linear subpatterns are merged into a single"""
      37          node = self
      38          subp = []
      39          while node:
      40              if node.type == TYPE_ALTERNATIVES:
      41                  node.alternatives.append(subp)
      42                  if len(node.alternatives) == len(node.children):
      43                      #last alternative
      44                      subp = [tuple(node.alternatives)]
      45                      node.alternatives = []
      46                      node = node.parent
      47                      continue
      48                  else:
      49                      node = node.parent
      50                      subp = None
      51                      break
      52  
      53              if node.type == TYPE_GROUP:
      54                  node.group.append(subp)
      55                  #probably should check the number of leaves
      56                  if len(node.group) == len(node.children):
      57                      subp = get_characteristic_subpattern(node.group)
      58                      node.group = []
      59                      node = node.parent
      60                      continue
      61                  else:
      62                      node = node.parent
      63                      subp = None
      64                      break
      65  
      66              if node.type == token_labels.NAME and node.name:
      67                  #in case of type=name, use the name instead
      68                  subp.append(node.name)
      69              else:
      70                  subp.append(node.type)
      71  
      72              node = node.parent
      73          return subp
      74  
      75      def get_linear_subpattern(self):
      76          """Drives the leaf_to_root method. The reason that
      77          leaf_to_root must be run multiple times is because we need to
      78          reject 'group' matches; for example the alternative form
      79          (a | b c) creates a group [b c] that needs to be matched. Since
      80          matching multiple linear patterns overcomes the automaton's
      81          capabilities, leaf_to_root merges each group into a single
      82          choice based on 'characteristic'ity,
      83  
      84          i.e. (a|b c) -> (a|b) if b more characteristic than c
      85  
      86          Returns: The most 'characteristic'(as defined by
      87            get_characteristic_subpattern) path for the compiled pattern
      88            tree.
      89          """
      90  
      91          for l in self.leaves():
      92              subp = l.leaf_to_root()
      93              if subp:
      94                  return subp
      95  
      96      def leaves(self):
      97          "Generator that returns the leaves of the tree"
      98          for child in self.children:
      99              yield from child.leaves()
     100          if not self.children:
     101              yield self
     102  
     103  def reduce_tree(node, parent=None):
     104      """
     105      Internal function. Reduces a compiled pattern tree to an
     106      intermediate representation suitable for feeding the
     107      automaton. This also trims off any optional pattern elements(like
     108      [a], a*).
     109      """
     110  
     111      new_node = None
     112      #switch on the node type
     113      if node.type == syms.Matcher:
     114          #skip
     115          node = node.children[0]
     116  
     117      if node.type == syms.Alternatives  :
     118          #2 cases
     119          if len(node.children) <= 2:
     120              #just a single 'Alternative', skip this node
     121              new_node = reduce_tree(node.children[0], parent)
     122          else:
     123              #real alternatives
     124              new_node = MinNode(type=TYPE_ALTERNATIVES)
     125              #skip odd children('|' tokens)
     126              for child in node.children:
     127                  if node.children.index(child)%2:
     128                      continue
     129                  reduced = reduce_tree(child, new_node)
     130                  if reduced is not None:
     131                      new_node.children.append(reduced)
     132      elif node.type == syms.Alternative:
     133          if len(node.children) > 1:
     134  
     135              new_node = MinNode(type=TYPE_GROUP)
     136              for child in node.children:
     137                  reduced = reduce_tree(child, new_node)
     138                  if reduced:
     139                      new_node.children.append(reduced)
     140              if not new_node.children:
     141                  # delete the group if all of the children were reduced to None
     142                  new_node = None
     143  
     144          else:
     145              new_node = reduce_tree(node.children[0], parent)
     146  
     147      elif node.type == syms.Unit:
     148          if (isinstance(node.children[0], pytree.Leaf) and
     149              node.children[0].value == '('):
     150              #skip parentheses
     151              return reduce_tree(node.children[1], parent)
     152          if ((isinstance(node.children[0], pytree.Leaf) and
     153                 node.children[0].value == '[')
     154                 or
     155                 (len(node.children)>1 and
     156                 hasattr(node.children[1], "value") and
     157                 node.children[1].value == '[')):
     158              #skip whole unit if its optional
     159              return None
     160  
     161          leaf = True
     162          details_node = None
     163          alternatives_node = None
     164          has_repeater = False
     165          repeater_node = None
     166          has_variable_name = False
     167  
     168          for child in node.children:
     169              if child.type == syms.Details:
     170                  leaf = False
     171                  details_node = child
     172              elif child.type == syms.Repeater:
     173                  has_repeater = True
     174                  repeater_node = child
     175              elif child.type == syms.Alternatives:
     176                  alternatives_node = child
     177              if hasattr(child, 'value') and child.value == '=': # variable name
     178                  has_variable_name = True
     179  
     180          #skip variable name
     181          if has_variable_name:
     182              #skip variable name, '='
     183              name_leaf = node.children[2]
     184              if hasattr(name_leaf, 'value') and name_leaf.value == '(':
     185                  # skip parenthesis
     186                  name_leaf = node.children[3]
     187          else:
     188              name_leaf = node.children[0]
     189  
     190          #set node type
     191          if name_leaf.type == token_labels.NAME:
     192              #(python) non-name or wildcard
     193              if name_leaf.value == 'any':
     194                  new_node = MinNode(type=TYPE_ANY)
     195              else:
     196                  if hasattr(token_labels, name_leaf.value):
     197                      new_node = MinNode(type=getattr(token_labels, name_leaf.value))
     198                  else:
     199                      new_node = MinNode(type=getattr(pysyms, name_leaf.value))
     200  
     201          elif name_leaf.type == token_labels.STRING:
     202              #(python) name or character; remove the apostrophes from
     203              #the string value
     204              name = name_leaf.value.strip("'")
     205              if name in tokens:
     206                  new_node = MinNode(type=tokens[name])
     207              else:
     208                  new_node = MinNode(type=token_labels.NAME, name=name)
     209          elif name_leaf.type == syms.Alternatives:
     210              new_node = reduce_tree(alternatives_node, parent)
     211  
     212          #handle repeaters
     213          if has_repeater:
     214              if repeater_node.children[0].value == '*':
     215                  #reduce to None
     216                  new_node = None
     217              elif repeater_node.children[0].value == '+':
     218                  #reduce to a single occurrence i.e. do nothing
     219                  pass
     220              else:
     221                  #TODO: handle {min, max} repeaters
     222                  raise NotImplementedError
     223  
     224          #add children
     225          if details_node and new_node is not None:
     226              for child in details_node.children[1:-1]:
     227                  #skip '<', '>' markers
     228                  reduced = reduce_tree(child, new_node)
     229                  if reduced is not None:
     230                      new_node.children.append(reduced)
     231      if new_node:
     232          new_node.parent = parent
     233      return new_node
     234  
     235  
     236  def get_characteristic_subpattern(subpatterns):
     237      """Picks the most characteristic from a list of linear patterns
     238      Current order used is:
     239      names > common_names > common_chars
     240      """
     241      if not isinstance(subpatterns, list):
     242          return subpatterns
     243      if len(subpatterns)==1:
     244          return subpatterns[0]
     245  
     246      # first pick out the ones containing variable names
     247      subpatterns_with_names = []
     248      subpatterns_with_common_names = []
     249      common_names = ['in', 'for', 'if' , 'not', 'None']
     250      subpatterns_with_common_chars = []
     251      common_chars = "[]().,:"
     252      for subpattern in subpatterns:
     253          if any(rec_test(subpattern, lambda x: type(x) is str)):
     254              if any(rec_test(subpattern,
     255                              lambda x: isinstance(x, str) and x in common_chars)):
     256                  subpatterns_with_common_chars.append(subpattern)
     257              elif any(rec_test(subpattern,
     258                                lambda x: isinstance(x, str) and x in common_names)):
     259                  subpatterns_with_common_names.append(subpattern)
     260  
     261              else:
     262                  subpatterns_with_names.append(subpattern)
     263  
     264      if subpatterns_with_names:
     265          subpatterns = subpatterns_with_names
     266      elif subpatterns_with_common_names:
     267          subpatterns = subpatterns_with_common_names
     268      elif subpatterns_with_common_chars:
     269          subpatterns = subpatterns_with_common_chars
     270      # of the remaining subpatterns pick out the longest one
     271      return max(subpatterns, key=len)
     272  
     273  def rec_test(sequence, test_func):
     274      """Tests test_func on all items of sequence and items of included
     275      sub-iterables"""
     276      for x in sequence:
     277          if isinstance(x, (list, tuple)):
     278              yield from rec_test(x, test_func)
     279          else:
     280              yield test_func(x)