1  # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
       2  # Licensed to PSF under a Contributor Agreement.
       3  
       4  """Parser engine for the grammar tables generated by pgen.
       5  
       6  The grammar table must be loaded first.
       7  
       8  See Parser/parser.c in the Python distribution for additional info on
       9  how this parsing engine works.
      10  
      11  """
      12  
      13  # Local imports
      14  from . import token
      15  
      16  class ESC[4;38;5;81mParseError(ESC[4;38;5;149mException):
      17      """Exception to signal the parser is stuck."""
      18  
      19      def __init__(self, msg, type, value, context):
      20          Exception.__init__(self, "%s: type=%r, value=%r, context=%r" %
      21                             (msg, type, value, context))
      22          self.msg = msg
      23          self.type = type
      24          self.value = value
      25          self.context = context
      26  
      27      def __reduce__(self):
      28          return type(self), (self.msg, self.type, self.value, self.context)
      29  
      30  class ESC[4;38;5;81mParser(ESC[4;38;5;149mobject):
      31      """Parser engine.
      32  
      33      The proper usage sequence is:
      34  
      35      p = Parser(grammar, [converter])  # create instance
      36      p.setup([start])                  # prepare for parsing
      37      <for each input token>:
      38          if p.addtoken(...):           # parse a token; may raise ParseError
      39              break
      40      root = p.rootnode                 # root of abstract syntax tree
      41  
      42      A Parser instance may be reused by calling setup() repeatedly.
      43  
      44      A Parser instance contains state pertaining to the current token
      45      sequence, and should not be used concurrently by different threads
      46      to parse separate token sequences.
      47  
      48      See driver.py for how to get input tokens by tokenizing a file or
      49      string.
      50  
      51      Parsing is complete when addtoken() returns True; the root of the
      52      abstract syntax tree can then be retrieved from the rootnode
      53      instance variable.  When a syntax error occurs, addtoken() raises
      54      the ParseError exception.  There is no error recovery; the parser
      55      cannot be used after a syntax error was reported (but it can be
      56      reinitialized by calling setup()).
      57  
      58      """
      59  
      60      def __init__(self, grammar, convert=None):
      61          """Constructor.
      62  
      63          The grammar argument is a grammar.Grammar instance; see the
      64          grammar module for more information.
      65  
      66          The parser is not ready yet for parsing; you must call the
      67          setup() method to get it started.
      68  
      69          The optional convert argument is a function mapping concrete
      70          syntax tree nodes to abstract syntax tree nodes.  If not
      71          given, no conversion is done and the syntax tree produced is
      72          the concrete syntax tree.  If given, it must be a function of
      73          two arguments, the first being the grammar (a grammar.Grammar
      74          instance), and the second being the concrete syntax tree node
      75          to be converted.  The syntax tree is converted from the bottom
      76          up.
      77  
      78          A concrete syntax tree node is a (type, value, context, nodes)
      79          tuple, where type is the node type (a token or symbol number),
      80          value is None for symbols and a string for tokens, context is
      81          None or an opaque value used for error reporting (typically a
      82          (lineno, offset) pair), and nodes is a list of children for
      83          symbols, and None for tokens.
      84  
      85          An abstract syntax tree node may be anything; this is entirely
      86          up to the converter function.
      87  
      88          """
      89          self.grammar = grammar
      90          self.convert = convert or (lambda grammar, node: node)
      91  
      92      def setup(self, start=None):
      93          """Prepare for parsing.
      94  
      95          This *must* be called before starting to parse.
      96  
      97          The optional argument is an alternative start symbol; it
      98          defaults to the grammar's start symbol.
      99  
     100          You can use a Parser instance to parse any number of programs;
     101          each time you call setup() the parser is reset to an initial
     102          state determined by the (implicit or explicit) start symbol.
     103  
     104          """
     105          if start is None:
     106              start = self.grammar.start
     107          # Each stack entry is a tuple: (dfa, state, node).
     108          # A node is a tuple: (type, value, context, children),
     109          # where children is a list of nodes or None, and context may be None.
     110          newnode = (start, None, None, [])
     111          stackentry = (self.grammar.dfas[start], 0, newnode)
     112          self.stack = [stackentry]
     113          self.rootnode = None
     114          self.used_names = set() # Aliased to self.rootnode.used_names in pop()
     115  
     116      def addtoken(self, type, value, context):
     117          """Add a token; return True iff this is the end of the program."""
     118          # Map from token to label
     119          ilabel = self.classify(type, value, context)
     120          # Loop until the token is shifted; may raise exceptions
     121          while True:
     122              dfa, state, node = self.stack[-1]
     123              states, first = dfa
     124              arcs = states[state]
     125              # Look for a state with this label
     126              for i, newstate in arcs:
     127                  t, v = self.grammar.labels[i]
     128                  if ilabel == i:
     129                      # Look it up in the list of labels
     130                      assert t < 256
     131                      # Shift a token; we're done with it
     132                      self.shift(type, value, newstate, context)
     133                      # Pop while we are in an accept-only state
     134                      state = newstate
     135                      while states[state] == [(0, state)]:
     136                          self.pop()
     137                          if not self.stack:
     138                              # Done parsing!
     139                              return True
     140                          dfa, state, node = self.stack[-1]
     141                          states, first = dfa
     142                      # Done with this token
     143                      return False
     144                  elif t >= 256:
     145                      # See if it's a symbol and if we're in its first set
     146                      itsdfa = self.grammar.dfas[t]
     147                      itsstates, itsfirst = itsdfa
     148                      if ilabel in itsfirst:
     149                          # Push a symbol
     150                          self.push(t, self.grammar.dfas[t], newstate, context)
     151                          break # To continue the outer while loop
     152              else:
     153                  if (0, state) in arcs:
     154                      # An accepting state, pop it and try something else
     155                      self.pop()
     156                      if not self.stack:
     157                          # Done parsing, but another token is input
     158                          raise ParseError("too much input",
     159                                           type, value, context)
     160                  else:
     161                      # No success finding a transition
     162                      raise ParseError("bad input", type, value, context)
     163  
     164      def classify(self, type, value, context):
     165          """Turn a token into a label.  (Internal)"""
     166          if type == token.NAME:
     167              # Keep a listing of all used names
     168              self.used_names.add(value)
     169              # Check for reserved words
     170              ilabel = self.grammar.keywords.get(value)
     171              if ilabel is not None:
     172                  return ilabel
     173          ilabel = self.grammar.tokens.get(type)
     174          if ilabel is None:
     175              raise ParseError("bad token", type, value, context)
     176          return ilabel
     177  
     178      def shift(self, type, value, newstate, context):
     179          """Shift a token.  (Internal)"""
     180          dfa, state, node = self.stack[-1]
     181          newnode = (type, value, context, None)
     182          newnode = self.convert(self.grammar, newnode)
     183          if newnode is not None:
     184              node[-1].append(newnode)
     185          self.stack[-1] = (dfa, newstate, node)
     186  
     187      def push(self, type, newdfa, newstate, context):
     188          """Push a nonterminal.  (Internal)"""
     189          dfa, state, node = self.stack[-1]
     190          newnode = (type, None, context, [])
     191          self.stack[-1] = (dfa, newstate, node)
     192          self.stack.append((newdfa, 0, newnode))
     193  
     194      def pop(self):
     195          """Pop a nonterminal.  (Internal)"""
     196          popdfa, popstate, popnode = self.stack.pop()
     197          newnode = self.convert(self.grammar, popnode)
     198          if newnode is not None:
     199              if self.stack:
     200                  dfa, state, node = self.stack[-1]
     201                  node[-1].append(newnode)
     202              else:
     203                  self.rootnode = newnode
     204                  self.rootnode.used_names = self.used_names