1  # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
       2  # Licensed to PSF under a Contributor Agreement.
       3  
       4  """This module defines the data structures used to represent a grammar.
       5  
       6  These are a bit arcane because they are derived from the data
       7  structures used by Python's 'pgen' parser generator.
       8  
       9  There's also a table here mapping operators to their names in the
      10  token module; the Python tokenize module reports all operators as the
      11  fallback token code OP, but the parser needs the actual token code.
      12  
      13  """
      14  
      15  # Python imports
      16  import pickle
      17  
      18  # Local imports
      19  from . import token
      20  
      21  
      22  class ESC[4;38;5;81mGrammar(ESC[4;38;5;149mobject):
      23      """Pgen parsing tables conversion class.
      24  
      25      Once initialized, this class supplies the grammar tables for the
      26      parsing engine implemented by parse.py.  The parsing engine
      27      accesses the instance variables directly.  The class here does not
      28      provide initialization of the tables; several subclasses exist to
      29      do this (see the conv and pgen modules).
      30  
      31      The load() method reads the tables from a pickle file, which is
      32      much faster than the other ways offered by subclasses.  The pickle
      33      file is written by calling dump() (after loading the grammar
      34      tables using a subclass).  The report() method prints a readable
      35      representation of the tables to stdout, for debugging.
      36  
      37      The instance variables are as follows:
      38  
      39      symbol2number -- a dict mapping symbol names to numbers.  Symbol
      40                       numbers are always 256 or higher, to distinguish
      41                       them from token numbers, which are between 0 and
      42                       255 (inclusive).
      43  
      44      number2symbol -- a dict mapping numbers to symbol names;
      45                       these two are each other's inverse.
      46  
      47      states        -- a list of DFAs, where each DFA is a list of
      48                       states, each state is a list of arcs, and each
      49                       arc is a (i, j) pair where i is a label and j is
      50                       a state number.  The DFA number is the index into
      51                       this list.  (This name is slightly confusing.)
      52                       Final states are represented by a special arc of
      53                       the form (0, j) where j is its own state number.
      54  
      55      dfas          -- a dict mapping symbol numbers to (DFA, first)
      56                       pairs, where DFA is an item from the states list
      57                       above, and first is a set of tokens that can
      58                       begin this grammar rule (represented by a dict
      59                       whose values are always 1).
      60  
      61      labels        -- a list of (x, y) pairs where x is either a token
      62                       number or a symbol number, and y is either None
      63                       or a string; the strings are keywords.  The label
      64                       number is the index in this list; label numbers
      65                       are used to mark state transitions (arcs) in the
      66                       DFAs.
      67  
      68      start         -- the number of the grammar's start symbol.
      69  
      70      keywords      -- a dict mapping keyword strings to arc labels.
      71  
      72      tokens        -- a dict mapping token numbers to arc labels.
      73  
      74      """
      75  
      76      def __init__(self):
      77          self.symbol2number = {}
      78          self.number2symbol = {}
      79          self.states = []
      80          self.dfas = {}
      81          self.labels = [(0, "EMPTY")]
      82          self.keywords = {}
      83          self.tokens = {}
      84          self.symbol2label = {}
      85          self.start = 256
      86  
      87      def dump(self, filename):
      88          """Dump the grammar tables to a pickle file."""
      89          with open(filename, "wb") as f:
      90              pickle.dump(self.__dict__, f, pickle.HIGHEST_PROTOCOL)
      91  
      92      def load(self, filename):
      93          """Load the grammar tables from a pickle file."""
      94          with open(filename, "rb") as f:
      95              d = pickle.load(f)
      96          self.__dict__.update(d)
      97  
      98      def loads(self, pkl):
      99          """Load the grammar tables from a pickle bytes object."""
     100          self.__dict__.update(pickle.loads(pkl))
     101  
     102      def copy(self):
     103          """
     104          Copy the grammar.
     105          """
     106          new = self.__class__()
     107          for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords",
     108                            "tokens", "symbol2label"):
     109              setattr(new, dict_attr, getattr(self, dict_attr).copy())
     110          new.labels = self.labels[:]
     111          new.states = self.states[:]
     112          new.start = self.start
     113          return new
     114  
     115      def report(self):
     116          """Dump the grammar tables to standard output, for debugging."""
     117          from pprint import pprint
     118          print("s2n")
     119          pprint(self.symbol2number)
     120          print("n2s")
     121          pprint(self.number2symbol)
     122          print("states")
     123          pprint(self.states)
     124          print("dfas")
     125          pprint(self.dfas)
     126          print("labels")
     127          pprint(self.labels)
     128          print("start", self.start)
     129  
     130  
     131  # Map from operator to number (since tokenize doesn't do this)
     132  
     133  opmap_raw = """
     134  ( LPAR
     135  ) RPAR
     136  [ LSQB
     137  ] RSQB
     138  : COLON
     139  , COMMA
     140  ; SEMI
     141  + PLUS
     142  - MINUS
     143  * STAR
     144  / SLASH
     145  | VBAR
     146  & AMPER
     147  < LESS
     148  > GREATER
     149  = EQUAL
     150  . DOT
     151  % PERCENT
     152  ` BACKQUOTE
     153  { LBRACE
     154  } RBRACE
     155  @ AT
     156  @= ATEQUAL
     157  == EQEQUAL
     158  != NOTEQUAL
     159  <> NOTEQUAL
     160  <= LESSEQUAL
     161  >= GREATEREQUAL
     162  ~ TILDE
     163  ^ CIRCUMFLEX
     164  << LEFTSHIFT
     165  >> RIGHTSHIFT
     166  ** DOUBLESTAR
     167  += PLUSEQUAL
     168  -= MINEQUAL
     169  *= STAREQUAL
     170  /= SLASHEQUAL
     171  %= PERCENTEQUAL
     172  &= AMPEREQUAL
     173  |= VBAREQUAL
     174  ^= CIRCUMFLEXEQUAL
     175  <<= LEFTSHIFTEQUAL
     176  >>= RIGHTSHIFTEQUAL
     177  **= DOUBLESTAREQUAL
     178  // DOUBLESLASH
     179  //= DOUBLESLASHEQUAL
     180  -> RARROW
     181  := COLONEQUAL
     182  """
     183  
     184  opmap = {}
     185  for line in opmap_raw.splitlines():
     186      if line:
     187          op, name = line.split()
     188          opmap[op] = getattr(token, name)
     189  del line, op, name