(root)/
Python-3.12.0/
Lib/
lib2to3/
pgen2/
conv.py
       1  # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
       2  # Licensed to PSF under a Contributor Agreement.
       3  
       4  """Convert graminit.[ch] spit out by pgen to Python code.
       5  
       6  Pgen is the Python parser generator.  It is useful to quickly create a
       7  parser from a grammar file in Python's grammar notation.  But I don't
       8  want my parsers to be written in C (yet), so I'm translating the
       9  parsing tables to Python data structures and writing a Python parse
      10  engine.
      11  
      12  Note that the token numbers are constants determined by the standard
      13  Python tokenizer.  The standard token module defines these numbers and
      14  their names (the names are not used much).  The token numbers are
      15  hardcoded into the Python tokenizer and into pgen.  A Python
      16  implementation of the Python tokenizer is also available, in the
      17  standard tokenize module.
      18  
      19  On the other hand, symbol numbers (representing the grammar's
      20  non-terminals) are assigned by pgen based on the actual grammar
      21  input.
      22  
      23  Note: this module is pretty much obsolete; the pgen module generates
      24  equivalent grammar tables directly from the Grammar.txt input file
      25  without having to invoke the Python pgen C program.
      26  
      27  """
      28  
      29  # Python imports
      30  import re
      31  
      32  # Local imports
      33  from pgen2 import grammar, token
      34  
      35  
      36  class ESC[4;38;5;81mConverter(ESC[4;38;5;149mgrammarESC[4;38;5;149m.ESC[4;38;5;149mGrammar):
      37      """Grammar subclass that reads classic pgen output files.
      38  
      39      The run() method reads the tables as produced by the pgen parser
      40      generator, typically contained in two C files, graminit.h and
      41      graminit.c.  The other methods are for internal use only.
      42  
      43      See the base class for more documentation.
      44  
      45      """
      46  
      47      def run(self, graminit_h, graminit_c):
      48          """Load the grammar tables from the text files written by pgen."""
      49          self.parse_graminit_h(graminit_h)
      50          self.parse_graminit_c(graminit_c)
      51          self.finish_off()
      52  
      53      def parse_graminit_h(self, filename):
      54          """Parse the .h file written by pgen.  (Internal)
      55  
      56          This file is a sequence of #define statements defining the
      57          nonterminals of the grammar as numbers.  We build two tables
      58          mapping the numbers to names and back.
      59  
      60          """
      61          try:
      62              f = open(filename)
      63          except OSError as err:
      64              print("Can't open %s: %s" % (filename, err))
      65              return False
      66          self.symbol2number = {}
      67          self.number2symbol = {}
      68          lineno = 0
      69          for line in f:
      70              lineno += 1
      71              mo = re.match(r"^#define\s+(\w+)\s+(\d+)$", line)
      72              if not mo and line.strip():
      73                  print("%s(%s): can't parse %s" % (filename, lineno,
      74                                                    line.strip()))
      75              else:
      76                  symbol, number = mo.groups()
      77                  number = int(number)
      78                  assert symbol not in self.symbol2number
      79                  assert number not in self.number2symbol
      80                  self.symbol2number[symbol] = number
      81                  self.number2symbol[number] = symbol
      82          return True
      83  
      84      def parse_graminit_c(self, filename):
      85          """Parse the .c file written by pgen.  (Internal)
      86  
      87          The file looks as follows.  The first two lines are always this:
      88  
      89          #include "pgenheaders.h"
      90          #include "grammar.h"
      91  
      92          After that come four blocks:
      93  
      94          1) one or more state definitions
      95          2) a table defining dfas
      96          3) a table defining labels
      97          4) a struct defining the grammar
      98  
      99          A state definition has the following form:
     100          - one or more arc arrays, each of the form:
     101            static arc arcs_<n>_<m>[<k>] = {
     102                    {<i>, <j>},
     103                    ...
     104            };
     105          - followed by a state array, of the form:
     106            static state states_<s>[<t>] = {
     107                    {<k>, arcs_<n>_<m>},
     108                    ...
     109            };
     110  
     111          """
     112          try:
     113              f = open(filename)
     114          except OSError as err:
     115              print("Can't open %s: %s" % (filename, err))
     116              return False
     117          # The code below essentially uses f's iterator-ness!
     118          lineno = 0
     119  
     120          # Expect the two #include lines
     121          lineno, line = lineno+1, next(f)
     122          assert line == '#include "pgenheaders.h"\n', (lineno, line)
     123          lineno, line = lineno+1, next(f)
     124          assert line == '#include "grammar.h"\n', (lineno, line)
     125  
     126          # Parse the state definitions
     127          lineno, line = lineno+1, next(f)
     128          allarcs = {}
     129          states = []
     130          while line.startswith("static arc "):
     131              while line.startswith("static arc "):
     132                  mo = re.match(r"static arc arcs_(\d+)_(\d+)\[(\d+)\] = {$",
     133                                line)
     134                  assert mo, (lineno, line)
     135                  n, m, k = list(map(int, mo.groups()))
     136                  arcs = []
     137                  for _ in range(k):
     138                      lineno, line = lineno+1, next(f)
     139                      mo = re.match(r"\s+{(\d+), (\d+)},$", line)
     140                      assert mo, (lineno, line)
     141                      i, j = list(map(int, mo.groups()))
     142                      arcs.append((i, j))
     143                  lineno, line = lineno+1, next(f)
     144                  assert line == "};\n", (lineno, line)
     145                  allarcs[(n, m)] = arcs
     146                  lineno, line = lineno+1, next(f)
     147              mo = re.match(r"static state states_(\d+)\[(\d+)\] = {$", line)
     148              assert mo, (lineno, line)
     149              s, t = list(map(int, mo.groups()))
     150              assert s == len(states), (lineno, line)
     151              state = []
     152              for _ in range(t):
     153                  lineno, line = lineno+1, next(f)
     154                  mo = re.match(r"\s+{(\d+), arcs_(\d+)_(\d+)},$", line)
     155                  assert mo, (lineno, line)
     156                  k, n, m = list(map(int, mo.groups()))
     157                  arcs = allarcs[n, m]
     158                  assert k == len(arcs), (lineno, line)
     159                  state.append(arcs)
     160              states.append(state)
     161              lineno, line = lineno+1, next(f)
     162              assert line == "};\n", (lineno, line)
     163              lineno, line = lineno+1, next(f)
     164          self.states = states
     165  
     166          # Parse the dfas
     167          dfas = {}
     168          mo = re.match(r"static dfa dfas\[(\d+)\] = {$", line)
     169          assert mo, (lineno, line)
     170          ndfas = int(mo.group(1))
     171          for i in range(ndfas):
     172              lineno, line = lineno+1, next(f)
     173              mo = re.match(r'\s+{(\d+), "(\w+)", (\d+), (\d+), states_(\d+),$',
     174                            line)
     175              assert mo, (lineno, line)
     176              symbol = mo.group(2)
     177              number, x, y, z = list(map(int, mo.group(1, 3, 4, 5)))
     178              assert self.symbol2number[symbol] == number, (lineno, line)
     179              assert self.number2symbol[number] == symbol, (lineno, line)
     180              assert x == 0, (lineno, line)
     181              state = states[z]
     182              assert y == len(state), (lineno, line)
     183              lineno, line = lineno+1, next(f)
     184              mo = re.match(r'\s+("(?:\\\d\d\d)*")},$', line)
     185              assert mo, (lineno, line)
     186              first = {}
     187              rawbitset = eval(mo.group(1))
     188              for i, c in enumerate(rawbitset):
     189                  byte = ord(c)
     190                  for j in range(8):
     191                      if byte & (1<<j):
     192                          first[i*8 + j] = 1
     193              dfas[number] = (state, first)
     194          lineno, line = lineno+1, next(f)
     195          assert line == "};\n", (lineno, line)
     196          self.dfas = dfas
     197  
     198          # Parse the labels
     199          labels = []
     200          lineno, line = lineno+1, next(f)
     201          mo = re.match(r"static label labels\[(\d+)\] = {$", line)
     202          assert mo, (lineno, line)
     203          nlabels = int(mo.group(1))
     204          for i in range(nlabels):
     205              lineno, line = lineno+1, next(f)
     206              mo = re.match(r'\s+{(\d+), (0|"\w+")},$', line)
     207              assert mo, (lineno, line)
     208              x, y = mo.groups()
     209              x = int(x)
     210              if y == "0":
     211                  y = None
     212              else:
     213                  y = eval(y)
     214              labels.append((x, y))
     215          lineno, line = lineno+1, next(f)
     216          assert line == "};\n", (lineno, line)
     217          self.labels = labels
     218  
     219          # Parse the grammar struct
     220          lineno, line = lineno+1, next(f)
     221          assert line == "grammar _PyParser_Grammar = {\n", (lineno, line)
     222          lineno, line = lineno+1, next(f)
     223          mo = re.match(r"\s+(\d+),$", line)
     224          assert mo, (lineno, line)
     225          ndfas = int(mo.group(1))
     226          assert ndfas == len(self.dfas)
     227          lineno, line = lineno+1, next(f)
     228          assert line == "\tdfas,\n", (lineno, line)
     229          lineno, line = lineno+1, next(f)
     230          mo = re.match(r"\s+{(\d+), labels},$", line)
     231          assert mo, (lineno, line)
     232          nlabels = int(mo.group(1))
     233          assert nlabels == len(self.labels), (lineno, line)
     234          lineno, line = lineno+1, next(f)
     235          mo = re.match(r"\s+(\d+)$", line)
     236          assert mo, (lineno, line)
     237          start = int(mo.group(1))
     238          assert start in self.number2symbol, (lineno, line)
     239          self.start = start
     240          lineno, line = lineno+1, next(f)
     241          assert line == "};\n", (lineno, line)
     242          try:
     243              lineno, line = lineno+1, next(f)
     244          except StopIteration:
     245              pass
     246          else:
     247              assert 0, (lineno, line)
     248  
     249      def finish_off(self):
     250          """Create additional useful structures.  (Internal)."""
     251          self.keywords = {} # map from keyword strings to arc labels
     252          self.tokens = {}   # map from numeric token values to arc labels
     253          for ilabel, (type, value) in enumerate(self.labels):
     254              if type == token.NAME and value is not None:
     255                  self.keywords[value] = ilabel
     256              elif value is None:
     257                  self.tokens[type] = ilabel