1  #! /usr/bin/env python3
       2  # This script generates token related files from Grammar/Tokens:
       3  #
       4  #   Doc/library/token-list.inc
       5  #   Include/token.h
       6  #   Parser/token.c
       7  #   Lib/token.py
       8  
       9  
      10  NT_OFFSET = 256
      11  
      12  def load_tokens(path):
      13      tok_names = []
      14      string_to_tok = {}
      15      ERRORTOKEN = None
      16      with open(path) as fp:
      17          for line in fp:
      18              line = line.strip()
      19              # strip comments
      20              i = line.find('#')
      21              if i >= 0:
      22                  line = line[:i].strip()
      23              if not line:
      24                  continue
      25              fields = line.split()
      26              name = fields[0]
      27              value = len(tok_names)
      28              if name == 'ERRORTOKEN':
      29                  ERRORTOKEN = value
      30              string = fields[1] if len(fields) > 1 else None
      31              if string:
      32                  string = eval(string)
      33                  string_to_tok[string] = value
      34              tok_names.append(name)
      35      return tok_names, ERRORTOKEN, string_to_tok
      36  
      37  
      38  def update_file(file, content):
      39      try:
      40          with open(file, 'r') as fobj:
      41              if fobj.read() == content:
      42                  return False
      43      except (OSError, ValueError):
      44          pass
      45      with open(file, 'w') as fobj:
      46          fobj.write(content)
      47      return True
      48  
      49  
      50  token_h_template = """\
      51  /* Auto-generated by Tools/scripts/generate_token.py */
      52  
      53  /* Token types */
      54  #ifndef Py_LIMITED_API
      55  #ifndef Py_TOKEN_H
      56  #define Py_TOKEN_H
      57  #ifdef __cplusplus
      58  extern "C" {
      59  #endif
      60  
      61  #undef TILDE   /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */
      62  
      63  %s\
      64  #define N_TOKENS        %d
      65  #define NT_OFFSET       %d
      66  
      67  /* Special definitions for cooperation with parser */
      68  
      69  #define ISTERMINAL(x)           ((x) < NT_OFFSET)
      70  #define ISNONTERMINAL(x)        ((x) >= NT_OFFSET)
      71  #define ISEOF(x)                ((x) == ENDMARKER)
      72  #define ISWHITESPACE(x)         ((x) == ENDMARKER || \\
      73                                   (x) == NEWLINE   || \\
      74                                   (x) == INDENT    || \\
      75                                   (x) == DEDENT)
      76  
      77  
      78  PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */
      79  PyAPI_FUNC(int) PyToken_OneChar(int);
      80  PyAPI_FUNC(int) PyToken_TwoChars(int, int);
      81  PyAPI_FUNC(int) PyToken_ThreeChars(int, int, int);
      82  
      83  #ifdef __cplusplus
      84  }
      85  #endif
      86  #endif /* !Py_TOKEN_H */
      87  #endif /* Py_LIMITED_API */
      88  """
      89  
      90  def make_h(infile, outfile='Include/token.h'):
      91      tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
      92  
      93      defines = []
      94      for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
      95          defines.append("#define %-15s %d\n" % (name, value))
      96  
      97      if update_file(outfile, token_h_template % (
      98              ''.join(defines),
      99              len(tok_names),
     100              NT_OFFSET
     101          )):
     102          print("%s regenerated from %s" % (outfile, infile))
     103  
     104  
     105  token_c_template = """\
     106  /* Auto-generated by Tools/scripts/generate_token.py */
     107  
     108  #include "Python.h"
     109  #include "token.h"
     110  
     111  /* Token names */
     112  
     113  const char * const _PyParser_TokenNames[] = {
     114  %s\
     115  };
     116  
     117  /* Return the token corresponding to a single character */
     118  
     119  int
     120  PyToken_OneChar(int c1)
     121  {
     122  %s\
     123      return OP;
     124  }
     125  
     126  int
     127  PyToken_TwoChars(int c1, int c2)
     128  {
     129  %s\
     130      return OP;
     131  }
     132  
     133  int
     134  PyToken_ThreeChars(int c1, int c2, int c3)
     135  {
     136  %s\
     137      return OP;
     138  }
     139  """
     140  
     141  def generate_chars_to_token(mapping, n=1):
     142      result = []
     143      write = result.append
     144      indent = '    ' * n
     145      write(indent)
     146      write('switch (c%d) {\n' % (n,))
     147      for c in sorted(mapping):
     148          write(indent)
     149          value = mapping[c]
     150          if isinstance(value, dict):
     151              write("case '%s':\n" % (c,))
     152              write(generate_chars_to_token(value, n + 1))
     153              write(indent)
     154              write('    break;\n')
     155          else:
     156              write("case '%s': return %s;\n" % (c, value))
     157      write(indent)
     158      write('}\n')
     159      return ''.join(result)
     160  
     161  def make_c(infile, outfile='Parser/token.c'):
     162      tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
     163      string_to_tok['<>'] = string_to_tok['!=']
     164      chars_to_token = {}
     165      for string, value in string_to_tok.items():
     166          assert 1 <= len(string) <= 3
     167          name = tok_names[value]
     168          m = chars_to_token.setdefault(len(string), {})
     169          for c in string[:-1]:
     170              m = m.setdefault(c, {})
     171          m[string[-1]] = name
     172  
     173      names = []
     174      for value, name in enumerate(tok_names):
     175          if value >= ERRORTOKEN:
     176              name = '<%s>' % name
     177          names.append('    "%s",\n' % name)
     178      names.append('    "<N_TOKENS>",\n')
     179  
     180      if update_file(outfile, token_c_template % (
     181              ''.join(names),
     182              generate_chars_to_token(chars_to_token[1]),
     183              generate_chars_to_token(chars_to_token[2]),
     184              generate_chars_to_token(chars_to_token[3])
     185          )):
     186          print("%s regenerated from %s" % (outfile, infile))
     187  
     188  
     189  token_inc_template = """\
     190  .. Auto-generated by Tools/scripts/generate_token.py
     191  %s
     192  .. data:: N_TOKENS
     193  
     194  .. data:: NT_OFFSET
     195  """
     196  
     197  def make_rst(infile, outfile='Doc/library/token-list.inc'):
     198      tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
     199      tok_to_string = {value: s for s, value in string_to_tok.items()}
     200  
     201      names = []
     202      for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
     203          names.append('.. data:: %s' % (name,))
     204          if value in tok_to_string:
     205              names.append('')
     206              names.append('   Token value for ``"%s"``.' % tok_to_string[value])
     207          names.append('')
     208  
     209      if update_file(outfile, token_inc_template % '\n'.join(names)):
     210          print("%s regenerated from %s" % (outfile, infile))
     211  
     212  
     213  token_py_template = '''\
     214  """Token constants."""
     215  # Auto-generated by Tools/scripts/generate_token.py
     216  
     217  __all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF']
     218  
     219  %s
     220  N_TOKENS = %d
     221  # Special definitions for cooperation with parser
     222  NT_OFFSET = %d
     223  
     224  tok_name = {value: name
     225              for name, value in globals().items()
     226              if isinstance(value, int) and not name.startswith('_')}
     227  __all__.extend(tok_name.values())
     228  
     229  EXACT_TOKEN_TYPES = {
     230  %s
     231  }
     232  
     233  def ISTERMINAL(x):
     234      return x < NT_OFFSET
     235  
     236  def ISNONTERMINAL(x):
     237      return x >= NT_OFFSET
     238  
     239  def ISEOF(x):
     240      return x == ENDMARKER
     241  '''
     242  
     243  def make_py(infile, outfile='Lib/token.py'):
     244      tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
     245  
     246      constants = []
     247      for value, name in enumerate(tok_names):
     248          constants.append('%s = %d' % (name, value))
     249      constants.insert(ERRORTOKEN,
     250          "# These aren't used by the C tokenizer but are needed for tokenize.py")
     251  
     252      token_types = []
     253      for s, value in sorted(string_to_tok.items()):
     254          token_types.append('    %r: %s,' % (s, tok_names[value]))
     255  
     256      if update_file(outfile, token_py_template % (
     257              '\n'.join(constants),
     258              len(tok_names),
     259              NT_OFFSET,
     260              '\n'.join(token_types),
     261          )):
     262          print("%s regenerated from %s" % (outfile, infile))
     263  
     264  
     265  def main(op, infile='Grammar/Tokens', *args):
     266      make = globals()['make_' + op]
     267      make(infile, *args)
     268  
     269  
     270  if __name__ == '__main__':
     271      import sys
     272      main(*sys.argv[1:])