(root)/
Python-3.12.0/
Tools/
peg_generator/
pegen/
tokenizer.py
       1  import token
       2  import tokenize
       3  from typing import Dict, Iterator, List
       4  
       5  Mark = int  # NewType('Mark', int)
       6  
       7  exact_token_types = token.EXACT_TOKEN_TYPES
       8  
       9  
      10  def shorttok(tok: tokenize.TokenInfo) -> str:
      11      return "%-25.25s" % f"{tok.start[0]}.{tok.start[1]}: {token.tok_name[tok.type]}:{tok.string!r}"
      12  
      13  
      14  class ESC[4;38;5;81mTokenizer:
      15      """Caching wrapper for the tokenize module.
      16  
      17      This is pretty tied to Python's syntax.
      18      """
      19  
      20      _tokens: List[tokenize.TokenInfo]
      21  
      22      def __init__(
      23          self, tokengen: Iterator[tokenize.TokenInfo], *, path: str = "", verbose: bool = False
      24      ):
      25          self._tokengen = tokengen
      26          self._tokens = []
      27          self._index = 0
      28          self._verbose = verbose
      29          self._lines: Dict[int, str] = {}
      30          self._path = path
      31          if verbose:
      32              self.report(False, False)
      33  
      34      def getnext(self) -> tokenize.TokenInfo:
      35          """Return the next token and updates the index."""
      36          cached = not self._index == len(self._tokens)
      37          tok = self.peek()
      38          self._index += 1
      39          if self._verbose:
      40              self.report(cached, False)
      41          return tok
      42  
      43      def peek(self) -> tokenize.TokenInfo:
      44          """Return the next token *without* updating the index."""
      45          while self._index == len(self._tokens):
      46              tok = next(self._tokengen)
      47              if tok.type in (tokenize.NL, tokenize.COMMENT):
      48                  continue
      49              if tok.type == token.ERRORTOKEN and tok.string.isspace():
      50                  continue
      51              if (
      52                  tok.type == token.NEWLINE
      53                  and self._tokens
      54                  and self._tokens[-1].type == token.NEWLINE
      55              ):
      56                  continue
      57              self._tokens.append(tok)
      58              if not self._path:
      59                  self._lines[tok.start[0]] = tok.line
      60          return self._tokens[self._index]
      61  
      62      def diagnose(self) -> tokenize.TokenInfo:
      63          if not self._tokens:
      64              self.getnext()
      65          return self._tokens[-1]
      66  
      67      def get_last_non_whitespace_token(self) -> tokenize.TokenInfo:
      68          for tok in reversed(self._tokens[: self._index]):
      69              if tok.type != tokenize.ENDMARKER and (
      70                  tok.type < tokenize.NEWLINE or tok.type > tokenize.DEDENT
      71              ):
      72                  break
      73          return tok
      74  
      75      def get_lines(self, line_numbers: List[int]) -> List[str]:
      76          """Retrieve source lines corresponding to line numbers."""
      77          if self._lines:
      78              lines = self._lines
      79          else:
      80              n = len(line_numbers)
      81              lines = {}
      82              count = 0
      83              seen = 0
      84              with open(self._path) as f:
      85                  for l in f:
      86                      count += 1
      87                      if count in line_numbers:
      88                          seen += 1
      89                          lines[count] = l
      90                          if seen == n:
      91                              break
      92  
      93          return [lines[n] for n in line_numbers]
      94  
      95      def mark(self) -> Mark:
      96          return self._index
      97  
      98      def reset(self, index: Mark) -> None:
      99          if index == self._index:
     100              return
     101          assert 0 <= index <= len(self._tokens), (index, len(self._tokens))
     102          old_index = self._index
     103          self._index = index
     104          if self._verbose:
     105              self.report(True, index < old_index)
     106  
     107      def report(self, cached: bool, back: bool) -> None:
     108          if back:
     109              fill = "-" * self._index + "-"
     110          elif cached:
     111              fill = "-" * self._index + ">"
     112          else:
     113              fill = "-" * self._index + "*"
     114          if self._index == 0:
     115              print(f"{fill} (Bof)")
     116          else:
     117              tok = self._tokens[self._index - 1]
     118              print(f"{fill} {shorttok(tok)}")