1 import token
2 import tokenize
3 from typing import Dict, Iterator, List
4
5 Mark = int # NewType('Mark', int)
6
7 exact_token_types = token.EXACT_TOKEN_TYPES
8
9
10 def shorttok(tok: tokenize.TokenInfo) -> str:
11 return "%-25.25s" % f"{tok.start[0]}.{tok.start[1]}: {token.tok_name[tok.type]}:{tok.string!r}"
12
13
14 class ESC[4;38;5;81mTokenizer:
15 """Caching wrapper for the tokenize module.
16
17 This is pretty tied to Python's syntax.
18 """
19
20 _tokens: List[tokenize.TokenInfo]
21
22 def __init__(
23 self, tokengen: Iterator[tokenize.TokenInfo], *, path: str = "", verbose: bool = False
24 ):
25 self._tokengen = tokengen
26 self._tokens = []
27 self._index = 0
28 self._verbose = verbose
29 self._lines: Dict[int, str] = {}
30 self._path = path
31 if verbose:
32 self.report(False, False)
33
34 def getnext(self) -> tokenize.TokenInfo:
35 """Return the next token and updates the index."""
36 cached = not self._index == len(self._tokens)
37 tok = self.peek()
38 self._index += 1
39 if self._verbose:
40 self.report(cached, False)
41 return tok
42
43 def peek(self) -> tokenize.TokenInfo:
44 """Return the next token *without* updating the index."""
45 while self._index == len(self._tokens):
46 tok = next(self._tokengen)
47 if tok.type in (tokenize.NL, tokenize.COMMENT):
48 continue
49 if tok.type == token.ERRORTOKEN and tok.string.isspace():
50 continue
51 if (
52 tok.type == token.NEWLINE
53 and self._tokens
54 and self._tokens[-1].type == token.NEWLINE
55 ):
56 continue
57 self._tokens.append(tok)
58 if not self._path:
59 self._lines[tok.start[0]] = tok.line
60 return self._tokens[self._index]
61
62 def diagnose(self) -> tokenize.TokenInfo:
63 if not self._tokens:
64 self.getnext()
65 return self._tokens[-1]
66
67 def get_last_non_whitespace_token(self) -> tokenize.TokenInfo:
68 for tok in reversed(self._tokens[: self._index]):
69 if tok.type != tokenize.ENDMARKER and (
70 tok.type < tokenize.NEWLINE or tok.type > tokenize.DEDENT
71 ):
72 break
73 return tok
74
75 def get_lines(self, line_numbers: List[int]) -> List[str]:
76 """Retrieve source lines corresponding to line numbers."""
77 if self._lines:
78 lines = self._lines
79 else:
80 n = len(line_numbers)
81 lines = {}
82 count = 0
83 seen = 0
84 with open(self._path) as f:
85 for l in f:
86 count += 1
87 if count in line_numbers:
88 seen += 1
89 lines[count] = l
90 if seen == n:
91 break
92
93 return [lines[n] for n in line_numbers]
94
95 def mark(self) -> Mark:
96 return self._index
97
98 def reset(self, index: Mark) -> None:
99 if index == self._index:
100 return
101 assert 0 <= index <= len(self._tokens), (index, len(self._tokens))
102 old_index = self._index
103 self._index = index
104 if self._verbose:
105 self.report(True, index < old_index)
106
107 def report(self, cached: bool, back: bool) -> None:
108 if back:
109 fill = "-" * self._index + "-"
110 elif cached:
111 fill = "-" * self._index + ">"
112 else:
113 fill = "-" * self._index + "*"
114 if self._index == 0:
115 print(f"{fill} (Bof)")
116 else:
117 tok = self._tokens[self._index - 1]
118 print(f"{fill} {shorttok(tok)}")