python (3.12.0)
1 #! /usr/bin/env python3
2
3 """The Tab Nanny despises ambiguous indentation. She knows no mercy.
4
5 tabnanny -- Detection of ambiguous indentation
6
7 For the time being this module is intended to be called as a script.
8 However it is possible to import it into an IDE and use the function
9 check() described below.
10
11 Warning: The API provided by this module is likely to change in future
12 releases; such changes may not be backward compatible.
13 """
14
15 # Released to the public domain, by Tim Peters, 15 April 1998.
16
17 # XXX Note: this is now a standard library module.
18 # XXX The API needs to undergo changes however; the current code is too
19 # XXX script-like. This will be addressed later.
20
21 __version__ = "6"
22
23 import os
24 import sys
25 import tokenize
26
27 __all__ = ["check", "NannyNag", "process_tokens"]
28
29 verbose = 0
30 filename_only = 0
31
32 def errprint(*args):
33 sep = ""
34 for arg in args:
35 sys.stderr.write(sep + str(arg))
36 sep = " "
37 sys.stderr.write("\n")
38 sys.exit(1)
39
40 def main():
41 import getopt
42
43 global verbose, filename_only
44 try:
45 opts, args = getopt.getopt(sys.argv[1:], "qv")
46 except getopt.error as msg:
47 errprint(msg)
48 for o, a in opts:
49 if o == '-q':
50 filename_only = filename_only + 1
51 if o == '-v':
52 verbose = verbose + 1
53 if not args:
54 errprint("Usage:", sys.argv[0], "[-v] file_or_directory ...")
55 for arg in args:
56 check(arg)
57
58 class ESC[4;38;5;81mNannyNag(ESC[4;38;5;149mException):
59 """
60 Raised by process_tokens() if detecting an ambiguous indent.
61 Captured and handled in check().
62 """
63 def __init__(self, lineno, msg, line):
64 self.lineno, self.msg, self.line = lineno, msg, line
65 def get_lineno(self):
66 return self.lineno
67 def get_msg(self):
68 return self.msg
69 def get_line(self):
70 return self.line
71
72 def check(file):
73 """check(file_or_dir)
74
75 If file_or_dir is a directory and not a symbolic link, then recursively
76 descend the directory tree named by file_or_dir, checking all .py files
77 along the way. If file_or_dir is an ordinary Python source file, it is
78 checked for whitespace related problems. The diagnostic messages are
79 written to standard output using the print statement.
80 """
81
82 if os.path.isdir(file) and not os.path.islink(file):
83 if verbose:
84 print("%r: listing directory" % (file,))
85 names = os.listdir(file)
86 for name in names:
87 fullname = os.path.join(file, name)
88 if (os.path.isdir(fullname) and
89 not os.path.islink(fullname) or
90 os.path.normcase(name[-3:]) == ".py"):
91 check(fullname)
92 return
93
94 try:
95 f = tokenize.open(file)
96 except OSError as msg:
97 errprint("%r: I/O Error: %s" % (file, msg))
98 return
99
100 if verbose > 1:
101 print("checking %r ..." % file)
102
103 try:
104 process_tokens(tokenize.generate_tokens(f.readline))
105
106 except tokenize.TokenError as msg:
107 errprint("%r: Token Error: %s" % (file, msg))
108 return
109
110 except SyntaxError as msg:
111 errprint("%r: Token Error: %s" % (file, msg))
112 return
113
114 except IndentationError as msg:
115 errprint("%r: Indentation Error: %s" % (file, msg))
116 return
117
118 except NannyNag as nag:
119 badline = nag.get_lineno()
120 line = nag.get_line()
121 if verbose:
122 print("%r: *** Line %d: trouble in tab city! ***" % (file, badline))
123 print("offending line: %r" % (line,))
124 print(nag.get_msg())
125 else:
126 if ' ' in file: file = '"' + file + '"'
127 if filename_only: print(file)
128 else: print(file, badline, repr(line))
129 return
130
131 finally:
132 f.close()
133
134 if verbose:
135 print("%r: Clean bill of health." % (file,))
136
137 class ESC[4;38;5;81mWhitespace:
138 # the characters used for space and tab
139 S, T = ' \t'
140
141 # members:
142 # raw
143 # the original string
144 # n
145 # the number of leading whitespace characters in raw
146 # nt
147 # the number of tabs in raw[:n]
148 # norm
149 # the normal form as a pair (count, trailing), where:
150 # count
151 # a tuple such that raw[:n] contains count[i]
152 # instances of S * i + T
153 # trailing
154 # the number of trailing spaces in raw[:n]
155 # It's A Theorem that m.indent_level(t) ==
156 # n.indent_level(t) for all t >= 1 iff m.norm == n.norm.
157 # is_simple
158 # true iff raw[:n] is of the form (T*)(S*)
159
160 def __init__(self, ws):
161 self.raw = ws
162 S, T = Whitespace.S, Whitespace.T
163 count = []
164 b = n = nt = 0
165 for ch in self.raw:
166 if ch == S:
167 n = n + 1
168 b = b + 1
169 elif ch == T:
170 n = n + 1
171 nt = nt + 1
172 if b >= len(count):
173 count = count + [0] * (b - len(count) + 1)
174 count[b] = count[b] + 1
175 b = 0
176 else:
177 break
178 self.n = n
179 self.nt = nt
180 self.norm = tuple(count), b
181 self.is_simple = len(count) <= 1
182
183 # return length of longest contiguous run of spaces (whether or not
184 # preceding a tab)
185 def longest_run_of_spaces(self):
186 count, trailing = self.norm
187 return max(len(count)-1, trailing)
188
189 def indent_level(self, tabsize):
190 # count, il = self.norm
191 # for i in range(len(count)):
192 # if count[i]:
193 # il = il + (i//tabsize + 1)*tabsize * count[i]
194 # return il
195
196 # quicker:
197 # il = trailing + sum (i//ts + 1)*ts*count[i] =
198 # trailing + ts * sum (i//ts + 1)*count[i] =
199 # trailing + ts * sum i//ts*count[i] + count[i] =
200 # trailing + ts * [(sum i//ts*count[i]) + (sum count[i])] =
201 # trailing + ts * [(sum i//ts*count[i]) + num_tabs]
202 # and note that i//ts*count[i] is 0 when i < ts
203
204 count, trailing = self.norm
205 il = 0
206 for i in range(tabsize, len(count)):
207 il = il + i//tabsize * count[i]
208 return trailing + tabsize * (il + self.nt)
209
210 # return true iff self.indent_level(t) == other.indent_level(t)
211 # for all t >= 1
212 def equal(self, other):
213 return self.norm == other.norm
214
215 # return a list of tuples (ts, i1, i2) such that
216 # i1 == self.indent_level(ts) != other.indent_level(ts) == i2.
217 # Intended to be used after not self.equal(other) is known, in which
218 # case it will return at least one witnessing tab size.
219 def not_equal_witness(self, other):
220 n = max(self.longest_run_of_spaces(),
221 other.longest_run_of_spaces()) + 1
222 a = []
223 for ts in range(1, n+1):
224 if self.indent_level(ts) != other.indent_level(ts):
225 a.append( (ts,
226 self.indent_level(ts),
227 other.indent_level(ts)) )
228 return a
229
230 # Return True iff self.indent_level(t) < other.indent_level(t)
231 # for all t >= 1.
232 # The algorithm is due to Vincent Broman.
233 # Easy to prove it's correct.
234 # XXXpost that.
235 # Trivial to prove n is sharp (consider T vs ST).
236 # Unknown whether there's a faster general way. I suspected so at
237 # first, but no longer.
238 # For the special (but common!) case where M and N are both of the
239 # form (T*)(S*), M.less(N) iff M.len() < N.len() and
240 # M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded.
241 # XXXwrite that up.
242 # Note that M is of the form (T*)(S*) iff len(M.norm[0]) <= 1.
243 def less(self, other):
244 if self.n >= other.n:
245 return False
246 if self.is_simple and other.is_simple:
247 return self.nt <= other.nt
248 n = max(self.longest_run_of_spaces(),
249 other.longest_run_of_spaces()) + 1
250 # the self.n >= other.n test already did it for ts=1
251 for ts in range(2, n+1):
252 if self.indent_level(ts) >= other.indent_level(ts):
253 return False
254 return True
255
256 # return a list of tuples (ts, i1, i2) such that
257 # i1 == self.indent_level(ts) >= other.indent_level(ts) == i2.
258 # Intended to be used after not self.less(other) is known, in which
259 # case it will return at least one witnessing tab size.
260 def not_less_witness(self, other):
261 n = max(self.longest_run_of_spaces(),
262 other.longest_run_of_spaces()) + 1
263 a = []
264 for ts in range(1, n+1):
265 if self.indent_level(ts) >= other.indent_level(ts):
266 a.append( (ts,
267 self.indent_level(ts),
268 other.indent_level(ts)) )
269 return a
270
271 def format_witnesses(w):
272 firsts = (str(tup[0]) for tup in w)
273 prefix = "at tab size"
274 if len(w) > 1:
275 prefix = prefix + "s"
276 return prefix + " " + ', '.join(firsts)
277
278 def process_tokens(tokens):
279 try:
280 _process_tokens(tokens)
281 except TabError as e:
282 raise NannyNag(e.lineno, e.msg, e.text)
283
284 def _process_tokens(tokens):
285 INDENT = tokenize.INDENT
286 DEDENT = tokenize.DEDENT
287 NEWLINE = tokenize.NEWLINE
288 JUNK = tokenize.COMMENT, tokenize.NL
289 indents = [Whitespace("")]
290 check_equal = 0
291
292 for (type, token, start, end, line) in tokens:
293 if type == NEWLINE:
294 # a program statement, or ENDMARKER, will eventually follow,
295 # after some (possibly empty) run of tokens of the form
296 # (NL | COMMENT)* (INDENT | DEDENT+)?
297 # If an INDENT appears, setting check_equal is wrong, and will
298 # be undone when we see the INDENT.
299 check_equal = 1
300
301 elif type == INDENT:
302 check_equal = 0
303 thisguy = Whitespace(token)
304 if not indents[-1].less(thisguy):
305 witness = indents[-1].not_less_witness(thisguy)
306 msg = "indent not greater e.g. " + format_witnesses(witness)
307 raise NannyNag(start[0], msg, line)
308 indents.append(thisguy)
309
310 elif type == DEDENT:
311 # there's nothing we need to check here! what's important is
312 # that when the run of DEDENTs ends, the indentation of the
313 # program statement (or ENDMARKER) that triggered the run is
314 # equal to what's left at the top of the indents stack
315
316 # Ouch! This assert triggers if the last line of the source
317 # is indented *and* lacks a newline -- then DEDENTs pop out
318 # of thin air.
319 # assert check_equal # else no earlier NEWLINE, or an earlier INDENT
320 check_equal = 1
321
322 del indents[-1]
323
324 elif check_equal and type not in JUNK:
325 # this is the first "real token" following a NEWLINE, so it
326 # must be the first token of the next program statement, or an
327 # ENDMARKER; the "line" argument exposes the leading whitespace
328 # for this statement; in the case of ENDMARKER, line is an empty
329 # string, so will properly match the empty string with which the
330 # "indents" stack was seeded
331 check_equal = 0
332 thisguy = Whitespace(line)
333 if not indents[-1].equal(thisguy):
334 witness = indents[-1].not_equal_witness(thisguy)
335 msg = "indent not equal e.g. " + format_witnesses(witness)
336 raise NannyNag(start[0], msg, line)
337
338
339 if __name__ == '__main__':
340 main()