1 #! /usr/bin/env python3
2
3 """The Tab Nanny despises ambiguous indentation. She knows no mercy.
4
5 tabnanny -- Detection of ambiguous indentation
6
7 For the time being this module is intended to be called as a script.
8 However it is possible to import it into an IDE and use the function
9 check() described below.
10
11 Warning: The API provided by this module is likely to change in future
12 releases; such changes may not be backward compatible.
13 """
14
15 # Released to the public domain, by Tim Peters, 15 April 1998.
16
17 # XXX Note: this is now a standard library module.
18 # XXX The API needs to undergo changes however; the current code is too
19 # XXX script-like. This will be addressed later.
20
21 __version__ = "6"
22
23 import os
24 import sys
25 import tokenize
26
27 __all__ = ["check", "NannyNag", "process_tokens"]
28
29 verbose = 0
30 filename_only = 0
31
32 def errprint(*args):
33 sep = ""
34 for arg in args:
35 sys.stderr.write(sep + str(arg))
36 sep = " "
37 sys.stderr.write("\n")
38
39 def main():
40 import getopt
41
42 global verbose, filename_only
43 try:
44 opts, args = getopt.getopt(sys.argv[1:], "qv")
45 except getopt.error as msg:
46 errprint(msg)
47 return
48 for o, a in opts:
49 if o == '-q':
50 filename_only = filename_only + 1
51 if o == '-v':
52 verbose = verbose + 1
53 if not args:
54 errprint("Usage:", sys.argv[0], "[-v] file_or_directory ...")
55 return
56 for arg in args:
57 check(arg)
58
59 class ESC[4;38;5;81mNannyNag(ESC[4;38;5;149mException):
60 """
61 Raised by process_tokens() if detecting an ambiguous indent.
62 Captured and handled in check().
63 """
64 def __init__(self, lineno, msg, line):
65 self.lineno, self.msg, self.line = lineno, msg, line
66 def get_lineno(self):
67 return self.lineno
68 def get_msg(self):
69 return self.msg
70 def get_line(self):
71 return self.line
72
73 def check(file):
74 """check(file_or_dir)
75
76 If file_or_dir is a directory and not a symbolic link, then recursively
77 descend the directory tree named by file_or_dir, checking all .py files
78 along the way. If file_or_dir is an ordinary Python source file, it is
79 checked for whitespace related problems. The diagnostic messages are
80 written to standard output using the print statement.
81 """
82
83 if os.path.isdir(file) and not os.path.islink(file):
84 if verbose:
85 print("%r: listing directory" % (file,))
86 names = os.listdir(file)
87 for name in names:
88 fullname = os.path.join(file, name)
89 if (os.path.isdir(fullname) and
90 not os.path.islink(fullname) or
91 os.path.normcase(name[-3:]) == ".py"):
92 check(fullname)
93 return
94
95 try:
96 f = tokenize.open(file)
97 except OSError as msg:
98 errprint("%r: I/O Error: %s" % (file, msg))
99 return
100
101 if verbose > 1:
102 print("checking %r ..." % file)
103
104 try:
105 process_tokens(tokenize.generate_tokens(f.readline))
106
107 except tokenize.TokenError as msg:
108 errprint("%r: Token Error: %s" % (file, msg))
109 return
110
111 except IndentationError as msg:
112 errprint("%r: Indentation Error: %s" % (file, msg))
113 return
114
115 except NannyNag as nag:
116 badline = nag.get_lineno()
117 line = nag.get_line()
118 if verbose:
119 print("%r: *** Line %d: trouble in tab city! ***" % (file, badline))
120 print("offending line: %r" % (line,))
121 print(nag.get_msg())
122 else:
123 if ' ' in file: file = '"' + file + '"'
124 if filename_only: print(file)
125 else: print(file, badline, repr(line))
126 return
127
128 finally:
129 f.close()
130
131 if verbose:
132 print("%r: Clean bill of health." % (file,))
133
134 class ESC[4;38;5;81mWhitespace:
135 # the characters used for space and tab
136 S, T = ' \t'
137
138 # members:
139 # raw
140 # the original string
141 # n
142 # the number of leading whitespace characters in raw
143 # nt
144 # the number of tabs in raw[:n]
145 # norm
146 # the normal form as a pair (count, trailing), where:
147 # count
148 # a tuple such that raw[:n] contains count[i]
149 # instances of S * i + T
150 # trailing
151 # the number of trailing spaces in raw[:n]
152 # It's A Theorem that m.indent_level(t) ==
153 # n.indent_level(t) for all t >= 1 iff m.norm == n.norm.
154 # is_simple
155 # true iff raw[:n] is of the form (T*)(S*)
156
157 def __init__(self, ws):
158 self.raw = ws
159 S, T = Whitespace.S, Whitespace.T
160 count = []
161 b = n = nt = 0
162 for ch in self.raw:
163 if ch == S:
164 n = n + 1
165 b = b + 1
166 elif ch == T:
167 n = n + 1
168 nt = nt + 1
169 if b >= len(count):
170 count = count + [0] * (b - len(count) + 1)
171 count[b] = count[b] + 1
172 b = 0
173 else:
174 break
175 self.n = n
176 self.nt = nt
177 self.norm = tuple(count), b
178 self.is_simple = len(count) <= 1
179
180 # return length of longest contiguous run of spaces (whether or not
181 # preceding a tab)
182 def longest_run_of_spaces(self):
183 count, trailing = self.norm
184 return max(len(count)-1, trailing)
185
186 def indent_level(self, tabsize):
187 # count, il = self.norm
188 # for i in range(len(count)):
189 # if count[i]:
190 # il = il + (i//tabsize + 1)*tabsize * count[i]
191 # return il
192
193 # quicker:
194 # il = trailing + sum (i//ts + 1)*ts*count[i] =
195 # trailing + ts * sum (i//ts + 1)*count[i] =
196 # trailing + ts * sum i//ts*count[i] + count[i] =
197 # trailing + ts * [(sum i//ts*count[i]) + (sum count[i])] =
198 # trailing + ts * [(sum i//ts*count[i]) + num_tabs]
199 # and note that i//ts*count[i] is 0 when i < ts
200
201 count, trailing = self.norm
202 il = 0
203 for i in range(tabsize, len(count)):
204 il = il + i//tabsize * count[i]
205 return trailing + tabsize * (il + self.nt)
206
207 # return true iff self.indent_level(t) == other.indent_level(t)
208 # for all t >= 1
209 def equal(self, other):
210 return self.norm == other.norm
211
212 # return a list of tuples (ts, i1, i2) such that
213 # i1 == self.indent_level(ts) != other.indent_level(ts) == i2.
214 # Intended to be used after not self.equal(other) is known, in which
215 # case it will return at least one witnessing tab size.
216 def not_equal_witness(self, other):
217 n = max(self.longest_run_of_spaces(),
218 other.longest_run_of_spaces()) + 1
219 a = []
220 for ts in range(1, n+1):
221 if self.indent_level(ts) != other.indent_level(ts):
222 a.append( (ts,
223 self.indent_level(ts),
224 other.indent_level(ts)) )
225 return a
226
227 # Return True iff self.indent_level(t) < other.indent_level(t)
228 # for all t >= 1.
229 # The algorithm is due to Vincent Broman.
230 # Easy to prove it's correct.
231 # XXXpost that.
232 # Trivial to prove n is sharp (consider T vs ST).
233 # Unknown whether there's a faster general way. I suspected so at
234 # first, but no longer.
235 # For the special (but common!) case where M and N are both of the
236 # form (T*)(S*), M.less(N) iff M.len() < N.len() and
237 # M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded.
238 # XXXwrite that up.
239 # Note that M is of the form (T*)(S*) iff len(M.norm[0]) <= 1.
240 def less(self, other):
241 if self.n >= other.n:
242 return False
243 if self.is_simple and other.is_simple:
244 return self.nt <= other.nt
245 n = max(self.longest_run_of_spaces(),
246 other.longest_run_of_spaces()) + 1
247 # the self.n >= other.n test already did it for ts=1
248 for ts in range(2, n+1):
249 if self.indent_level(ts) >= other.indent_level(ts):
250 return False
251 return True
252
253 # return a list of tuples (ts, i1, i2) such that
254 # i1 == self.indent_level(ts) >= other.indent_level(ts) == i2.
255 # Intended to be used after not self.less(other) is known, in which
256 # case it will return at least one witnessing tab size.
257 def not_less_witness(self, other):
258 n = max(self.longest_run_of_spaces(),
259 other.longest_run_of_spaces()) + 1
260 a = []
261 for ts in range(1, n+1):
262 if self.indent_level(ts) >= other.indent_level(ts):
263 a.append( (ts,
264 self.indent_level(ts),
265 other.indent_level(ts)) )
266 return a
267
268 def format_witnesses(w):
269 firsts = (str(tup[0]) for tup in w)
270 prefix = "at tab size"
271 if len(w) > 1:
272 prefix = prefix + "s"
273 return prefix + " " + ', '.join(firsts)
274
275 def process_tokens(tokens):
276 INDENT = tokenize.INDENT
277 DEDENT = tokenize.DEDENT
278 NEWLINE = tokenize.NEWLINE
279 JUNK = tokenize.COMMENT, tokenize.NL
280 indents = [Whitespace("")]
281 check_equal = 0
282
283 for (type, token, start, end, line) in tokens:
284 if type == NEWLINE:
285 # a program statement, or ENDMARKER, will eventually follow,
286 # after some (possibly empty) run of tokens of the form
287 # (NL | COMMENT)* (INDENT | DEDENT+)?
288 # If an INDENT appears, setting check_equal is wrong, and will
289 # be undone when we see the INDENT.
290 check_equal = 1
291
292 elif type == INDENT:
293 check_equal = 0
294 thisguy = Whitespace(token)
295 if not indents[-1].less(thisguy):
296 witness = indents[-1].not_less_witness(thisguy)
297 msg = "indent not greater e.g. " + format_witnesses(witness)
298 raise NannyNag(start[0], msg, line)
299 indents.append(thisguy)
300
301 elif type == DEDENT:
302 # there's nothing we need to check here! what's important is
303 # that when the run of DEDENTs ends, the indentation of the
304 # program statement (or ENDMARKER) that triggered the run is
305 # equal to what's left at the top of the indents stack
306
307 # Ouch! This assert triggers if the last line of the source
308 # is indented *and* lacks a newline -- then DEDENTs pop out
309 # of thin air.
310 # assert check_equal # else no earlier NEWLINE, or an earlier INDENT
311 check_equal = 1
312
313 del indents[-1]
314
315 elif check_equal and type not in JUNK:
316 # this is the first "real token" following a NEWLINE, so it
317 # must be the first token of the next program statement, or an
318 # ENDMARKER; the "line" argument exposes the leading whitespace
319 # for this statement; in the case of ENDMARKER, line is an empty
320 # string, so will properly match the empty string with which the
321 # "indents" stack was seeded
322 check_equal = 0
323 thisguy = Whitespace(line)
324 if not indents[-1].equal(thisguy):
325 witness = indents[-1].not_equal_witness(thisguy)
326 msg = "indent not equal e.g. " + format_witnesses(witness)
327 raise NannyNag(start[0], msg, line)
328
329
330 if __name__ == '__main__':
331 main()