1  #! /usr/bin/env python3
       2  
       3  """The Tab Nanny despises ambiguous indentation.  She knows no mercy.
       4  
       5  tabnanny -- Detection of ambiguous indentation
       6  
       7  For the time being this module is intended to be called as a script.
       8  However it is possible to import it into an IDE and use the function
       9  check() described below.
      10  
      11  Warning: The API provided by this module is likely to change in future
      12  releases; such changes may not be backward compatible.
      13  """
      14  
      15  # Released to the public domain, by Tim Peters, 15 April 1998.
      16  
      17  # XXX Note: this is now a standard library module.
      18  # XXX The API needs to undergo changes however; the current code is too
      19  # XXX script-like.  This will be addressed later.
      20  
      21  __version__ = "6"
      22  
      23  import os
      24  import sys
      25  import tokenize
      26  
      27  __all__ = ["check", "NannyNag", "process_tokens"]
      28  
      29  verbose = 0
      30  filename_only = 0
      31  
      32  def errprint(*args):
      33      sep = ""
      34      for arg in args:
      35          sys.stderr.write(sep + str(arg))
      36          sep = " "
      37      sys.stderr.write("\n")
      38  
      39  def main():
      40      import getopt
      41  
      42      global verbose, filename_only
      43      try:
      44          opts, args = getopt.getopt(sys.argv[1:], "qv")
      45      except getopt.error as msg:
      46          errprint(msg)
      47          return
      48      for o, a in opts:
      49          if o == '-q':
      50              filename_only = filename_only + 1
      51          if o == '-v':
      52              verbose = verbose + 1
      53      if not args:
      54          errprint("Usage:", sys.argv[0], "[-v] file_or_directory ...")
      55          return
      56      for arg in args:
      57          check(arg)
      58  
      59  class ESC[4;38;5;81mNannyNag(ESC[4;38;5;149mException):
      60      """
      61      Raised by process_tokens() if detecting an ambiguous indent.
      62      Captured and handled in check().
      63      """
      64      def __init__(self, lineno, msg, line):
      65          self.lineno, self.msg, self.line = lineno, msg, line
      66      def get_lineno(self):
      67          return self.lineno
      68      def get_msg(self):
      69          return self.msg
      70      def get_line(self):
      71          return self.line
      72  
      73  def check(file):
      74      """check(file_or_dir)
      75  
      76      If file_or_dir is a directory and not a symbolic link, then recursively
      77      descend the directory tree named by file_or_dir, checking all .py files
      78      along the way. If file_or_dir is an ordinary Python source file, it is
      79      checked for whitespace related problems. The diagnostic messages are
      80      written to standard output using the print statement.
      81      """
      82  
      83      if os.path.isdir(file) and not os.path.islink(file):
      84          if verbose:
      85              print("%r: listing directory" % (file,))
      86          names = os.listdir(file)
      87          for name in names:
      88              fullname = os.path.join(file, name)
      89              if (os.path.isdir(fullname) and
      90                  not os.path.islink(fullname) or
      91                  os.path.normcase(name[-3:]) == ".py"):
      92                  check(fullname)
      93          return
      94  
      95      try:
      96          f = tokenize.open(file)
      97      except OSError as msg:
      98          errprint("%r: I/O Error: %s" % (file, msg))
      99          return
     100  
     101      if verbose > 1:
     102          print("checking %r ..." % file)
     103  
     104      try:
     105          process_tokens(tokenize.generate_tokens(f.readline))
     106  
     107      except tokenize.TokenError as msg:
     108          errprint("%r: Token Error: %s" % (file, msg))
     109          return
     110  
     111      except IndentationError as msg:
     112          errprint("%r: Indentation Error: %s" % (file, msg))
     113          return
     114  
     115      except NannyNag as nag:
     116          badline = nag.get_lineno()
     117          line = nag.get_line()
     118          if verbose:
     119              print("%r: *** Line %d: trouble in tab city! ***" % (file, badline))
     120              print("offending line: %r" % (line,))
     121              print(nag.get_msg())
     122          else:
     123              if ' ' in file: file = '"' + file + '"'
     124              if filename_only: print(file)
     125              else: print(file, badline, repr(line))
     126          return
     127  
     128      finally:
     129          f.close()
     130  
     131      if verbose:
     132          print("%r: Clean bill of health." % (file,))
     133  
     134  class ESC[4;38;5;81mWhitespace:
     135      # the characters used for space and tab
     136      S, T = ' \t'
     137  
     138      # members:
     139      #   raw
     140      #       the original string
     141      #   n
     142      #       the number of leading whitespace characters in raw
     143      #   nt
     144      #       the number of tabs in raw[:n]
     145      #   norm
     146      #       the normal form as a pair (count, trailing), where:
     147      #       count
     148      #           a tuple such that raw[:n] contains count[i]
     149      #           instances of S * i + T
     150      #       trailing
     151      #           the number of trailing spaces in raw[:n]
     152      #       It's A Theorem that m.indent_level(t) ==
     153      #       n.indent_level(t) for all t >= 1 iff m.norm == n.norm.
     154      #   is_simple
     155      #       true iff raw[:n] is of the form (T*)(S*)
     156  
     157      def __init__(self, ws):
     158          self.raw  = ws
     159          S, T = Whitespace.S, Whitespace.T
     160          count = []
     161          b = n = nt = 0
     162          for ch in self.raw:
     163              if ch == S:
     164                  n = n + 1
     165                  b = b + 1
     166              elif ch == T:
     167                  n = n + 1
     168                  nt = nt + 1
     169                  if b >= len(count):
     170                      count = count + [0] * (b - len(count) + 1)
     171                  count[b] = count[b] + 1
     172                  b = 0
     173              else:
     174                  break
     175          self.n    = n
     176          self.nt   = nt
     177          self.norm = tuple(count), b
     178          self.is_simple = len(count) <= 1
     179  
     180      # return length of longest contiguous run of spaces (whether or not
     181      # preceding a tab)
     182      def longest_run_of_spaces(self):
     183          count, trailing = self.norm
     184          return max(len(count)-1, trailing)
     185  
     186      def indent_level(self, tabsize):
     187          # count, il = self.norm
     188          # for i in range(len(count)):
     189          #    if count[i]:
     190          #        il = il + (i//tabsize + 1)*tabsize * count[i]
     191          # return il
     192  
     193          # quicker:
     194          # il = trailing + sum (i//ts + 1)*ts*count[i] =
     195          # trailing + ts * sum (i//ts + 1)*count[i] =
     196          # trailing + ts * sum i//ts*count[i] + count[i] =
     197          # trailing + ts * [(sum i//ts*count[i]) + (sum count[i])] =
     198          # trailing + ts * [(sum i//ts*count[i]) + num_tabs]
     199          # and note that i//ts*count[i] is 0 when i < ts
     200  
     201          count, trailing = self.norm
     202          il = 0
     203          for i in range(tabsize, len(count)):
     204              il = il + i//tabsize * count[i]
     205          return trailing + tabsize * (il + self.nt)
     206  
     207      # return true iff self.indent_level(t) == other.indent_level(t)
     208      # for all t >= 1
     209      def equal(self, other):
     210          return self.norm == other.norm
     211  
     212      # return a list of tuples (ts, i1, i2) such that
     213      # i1 == self.indent_level(ts) != other.indent_level(ts) == i2.
     214      # Intended to be used after not self.equal(other) is known, in which
     215      # case it will return at least one witnessing tab size.
     216      def not_equal_witness(self, other):
     217          n = max(self.longest_run_of_spaces(),
     218                  other.longest_run_of_spaces()) + 1
     219          a = []
     220          for ts in range(1, n+1):
     221              if self.indent_level(ts) != other.indent_level(ts):
     222                  a.append( (ts,
     223                             self.indent_level(ts),
     224                             other.indent_level(ts)) )
     225          return a
     226  
     227      # Return True iff self.indent_level(t) < other.indent_level(t)
     228      # for all t >= 1.
     229      # The algorithm is due to Vincent Broman.
     230      # Easy to prove it's correct.
     231      # XXXpost that.
     232      # Trivial to prove n is sharp (consider T vs ST).
     233      # Unknown whether there's a faster general way.  I suspected so at
     234      # first, but no longer.
     235      # For the special (but common!) case where M and N are both of the
     236      # form (T*)(S*), M.less(N) iff M.len() < N.len() and
     237      # M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded.
     238      # XXXwrite that up.
     239      # Note that M is of the form (T*)(S*) iff len(M.norm[0]) <= 1.
     240      def less(self, other):
     241          if self.n >= other.n:
     242              return False
     243          if self.is_simple and other.is_simple:
     244              return self.nt <= other.nt
     245          n = max(self.longest_run_of_spaces(),
     246                  other.longest_run_of_spaces()) + 1
     247          # the self.n >= other.n test already did it for ts=1
     248          for ts in range(2, n+1):
     249              if self.indent_level(ts) >= other.indent_level(ts):
     250                  return False
     251          return True
     252  
     253      # return a list of tuples (ts, i1, i2) such that
     254      # i1 == self.indent_level(ts) >= other.indent_level(ts) == i2.
     255      # Intended to be used after not self.less(other) is known, in which
     256      # case it will return at least one witnessing tab size.
     257      def not_less_witness(self, other):
     258          n = max(self.longest_run_of_spaces(),
     259                  other.longest_run_of_spaces()) + 1
     260          a = []
     261          for ts in range(1, n+1):
     262              if self.indent_level(ts) >= other.indent_level(ts):
     263                  a.append( (ts,
     264                             self.indent_level(ts),
     265                             other.indent_level(ts)) )
     266          return a
     267  
     268  def format_witnesses(w):
     269      firsts = (str(tup[0]) for tup in w)
     270      prefix = "at tab size"
     271      if len(w) > 1:
     272          prefix = prefix + "s"
     273      return prefix + " " + ', '.join(firsts)
     274  
     275  def process_tokens(tokens):
     276      INDENT = tokenize.INDENT
     277      DEDENT = tokenize.DEDENT
     278      NEWLINE = tokenize.NEWLINE
     279      JUNK = tokenize.COMMENT, tokenize.NL
     280      indents = [Whitespace("")]
     281      check_equal = 0
     282  
     283      for (type, token, start, end, line) in tokens:
     284          if type == NEWLINE:
     285              # a program statement, or ENDMARKER, will eventually follow,
     286              # after some (possibly empty) run of tokens of the form
     287              #     (NL | COMMENT)* (INDENT | DEDENT+)?
     288              # If an INDENT appears, setting check_equal is wrong, and will
     289              # be undone when we see the INDENT.
     290              check_equal = 1
     291  
     292          elif type == INDENT:
     293              check_equal = 0
     294              thisguy = Whitespace(token)
     295              if not indents[-1].less(thisguy):
     296                  witness = indents[-1].not_less_witness(thisguy)
     297                  msg = "indent not greater e.g. " + format_witnesses(witness)
     298                  raise NannyNag(start[0], msg, line)
     299              indents.append(thisguy)
     300  
     301          elif type == DEDENT:
     302              # there's nothing we need to check here!  what's important is
     303              # that when the run of DEDENTs ends, the indentation of the
     304              # program statement (or ENDMARKER) that triggered the run is
     305              # equal to what's left at the top of the indents stack
     306  
     307              # Ouch!  This assert triggers if the last line of the source
     308              # is indented *and* lacks a newline -- then DEDENTs pop out
     309              # of thin air.
     310              # assert check_equal  # else no earlier NEWLINE, or an earlier INDENT
     311              check_equal = 1
     312  
     313              del indents[-1]
     314  
     315          elif check_equal and type not in JUNK:
     316              # this is the first "real token" following a NEWLINE, so it
     317              # must be the first token of the next program statement, or an
     318              # ENDMARKER; the "line" argument exposes the leading whitespace
     319              # for this statement; in the case of ENDMARKER, line is an empty
     320              # string, so will properly match the empty string with which the
     321              # "indents" stack was seeded
     322              check_equal = 0
     323              thisguy = Whitespace(line)
     324              if not indents[-1].equal(thisguy):
     325                  witness = indents[-1].not_equal_witness(thisguy)
     326                  msg = "indent not equal e.g. " + format_witnesses(witness)
     327                  raise NannyNag(start[0], msg, line)
     328  
     329  
     330  if __name__ == '__main__':
     331      main()