python (3.12.0)

Browse
Build Log
Usage
       1  #! /usr/bin/env python3
       2  
       3  """The Tab Nanny despises ambiguous indentation.  She knows no mercy.
       4  
       5  tabnanny -- Detection of ambiguous indentation
       6  
       7  For the time being this module is intended to be called as a script.
       8  However it is possible to import it into an IDE and use the function
       9  check() described below.
      10  
      11  Warning: The API provided by this module is likely to change in future
      12  releases; such changes may not be backward compatible.
      13  """
      14  
      15  # Released to the public domain, by Tim Peters, 15 April 1998.
      16  
      17  # XXX Note: this is now a standard library module.
      18  # XXX The API needs to undergo changes however; the current code is too
      19  # XXX script-like.  This will be addressed later.
      20  
      21  __version__ = "6"
      22  
      23  import os
      24  import sys
      25  import tokenize
      26  
      27  __all__ = ["check", "NannyNag", "process_tokens"]
      28  
      29  verbose = 0
      30  filename_only = 0
      31  
      32  def errprint(*args):
      33      sep = ""
      34      for arg in args:
      35          sys.stderr.write(sep + str(arg))
      36          sep = " "
      37      sys.stderr.write("\n")
      38      sys.exit(1)
      39  
      40  def main():
      41      import getopt
      42  
      43      global verbose, filename_only
      44      try:
      45          opts, args = getopt.getopt(sys.argv[1:], "qv")
      46      except getopt.error as msg:
      47          errprint(msg)
      48      for o, a in opts:
      49          if o == '-q':
      50              filename_only = filename_only + 1
      51          if o == '-v':
      52              verbose = verbose + 1
      53      if not args:
      54          errprint("Usage:", sys.argv[0], "[-v] file_or_directory ...")
      55      for arg in args:
      56          check(arg)
      57  
      58  class ESC[4;38;5;81mNannyNag(ESC[4;38;5;149mException):
      59      """
      60      Raised by process_tokens() if detecting an ambiguous indent.
      61      Captured and handled in check().
      62      """
      63      def __init__(self, lineno, msg, line):
      64          self.lineno, self.msg, self.line = lineno, msg, line
      65      def get_lineno(self):
      66          return self.lineno
      67      def get_msg(self):
      68          return self.msg
      69      def get_line(self):
      70          return self.line
      71  
      72  def check(file):
      73      """check(file_or_dir)
      74  
      75      If file_or_dir is a directory and not a symbolic link, then recursively
      76      descend the directory tree named by file_or_dir, checking all .py files
      77      along the way. If file_or_dir is an ordinary Python source file, it is
      78      checked for whitespace related problems. The diagnostic messages are
      79      written to standard output using the print statement.
      80      """
      81  
      82      if os.path.isdir(file) and not os.path.islink(file):
      83          if verbose:
      84              print("%r: listing directory" % (file,))
      85          names = os.listdir(file)
      86          for name in names:
      87              fullname = os.path.join(file, name)
      88              if (os.path.isdir(fullname) and
      89                  not os.path.islink(fullname) or
      90                  os.path.normcase(name[-3:]) == ".py"):
      91                  check(fullname)
      92          return
      93  
      94      try:
      95          f = tokenize.open(file)
      96      except OSError as msg:
      97          errprint("%r: I/O Error: %s" % (file, msg))
      98          return
      99  
     100      if verbose > 1:
     101          print("checking %r ..." % file)
     102  
     103      try:
     104          process_tokens(tokenize.generate_tokens(f.readline))
     105  
     106      except tokenize.TokenError as msg:
     107          errprint("%r: Token Error: %s" % (file, msg))
     108          return
     109  
     110      except SyntaxError as msg:
     111          errprint("%r: Token Error: %s" % (file, msg))
     112          return
     113  
     114      except IndentationError as msg:
     115          errprint("%r: Indentation Error: %s" % (file, msg))
     116          return
     117  
     118      except NannyNag as nag:
     119          badline = nag.get_lineno()
     120          line = nag.get_line()
     121          if verbose:
     122              print("%r: *** Line %d: trouble in tab city! ***" % (file, badline))
     123              print("offending line: %r" % (line,))
     124              print(nag.get_msg())
     125          else:
     126              if ' ' in file: file = '"' + file + '"'
     127              if filename_only: print(file)
     128              else: print(file, badline, repr(line))
     129          return
     130  
     131      finally:
     132          f.close()
     133  
     134      if verbose:
     135          print("%r: Clean bill of health." % (file,))
     136  
     137  class ESC[4;38;5;81mWhitespace:
     138      # the characters used for space and tab
     139      S, T = ' \t'
     140  
     141      # members:
     142      #   raw
     143      #       the original string
     144      #   n
     145      #       the number of leading whitespace characters in raw
     146      #   nt
     147      #       the number of tabs in raw[:n]
     148      #   norm
     149      #       the normal form as a pair (count, trailing), where:
     150      #       count
     151      #           a tuple such that raw[:n] contains count[i]
     152      #           instances of S * i + T
     153      #       trailing
     154      #           the number of trailing spaces in raw[:n]
     155      #       It's A Theorem that m.indent_level(t) ==
     156      #       n.indent_level(t) for all t >= 1 iff m.norm == n.norm.
     157      #   is_simple
     158      #       true iff raw[:n] is of the form (T*)(S*)
     159  
     160      def __init__(self, ws):
     161          self.raw  = ws
     162          S, T = Whitespace.S, Whitespace.T
     163          count = []
     164          b = n = nt = 0
     165          for ch in self.raw:
     166              if ch == S:
     167                  n = n + 1
     168                  b = b + 1
     169              elif ch == T:
     170                  n = n + 1
     171                  nt = nt + 1
     172                  if b >= len(count):
     173                      count = count + [0] * (b - len(count) + 1)
     174                  count[b] = count[b] + 1
     175                  b = 0
     176              else:
     177                  break
     178          self.n    = n
     179          self.nt   = nt
     180          self.norm = tuple(count), b
     181          self.is_simple = len(count) <= 1
     182  
     183      # return length of longest contiguous run of spaces (whether or not
     184      # preceding a tab)
     185      def longest_run_of_spaces(self):
     186          count, trailing = self.norm
     187          return max(len(count)-1, trailing)
     188  
     189      def indent_level(self, tabsize):
     190          # count, il = self.norm
     191          # for i in range(len(count)):
     192          #    if count[i]:
     193          #        il = il + (i//tabsize + 1)*tabsize * count[i]
     194          # return il
     195  
     196          # quicker:
     197          # il = trailing + sum (i//ts + 1)*ts*count[i] =
     198          # trailing + ts * sum (i//ts + 1)*count[i] =
     199          # trailing + ts * sum i//ts*count[i] + count[i] =
     200          # trailing + ts * [(sum i//ts*count[i]) + (sum count[i])] =
     201          # trailing + ts * [(sum i//ts*count[i]) + num_tabs]
     202          # and note that i//ts*count[i] is 0 when i < ts
     203  
     204          count, trailing = self.norm
     205          il = 0
     206          for i in range(tabsize, len(count)):
     207              il = il + i//tabsize * count[i]
     208          return trailing + tabsize * (il + self.nt)
     209  
     210      # return true iff self.indent_level(t) == other.indent_level(t)
     211      # for all t >= 1
     212      def equal(self, other):
     213          return self.norm == other.norm
     214  
     215      # return a list of tuples (ts, i1, i2) such that
     216      # i1 == self.indent_level(ts) != other.indent_level(ts) == i2.
     217      # Intended to be used after not self.equal(other) is known, in which
     218      # case it will return at least one witnessing tab size.
     219      def not_equal_witness(self, other):
     220          n = max(self.longest_run_of_spaces(),
     221                  other.longest_run_of_spaces()) + 1
     222          a = []
     223          for ts in range(1, n+1):
     224              if self.indent_level(ts) != other.indent_level(ts):
     225                  a.append( (ts,
     226                             self.indent_level(ts),
     227                             other.indent_level(ts)) )
     228          return a
     229  
     230      # Return True iff self.indent_level(t) < other.indent_level(t)
     231      # for all t >= 1.
     232      # The algorithm is due to Vincent Broman.
     233      # Easy to prove it's correct.
     234      # XXXpost that.
     235      # Trivial to prove n is sharp (consider T vs ST).
     236      # Unknown whether there's a faster general way.  I suspected so at
     237      # first, but no longer.
     238      # For the special (but common!) case where M and N are both of the
     239      # form (T*)(S*), M.less(N) iff M.len() < N.len() and
     240      # M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded.
     241      # XXXwrite that up.
     242      # Note that M is of the form (T*)(S*) iff len(M.norm[0]) <= 1.
     243      def less(self, other):
     244          if self.n >= other.n:
     245              return False
     246          if self.is_simple and other.is_simple:
     247              return self.nt <= other.nt
     248          n = max(self.longest_run_of_spaces(),
     249                  other.longest_run_of_spaces()) + 1
     250          # the self.n >= other.n test already did it for ts=1
     251          for ts in range(2, n+1):
     252              if self.indent_level(ts) >= other.indent_level(ts):
     253                  return False
     254          return True
     255  
     256      # return a list of tuples (ts, i1, i2) such that
     257      # i1 == self.indent_level(ts) >= other.indent_level(ts) == i2.
     258      # Intended to be used after not self.less(other) is known, in which
     259      # case it will return at least one witnessing tab size.
     260      def not_less_witness(self, other):
     261          n = max(self.longest_run_of_spaces(),
     262                  other.longest_run_of_spaces()) + 1
     263          a = []
     264          for ts in range(1, n+1):
     265              if self.indent_level(ts) >= other.indent_level(ts):
     266                  a.append( (ts,
     267                             self.indent_level(ts),
     268                             other.indent_level(ts)) )
     269          return a
     270  
     271  def format_witnesses(w):
     272      firsts = (str(tup[0]) for tup in w)
     273      prefix = "at tab size"
     274      if len(w) > 1:
     275          prefix = prefix + "s"
     276      return prefix + " " + ', '.join(firsts)
     277  
     278  def process_tokens(tokens):
     279      try:
     280          _process_tokens(tokens)
     281      except TabError as e:
     282          raise NannyNag(e.lineno, e.msg, e.text)
     283  
     284  def _process_tokens(tokens):
     285      INDENT = tokenize.INDENT
     286      DEDENT = tokenize.DEDENT
     287      NEWLINE = tokenize.NEWLINE
     288      JUNK = tokenize.COMMENT, tokenize.NL
     289      indents = [Whitespace("")]
     290      check_equal = 0
     291  
     292      for (type, token, start, end, line) in tokens:
     293          if type == NEWLINE:
     294              # a program statement, or ENDMARKER, will eventually follow,
     295              # after some (possibly empty) run of tokens of the form
     296              #     (NL | COMMENT)* (INDENT | DEDENT+)?
     297              # If an INDENT appears, setting check_equal is wrong, and will
     298              # be undone when we see the INDENT.
     299              check_equal = 1
     300  
     301          elif type == INDENT:
     302              check_equal = 0
     303              thisguy = Whitespace(token)
     304              if not indents[-1].less(thisguy):
     305                  witness = indents[-1].not_less_witness(thisguy)
     306                  msg = "indent not greater e.g. " + format_witnesses(witness)
     307                  raise NannyNag(start[0], msg, line)
     308              indents.append(thisguy)
     309  
     310          elif type == DEDENT:
     311              # there's nothing we need to check here!  what's important is
     312              # that when the run of DEDENTs ends, the indentation of the
     313              # program statement (or ENDMARKER) that triggered the run is
     314              # equal to what's left at the top of the indents stack
     315  
     316              # Ouch!  This assert triggers if the last line of the source
     317              # is indented *and* lacks a newline -- then DEDENTs pop out
     318              # of thin air.
     319              # assert check_equal  # else no earlier NEWLINE, or an earlier INDENT
     320              check_equal = 1
     321  
     322              del indents[-1]
     323  
     324          elif check_equal and type not in JUNK:
     325              # this is the first "real token" following a NEWLINE, so it
     326              # must be the first token of the next program statement, or an
     327              # ENDMARKER; the "line" argument exposes the leading whitespace
     328              # for this statement; in the case of ENDMARKER, line is an empty
     329              # string, so will properly match the empty string with which the
     330              # "indents" stack was seeded
     331              check_equal = 0
     332              thisguy = Whitespace(line)
     333              if not indents[-1].equal(thisguy):
     334                  witness = indents[-1].not_equal_witness(thisguy)
     335                  msg = "indent not equal e.g. " + format_witnesses(witness)
     336                  raise NannyNag(start[0], msg, line)
     337  
     338  
     339  if __name__ == '__main__':
     340      main()