1  #! /usr/bin/env python3
       2  
       3  # Released to the public domain, by Tim Peters, 03 October 2000.
       4  
       5  """reindent [-d][-r][-v] [ path ... ]
       6  
       7  -d (--dryrun)   Dry run.   Analyze, but don't make any changes to, files.
       8  -r (--recurse)  Recurse.   Search for all .py files in subdirectories too.
       9  -n (--nobackup) No backup. Does not make a ".bak" file before reindenting.
      10  -v (--verbose)  Verbose.   Print informative msgs; else no output.
      11     (--newline)  Newline.   Specify the newline character to use (CRLF, LF).
      12                             Default is the same as the original file.
      13  -h (--help)     Help.      Print this usage information and exit.
      14  
      15  Change Python (.py) files to use 4-space indents and no hard tab characters.
      16  Also trim excess spaces and tabs from ends of lines, and remove empty lines
      17  at the end of files.  Also ensure the last line ends with a newline.
      18  
      19  If no paths are given on the command line, reindent operates as a filter,
      20  reading a single source file from standard input and writing the transformed
      21  source to standard output.  In this case, the -d, -r and -v flags are
      22  ignored.
      23  
      24  You can pass one or more file and/or directory paths.  When a directory
      25  path, all .py files within the directory will be examined, and, if the -r
      26  option is given, likewise recursively for subdirectories.
      27  
      28  If output is not to standard output, reindent overwrites files in place,
      29  renaming the originals with a .bak extension.  If it finds nothing to
      30  change, the file is left alone.  If reindent does change a file, the changed
      31  file is a fixed-point for future runs (i.e., running reindent on the
      32  resulting .py file won't change it again).
      33  
      34  The hard part of reindenting is figuring out what to do with comment
      35  lines.  So long as the input files get a clean bill of health from
      36  tabnanny.py, reindent should do a good job.
      37  
      38  The backup file is a copy of the one that is being reindented. The ".bak"
      39  file is generated with shutil.copy(), but some corner cases regarding
      40  user/group and permissions could leave the backup file more readable than
      41  you'd prefer. You can always use the --nobackup option to prevent this.
      42  """
      43  
      44  __version__ = "1"
      45  
      46  import tokenize
      47  import os
      48  import shutil
      49  import sys
      50  
      51  verbose = False
      52  recurse = False
      53  dryrun = False
      54  makebackup = True
      55  # A specified newline to be used in the output (set by --newline option)
      56  spec_newline = None
      57  
      58  
      59  def usage(msg=None):
      60      if msg is None:
      61          msg = __doc__
      62      print(msg, file=sys.stderr)
      63  
      64  
      65  def errprint(*args):
      66      sys.stderr.write(" ".join(str(arg) for arg in args))
      67      sys.stderr.write("\n")
      68  
      69  def main():
      70      import getopt
      71      global verbose, recurse, dryrun, makebackup, spec_newline
      72      try:
      73          opts, args = getopt.getopt(sys.argv[1:], "drnvh",
      74              ["dryrun", "recurse", "nobackup", "verbose", "newline=", "help"])
      75      except getopt.error as msg:
      76          usage(msg)
      77          return
      78      for o, a in opts:
      79          if o in ('-d', '--dryrun'):
      80              dryrun = True
      81          elif o in ('-r', '--recurse'):
      82              recurse = True
      83          elif o in ('-n', '--nobackup'):
      84              makebackup = False
      85          elif o in ('-v', '--verbose'):
      86              verbose = True
      87          elif o in ('--newline',):
      88              if not a.upper() in ('CRLF', 'LF'):
      89                  usage()
      90                  return
      91              spec_newline = dict(CRLF='\r\n', LF='\n')[a.upper()]
      92          elif o in ('-h', '--help'):
      93              usage()
      94              return
      95      if not args:
      96          r = Reindenter(sys.stdin)
      97          r.run()
      98          r.write(sys.stdout)
      99          return
     100      for arg in args:
     101          check(arg)
     102  
     103  
     104  def check(file):
     105      if os.path.isdir(file) and not os.path.islink(file):
     106          if verbose:
     107              print("listing directory", file)
     108          names = os.listdir(file)
     109          for name in names:
     110              fullname = os.path.join(file, name)
     111              if ((recurse and os.path.isdir(fullname) and
     112                   not os.path.islink(fullname) and
     113                   not os.path.split(fullname)[1].startswith("."))
     114                  or name.lower().endswith(".py")):
     115                  check(fullname)
     116          return
     117  
     118      if verbose:
     119          print("checking", file, "...", end=' ')
     120      with open(file, 'rb') as f:
     121          try:
     122              encoding, _ = tokenize.detect_encoding(f.readline)
     123          except SyntaxError as se:
     124              errprint("%s: SyntaxError: %s" % (file, str(se)))
     125              return
     126      try:
     127          with open(file, encoding=encoding) as f:
     128              r = Reindenter(f)
     129      except IOError as msg:
     130          errprint("%s: I/O Error: %s" % (file, str(msg)))
     131          return
     132  
     133      newline = spec_newline if spec_newline else r.newlines
     134      if isinstance(newline, tuple):
     135          errprint("%s: mixed newlines detected; cannot continue without --newline" % file)
     136          return
     137  
     138      if r.run():
     139          if verbose:
     140              print("changed.")
     141              if dryrun:
     142                  print("But this is a dry run, so leaving it alone.")
     143          if not dryrun:
     144              bak = file + ".bak"
     145              if makebackup:
     146                  shutil.copyfile(file, bak)
     147                  if verbose:
     148                      print("backed up", file, "to", bak)
     149              with open(file, "w", encoding=encoding, newline=newline) as f:
     150                  r.write(f)
     151              if verbose:
     152                  print("wrote new", file)
     153          return True
     154      else:
     155          if verbose:
     156              print("unchanged.")
     157          return False
     158  
     159  
     160  def _rstrip(line, JUNK='\n \t'):
     161      """Return line stripped of trailing spaces, tabs, newlines.
     162  
     163      Note that line.rstrip() instead also strips sundry control characters,
     164      but at least one known Emacs user expects to keep junk like that, not
     165      mentioning Barry by name or anything <wink>.
     166      """
     167  
     168      i = len(line)
     169      while i > 0 and line[i - 1] in JUNK:
     170          i -= 1
     171      return line[:i]
     172  
     173  
     174  class ESC[4;38;5;81mReindenter:
     175  
     176      def __init__(self, f):
     177          self.find_stmt = 1  # next token begins a fresh stmt?
     178          self.level = 0      # current indent level
     179  
     180          # Raw file lines.
     181          self.raw = f.readlines()
     182  
     183          # File lines, rstripped & tab-expanded.  Dummy at start is so
     184          # that we can use tokenize's 1-based line numbering easily.
     185          # Note that a line is all-blank iff it's "\n".
     186          self.lines = [_rstrip(line).expandtabs() + "\n"
     187                        for line in self.raw]
     188          self.lines.insert(0, None)
     189          self.index = 1  # index into self.lines of next line
     190  
     191          # List of (lineno, indentlevel) pairs, one for each stmt and
     192          # comment line.  indentlevel is -1 for comment lines, as a
     193          # signal that tokenize doesn't know what to do about them;
     194          # indeed, they're our headache!
     195          self.stats = []
     196  
     197          # Save the newlines found in the file so they can be used to
     198          #  create output without mutating the newlines.
     199          self.newlines = f.newlines
     200  
     201      def run(self):
     202          tokens = tokenize.generate_tokens(self.getline)
     203          for _token in tokens:
     204              self.tokeneater(*_token)
     205          # Remove trailing empty lines.
     206          lines = self.lines
     207          while lines and lines[-1] == "\n":
     208              lines.pop()
     209          # Sentinel.
     210          stats = self.stats
     211          stats.append((len(lines), 0))
     212          # Map count of leading spaces to # we want.
     213          have2want = {}
     214          # Program after transformation.
     215          after = self.after = []
     216          # Copy over initial empty lines -- there's nothing to do until
     217          # we see a line with *something* on it.
     218          i = stats[0][0]
     219          after.extend(lines[1:i])
     220          for i in range(len(stats) - 1):
     221              thisstmt, thislevel = stats[i]
     222              nextstmt = stats[i + 1][0]
     223              have = getlspace(lines[thisstmt])
     224              want = thislevel * 4
     225              if want < 0:
     226                  # A comment line.
     227                  if have:
     228                      # An indented comment line.  If we saw the same
     229                      # indentation before, reuse what it most recently
     230                      # mapped to.
     231                      want = have2want.get(have, -1)
     232                      if want < 0:
     233                          # Then it probably belongs to the next real stmt.
     234                          for j in range(i + 1, len(stats) - 1):
     235                              jline, jlevel = stats[j]
     236                              if jlevel >= 0:
     237                                  if have == getlspace(lines[jline]):
     238                                      want = jlevel * 4
     239                                  break
     240                      if want < 0:           # Maybe it's a hanging
     241                                             # comment like this one,
     242                          # in which case we should shift it like its base
     243                          # line got shifted.
     244                          for j in range(i - 1, -1, -1):
     245                              jline, jlevel = stats[j]
     246                              if jlevel >= 0:
     247                                  want = have + (getlspace(after[jline - 1]) -
     248                                                 getlspace(lines[jline]))
     249                                  break
     250                      if want < 0:
     251                          # Still no luck -- leave it alone.
     252                          want = have
     253                  else:
     254                      want = 0
     255              assert want >= 0
     256              have2want[have] = want
     257              diff = want - have
     258              if diff == 0 or have == 0:
     259                  after.extend(lines[thisstmt:nextstmt])
     260              else:
     261                  for line in lines[thisstmt:nextstmt]:
     262                      if diff > 0:
     263                          if line == "\n":
     264                              after.append(line)
     265                          else:
     266                              after.append(" " * diff + line)
     267                      else:
     268                          remove = min(getlspace(line), -diff)
     269                          after.append(line[remove:])
     270          return self.raw != self.after
     271  
     272      def write(self, f):
     273          f.writelines(self.after)
     274  
     275      # Line-getter for tokenize.
     276      def getline(self):
     277          if self.index >= len(self.lines):
     278              line = ""
     279          else:
     280              line = self.lines[self.index]
     281              self.index += 1
     282          return line
     283  
     284      # Line-eater for tokenize.
     285      def tokeneater(self, type, token, slinecol, end, line,
     286                     INDENT=tokenize.INDENT,
     287                     DEDENT=tokenize.DEDENT,
     288                     NEWLINE=tokenize.NEWLINE,
     289                     COMMENT=tokenize.COMMENT,
     290                     NL=tokenize.NL):
     291  
     292          if type == NEWLINE:
     293              # A program statement, or ENDMARKER, will eventually follow,
     294              # after some (possibly empty) run of tokens of the form
     295              #     (NL | COMMENT)* (INDENT | DEDENT+)?
     296              self.find_stmt = 1
     297  
     298          elif type == INDENT:
     299              self.find_stmt = 1
     300              self.level += 1
     301  
     302          elif type == DEDENT:
     303              self.find_stmt = 1
     304              self.level -= 1
     305  
     306          elif type == COMMENT:
     307              if self.find_stmt:
     308                  self.stats.append((slinecol[0], -1))
     309                  # but we're still looking for a new stmt, so leave
     310                  # find_stmt alone
     311  
     312          elif type == NL:
     313              pass
     314  
     315          elif self.find_stmt:
     316              # This is the first "real token" following a NEWLINE, so it
     317              # must be the first token of the next program statement, or an
     318              # ENDMARKER.
     319              self.find_stmt = 0
     320              if line:   # not endmarker
     321                  self.stats.append((slinecol[0], self.level))
     322  
     323  
     324  # Count number of leading blanks.
     325  def getlspace(line):
     326      i, n = 0, len(line)
     327      while i < n and line[i] == " ":
     328          i += 1
     329      return i
     330  
     331  
     332  if __name__ == '__main__':
     333      main()