1  #!/usr/bin/env python3
       2  '''Add syntax highlighting to Python source code'''
       3  
       4  __author__ = 'Raymond Hettinger'
       5  
       6  import builtins
       7  import functools
       8  import html as html_module
       9  import keyword
      10  import re
      11  import tokenize
      12  
      13  #### Analyze Python Source #################################
      14  
      15  def is_builtin(s):
      16      'Return True if s is the name of a builtin'
      17      return hasattr(builtins, s)
      18  
      19  def combine_range(lines, start, end):
      20      'Join content from a range of lines between start and end'
      21      (srow, scol), (erow, ecol) = start, end
      22      if srow == erow:
      23          return lines[srow-1][scol:ecol], end
      24      rows = [lines[srow-1][scol:]] + lines[srow: erow-1] + [lines[erow-1][:ecol]]
      25      return ''.join(rows), end
      26  
      27  def analyze_python(source):
      28      '''Generate and classify chunks of Python for syntax highlighting.
      29         Yields tuples in the form: (category, categorized_text).
      30      '''
      31      lines = source.splitlines(True)
      32      lines.append('')
      33      readline = functools.partial(next, iter(lines), '')
      34      kind = tok_str = ''
      35      tok_type = tokenize.COMMENT
      36      written = (1, 0)
      37      for tok in tokenize.generate_tokens(readline):
      38          prev_tok_type, prev_tok_str = tok_type, tok_str
      39          tok_type, tok_str, (srow, scol), (erow, ecol), logical_lineno = tok
      40          kind = ''
      41          if tok_type == tokenize.COMMENT:
      42              kind = 'comment'
      43          elif tok_type == tokenize.OP and tok_str[:1] not in '{}[](),.:;@':
      44              kind = 'operator'
      45          elif tok_type == tokenize.STRING:
      46              kind = 'string'
      47              if prev_tok_type == tokenize.INDENT or scol==0:
      48                  kind = 'docstring'
      49          elif tok_type == tokenize.NAME:
      50              if tok_str in ('def', 'class', 'import', 'from'):
      51                  kind = 'definition'
      52              elif prev_tok_str in ('def', 'class'):
      53                  kind = 'defname'
      54              elif keyword.iskeyword(tok_str):
      55                  kind = 'keyword'
      56              elif is_builtin(tok_str) and prev_tok_str != '.':
      57                  kind = 'builtin'
      58          if kind:
      59              text, written = combine_range(lines, written, (srow, scol))
      60              yield '', text
      61              text, written = tok_str, (erow, ecol)
      62              yield kind, text
      63      line_upto_token, written = combine_range(lines, written, (erow, ecol))
      64      yield '', line_upto_token
      65  
      66  #### Raw Output  ###########################################
      67  
      68  def raw_highlight(classified_text):
      69      'Straight text display of text classifications'
      70      result = []
      71      for kind, text in classified_text:
      72          result.append('%15s:  %r\n' % (kind or 'plain', text))
      73      return ''.join(result)
      74  
      75  #### ANSI Output ###########################################
      76  
      77  default_ansi = {
      78      'comment': ('\033[0;31m', '\033[0m'),
      79      'string': ('\033[0;32m', '\033[0m'),
      80      'docstring': ('\033[0;32m', '\033[0m'),
      81      'keyword': ('\033[0;33m', '\033[0m'),
      82      'builtin': ('\033[0;35m', '\033[0m'),
      83      'definition': ('\033[0;33m', '\033[0m'),
      84      'defname': ('\033[0;34m', '\033[0m'),
      85      'operator': ('\033[0;33m', '\033[0m'),
      86  }
      87  
      88  def ansi_highlight(classified_text, colors=default_ansi):
      89      'Add syntax highlighting to source code using ANSI escape sequences'
      90      # http://en.wikipedia.org/wiki/ANSI_escape_code
      91      result = []
      92      for kind, text in classified_text:
      93          opener, closer = colors.get(kind, ('', ''))
      94          result += [opener, text, closer]
      95      return ''.join(result)
      96  
      97  #### HTML Output ###########################################
      98  
      99  def html_highlight(classified_text,opener='<pre class="python">\n', closer='</pre>\n'):
     100      'Convert classified text to an HTML fragment'
     101      result = [opener]
     102      for kind, text in classified_text:
     103          if kind:
     104              result.append('<span class="%s">' % kind)
     105          result.append(html_module.escape(text))
     106          if kind:
     107              result.append('</span>')
     108      result.append(closer)
     109      return ''.join(result)
     110  
     111  default_css = {
     112      '.comment': '{color: crimson;}',
     113      '.string':  '{color: forestgreen;}',
     114      '.docstring': '{color: forestgreen; font-style:italic;}',
     115      '.keyword': '{color: darkorange;}',
     116      '.builtin': '{color: purple;}',
     117      '.definition': '{color: darkorange; font-weight:bold;}',
     118      '.defname': '{color: blue;}',
     119      '.operator': '{color: brown;}',
     120  }
     121  
     122  default_html = '''\
     123  <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
     124            "http://www.w3.org/TR/html4/strict.dtd">
     125  <html>
     126  <head>
     127  <meta http-equiv="Content-type" content="text/html;charset=UTF-8">
     128  <title> {title} </title>
     129  <style type="text/css">
     130  {css}
     131  </style>
     132  </head>
     133  <body>
     134  {body}
     135  </body>
     136  </html>
     137  '''
     138  
     139  def build_html_page(classified_text, title='python',
     140                      css=default_css, html=default_html):
     141      'Create a complete HTML page with colorized source code'
     142      css_str = '\n'.join(['%s %s' % item for item in css.items()])
     143      result = html_highlight(classified_text)
     144      title = html_module.escape(title)
     145      return html.format(title=title, css=css_str, body=result)
     146  
     147  #### LaTeX Output ##########################################
     148  
     149  default_latex_commands = {
     150      'comment': r'{\color{red}#1}',
     151      'string': r'{\color{ForestGreen}#1}',
     152      'docstring': r'{\emph{\color{ForestGreen}#1}}',
     153      'keyword': r'{\color{orange}#1}',
     154      'builtin': r'{\color{purple}#1}',
     155      'definition': r'{\color{orange}#1}',
     156      'defname': r'{\color{blue}#1}',
     157      'operator': r'{\color{brown}#1}',
     158  }
     159  
     160  default_latex_document = r'''
     161  \documentclass{article}
     162  \usepackage{alltt}
     163  \usepackage{upquote}
     164  \usepackage{color}
     165  \usepackage[usenames,dvipsnames]{xcolor}
     166  \usepackage[cm]{fullpage}
     167  %(macros)s
     168  \begin{document}
     169  \center{\LARGE{%(title)s}}
     170  \begin{alltt}
     171  %(body)s
     172  \end{alltt}
     173  \end{document}
     174  '''
     175  
     176  def alltt_escape(s):
     177      'Replace backslash and braces with their escaped equivalents'
     178      xlat = {'{': r'\{', '}': r'\}', '\\': r'\textbackslash{}'}
     179      return re.sub(r'[\\{}]', lambda mo: xlat[mo.group()], s)
     180  
     181  def latex_highlight(classified_text, title = 'python',
     182                      commands = default_latex_commands,
     183                      document = default_latex_document):
     184      'Create a complete LaTeX document with colorized source code'
     185      macros = '\n'.join(r'\newcommand{\py%s}[1]{%s}' % c for c in commands.items())
     186      result = []
     187      for kind, text in classified_text:
     188          if kind:
     189              result.append(r'\py%s{' % kind)
     190          result.append(alltt_escape(text))
     191          if kind:
     192              result.append('}')
     193      return default_latex_document % dict(title=title, macros=macros, body=''.join(result))
     194  
     195  
     196  if __name__ == '__main__':
     197      import argparse
     198      import os.path
     199      import sys
     200      import textwrap
     201      import webbrowser
     202  
     203      parser = argparse.ArgumentParser(
     204              description = 'Add syntax highlighting to Python source code',
     205              formatter_class=argparse.RawDescriptionHelpFormatter,
     206              epilog = textwrap.dedent('''
     207                  examples:
     208  
     209                    # Show syntax highlighted code in the terminal window
     210                    $ ./highlight.py myfile.py
     211  
     212                    # Colorize myfile.py and display in a browser
     213                    $ ./highlight.py -b myfile.py
     214  
     215                    # Create an HTML section to embed in an existing webpage
     216                    ./highlight.py -s myfile.py
     217  
     218                    # Create a complete HTML file
     219                    $ ./highlight.py -c myfile.py > myfile.html
     220  
     221                    # Create a PDF using LaTeX
     222                    $ ./highlight.py -l myfile.py | pdflatex
     223  
     224              '''))
     225      parser.add_argument('sourcefile', metavar = 'SOURCEFILE',
     226              help = 'file containing Python sourcecode')
     227      parser.add_argument('-b', '--browser', action = 'store_true',
     228              help = 'launch a browser to show results')
     229      parser.add_argument('-c', '--complete', action = 'store_true',
     230              help = 'build a complete html webpage')
     231      parser.add_argument('-l', '--latex', action = 'store_true',
     232              help = 'build a LaTeX document')
     233      parser.add_argument('-r', '--raw', action = 'store_true',
     234              help = 'raw parse of categorized text')
     235      parser.add_argument('-s', '--section', action = 'store_true',
     236              help = 'show an HTML section rather than a complete webpage')
     237      args = parser.parse_args()
     238  
     239      if args.section and (args.browser or args.complete):
     240          parser.error('The -s/--section option is incompatible with '
     241                       'the -b/--browser or -c/--complete options')
     242  
     243      sourcefile = args.sourcefile
     244      with open(sourcefile) as f:
     245          source = f.read()
     246      classified_text = analyze_python(source)
     247  
     248      if args.raw:
     249          encoded = raw_highlight(classified_text)
     250      elif args.complete or args.browser:
     251          encoded = build_html_page(classified_text, title=sourcefile)
     252      elif args.section:
     253          encoded = html_highlight(classified_text)
     254      elif args.latex:
     255          encoded = latex_highlight(classified_text, title=sourcefile)
     256      else:
     257          encoded = ansi_highlight(classified_text)
     258  
     259      if args.browser:
     260          htmlfile = os.path.splitext(os.path.basename(sourcefile))[0] + '.html'
     261          with open(htmlfile, 'w') as f:
     262              f.write(encoded)
     263          webbrowser.open('file://' + os.path.abspath(htmlfile))
     264      else:
     265          sys.stdout.write(encoded)