"""
Colorize - Python source formatter that outputs python code in XHTML.
This script is based on MoinMoin - The Python Source Parser.
Usage:
colorize.py [source file name] [optional author name]
"""
import cgi
import string
import sys
import cStringIO
import keyword
import token
import tokenize
import re
import os
sourcefile = sys.argv[1]
filename = os.path.split(sourcefile)[1]
if len(sys.argv)> 2:
authorname = sys.argv[2]
else:
authorname = "Unknown"
_KEYWORD = token.NT_OFFSET + 1
_TEXT = token.NT_OFFSET + 2
_classes = {
token.NUMBER: 'token_number',
token.OP: 'token_op',
token.STRING: 'token_string',
tokenize.COMMENT: 'token_comment',
token.NAME: 'token_name',
token.ERRORTOKEN: 'token_error',
_KEYWORD: 'keyword',
_TEXT: 'text',
}
_DEFAULTENCODING = "utf-8"
docstart = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
<head>"""
docstart += """\n\t<style type="text/css">\n"""
cssfile = open("colorize.css", "r")
docstart += cssfile.read()
docstart += """\n\t</style>\n"""
docend = "\n\t</body>\n</html>"
docencoding = _DEFAULTENCODING
def getEncodingOfFile(sourcefile):
"""Get encoding of source file. If no encoding found, returns _DEFAULTENCODING."""
myfile = file(sourcefile)
line = myfile.readline()
encoding = parseEncoding(line)
if encoding != "":
return encoding
line = myfile.readline()
encoding = parseEncoding(line)
if encoding != "":
return encoding
if myfile.encoding != None:
return myfile.encoding
return _DEFAULTENCODING
def parseEncoding(textline):
"""Parse encoding from textline."""
regex = re.compile("coding[=:]\s*([-\w.]+)")
match = regex.search(textline, 1)
if match != None:
return match.group(1)
else:
return ""
class Parser:
""" Send colored python source.
"""
def __init__(self, raw, out = sys.stdout):
""" Store the source text.
"""
self.raw = string.strip(string.expandtabs(raw))
self.out = out
def format(self, formatter, form):
""" Parse and send the colored source. Med öäå.
"""
self.lines = [0, 0]
pos = 0
while 1:
pos = string.find(self.raw, '\n', pos) + 1
if not pos: break
self.lines.append(pos)
self.lines.append(len(self.raw))
self.out.write(docstart)
self.out.write("\n<title>" + filename + "</title>\n")
self.out.write('<link rel="schema.DC" href="http://purl.org/DC/elements/1.0" />\n')
self.out.write('<meta name="DC.Language" content="en" />\n')
self.out.write('<meta name="DC.Format" content="text/html" />\n')
self.out.write('<meta name="DC.Type" content="Software" />\n')
self.out.write('<meta name="DC.Title" content="Python source of %s" />\n' % filename)
self.out.write('<meta name="DC.Creator" content="%s" />\n' % authorname)
self.out.write('<meta http-equiv="Content-Type" content="text/html; charset=%s" />\n' % docencoding)
self.out.write("\n\t</head>\n<body>")
self.pos = 0
text = cStringIO.StringIO(self.raw)
self.out.write('<pre><code>')
try:
tokenize.tokenize(text.readline, self)
except tokenize.TokenError, ex:
msg = ex[0]
line = ex[1][0]
self.out.write("<h3>ERROR: %s</h3>%s\n" % (
msg, self.raw[self.lines[line]:]))
self.out.write('</code></pre>\n')
self.out.write(docend)
def __call__(self, toktype, toktext, (srow,scol), (erow,ecol), line):
""" Token handler.
"""
if 0:
print "type", toktype, token.tok_name[toktype], "text", toktext,
print "start", srow, scol, "end", erow, ecol, "<br />"
oldpos = self.pos
newpos = self.lines[srow] + scol
self.pos = newpos + len(toktext)
if toktype in [token.NEWLINE, tokenize.NL]:
self.out.write('\n')
return
if newpos > oldpos:
self.out.write(self.raw[oldpos:newpos])
if toktype in [token.INDENT, token.DEDENT]:
self.pos = newpos
return
if token.LPAR <= toktype and toktype <= token.OP:
toktype = token.OP
elif toktype == token.NAME and keyword.iskeyword(toktext):
toktype = _KEYWORD
classval = _classes.get(toktype, _classes[_TEXT])
style = ''
if toktype == token.ERRORTOKEN:
style = ' style="border: solid 1.5pt #FF0000;"'
self.out.write('<span class="%s"%s>' % (classval, style))
self.out.write(cgi.escape(toktext))
self.out.write('</span>')
if __name__ == "__main__":
import os, sys
print "Formatting " + sourcefile
docencoding = getEncodingOfFile(sourcefile)
source = open(sourcefile).read()
Parser(source, open(sourcefile + '.html', 'wt')).format(None, None)
print "Done! Wrote result file " + sourcefile + ".html"