"""
Read list of links, call validator and store result in CSV file.
This script takes two arguments:
1. the name of a text file containing links (one link per line) to check.
2. the name of the ouput file where validation results are stored.
Usage example:
python massvalidate.py mylinks.txt resultfile.csv
This script was written in a haste. Please report errors to
pete@standards-schmandards.com
"""
__author__ = 'Peter Krantz'
__version__ = '0.1'
__date__ = '2005/04/02'
import urllib, time
import xml.dom.minidom
from xml.dom.minidom import Node, parse, parseString
import sys
import string
import csv
def GetNodeFirstChildValue(NodeCollection):
if NodeCollection.length > 0:
if NodeCollection[0].firstChild != None:
return NodeCollection[0].firstChild.data
else:
return ""
else:
return ""
linkfilename = sys.argv[1]
resultfilename = sys.argv[2]
linkFile = file(linkfilename,"r")
class MyDialect(csv.excel):
delimiter = ';'
doublequote = True
quotechar = '"'
quoting = csv.QUOTE_ALL
resultFile = csv.writer(file(resultfilename, "w"), dialect=MyDialect)
validatorUrl = "http://127.0.0.1/validator/htdocs/check?uri="
siteid = 0
for line in linkFile.readlines():
print "Trying " + line
urlstring = validatorUrl + string.strip(line) + "&output=xml"
f = urllib.urlopen(urlstring)
valresultXml = f.read()
try:
DomDoc = parseString(valresultXml)
validatorError = GetNodeFirstChildValue(DomDoc.documentElement.getElementsByTagName("error"))
print "validatorError: " + str(validatorError)
if len(str(validatorError)) > 0:
resultFile.writerow(( str(siteid), string.strip(line), "validatorerror", "", "", "" ))
siteid = siteid + 1
continue
errorCount = DomDoc.documentElement.getElementsByTagName("msg").length
print "errorCount: " + str(errorCount)
server = GetNodeFirstChildValue(DomDoc.documentElement.getElementsByTagName("server"))
print "server: " + server
hasDc = GetNodeFirstChildValue(DomDoc.documentElement.getElementsByTagName("hasdc"))
print "hasDc: " + hasDc
docType = GetNodeFirstChildValue(DomDoc.documentElement.getElementsByTagName("doctype"))
print "docType: " + docType
resultFile.writerow(( str(siteid), string.strip(line), errorCount, server, hasDc, docType ))
DomDoc.unlink()
except Exception:
resultFile.writerow(( str(siteid), string.strip(line), "error", "", "", "" ))
print "Could not check."
f.close()
siteid = siteid + 1
linkFile.close()
print "Done!"