import re
import csv
import cgi
import urllib
from html5lib.constants import spaceCharacters

spaceCharacters = u"".join(spaceCharacters)
spacesRegex = re.compile(u"[%s]+" % spaceCharacters)

file = open("referrers.txt", "rb")
writer = csv.writer(open("referrers.csv", "wb"))

for line in file.readlines():
	string, hits = line.split("\t")
	string = urllib.unquote_plus(string)
	try:
		string = string.decode("utf-8")
	except UnicodeError:
		string = string.decode("windows-1252")
	string = spacesRegex.sub(u" ", string)
	string = string.strip(spaceCharacters)
	string = string.encode("utf-8")
	hits = int(hits)
	
	writer.writerow([string, hits])
	#print "\t<tr><td>%s<td>%s" % (cgi.escape(string).encode("utf-8"), hits)
