#!/usr/bin/python3 import sys, os, re, xml.sax.saxutils abbrevToName = {'HRC': 'Human Rights Campaign', 'NRA': 'National Rifle Association', 'ACLU' : 'American Civil Liberties Union'} startGroupRE = re.compile("^([a-zA-Z,' ]+):") issueRE = re.compile('^\s*-\s+([^(]*?)\s*\((no|yes)\)\s*\((\d+)-(\d+)\)\s*(.*?)\s*$') def processGroups(fileName, year): #print "%s: %d" % (fileName, year) inFile = open(fileName, 'r') curGroup = None lastURL = None for line in inFile.readlines(): startGroupMatch = startGroupRE.search(line) if (startGroupMatch): curGroup = startGroupMatch.group(1) if (curGroup in list(abbrevToName.keys())): curGroup = abbrevToName[curGroup] #print "got group %s" % curGroup else: issueMatch = issueRE.search(line) if (issueMatch): name = issueMatch.group(1) yesOrNo = issueMatch.group(2) yearIndex = int(issueMatch.group(3)) rollCall = int(issueMatch.group(4)) url = issueMatch.group(5) if (url == ''): url = lastURL elif (url.endswith('(for all)')): url = url[:-9] lastURL = url if (yearIndex == year): print("%s|%s|%s|%d|%s" % (xml.sax.saxutils.escape(curGroup), xml.sax.saxutils.escape(name), yesOrNo, rollCall, xml.sax.saxutils.escape(url))) #print "%s|%s|%s|%d|%s" % (curGroup, name, yesOrNo, rollCall, url) inFile.close() if (__name__ == '__main__'): processGroups(sys.argv[1], int(sys.argv[2]))