#!/usr/bin/python3 import re, os, sys, csv from bs4 import BeautifulSoup def process_cpi_data(in_filename): bs = BeautifulSoup(open(in_filename, 'r').read()) table = bs.findAll(id='cpidata')[0] trs = table.findAll('tr') #print len(trs) #print trs data = {} for tr in trs: tds = tr.findAll('td') year_list = tds[0].contents #print year_list year = None if len(year_list) == 1: year = year_list[0].string try: year = int(year) #print year if year not in data: year_data = [float(x.string) for x in tds[1:14]] data[year] = year_data except: pass #print data for y in range(1913, 2011): if y not in data: print('Missing year %d' % y) sys.exit(1) print('%d,%s' % (y, ','.join([str(x) for x in data[y]]))) def main(): process_cpi_data(sys.argv[1]) if (__name__ == '__main__'): main()