From ff540217e457b9afd208793bfaeaa27fb2dbdbb4 Mon Sep 17 00:00:00 2001 From: lbvienna Date: Wed, 12 Jun 2019 17:33:27 -0400 Subject: [PATCH] support parsing .fec files too --- process_filing.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/process_filing.py b/process_filing.py index ed65647..9f19fe5 100644 --- a/process_filing.py +++ b/process_filing.py @@ -16,13 +16,22 @@ PROJECT_ROOT = os.path.dirname(os.path.realpath(__file__)) CSV_FILE_DIRECTORY = '{}/fec-csv-sources'.format(PROJECT_ROOT) +def get_delimiter(line): + if len(line.split('\034')) > 1: + return '\034' + elif len(line.split(',')) > 1: + return ',' + raise Exception("Cannot parse first line") + def process_electronic_filing(path, filing_id=None, dump_full=True): #if dump_full is true, you'll get the whole filing, and "itemizations" #will include all itemizations grouped by category #otherwise "itemizations" will be in iterator filing_dict = {} with open(path, 'r', errors='replace') as f: - reader = csv.reader(f) + delimiter = get_delimiter(next(f)) + f.seek(0) + reader = csv.reader(f, delimiter=delimiter) fec_header = next(reader) fec_version_number = fec_header[2].strip() @@ -67,7 +76,9 @@ def process_electronic_filing(path, filing_id=None, dump_full=True): def itemization_iterator(path, filing_id, fec_version_number): with open(path, 'r', errors='replace') as f: - reader = csv.reader(f) + delimiter = get_delimiter(next(f)) + f.seek(0) + reader = csv.reader(f, delimiter=delimiter) fec_header = next(reader) summary_row = next(reader) for line in reader: @@ -209,12 +220,10 @@ def write_file(outpath, content): def main(): #do some argparse stuff parser = argparse.ArgumentParser() - parser.add_argument('--path', help='path to the fec file we want to load') - parser.add_argument('--fecfile', action='store_true', default=False, help='indicates we\'re using a .fec file instead of the fec\'s .csv file. .csv is default and recommended for messy whitespace reasons') + parser.add_argument('path', help='path to the fec file we want to load') parser.add_argument('--filing_id', help='if not available, assume that filing id is the filename minus the extension.') args = parser.parse_args() - assert not args.fecfile, "parsing for .fec file not yet implemented, use .csv file" content = process_electronic_filing(args.path, args.filing_id) sys.stdout.write(json.dumps(content))