Exercise 3.2 - Solution
(a) Reading CSV Files
# fileparse.py
import csv
def parse_csv(filename):
'''
Parse a CSV file into a list of records
'''
f = open(filename)
f_csv = csv.reader(f)
# Read the file headers
headers = next(f_csv)
records = []
for row in f_csv:
if not row: # Skip rows with no data
continue
record = dict(zip(headers, row))
records.append(record)
f.close()
return records
(b) Building a Column Selector
# fileparse.py
import csv
def parse_csv(filename, select=None):
'''
Parse a CSV file into a list of records
'''
f = open(filename)
f_csv = csv.reader(f)
# Read the file headers
headers = next(f_csv)
if select:
indices = [headers.index(colname) for colname in select]
output_columns = select
else:
indices = []
output_columns = headers
records = []
for row in f_csv:
if not row: # Skip rows with no data
continue
# Filter the row if specific columns selected
if indices:
row = [ row[index] for index in indices ]
# Make a dictionary
record = dict(zip(output_columns, row))
records.append(record)
f.close()
return records
(c) Performing Type Conversion
# fileparse.py
import csv
def parse_csv(filename, select=None, types=None):
'''
Parse a CSV file into a list of records
'''
f = open(filename)
f_csv = csv.reader(f)
# Read the file headers
headers = next(f_csv)
if select:
indices = [headers.index(colname) for colname in select]
output_columns = select
else:
indices = []
output_columns = headers
records = []
for row in f_csv:
if not row: # Skip rows with no data
continue
# Filter the row if specific columns selected
if indices:
row = [ row[index] for index in indices ]
# Apply type conversion
if types:
row = [func(val) for func, val in zip(types, row) ]
# Make a dictionary
record = dict(zip(output_columns, row))
records.append(record)
f.close()
return records
(d) Working without headers
# fileparse.py
import csv
def parse_csv(filename, select=None, types=None, has_headers=True):
'''
Parse a CSV file into a list of records
'''
f = open(filename)
f_csv = csv.reader(f)
# Read the file headers
headers = next(f_csv) if has_headers else []
if select:
indices = [headers.index(colname) for colname in select]
output_columns = select
else:
indices = []
output_columns = headers
records = []
for row in f_csv:
if not row: # Skip rows with no data
continue
# Filter the row if specific columns selected
if indices:
row = [ row[index] for index in indices ]
# Apply type conversion
if types:
row = [func(val) for func, val in zip(types, row) ]
# Make a dictionary or tuple
if output_columns:
record = dict(zip(output_columns, row))
else:
record = tuple(row)
records.append(record)
f.close()
return records
(e) Picking a different column delimiter
# fileparse.py
import csv
def parse_csv(filename, select=None, types=None, has_headers=True, delimiter=','):
'''
Parse a CSV file into a list of records with type conversion.
'''
f = open(filename)
f_csv = csv.reader(f, delimiter=delimiter)
# Read the file headers
headers = next(f_csv) if has_headers else []
# If specific columns have been selected, make indices for filtering and set output columns
if select:
indices = [ headers.index(colname) for colname in select ]
output_columns = select
else:
indices = []
output_columns = headers
records = []
for row in f_csv:
if not row: # Skip rows with no data
continue
# If specific column indices are selected, pick them out
if indices:
row = [ row[index] for index in indices]
# Apply type conversion to the row
if types:
row = [func(val) for func, val in zip(types, row)]
# Make a dictionary or a tuple
if output_columns:
record = dict(zip(output_columns, row))
else:
record = tuple(row)
records.append(record)
f.close()
return records
[ Back ]