Exercise 3.2 - Solution

(a) Reading CSV Files

# fileparse.py
import csv

def parse_csv(filename):
    '''
    Parse a CSV file into a list of records
    '''
    f = open(filename)
    f_csv = csv.reader(f)

    # Read the file headers
    headers = next(f_csv)
    records = []
    for row in f_csv:
        if not row:     # Skip rows with no data
            continue
        record = dict(zip(headers, row))
        records.append(record)

    f.close()
    return records

(b) Building a Column Selector

# fileparse.py
import csv

def parse_csv(filename, select=None):
    '''
    Parse a CSV file into a list of records
    '''
    f = open(filename)
    f_csv = csv.reader(f)

    # Read the file headers
    headers = next(f_csv)

    if select:
        indices = [headers.index(colname) for colname in select]
        output_columns = select
    else:
        indices = []
        output_columns = headers

    records = []
    for row in f_csv:
        if not row:     # Skip rows with no data
            continue

        # Filter the row if specific columns selected
        if indices:
            row = [ row[index] for index in indices ]

        # Make a dictionary
        record = dict(zip(output_columns, row))
        records.append(record)

    f.close()
    return records

(c) Performing Type Conversion

# fileparse.py
import csv

def parse_csv(filename, select=None, types=None):
    '''
    Parse a CSV file into a list of records
    '''
    f = open(filename)
    f_csv = csv.reader(f)

    # Read the file headers
    headers = next(f_csv)

    if select:
        indices = [headers.index(colname) for colname in select]
        output_columns = select
    else:
        indices = []
        output_columns = headers

    records = []
    for row in f_csv:
        if not row:     # Skip rows with no data
            continue

        # Filter the row if specific columns selected
        if indices:
            row = [ row[index] for index in indices ]

        # Apply type conversion
        if types:
            row = [func(val) for func, val in zip(types, row) ]

        # Make a dictionary
        record = dict(zip(output_columns, row))
        records.append(record)

    f.close()
    return records

(d) Working without headers

# fileparse.py
import csv

def parse_csv(filename, select=None, types=None, has_headers=True):
    '''
    Parse a CSV file into a list of records
    '''
    f = open(filename)
    f_csv = csv.reader(f)

    # Read the file headers
    headers = next(f_csv) if has_headers else []

    if select:
        indices = [headers.index(colname) for colname in select]
        output_columns = select
    else:
        indices = []
        output_columns = headers

    records = []
    for row in f_csv:
        if not row:     # Skip rows with no data
            continue

        # Filter the row if specific columns selected
        if indices:
            row = [ row[index] for index in indices ]

        # Apply type conversion
        if types:
            row = [func(val) for func, val in zip(types, row) ]

        # Make a dictionary or tuple
        if output_columns:
            record = dict(zip(output_columns, row))
        else:
            record = tuple(row)
        records.append(record)

    f.close()
    return records
Discussion

The following statement is an example of an "conditional assignment."

headers = next(f_csv) if has_headers else []

The expression after the if is evaluated. If true, the expression on the left is the result. Otherwise, the result is the expression after the else. This is the Python equivalent of the ?: operator from other languages such as C.

A subtle implementation detail concerns the output_columns variable. If there are no headers, then there are also no output columns. Thus, the code turns the row into a tuple when this happens (as there are no column names from which to make a dictionary).

(e) Picking a different column delimiter

# fileparse.py
import csv

def parse_csv(filename, select=None, types=None, has_headers=True, delimiter=','):
    '''
    Parse a CSV file into a list of records with type conversion.
    '''
    f = open(filename)
    f_csv = csv.reader(f, delimiter=delimiter)

    # Read the file headers
    headers = next(f_csv) if has_headers else []

    # If specific columns have been selected, make indices for filtering and set output columns
    if select:
        indices = [ headers.index(colname) for colname in select ]
        output_columns = select
    else:
        indices = []
        output_columns = headers

    records = []
    for row in f_csv:
        if not row:     # Skip rows with no data
            continue

        # If specific column indices are selected, pick them out
        if indices:
            row = [ row[index] for index in indices]

        # Apply type conversion to the row
        if types:
            row = [func(val) for func, val in zip(types, row)]

        # Make a dictionary or a tuple
        if output_columns:
            record = dict(zip(output_columns, row))
        else:
            record = tuple(row)
        records.append(record)

    f.close()
    return records

[ Back ]