# Mike Stringer (@mstringer). Do whatever you want with this.
import sys
import csv
import random
# get the filename of the census results
filename = sys.argv[1]
# rough guesses
n_tables = 20
n_seatings = 2
nights_per_year = 250
years_open = 7.5
n_total = int(n_tables * n_seatings * nights_per_year * years_open)
# US population -- probably not a particularly fantastic population to
# draw from since Alinea guests likely more international
population = []
with open(filename) as stream:
    reader = csv.reader(stream)
    print >> sys.stderr, reader.next()
    for row in reader:
        surname = row[0]
        count = int(row[2])
        population.extend([surname] * count)
# just do this a few times to get a feel for the numbers
n_trials = 10
for trial_index in range(n_trials):
    # there are at least a few issues here: (i) alinea guests are almost
    # certainly not a random subpopulation of the US, (ii) there will
    # probably be a handful of fanatical repeat diners that
    alinea_guests = random.sample(population, n_total)
    # count them up and print it out
    unique_surnames = set(alinea_guests)
    surnames_start_with_z = set(surname for surname in unique_surnames
                                if surname.startswith('Z'))
    print len(unique_surnames), len(surnames_start_with_z)