# Mike Stringer (@mstringer). Do whatever you want with this. import sys import csv import random # get the filename of the census results filename = sys.argv[1] # rough guesses n_tables = 20 n_seatings = 2 nights_per_year = 250 years_open = 7.5 n_total = int(n_tables * n_seatings * nights_per_year * years_open) # US population -- probably not a particularly fantastic population to # draw from since Alinea guests likely more international population = [] with open(filename) as stream: reader = csv.reader(stream) print >> sys.stderr, reader.next() for row in reader: surname = row[0] count = int(row[2]) population.extend([surname] * count) # just do this a few times to get a feel for the numbers n_trials = 10 for trial_index in range(n_trials): # there are at least a few issues here: (i) alinea guests are almost # certainly not a random subpopulation of the US, (ii) there will # probably be a handful of fanatical repeat diners that alinea_guests = random.sample(population, n_total) # count them up and print it out unique_surnames = set(alinea_guests) surnames_start_with_z = set(surname for surname in unique_surnames if surname.startswith('Z')) print len(unique_surnames), len(surnames_start_with_z)