# This program calculates the probability that a name starts with
# a particular letter of the alphabet.  It uses data retrieved from
#
# http://www.census.gov/topics/population/genealogy/data/2000_surnames.html
#
# converted to a CSV file with the header row deleted.

#
# Step 1.  Initialize a dictionary of letter -> counter pairs.
#
count_all_names = 0
total_names = {}
for letter in 'abcdefghijklmnopqrstuvwxyz':
    total_names[letter] = 0

#
# Step 2. Loop through the file, updating the counter for the
#         corresponding letter, as well as the counter for all
#         the names in the file.
#
source = open('app_c.csv', 'r')
for entry in source:
    field  = entry.split(',')     # split the line
    name   = field[0].lower()     # pull out lowercase name
    letter = name[0]              # grab its first character
    count  = int( field[2] )      # pull out number of people
    total_names[letter] += count  # update letter counter
    count_all_names     += count  # update global counter
source.close()

#
# Step 3. Print out the letter -> counter pairs in the dictionary.
#         (They are out of order.  Can you find out why?)
#
for (letter, count_for_letter) in total_names.items():
    print(letter, '->', count_for_letter/count_all_names)