# This program calculates the probability that a name starts with # a particular letter of the alphabet. It uses data retrieved from # # http://www.census.gov/topics/population/genealogy/data/2000_surnames.html # # converted to a CSV file with the header row deleted. # # Step 1. Initialize a dictionary of letter -> counter pairs. # count_all_names = 0 total_names = {} for letter in 'abcdefghijklmnopqrstuvwxyz': total_names[letter] = 0 # # Step 2. Loop through the file, updating the counter for the # corresponding letter, as well as the counter for all # the names in the file. # source = open('app_c.csv', 'r') for entry in source: field = entry.split(',') # split the line name = field[0].lower() # pull out lowercase name letter = name[0] # grab its first character count = int( field[2] ) # pull out number of people total_names[letter] += count # update letter counter count_all_names += count # update global counter source.close() # # Step 3. Print out the letter -> counter pairs in the dictionary. # (They are out of order. Can you find out why?) # for (letter, count_for_letter) in total_names.items(): print(letter, '->', count_for_letter/count_all_names)