Academic Integrity: tutoring, explanations, and feedback — we don’t complete graded work or submit on a student’s behalf.

usepython3 #!/usr/bin/python3 import sys def build_dict(filepath): \'\'\'Given a

ID: 3758825 • Letter: U

Question

usepython3

#!/usr/bin/python3

import sys


def build_dict(filepath):
'''Given a path to a text file, return a dictionary of the word (key)
and its frequency (value), or how often it apears. You need store all the
words in lowercase, (e.g., 'The' and 'the' are counted as the same word)

Note: For this exercise, you don't need to worry about punctuation.

Hint: Use str.split() (no argument) to split on all whitespace.

Keyword arguments:
filepath -- the path to a text file

Returns: dict
'''
# +++ADD code here+++


def print_words(filepath):
'''Given a path to a text file, print the word and its frequency in the
following format:
word1 count1
word2 count2
...

The list must be printed sorted and in ascending order by word. This
function is called when we use --count flag in the command line. You
should reuse the build_dict function.

Keyword arguments:
filepath -- the path to a text file

Returns: None
'''
# +++your code here+++


def print_top(filepath):
'''Given a path to a text file, print the top 20 most common words in the
order that the most common word is first, then the next most common, and
so on. This function is called when we use --topcount flag in the command
line. You should reuse the build_dict function.

Keyword arguments:
filepath -- the path to a text file

Returns: None
'''
# +++your code here+++


def main():
'''Provide basic command line argument parsing code and call the approriate
print_words() or print_top() functions'''
if len(sys.argv) != 3:
print('Usage: ./wordcount.py [OPTION] [FILE]')
print('Options:')
print(' --count Print the word and its frequency')
print(' --topcount Print the top 20 most common words')
sys.exit(1)

option, filepath = sys.argv[1:3]
if option == '--count':
print_words(filepath)
elif option == '--topcount':
print_top(filepath)
else:
print('Unknown option: ' + option)
sys.exit(1)

if __name__ == '__main__':
main()

Explanation / Answer

1)

2) def print_words()

3)

counts = defaultdict(lambda : defaultdict(int))
    cmu = pickle.load(file(cmu_filename))
    # Get the most beautiful cases:
    most_beautiful = set()
    reader = csv.reader(file(src_filename))
    header = reader.next()
    for row in reader:
        word, category = row  
        if category == 'most beautiful' and word in cmu:
            most_beautiful.add(word)
    # Get the sample from words not in keepers already:
    # Sample-size == number of most-beautiful words we found in cmu:
    samplesize = len(most_beautiful)
    candidates = set(cmu.keys()) - most_beautiful
    regs = set(random.sample(candidates, samplesize))
    # Now get the counts:
    vocab = {'regular': regs, 'most beautiful': most_beautiful}  
    for category, words in vocab.items():
        for word in words:
            for phones in cmu[word]:
                for ph in phones:      
                    counts[ph][category] += 1
    return counts

4)