patternpythonMinor
Top 5 most common words in Python
Viewed 0 times
topwordspythoncommonmost
Problem
I would like to hear feedback from an interviewer's perspective on how to improve the following code:
from collections import Counter
def word_map(string):
word_dict = {}
stop_word_file=open("stopwords.txt", "r")
stop_words =stop_word_file.read().split()
for word in string.split():
word = filter(str.isalnum, word).lower()
word = word.strip()
if word != '' and word not in stop_words:
if word in word_dict.keys():
word_dict[word] +=1
else:
word_dict[word] = 1
return word_dict
file = open("story.txt", "r")
five_top_words = Counter(word_map(file.read())).most_common(5)
for letter, count in five_top_words:
print '%s : %10d' %(letter, count)Solution
I second @Dannnno arguments except for the fact that you don't need to split the work into tinier functions.
In fact I'd say quite the opposite as using more function let you turn them into generators for a more efficient memory usage.
You should also make yourself familiar with the
My proposal is:
In fact I'd say quite the opposite as using more function let you turn them into generators for a more efficient memory usage.
You should also make yourself familiar with the
str.format syntax which is the new way to go over the % formatting syntax.My proposal is:
from collections import Counter
from itertools import chain
def word_map(filename, stop_words_file):
with open(stop_words_file) as f:
stop_words = set(f.read().split())
with open(filename) as f:
for line in f:
words = (filter(str.isalnum, w).lower() for w in line.split())
yield (w for w in words if w and w not in stop_words)
def top_words(filename, stopfile, count):
words = chain.from_iterable(word_map(filename, stopfile))
return Counter(words).most_common(count)
if __name__ == '__main__':
for word, count in top_words('story.txt', 'stopwords.txt', 5):
print '{} : {:>10}'.format(word, count)Code Snippets
from collections import Counter
from itertools import chain
def word_map(filename, stop_words_file):
with open(stop_words_file) as f:
stop_words = set(f.read().split())
with open(filename) as f:
for line in f:
words = (filter(str.isalnum, w).lower() for w in line.split())
yield (w for w in words if w and w not in stop_words)
def top_words(filename, stopfile, count):
words = chain.from_iterable(word_map(filename, stopfile))
return Counter(words).most_common(count)
if __name__ == '__main__':
for word, count in top_words('story.txt', 'stopwords.txt', 5):
print '{} : {:>10}'.format(word, count)Context
StackExchange Code Review Q#129793, answer score: 4
Revisions (0)
No revisions yet.