python-统计英文文本文件中每个单词出现的频率

# -*- coding:utf-8*-

"""

统计一篇英语文章中每个单词出现的次数

"""

import string

def get_dict_word_times(file):

"""构建字典{单词: 次数}"""

list_word_with_punctuation = file.read().split()

# 去掉标点，不区分大小写

list_word = [word.strip(string.punctuation).lower() for word in list_word_with_punctuation]

# 去掉重复单词

set_word = set(list_word)

return {word: list_word.count(word) for word in set_word}

def main():

with open('test.txt', 'r') as file:

#在该文件夹下打开英文文本文件test.txt, 把英文文章放在test.txt中

dict_word_times = get_dict_word_times(file)

# 把单词按照次数由多到少排序

list_sorted_words = sorted(dict_word_times, key=lambda w: dict_word_times[w], reverse=True)

for word in list_sorted_words:

print("{} -- {} times".format(word, dict_word_times[word]))

main()

#生成这个文件，然后将这个文件另存，编码为utf-8

#然后在终端运行 python xx.py > res.txt

最终结果存在res.txt文本文件中