jieba

import jieba
txt=open('D:\桌面\西游记.txt',"r",encoding='utf-8').read()
excludes={"什么","一个","那里","怎么","我们","不知","两个","甚么",\
"不是","只见","原来","不敢","如何","这个","不曾","闻言",\
"正是","只是","出来","一声", "真个", "不得", "这里", \
"今日" ,"那个","不见"}
words =jieba.lcut(txt)
counts = {}
for word in words:
if len(word) == 1:
continue
elif word == "行者" or word == "大圣" or \
word =="老孙" or word =="悟空" or word =="孙行者":
rword = "孙悟空"
elif word == "师父" or word =="三藏" or word =="长老":
 rword = "唐僧"
elif word =="呆子":
rword ="八戒"
elif word =="那怪" or word =="小妖" :
rword ="妖精"
else:
rword = word
counts[rword] = counts.get(rword,0) + 1
for word in excludes:
del counts[word]
items = list(counts.items())
items.sort(key=lambda x:x[1], reverse = True)
for i in range(20):
word,count=items[i]
print("{0:<10}{1:>5}".format(word,count)

上一篇:判断语句和循环语句-2.8-应用:猜拳游戏


下一篇:用jieba统计《红楼梦》词频