jieba分词

import jieba
 
 
def takeSecond(elem):
    return elem[1]
 
 
def main():
    path = "E:\聊斋\\31883\\all.txt"
    file = open(path, "r", encoding="utf-8")
    text = file.read()
    file.close()
    a=[',','。',':','“','”',' ','?','!',';','.']
 
    words = jieba.lcut(text)
    counts = {}
    for word in words:
        if word  not in a:
 
           counts[word] = counts.get(word, 0) + 1
 
    items = list(counts.items())
    items.sort(key=takeSecond, reverse=True)
 
    for i in range(40):
        item = items[i]
        keyWord = item[0]
        count = item[1]
        print("{0:<10}{1:>5}".format(keyWord, count))
 
 
main()

  jieba分词

上一篇:jieba 分词


下一篇:jieba 分词. 西游记相关的分词,出现次数最高的20个。