# -*- coding: utf-8 -*-
"""
Created on Tue Feb 25 17:37:55 2020
@author: weisssun
"""
import jieba
import re
import csv
from collections import Counter
stopw = [line.strip() for line in open(r'D:\Python\dict\dict\stopwords.txt',encoding='utf-8').readlines()]
#读取停用词词典
cut_words=''
for line in open(r'D:\Python\family.txt',encoding='utf-8'):
#读取需分词的txt文档
line.strip('\n')
line = re.sub('[A-Za-z0-9\:\·\—\,\。\“ \”]', '', line)
seg_list = jieba.cut(line,cut_all=True)
cut_words += (' '.join(seg_list))
all_words=cut_words.split()
new_words = [w for w in all_words if w not in stopw]
#从分好的词中去掉停用词
#print(all_words)
#word_dict = Counter(all_words)
print(new_words)
word_dict = Counter(new_words)
print(word_dict)
with open(r'D:\Python\family_words.csv', 'w', newline='',encoding='gbk') as f: # 将词频结果写入csv文件
writer = csv.writer(f)
for k, v in word_dict.items():
writer.writerow([k, v])