# -*- coding: utf-8 -*- import csv
import re csvfile = 'weibo.csv' def columns_data(path, column):
columns_data = ''
csvfile = open(path, 'r', encoding='utf-8')
DicReader = csv.DictReader(csvfile)
for row in DicReader:
columns_data = columns_data+row.get(column)
return columns_data comments = columns_data(csvfile, 'comment')
print(comments) pattern = re.compile(r'[\u4e00-\u9fa5]+')
filterdata = re.findall(pattern, comments)
print(filterdata) cleaned_comments = ''.join(filterdata)
print(cleaned_comments)
数据来源:Python爬取新浪微博评论数据,写入csv文件中
本文等同:筛选出一段文字中的中文