jieba+wordcloud 按词频生成词云

# -*- coding: utf-8 -*-
from random import randint
import jieba.analyse
from os import path
from PIL import Image,ImageSequence
import numpy as np
import matplotlib.pyplot as plt
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator

d = path.dirname(__file__)
mypath = '你的文件路径'

file_in = open(path, 'r')
content = file_in.read()
try:
    #jieba.analyse.set_stop_words('你的停用词表路径')
    tags = jieba.analyse.extract_tags(content, topK=100, withWeight=True)
    keywords = dict()
    for v, n in tags:
        #权重是小数,为了凑整,乘了一万
        keywords[v] = (int(n * 10000))
    print(keywords)
finally:
    file_in.close()
pic_address=path.abspath(mypath+"武汉区划.jpg")
pic= plt.imread(pic_address)  #读取图片


def random_color_func(word=None, font_size=None, position=None, orientation=None, font_path=None, random_state=None):
    h = randint(120, 250)
    s = int(100.0 * 255.0 / 255.0)
    l = int(100.0 * float(randint(60, 120)) / 255.0)
    return "hsl({}, {}%, {}%)".format(h, s, l)
wc=WordCloud(background_color='white',    #构造wordcloud类
mask=pic,
max_font_size=50,
mode="RGB",
scale=10,
font_path=mypath+"msyhbd.ttf",
max_words=100,
min_font_size=10,
color_func = random_color_func,
random_state="none",
)



#random_state:如果给定一个随机的对象,会用作它生成随机种子的数字
wc.generate_from_frequencies(keywords)
plt.figure()    #画图
plt.imshow(wc)
plt.axis("off")
plt.show()
wc.to_file(mypath+"1.png")   #保存图片
上一篇:python基础篇-jieba库的使用


下一篇:开始导入第一个第三方库jieba