Gensim库生成与导入W2V模型
语料目录
完整代码
import gensim
import pandas as pd
'''
W2V-msr 生成
'''
corpus_msr = gensim.models.word2vec.Text8Corpus("./training/msr_training.utf8") # 加载语料
model = gensim.models.word2vec.Word2Vec(corpus_msr, size=128, window=5, min_count=5, workers=4)
model.save('W2V_Danmu_msr')
print('W2V_Danmu_msr 生成完毕')
'''
W2V-pku 生成
'''
corpus_pku = gensim.models.word2vec.Text8Corpus("./training/pku_training.utf8") # 加载语料
model = gensim.models.word2vec.Word2Vec(corpus_pku, size=128, window=5, min_count=5, workers=4)
model.save('W2V_Danmu_pku')
print('W2V_Danmu_pku 生成完毕')
'''
W2V-msr 导入
'''
model_msr = gensim.models.Word2Vec.load('W2V_Danmu_msr')
print()
print('---使用 W2V_Danmu_msr 模型---')
print(pd.Series(model_msr.most_similar('爸爸', topn=10)))
'''
W2V-pku 导入
'''
model_pku = gensim.models.Word2Vec.load('W2V_Danmu_pku')
print()
print('---使用 W2V_Danmu_pku 模型---')
print(pd.Series(model_pku.most_similar('爸爸', topn=10)))