关于考题和书籍上知识运用百度AI文本相似度处理的代码

import pandas as pd
import codecs
import chardet
from aip import AipNlp

APP_ID = '18141823'
API_KEY = 'eEmvBrXfCdexVmjAyoPNBoxE'
SECRET_KEY = 'p10xZogTbVDe7PphkB9zIjyZ8QkRBAqu'
client = AipNlp(APP_ID,API_KEY,SECRET_KEY)

df1=pd.read_excel('Article_guanli.xlsx')
df2=pd.read_excel('Topic_guanli.xlsx')
test_data=[]
height1,width1 = df1.shape
height2,width2 = df2.shape

for i in xrange(0,1):
for j in xrange(0,height1):
txt1 = df1.ix[j,0].encode('utf-8')
txt2 = df2.ix[i,0].encode('utf-8')
ret = client.simnet(str(txt1),str(txt2))
print ret
f = codecs.open('xiangsidu.txt','a',encoding="utf-8")
k = ret['texts']['text_1'] + "#" + ret['texts']['text_2'] + "#" + str(ret['score'])
f.write(k + "\n")

上一篇:11个Python Pandas小技巧让你的工作更高效(附代码实例)


下一篇:pyspark join 出现重复列的问题