import requests_html
sess = requests_html.HTMLSession()
word='李小龙'
pn=30
addr_list = []
for i in range(1,3):
url = f'http://image.baidu.com/search/index?tn=baiduimage&ie=utf-8&word={word}&pn={pn}'
resp = sess.get(url)
reg = '"thumbURL":"{}"'
addr_res = resp.html.search_all(reg)
for num,i in enumerate(addr_res,1+(i-1)*30):
print(num,i[0])
addr_list.append(i[0])
print(addr_list.__len__(),list(set(addr_list)).__len__())
KingoKing
发布了14 篇原创文章 · 获赞 0 · 访问量 107
私信
关注