最近比较无聊,翻翻网页,发现了之前浏览过的网站,发现堆糖里的图片比较好玩,想下再把,发现比较多,而我本人又比较懒,所以呢,我就发挥我自己的能力,写个爬虫自己玩儿玩儿,下面是我自己写的内容。虽然有很多的不足,还望各位大佬多多指点一下,多多交流分享一下心得。。。
import requests
import jsonpath
import json
url = 'https://www.duitang.com/napi/blog/list/by_search/?kw=%E5%BE%AE%E4%BF%A1%E8%83%8C%E6%99%AF&start=72&_=1627228899032'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36'
}
def get_page(url, headers):
resp = requests.get(url, headers=headers)
page = resp.content.decode('utf-8')
print(page)
return page
# print(page)
# 提取链接
def findall_url(page):
html = json.loads(page)
photos = jsonpath.jsonpath(html, '$..path')
# print(photos)
return photos
# 保存图片
def pic_download(url, n):
r = requests.get(url)
URL = r'C:\Users\Administrator\Desktop\duitang\%sb.jpg'%n
with open(URL, 'wb') as f:
f.write(r.content)
page = get_page(url, headers)
photos = findall_url(page)
n = 0
for i in photos:
pic_download(i, n)
print(i)
n += 1