import requests
import re
import os
#动态加载的图片的抓取 (1)分析动态网页的规律 拿下来做对比,发现pn是可变的有规律的 (2)构造pn值 获取相应的page_url(每个包含30张图片)
img_1="https://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&is=&fp=result&queryWord=%E7%BE%8E%E5%A5%B3&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=-1&z=&ic=0&hd=&latest=©right=&word=%E7%BE%8E%E5%A5%B3&s=&se=&tab=&width=&height=&face=0&istype=2&qc=&nc=1&fr=&expermode=&force=&cg=girl&pn=30&rn=30&gsm=1e&1586609758663="
img_2="https://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&is=&fp=result&queryWord=%E7%BE%8E%E5%A5%B3&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=-1&z=&ic=0&hd=&latest=©right=&word=%E7%BE%8E%E5%A5%B3&s=&se=&tab=&width=&height=&face=0&istype=2&qc=&nc=1&fr=&expermode=&force=&cg=girl&pn=60&rn=30&gsm=3c&1586609758847="
img_3="https://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&is=&fp=result&queryWord=%E7%BE%8E%E5%A5%B3&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=-1&z=&ic=0&hd=&latest=©right=&word=%E7%BE%8E%E5%A5%B3&s=&se=&tab=&width=&height=&face=0&istype=2&qc=&nc=1&fr=&expermode=&force=&cg=girl&pn=90&rn=30&gsm=5a&1586609775289="
page_url="https://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&is=&fp=result&queryWord=%E7%BE%8E%E5%A5%B3&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=-1&z=&ic=0&hd=&latest=©right=&word=%E7%BE%8E%E5%A5%B3&s=&se=&tab=&width=&height=&face=0&istype=2&qc=&nc=1&fr=&expermode=&force=&cg=girl&pn={}&rn=30&gsm=1e&1586609758663="
# 第一部分:获取包含这些图片的网页的代码
for i in range(1,4):
# 将图片扔到件夹里
dir="img"+str(i)
if not os.path.exists(dir):
os.mkdir(dir)
url=page_url.format(i*30)
res=requests.get(url)
html = res.text
#第二部分:利用正则锁定我们所要爬取的图片的url
img_urls = re.findall('thumbURL":"(.*?)"',html)
print(img_urls)
#第三部分: 遍历对每个猫咪url发起请求
for index, img_url in enumerate(img_urls):
if "\\" in img_url:
img_url = img_url.replace("\\","")
res = requests.get(img_url)
img_data = res.content
filename = dir + "/" + str(index) + ".jpg"
with open(filename, "wb") as f:
f.write(img_data)
requests库爬一下百度的可爱的猫咪的照片(注意:不要爬太多哦!!!)
目标:
分析同上篇文章:
代码如下:(简单易懂方便!)