import requests
from bs4 import BeautifulSoup
import time
- 解析主页面原代码,提取子页面url
- 通过子页面拿取内容,找到图片的下载地址
- 下载图片
url = 'https://www.umei.cc/bizhitupian/weimeibizhi/'
resp = requests.get(url)
resp.encoding='utf-8'#处理乱码
#print(resp.text)
main_page = BeautifulSoup(resp.text, 'html.parser')#源代码交给bs4
alist = main_page.find('div',class_ = 'TypeList').find_all('a')#找到子页面url
for a in alist:
a = a.get('href').split('/')[3]#切割url 并且拼接url
url_l = url+a
chil_page_resp = requests.get(url_l)#访问子页面
chil_page_resp.encoding = 'utf-8'
chil_page_text = chil_page_resp.text
chil_page = BeautifulSoup(chil_page_text,'html.parser')#将源代码交给bs4,找到下载路径
abs = chil_page.find('p',align = 'center').find('img')
#find('标签',属性 = '属性值')
#find =('p',calss_='***')或find =('p',apprs{class:属性值})
src = abs.get('src')#通过get请求拿到属性值
img_resp = requests.get(src)#访问下载路径
#img_resp.content #拿到字节
img_name = src.split('/')[-1]#切割最后一个内容
with open('img/'+img_name,mode='wb') as f:#img/是本地文件
f.write(img_resp.content)#写入文件
time.sleep(1)
print('over!!',img_name)
print('结束')
#find 只找一个,find_all 找到所有的