2021-11-08爬取优美图库的图片,bs4 解析.

import requests
from bs4 import BeautifulSoup
import time
  1. 解析主页面原代码,提取子页面url
  2. 通过子页面拿取内容,找到图片的下载地址
  3. 下载图片
url = 'https://www.umei.cc/bizhitupian/weimeibizhi/'

resp = requests.get(url)

resp.encoding='utf-8'#处理乱码

#print(resp.text)

main_page = BeautifulSoup(resp.text, 'html.parser')#源代码交给bs4

alist = main_page.find('div',class_ = 'TypeList').find_all('a')#找到子页面url

for a in alist:
    a = a.get('href').split('/')[3]#切割url 并且拼接url
    url_l = url+a

    chil_page_resp = requests.get(url_l)#访问子页面
    chil_page_resp.encoding = 'utf-8'
    
    chil_page_text = chil_page_resp.text

    chil_page = BeautifulSoup(chil_page_text,'html.parser')#将源代码交给bs4,找到下载路径

    abs = chil_page.find('p',align = 'center').find('img')
    #find('标签',属性 = '属性值') 
    #find =('p',calss_='***')或find =('p',apprs{class:属性值})
    
    src = abs.get('src')#通过get请求拿到属性值

    img_resp = requests.get(src)#访问下载路径

    #img_resp.content #拿到字节
    img_name = src.split('/')[-1]#切割最后一个内容
    with open('img/'+img_name,mode='wb') as f:#img/是本地文件
        f.write(img_resp.content)#写入文件

        time.sleep(1)

    print('over!!',img_name)

print('结束')

#find 只找一个,find_all 找到所有的

上一篇:爆破专栏丨Spring Security系列教程之OAuth2.0协议详解


下一篇:bs4爬取网页图片