python爬取某网站图片

程序运行需要导包 具体看代码中的,缺啥导啥✌

import requests
import re
import time
import os
url="https://pic.netbian.com/4kmeinv/index.html"
header={
    "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:92.0) Gecko/20100101 Firefox/92.0",
    "Accept-Language":" zh-CN,zh;q=0.9",
    "Accept":" text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
    "Host":" pic.netbian.com",
    "Accept-Encoding":" gzip, deflate"
}
###################下载后文件保存的位置修改关键位置即可
path='C:\\Users\\lyd\\Desktop\\彼岸图库\\'
os.makedirs(path,exist_ok=True)
####################################################
page=int(input("请输入要下载的页数(即从第一页开始):"))
for page_ in range(page):
    if page_>0:
        url = f"https://pic.netbian.com/4kmeinv/index_{page_}.html"
    res = requests.get(url, header)
    res.encoding = res.apparent_encoding
    try:
        json_text = re.findall("<li>(.*?)</li>", res.text)
    except:
        print("程序出错!正在退出。。。。。。")
        break
    data = json_text[5:len(json_text)]
    for i in data:
        i = i.replace("href=", "").replace("src=", "").replace('"', "").replace("alt=", "")
        i_url = i.split(" ")
        url_title = "https://pic.netbian.com" + i_url[1]
        res_image = requests.get(url_title, header)
        res_image.encoding = res_image.apparent_encoding
        title = re.findall('<div class="photo-hd"><h1>(.*?)</h1></div>', res_image.text)[0].replace(" ", "_")
        image_url = re.findall('id="img"><img src="(.*?)" data', res_image.text)[0]
        image_url = "https://pic.netbian.com" + image_url
        print(title, image_url)
        res_image_req = requests.get(image_url).content
        with open(path+title + ".jpg", "wb") as f:
            f.write(res_image_req)
    print(f"第{page_ + 1}页下载完成!")
    time.sleep(1)

程序运行截图(是你的冰冰吗?
上一篇:MySQL中,21个写SQL的好习惯


下一篇:字符串的替换(replace()与合并(join())