Python使用requests模块下载图片

2023-08-08 20:36:52
MySQL中事先保存好爬取到的图片链接地址。
然后使用多线程把图片下载到本地。
# coding: utf-8

import MySQLdb

import requests

import os

import re

from threading import Thread

import datetime

header = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '

                        'Chrome/63.0.3239.132 Safari/537.36'}

file_path = 'F:\\mlu2'

if not os.path.exists(file_path):

    os.mkdir(file_path)

class Spider(object):

    def __init__(self, file_path, header):

        self.file_path = file_path

        self.header = header

    @staticmethod

    def timer(func):

        def time_count(*args):

            start_time = datetime.datetime.now()

            func(*args)

            end_time = datetime.datetime.now()

            day = (end_time - start_time).days

            times = (end_time - start_time).seconds

            hour = times / 3600

            h = times % 3600

            minute = h / 60

            m = h % 60

            second = m

            print "爬取完成"

            print "一共用时%s天%s时%s分%s秒" % (day, hour, minute, second)

        return time_count

    def get_link(self):

        conn = MySQLdb.connect(host='localhost',

                               port=3306,

                               user='root',

                               passwd='',

                               db='mlu',

                               charset='utf8')

        cur = conn.cursor()

        sql = 'select image from msg limit 100'  # image为事先爬取存到MySQL的图片链接地址

        cur.execute(sql)

        img_link = cur.fetchall()

        return img_link

    def download(self, link):

        filename = re.findall(r'.*/(.+)', link)[0]

        try:

            pic = requests.get(link, headers=self.header)

            if pic.status_code == 200:

                with open(os.path.join(self.file_path)+os.sep+filename, 'wb') as fp:

                    fp.write(pic.content)

                    fp.close()

            print "下载完成"

        except Exception as e:

            print e

    @timer

    def run_main(self):

        threads = []

        links = self.get_link()

        for link in links:

            img = str(link[0])

            t = Thread(target=self.download, args=[img])

            t.start()

            threads.append(t)

        for t in threads:

            t.join()

spider = Spider(file_path, header)

spider.run_main()
码农公寓

相关文章