da24-银行账户多线程和通过开发者工具进行爬虫

网络图片爬取

  1. 页面上可能有动态内容(通过JavaScript代码动态生成的,显示网页源代码时看不到的内容)

    1. 方法一:JavaScript逆向 —> 找真正提供数据的URL

      1. 通过浏览器开发者工具,查找提供数据的URL(数据接口)
      2. 通过专业的抓包工具,直接获取到数据接口
      • Fiddler / Charles / Wireshark(Ethereal)
    2. 方法二:通过Python代码操控浏览器,直接拿到带动态内容的页面,然后再提取数据

      ​ —> selenium.webdriver

    3. 了解: https://python-selenium-zh.readthedocs.io/zh_CN/latest/

"""
通过浏览器开发者工具,查找提供数据的URL,爬取图片
"""
from concurrent.futures.thread import ThreadPoolExecutor

import requests

from spider_open_API02 import download_picture

with ThreadPoolExecutor(max_workers=8) as pool:
    channels = ['beauty', 'car', 'wallpaper']
    for ch in channels:
        for num in range(1, 3):
            resp = requests.get(f'https://image.so.com/zjl?ch={ch}&sn={num * 30}')
            wallpaper_list = resp.json()['list']
            for wallpaper in wallpaper_list:
                picture_url = wallpaper['qhimg_url']
                pool.submit(download_picture, picture_url)

定义银行类,实现多线程存取钱

"""
多个线程竞争一个资源 ---> 临界资源
想要保护资源(关键操作上只有一个线程能够访问到这个资源)需要使用锁
condition ---> 线程调度:暂停wait()、唤醒 notify_all()
threading.current_thread() ---> 可以得到当前线程的名字
"""
import random
import threading
import time
from concurrent.futures.thread import ThreadPoolExecutor


class Account:
    """银行账户"""

    def __init__(self):
        self.balance = 0
        self.condition = threading.Condition(threading.RLock())

    def deposit(self, money):
        """
        存钱
        :param money:存入的金额
        """
        # 上下文语法
        with self.condition:
            new_balance = self.balance + money
            time.sleep(0.01)
            self.balance = new_balance
            # 唤醒(通知)暂停的线程让它们有机会恢复执行
            self.condition.notify_all()

    def withdraw(self, money):
        """
        取钱
        :param money:取款的金额
        :return: 取款成功返回True, 否则返回False
        """
        with self.condition:
            while self.balance < money:
                # 如果线程执行条件不满足,可以让线程暂停并释放已经获得的锁对象
                self.condition.wait()
            new_balance = self.balance - money
            time.sleep(0.01)
            self.balance = new_balance


def put_money(account):
    while True:
        money = random.randint(5, 10)
        account.deposit(money)
        current = threading.current_thread()
        print(f'{current.name}存入{money}元,当前余额{account.balance}元')
        time.sleep(1)


def get_money(account):
    while True:
        money = random.randint(10, 20)
        account.withdraw(money)
        current = threading.current_thread()
        print(f'{current.name}取出{money}元,当前余额{account.balance}元')
        time.sleep(0.5)


def main():
    account = Account()
    with ThreadPoolExecutor(max_workers=10, thread_name_prefix='Joker') as pool:
        for _ in range(5):
            pool.submit(put_money, account)
            pool.submit(get_money, account)


if __name__ == '__main__':
    main()
上一篇:leetcode解题思路分析(五十六)476 - 482 题


下一篇:1221. 分割平衡字符串