爬虫之协程异步 asyncio和aiohttp

基本用法:

爬虫之协程异步    asyncio和aiohttp
# 高性能之协程爬虫
import asyncio
# async def fun(url):
#     print(f"正在请求{url}")
#     print(f"{url}请求完毕")
#     return f"{url}你爸爸已经搞定了"
# f = fun("http://www.baidu.com")


# loop怎么玩?
# loop = asyncio.get_event_loop()
# loop.run_until_complete(f)

# task怎么玩呢?
# loop = asyncio.get_event_loop()
# task = loop.create_task(f)
# loop.run_until_complete(task)

# future怎么玩呢?
# loop = asyncio.get_event_loop()
# task = asyncio.ensure_future(f)
# loop.run_until_complete(task)

# 回调函数
# def callback(task):
#     print(task.result())
# 绑定回调
# loop = asyncio.get_event_loop()
# task = loop.create_task(f)
# task.add_done_callback(callback)
# loop.run_until_complete(task)
View Code

对多个任务爬取的例子

爬虫之协程异步    asyncio和aiohttp
# =============正题来了==============
# 对多个任务进行爬取
import requests
import aiohttp
import asyncio
import random,time
import time
headers = {
            Accept-Encoding: gzip, deflate, sdch,
            Accept-Language: en-US,en;q=0.8,
            User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36,
            Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8,
            Referer: http://www.baidu.com/,
            Connection: keep-alive,
        }
start = time.time()
arr = ["http://music.163.com/song/media/outer/url?id=1820550501.mp3",
"http://music.163.com/song/media/outer/url?id=1820550501.mp3",
"http://music.163.com/song/media/outer/url?id=1820550501.mp3",
"http://music.163.com/song/media/outer/url?id=1820550501.mp3",
"http://music.163.com/song/media/outer/url?id=1820550501.mp3",
"http://music.163.com/song/media/outer/url?id=1820550501.mp3",
"http://music.163.com/song/media/outer/url?id=1820550501.mp3",
"http://music.163.com/song/media/outer/url?id=1820550501.mp3",
"http://music.163.com/song/media/outer/url?id=1820550501.mp3",
"http://music.163.com/song/media/outer/url?id=1820550501.mp3",
"http://music.163.com/song/media/outer/url?id=1820550501.mp3"]
async def downsong(url):
    print(f"{url}开始")
    # 记住: 在异步协程中如果出现了同步模块相关的代码,那么就无法实现异步
    # time.sleep(3)
    # await asyncio.sleep(3) # 得用这个

    # res = requests.get(url,headers=headers)
    # print(res.text)

    async with aiohttp.ClientSession() as session:
        async with await session.get(url,headers=headers) as response:
            pass
            # page_text = await response.text()
            # print(page_text)
    print(f"{url}结束")

stasks = []
loop = asyncio.get_event_loop()
for i in range(len(arr)):
    f = downsong(arr[i])
    task = loop.create_task(f)
    stasks.append(task)

loop.run_until_complete(asyncio.wait(stasks))
end = time.time()
print(end-start)
View Code

aiohttp基本用法

爬虫之协程异步    asyncio和aiohttp
# 注意解决协程异步需要一个模块aiohttp
# pip install aiohttp

# aiohttp解决问题的基本代码
# async def get_page(url):
#     async with aiohttp.ClientSession() as session:
#         #get()、post():
#         #headers,params/data,proxy=‘http://ip:port‘
#         async with await session.get(url) as response:
#             #text()返回字符串形式的响应数据
#             #read()返回的二进制形式的响应数据
#             #json()返回的就是json对象
#             #注意:获取响应数据操作之前一定要使用await进行手动挂起
#             page_text = await response.text()
#             print(page_text)
View Code

 

 

有一点还没有解决,我不知道写入本地怎么写

爬虫之协程异步 asyncio和aiohttp

上一篇:node.js 中的package.json文件怎么创建?


下一篇:微信小程序实现滚动加载更多