基于Flask的示例
Server端
from flask import Flask,render_template import time app = Flask(__name__) @app.route('/bobo') def index_bobo(): time.sleep(2) return render_template('test.html') @app.route('/jay') def index_jay(): time.sleep(2) return render_template('test.html') @app.route('/tom') def index_tom(): time.sleep(2) return render_template('test.html') if __name__ == '__main__': app.run(threaded=True)
爬虫端(异步爬取)
import aiohttp import asyncio import time from lxml import etree start = time.time() urls = [ 'http://127.0.0.1:5000/bobo', 'http://127.0.0.1:5000/jay', 'http://127.0.0.1:5000/tom', 'http://127.0.0.1:5000/bobo', 'http://127.0.0.1:5000/jay', 'http://127.0.0.1:5000/tom', 'http://127.0.0.1:5000/bobo', 'http://127.0.0.1:5000/jay', 'http://127.0.0.1:5000/tom', 'http://127.0.0.1:5000/bobo', 'http://127.0.0.1:5000/jay', 'http://127.0.0.1:5000/tom' ] #特殊的函数:请求发送和响应数据的捕获 #细节:在每一个with前加上async,在每一个阻塞操作的前边加上await async def get_request(url): async with aiohttp.ClientSession() as s: #s.get(url,headers,proxy="http://ip:port",params) async with await s.get(url) as response: page_text = await response.text()#read()返回的是byte类型的数据 return page_text #回调函数 def parse(task): page_text = task.result() tree = etree.HTML(page_text) parse_data = tree.xpath('//li/text()') print(parse_data) tasks = [] for url in urls: c = get_request(url) task = asyncio.ensure_future(c) task.add_done_callback(parse) tasks.append(task) loop = asyncio.get_event_loop() loop.run_until_complete(asyncio.wait(tasks)) print(time.time()-start)