import asyncio from pyppeteer import launch from lxml import etree async def main(): bro=await launch(headless=False) page=await bro.newPage() await page.goto('http://quotes.toscrape.com/') page_text=await page.content() return page_text def parse(tasks): page_text=tasks.result() tree=etree.HTML(page_text) div_list=tree.xpath('/html/body/div/div[2]/div[1]/div') for element in div_list: content=element.xpath('.//span/text()') print(content) c=main() tasks=asyncio.ensure_future(c) tasks.add_done_callback(parse) loop=asyncio.get_event_loop() loop.run_until_complete(tasks)
pyppteer用于抓取网页中动态加载的内容,也可以用于模拟登陆