-
安装lxml
pip install lxml -
导入lxml
from lxml import etree -
读取
url = "https://www.baidu.com/" headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.128 Safari/537.36", "Content-Type": "application/json; charset=UTF-8" } html_str = requests.get(url, headers).text fen1 = etree.HTML(html_str) hrefs = fen1.xpath(u"//a") for href in hrefs: print(href.attrib)