lxml解析数据结合Xpath
import requests
from lxml import etree
#设置用户标识
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36'
}
url='https://www.1905.com/vod/top/lst/'
# 发送请求
html = requests.get(url,headers=header)
# 处理数据
print(html.text)
lxml = etree.HTML(html.text)
titles=lxml.xpath('//dl/dt/a[@target="_blank"]')
for x in range(len(titles)):
print(titles[x].tag) #标签名
print(titles[x].attrib) #属性
print(titles[x].text) #文本