12 lxml&XPath结合使用

实现:

# 1、获取所有tr标签
# 2、获取第2个tr标签
# 3、获取所有class等于even的tr标签
# 4、获取所有a标签及其属性值
# 5、获取所有的职位信息(纯文本)
 1 """lxml&XPath结合使用"""
 2 
 3 
 4 from lxml import etree
 5 
 6 parser = etree.HTMLParser(encoding='utf-8')
 7 html = etree.parse('test.html', parser=parser)
 8 
 9 # 1、获取所有tr标签
10 trs = html.xpath("//tr")        # xpath函数返回的是一个列表
11 for tr in trs:
12     print(tr)
13     #print(etree.tostring(tr, encoding='utf-8').decode('GBK'))
1 # 2、获取第2个tr标签
2 tr = html.xpath("//tr[2]")[0]
3 print(etree.tostring(tr, encoding='utf-8').decode('GBK'))
1 # 3、获取所有class等于even的tr标签
2 trs = html.xpath("//tr[@class='even']")
3 for tr in trs:
4     print(etree.tostring(tr, encoding='utf-8').decode('GBK'))
1 # 4_1、获取所有a标签下href属性的值
2 ah = html.xpath("//a/@href")
3 for a in ah:
4     print(a)
5 # 4_2、获取拥有href属性的a标签
6 al = html.xpath("//a[@href]")
7 for a in al:
8     print(etree.tostring(a, encoding='utf-8').decode('GBK'))

 

 



上一篇:python中通过selenium简单操作及xpath元素定位&轴定位


下一篇:SQL Server 数据库定时自动备份(转)