简要实现xpath基础功能
# -*- coding: utf-8 -*-
"""
Created on Sat Jan 1 15:38:11 2022
@author: shiyu
"""
import tkinter
import requests
from lxml import etree
url='https://www.bilibili.com/v/popular/all?spm_id_from=333.1007.0.0'
headers={'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/\
537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36 Edg/96.\
0.1054.62'}
html_=requests.get(url,headers=headers)
with open('data.txt','w',encoding='utf-8') as f:
f.write(html_.text)
def create_button1(s,pos):
b=tkinter.Button(win,text=s,command=lambda:etree_HTML())
b.grid(row=pos[0],column=pos[1],sticky='w')
#etree.HTML
def etree_HTML():
html=etree.HTML(html_.text)
print(type(html))
result=etree.tostring(html)
print(type(result))
print(type(result.decode('utf-8')))
#print(result.decode('utf-8'))
print('\n')
def create_button2(s,pos):
b=tkinter.Button(win,text=s,command=lambda:etree_parse())
b.grid(row=pos[0],column=pos[1],sticky='w')
#etree.parse
def etree_parse():
html=etree.parse('data.txt',etree.HTMLParser())
print(type(html))
result=etree.tostring(html)
print(type(result))
print(type(result.decode('utf-8')))
print('\n')
#print(result.decode('utf-8'))
def create_button3(s,pos):
b=tkinter.Button(win,text=s,command=lambda:etree_1())
b.grid(row=pos[0],column=pos[1],sticky='w')
#//所有节点
def etree_1():
html=etree.parse('data.txt',etree.HTMLParser())
print(type(html))
result=html.xpath('//*')
print(type(result))
print(result)
print('\n')
def create_button4(s,pos):
b=tkinter.Button(win,text=s,command=lambda:etree_2())
b.grid(row=pos[0],column=pos[1],sticky='w')
#/子节点
def etree_2():
html=etree.parse('data.txt',etree.HTMLParser())
print(type(html))
result=html.xpath('//div/p')
print(type(result))
print(result)
print('\n')
def create_button5(s,pos):
b=tkinter.Button(win,text=s,command=lambda:etree_3())
b.grid(row=pos[0],column=pos[1],sticky='w')
#..父节点
def etree_3():
html=etree.parse('data.txt',etree.HTMLParser())
print(type(html))
result=html.xpath('//p/..')
print(type(result))
print(result)
print('\n')
def create_button6(s,pos):
b=tkinter.Button(win,text=s,command=lambda:etree_4())
b.grid(row=pos[0],column=pos[1],sticky='w')
#/具有指定属性的节点的父节点
def etree_4():
html=etree.parse('data.txt',etree.HTMLParser())
print(type(html))
result=html.xpath('//p[@class="popular-tips"]/..')
print(type(result))
print(result)
print('\n')
def create_button7(s,pos):
b=tkinter.Button(win,text=s,command=lambda:etree_5())
b.grid(row=pos[0],column=pos[1],sticky='w')
#/具有指定属性的节点的父节点的属性
def etree_5():
html=etree.parse('data.txt',etree.HTMLParser())
print(type(html))
result=html.xpath('//p[@class="popular-tips"]/../@class')
print(type(result))
print(result)
print('\n')
def create_button8(s,pos):
b=tkinter.Button(win,text=s,command=lambda:etree_6())
b.grid(row=pos[0],column=pos[1],sticky='w')
#/parent::
def etree_6():
html=etree.parse('data.txt',etree.HTMLParser())
print(type(html))
result=html.xpath('//p[@class="popular-tips"]/parent::*')
print(type(result))
print(result)
print('\n')
def create_button9(s,pos):
b=tkinter.Button(win,text=s,command=lambda:etree_7())
b.grid(row=pos[0],column=pos[1],sticky='w')
#属性匹配,筛选
def etree_7():
html=etree.parse('data.txt',etree.HTMLParser())
print(type(html))
result=html.xpath('//p[@class="popular-tips"]')
print(type(result))
print(result)
print('\n')
def create_button10(s,pos):
b=tkinter.Button(win,text=s,command=lambda:etree_8())
b.grid(row=pos[0],column=pos[1],sticky='w')
#text()
def etree_8():
html=etree.parse('data.txt',etree.HTMLParser())
print(type(html))
result=html.xpath('//span/text()')
print(type(result))
print(result)
print('\n')
def create_button11(s,pos):
b=tkinter.Button(win,text=s,command=lambda:etree_9())
b.grid(row=pos[0],column=pos[1],sticky='w')
#属性多值匹配contains(),class有多个值,根据一个匹配
def etree_9():
html=etree.parse('data.txt',etree.HTMLParser())
print(type(html))
result=html.xpath('//div[contains(@class,"nav-tabs__item")]')
print(type(result))
print(result)
print('\n')
def create_button12(s,pos):
b=tkinter.Button(win,text=s,command=lambda:etree_10())
b.grid(row=pos[0],column=pos[1],sticky='w')
#多属性匹配,根据多个属性筛选
def etree_10():
html=etree.parse('data.txt',etree.HTMLParser())
print(type(html))
result=html.xpath('//div[contains(@class,"flow-loader") and @style=\
"position:relative;"]')
print(type(result))
print(result)
print('\n')
def create_button13(s,pos):
b=tkinter.Button(win,text=s,command=lambda:etree_11())
b.grid(row=pos[0],column=pos[1],sticky='w')
#按序选择[]
def etree_11():
html=etree.parse('data.txt',etree.HTMLParser())
print(type(html))
#所有满足div[3]的div节点
result=html.xpath('//div[3]')
print(type(result))
print(result)
print('\n')
def create_button14(s,pos):
b=tkinter.Button(win,text=s,command=lambda:etree_12())
b.grid(row=pos[0],column=pos[1],sticky='w')
#按序选择last()
def etree_12():
html=etree.parse('data.txt',etree.HTMLParser())
print(type(html))
result=html.xpath('//div[last()]')
print(type(result))
print(result)
print('\n')
def create_button15(s,pos):
b=tkinter.Button(win,text=s,command=lambda:etree_13())
b.grid(row=pos[0],column=pos[1],sticky='w')
#按序选择last()
def etree_13():
html=etree.parse('data.txt',etree.HTMLParser())
print(type(html))
result=html.xpath('//div[position()<3]')
print(type(result))
print(result)
print('\n')
def create_button16(s,pos):
b=tkinter.Button(win,text=s,command=lambda:etree_14())
b.grid(row=pos[0],column=pos[1],sticky='w')
#attribuye()
def etree_14():
html=etree.parse('data.txt',etree.HTMLParser())
print(type(html))
result=html.xpath('//div[3]/div[1]/attribute::*')
print(type(result))
print(result)
print('\n')
def create_button17(s,pos):
b=tkinter.Button(win,text=s,command=lambda:etree_15())
b.grid(row=pos[0],column=pos[1],sticky='w')
#child()直接子节点
def etree_15():
html=etree.parse('te.txt',etree.HTMLParser())
print(type(html))
result1=html.xpath('//body/div[1]/child::div')
result2=html.xpath('//body/div[1]/div[1]')
result3=html.xpath('//body/div[1]/div[2]')
print(type(result1))
print(result1)
print(result2,result3)
print('\n')
def create_button18(s,pos):
b=tkinter.Button(win,text=s,command=lambda:etree_16())
b.grid(row=pos[0],column=pos[1],sticky='w')
#descendant()所有后代节点
def etree_16():
html=etree.parse('te.txt',etree.HTMLParser())
print(type(html))
result1=html.xpath('//body/div[1]/descendant::div')
result2=html.xpath('//body/div[1]/div[1]')
result3=html.xpath('//body/div[1]/div[1]/div[1]')
result4=html.xpath('//body/div[1]/div[2]')
print(type(result1))
print(result1)
print(result2,result3,result4)
print('\n')
def create_button19(s,pos):
b=tkinter.Button(win,text=s,command=lambda:etree_17())
b.grid(row=pos[0],column=pos[1],sticky='w')
#following::当前节点之后的节点(可不同级)
def etree_17():
html=etree.parse('te.txt',etree.HTMLParser())
print(type(html))
result1=html.xpath('//body/div[1]/following::*')
result2=html.xpath('//body/div[2]')
result3=html.xpath('//body/div[2]//*')
print(type(result1))
print(result1)
print(result2,result3)
print('\n')
def create_button20(s,pos):
b=tkinter.Button(win,text=s,command=lambda:etree_18())
b.grid(row=pos[0],column=pos[1],sticky='w')
#following-sibling::当前节点之后的同级节点
def etree_18():
html=etree.parse('te.txt',etree.HTMLParser())
print(type(html))
result1=html.xpath('//body/div[1]/following-sibling::*')
result2=html.xpath('//body/div[2]')
print(type(result1))
print(result1)
print(result2)
print('\n')
win=tkinter.Tk()
win.geometry('600x450')
win.title('xpath')
create_button1('etree.HTML',(0,0))
create_button2('etree.parse',(0,1))
create_button3('//',(0,2))
create_button4('/',(1,0))
create_button5('..',(1,1))
create_button6('..具有指定属性的节点的父节点',(1,2))
create_button7('..具有指定属性的节点的父节点的属性',(2,0))
create_button8('parent::*',(2,1))
create_button9('属性匹配,筛选',(2,2))
create_button10('text()',(3,0))
create_button11('属性多值匹配contains()',(3,1))
create_button12('多属性匹配',(3,2))
create_button13('按序选择[]',(4,0))
create_button14('按序选择last()',(4,1))
create_button15('按序选择position()',(4,2))
create_button16('attribute()',(5,0))
create_button17('child()',(5,1))
create_button18('descensant()',(5,2))
create_button19('following',(6,0))
create_button20('following-sibling',(6,1))
win.mainloop()
te.text
# -*- coding: utf-8 -*-
"""
Created on Sat Jan 1 22:46:01 2022
@author: shiyu
"""
<html>
<body>
<div>
<div>1
<div>a</div>
</div>
<div>2</div>
</div>
<div>
<div>3
<div>b</div>
</div>
<div>4</div>
</div>
</body>
</html>