xpath简介

简要实现xpath基础功能

# -*- coding: utf-8 -*-
"""
Created on Sat Jan  1 15:38:11 2022

@author: shiyu
"""

import tkinter
import requests
from lxml import etree

url='https://www.bilibili.com/v/popular/all?spm_id_from=333.1007.0.0'
headers={'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/\
         537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36 Edg/96.\
             0.1054.62'}
html_=requests.get(url,headers=headers)
with open('data.txt','w',encoding='utf-8') as f:
    f.write(html_.text)

def create_button1(s,pos):
    b=tkinter.Button(win,text=s,command=lambda:etree_HTML())
    b.grid(row=pos[0],column=pos[1],sticky='w')
#etree.HTML
def etree_HTML():
    html=etree.HTML(html_.text)
    print(type(html))
    result=etree.tostring(html)
    print(type(result))
    print(type(result.decode('utf-8')))
    #print(result.decode('utf-8'))
    print('\n')

def create_button2(s,pos):
    b=tkinter.Button(win,text=s,command=lambda:etree_parse())
    b.grid(row=pos[0],column=pos[1],sticky='w')
#etree.parse
def etree_parse():
    html=etree.parse('data.txt',etree.HTMLParser())
    print(type(html))
    result=etree.tostring(html)
    print(type(result))
    print(type(result.decode('utf-8')))
    print('\n')
    #print(result.decode('utf-8'))

def create_button3(s,pos):
    b=tkinter.Button(win,text=s,command=lambda:etree_1())
    b.grid(row=pos[0],column=pos[1],sticky='w')
#//所有节点
def etree_1():
    html=etree.parse('data.txt',etree.HTMLParser())
    print(type(html))
    result=html.xpath('//*')
    print(type(result))
    print(result)
    print('\n')

def create_button4(s,pos):
    b=tkinter.Button(win,text=s,command=lambda:etree_2())
    b.grid(row=pos[0],column=pos[1],sticky='w')
#/子节点
def etree_2():
    html=etree.parse('data.txt',etree.HTMLParser())
    print(type(html))
    result=html.xpath('//div/p')
    print(type(result))
    print(result)
    print('\n')

def create_button5(s,pos):
    b=tkinter.Button(win,text=s,command=lambda:etree_3())
    b.grid(row=pos[0],column=pos[1],sticky='w')
#..父节点
def etree_3():
    html=etree.parse('data.txt',etree.HTMLParser())
    print(type(html))
    result=html.xpath('//p/..')
    print(type(result))
    print(result)
    print('\n')

def create_button6(s,pos):
    b=tkinter.Button(win,text=s,command=lambda:etree_4())
    b.grid(row=pos[0],column=pos[1],sticky='w')
#/具有指定属性的节点的父节点
def etree_4():
    html=etree.parse('data.txt',etree.HTMLParser())
    print(type(html))
    result=html.xpath('//p[@class="popular-tips"]/..')
    print(type(result))
    print(result)
    print('\n')

def create_button7(s,pos):
    b=tkinter.Button(win,text=s,command=lambda:etree_5())
    b.grid(row=pos[0],column=pos[1],sticky='w')
#/具有指定属性的节点的父节点的属性
def etree_5():
    html=etree.parse('data.txt',etree.HTMLParser())
    print(type(html))
    result=html.xpath('//p[@class="popular-tips"]/../@class')
    print(type(result))
    print(result)
    print('\n')

def create_button8(s,pos):
    b=tkinter.Button(win,text=s,command=lambda:etree_6())
    b.grid(row=pos[0],column=pos[1],sticky='w')
#/parent::
def etree_6():
    html=etree.parse('data.txt',etree.HTMLParser())
    print(type(html))
    result=html.xpath('//p[@class="popular-tips"]/parent::*')
    print(type(result))
    print(result)
    print('\n')

def create_button9(s,pos):
    b=tkinter.Button(win,text=s,command=lambda:etree_7())
    b.grid(row=pos[0],column=pos[1],sticky='w')
#属性匹配,筛选
def etree_7():
    html=etree.parse('data.txt',etree.HTMLParser())
    print(type(html))
    result=html.xpath('//p[@class="popular-tips"]')
    print(type(result))
    print(result)
    print('\n')

def create_button10(s,pos):
    b=tkinter.Button(win,text=s,command=lambda:etree_8())
    b.grid(row=pos[0],column=pos[1],sticky='w')
#text()
def etree_8():
    html=etree.parse('data.txt',etree.HTMLParser())
    print(type(html))
    result=html.xpath('//span/text()')
    print(type(result))
    print(result)
    print('\n')

def create_button11(s,pos):
    b=tkinter.Button(win,text=s,command=lambda:etree_9())
    b.grid(row=pos[0],column=pos[1],sticky='w')
#属性多值匹配contains(),class有多个值,根据一个匹配
def etree_9():
    html=etree.parse('data.txt',etree.HTMLParser())
    print(type(html))
    result=html.xpath('//div[contains(@class,"nav-tabs__item")]')
    print(type(result))
    print(result)
    print('\n')
    
def create_button12(s,pos):
    b=tkinter.Button(win,text=s,command=lambda:etree_10())
    b.grid(row=pos[0],column=pos[1],sticky='w')
#多属性匹配,根据多个属性筛选
def etree_10():
    html=etree.parse('data.txt',etree.HTMLParser())
    print(type(html))
    result=html.xpath('//div[contains(@class,"flow-loader") and @style=\
                      "position:relative;"]')
    print(type(result))
    print(result)
    print('\n')

def create_button13(s,pos):
    b=tkinter.Button(win,text=s,command=lambda:etree_11())
    b.grid(row=pos[0],column=pos[1],sticky='w')
#按序选择[]
def etree_11():
    html=etree.parse('data.txt',etree.HTMLParser())
    print(type(html))
    #所有满足div[3]的div节点
    result=html.xpath('//div[3]')
    print(type(result))
    print(result)
    print('\n')

def create_button14(s,pos):
    b=tkinter.Button(win,text=s,command=lambda:etree_12())
    b.grid(row=pos[0],column=pos[1],sticky='w')
#按序选择last()
def etree_12():
    html=etree.parse('data.txt',etree.HTMLParser())
    print(type(html))
    result=html.xpath('//div[last()]')
    print(type(result))
    print(result)
    print('\n')

def create_button15(s,pos):
    b=tkinter.Button(win,text=s,command=lambda:etree_13())
    b.grid(row=pos[0],column=pos[1],sticky='w')
#按序选择last()
def etree_13():
    html=etree.parse('data.txt',etree.HTMLParser())
    print(type(html))
    result=html.xpath('//div[position()<3]')
    print(type(result))
    print(result)
    print('\n')
    
def create_button16(s,pos):
    b=tkinter.Button(win,text=s,command=lambda:etree_14())
    b.grid(row=pos[0],column=pos[1],sticky='w')
#attribuye()
def etree_14():
    html=etree.parse('data.txt',etree.HTMLParser())
    print(type(html))
    result=html.xpath('//div[3]/div[1]/attribute::*')
    print(type(result))
    print(result)
    print('\n')

def create_button17(s,pos):
    b=tkinter.Button(win,text=s,command=lambda:etree_15())
    b.grid(row=pos[0],column=pos[1],sticky='w')
#child()直接子节点
def etree_15():
    html=etree.parse('te.txt',etree.HTMLParser())
    print(type(html))
    result1=html.xpath('//body/div[1]/child::div')
    result2=html.xpath('//body/div[1]/div[1]')
    result3=html.xpath('//body/div[1]/div[2]')
    print(type(result1))
    print(result1)
    print(result2,result3)
    print('\n')

def create_button18(s,pos):
    b=tkinter.Button(win,text=s,command=lambda:etree_16())
    b.grid(row=pos[0],column=pos[1],sticky='w')
#descendant()所有后代节点
def etree_16():
    html=etree.parse('te.txt',etree.HTMLParser())
    print(type(html))
    result1=html.xpath('//body/div[1]/descendant::div')
    result2=html.xpath('//body/div[1]/div[1]')
    result3=html.xpath('//body/div[1]/div[1]/div[1]')
    result4=html.xpath('//body/div[1]/div[2]')
    print(type(result1))
    print(result1)
    print(result2,result3,result4)
    print('\n')

def create_button19(s,pos):
    b=tkinter.Button(win,text=s,command=lambda:etree_17())
    b.grid(row=pos[0],column=pos[1],sticky='w')
#following::当前节点之后的节点(可不同级)
def etree_17():
    html=etree.parse('te.txt',etree.HTMLParser())
    print(type(html))
    result1=html.xpath('//body/div[1]/following::*')
    result2=html.xpath('//body/div[2]')
    result3=html.xpath('//body/div[2]//*')
    print(type(result1))
    print(result1)
    print(result2,result3)
    print('\n')

def create_button20(s,pos):
    b=tkinter.Button(win,text=s,command=lambda:etree_18())
    b.grid(row=pos[0],column=pos[1],sticky='w')
#following-sibling::当前节点之后的同级节点
def etree_18():
    html=etree.parse('te.txt',etree.HTMLParser())
    print(type(html))
    result1=html.xpath('//body/div[1]/following-sibling::*')
    result2=html.xpath('//body/div[2]')
    print(type(result1))
    print(result1)
    print(result2)
    print('\n')
    
win=tkinter.Tk()
win.geometry('600x450')
win.title('xpath')

create_button1('etree.HTML',(0,0))
create_button2('etree.parse',(0,1))
create_button3('//',(0,2))
create_button4('/',(1,0))
create_button5('..',(1,1))
create_button6('..具有指定属性的节点的父节点',(1,2))
create_button7('..具有指定属性的节点的父节点的属性',(2,0))
create_button8('parent::*',(2,1))
create_button9('属性匹配,筛选',(2,2))
create_button10('text()',(3,0))
create_button11('属性多值匹配contains()',(3,1))
create_button12('多属性匹配',(3,2))
create_button13('按序选择[]',(4,0))
create_button14('按序选择last()',(4,1))
create_button15('按序选择position()',(4,2))
create_button16('attribute()',(5,0))
create_button17('child()',(5,1))
create_button18('descensant()',(5,2))
create_button19('following',(6,0))
create_button20('following-sibling',(6,1))

win.mainloop()

te.text

# -*- coding: utf-8 -*-
"""
Created on Sat Jan  1 22:46:01 2022

@author: shiyu
"""

<html>
    <body>
        <div>
            <div>1
                <div>a</div>
            </div>
            <div>2</div>
        </div>
        <div>
            <div>3
                <div>b</div>
            </div>
            <div>4</div>
        </div>
    </body>
</html>
上一篇:python实现自动点赞——实践_2022-01-02


下一篇:【优化求解】一种非线性动态自适应惯性权重PSO算法(IPSO)Matlab代码