xml模块

2024-03-02 17:12:16

xml模块

新建一个命名为country.xml的文件，里面的内容如下：
<data>
    <country name="中国">
        <rank updated="yes">2</rank>
        <neighbor name="俄罗斯" direction="E"/>
        <neighbor name="印度" direction="W"/>
    </country>
    <country name="新加坡">
        <rank updated="yes">5</rank>
        <neighbor name="马来西亚" direction="N"/>
    </country>
    <country name="Panama">
        <rank updated="yes">69</rank>
        <year>2011</year>
        <neighbor name="Colombia" direction="E"/>
    </country>
</data>

解析

通过下面三种方式可以获取根节点
1.调用parse()方法，返回解析树(用于解析xml文件)
import xml.etree.ElementTree as ET
tree=ET.parse('country.xml')#将文件解析成xml格式
# getroot():获取根节点
root=tree.getroot()
print(root)#根节点 <Element 'data' at 0x02C84B18>

2.调用fromstring(),返回解析树的根元素(用于解析字符串格式)
import xml.etree.ElementTree as ET
data=open("country.xml",mode='r',encoding='utf-8').read()
root=ET.fromstring(data)#将xml形式的字符串解析成xml格式，并获取根节点
print(root)#<Element 'data' at 0x039A4AC8>

3.调用ElementTree类ElementTree(self, element=None, file=None)
import xml.etree.ElementTree as ET
tree=ET.ElementTree(file='country.xml')
root=root.getroot()
print(root)#<Element 'data' at 0x02C5F0C8>

遍历

import xml.etree.ElementTree as ET
tree = ET.parse("country.xml")
root = tree.getroot()
#root.tag:获取根节点的标签  
print(root.tag)#data
#root.attrib：获取根节点的属性
print(root.attrib)#{}

# 遍历xml文档的第二层
import xml.etree.ElementTree as ET
tree = ET.parse("country.xml")
root = tree.getroot()
for childNode in root:
    # 第二层(标签为country一层)节点的标签名称和属性
    print(childNode.tag,"--1--",childNode.attrib)
    for item in childNode:
        print(item.tag,"--2--",item.attrib)
#输出
country --1-- {'name': '中国'}
rank --2-- {'updated': 'yes'}
neighbor --2-- {'name': '俄罗斯', 'direction': 'E'}
neighbor --2-- {'name': '印度', 'direction': 'W'}
country --1-- {'name': '新加坡'}
rank --2-- {'updated': 'yes'}
neighbor --2-- {'name': '马来西亚', 'direction': 'N'}
country --1-- {'name': 'Panama'}
rank --2-- {'updated': 'yes'}
year --2-- {}
neighbor --2-- {'name': 'Colombia', 'direction': 'E'}

通过下标的方式直接访问节点

import xml.etree.ElementTree as ET
tree = ET.parse("country.xml")
root = tree.getroot()
#获取第一个country标签里的第一个子标签名和属性
chinaRank=root[0][0]
print(chinaRank.tag,'  ',chinaRank.text)#rank    2
#获取第一个country标签里的第二个子标签名和属性
chinaNei=root[0][1]
print(chinaNei.tag,'  ',chinaNei.attrib)#neighbor    {'name': '俄罗斯', 'direction': 'E'}

ElementTree提供的方法

iter(tag)：以当前元素为根节点，创建树迭代器,如果tag不为None,则以tag进行过滤

import xml.etree.ElementTree as ET
tree = ET.parse("country.xml")
root = tree.getroot()
for neibhbor in root.iter('neighbor'):
    print(neighbor.tag,'---',neighbor.attrib)
#输出：
neighbor --- {'name': '俄罗斯', 'direction': 'E'}
neighbor --- {'name': '印度', 'direction': 'W'}
neighbor --- {'name': '马来西亚', 'direction': 'N'}
neighbor --- {'name': 'Colombia', 'direction': 'E'}

findall() ：返回所有匹配的子元素列表
find() ：查找第一个匹配的子元素

import xml.etree.ElementTree as ET
tree = ET.parse("country.xml")
root = tree.getroot()
for country in root.findall('country'):
    name=country.get('name')
    rank=country.find('rank').text
    print(name,'   ',rank)
#输出：
中国     2
新加坡     5
Panama     69

修改xml结构

添加、修改或输出标签属性

import xml.etree.ElementTree as ET
tree = ET.parse("country.xml")
root = tree.getroot()
print(root[0][0].text)# 2  修改前
root[0][0].text=int(root[0][0].text)-1
print(root[0][0].text)# 1     后
root[0][0].text='排名'
print(root[0][0].text)# 排名
#修改属性，有则改之，无则添加
print(root[0][0].attrib)#  {'updated': 'yes'} 
root[0][0].set('updated','No')#修改updated属性
print(root[0][0].attrib)#{'updated': 'No'}
root[0][0].set('time','2021')#添加time属性
print(root[0][0].attrib)#{'updated': 'No', 'time': '2021'}
#删除属性
print(root[1][0].attrib)#{'updated': 'yes'}
del root[1][0].attrib['updated']
print(root[1][0].attrib)#{}

# 在终端显示整个xml
ET.dump(root)
# 注意 修改的内容存在内存中 尚未保存到文件中，要保存修改后的内容通过下面语句
tree.write("output.xml")

删除元素（标签）

import xml.etree.ElementTree as ET
tree = ET.parse("country.xml")
root = tree.getroot()
print(root.findall('country'))#[<Element 'country' at 0x02E0C578>, <Element 'country' at 0x02E0C3E8>, <Element 'country' at 0x02E0C988>]
root.remove(root[2])
print(root.findall('country'))#[<Element 'country' at 0x02E0C578>, <Element 'country' at 0x02E0C3E8>]

添加元素（标签）

import xml.etree.ElementTree as ET
tree = ET.parse("country.xml")
root = tree.getroot()
country = root[0]
# 创建新的元素, tag为GDP
elem1 = ET.Element("GDP")
elem1.text = "100w亿"
country.append(elem1)
ET.dump(root)
for item in root[0]:
    print(item.tag)
#输出
rank
neighbor
neighbor
GDP

  

**思考题**

- 已知xml的字符串xmlstr。

  1）获取root节点的id值存在属性id中

  2）请把所有图片的地址获取到存储到列表中

```python
xmlstr='''
	<root id='123'>
		<img src='1.png' />
		<img src='2.png' />
	</root>
'''

1)
import xml.etree.ElementTree as ET
root=ET.fromstring(xmlstr)
for id_v in root.iter('root'):
    print(id_v.tag,':',id_v.attrib)
    
2)
li=[]
for img_tag in root.findall('img'):
    li.append(img_tag.attrib)
print(li)

把所有a1元素的值存储在列表中

xmlstr2.xml文件中内容如下：
<root id='123'>
    <test>
        <a1>111</a1>
        <a1>222</a1>
    </test>
</root>

##<——————————————————>
import xml.etree.ElementTree as ET
tree=ET.parse('xmlstr2.xml')
root=tree.getroot()
print(root.tag)
print(root.findall('test'))
TestNode=root.findall('test')[0]
print(TestNode.findall('a1'))
li=[]
for item in TestNode.findall('a1'):
    li.append(item.text)
print(li)

码农公寓

xml模块

相关文章