# 操作xml格式文件
# 简单的数据存储语言,被设计用来传输和存储数据
'''xml
<data> # country name从属于data,rank updated等从属于country name
<country name="Liechtenstein">
<rank updated="yes">2</rank> # updated="yes"是rank updated的属性,2是它的值
<year>2023</year>
<gdppc>141100</gdppc>
<neighbor direction="E" name="Austria" />
<neighbor direction="W" name="Switzerland" />
</country>
<country name="Singapore">
<rank updated="yes">5</rank>
<year>2026</year>
<gdppc>59900</gdppc>
<neighbor direction="N" name="Malaysia" />
</country>
<country name="Panama">
<rank updated="yes">69</rank>
<year>2026</year>
<gdppc>13600</gdppc>
<neighbor direction="W" name="Costa Rica" />
<neighbor direction="E" name="Colombia" />
</country>
</data>
'''
from xml.etree import ElementTree as ET # 'as ET'可以不加,加了之后,ET以后就代指ElementTree
# 直接解析xml文件
file_01 = ET.parse('anli.xml')
file_02 = file_01.getroot() # 得到跟标签
print(file_02) # 输出<Element 'data' at 0x0000015A4B37D270>
# 如果文件是字符串或者从网络上获取的等
content = '''<data> # country name从属于data,rank updated等从属于country name
<country name="Liechtenstein">
<rank updated="yes">2</rank> # updated="yes"是rank updated的属性,2是它的值
<year>2023</year>
<gdppc>141100</gdppc>
<neighbor direction="E" name="Austria" />
<neighbor direction="W" name="Switzerland" />
</country>
<country name="Singapore">
<rank updated="yes">5</rank>
<year>2026</year>
<gdppc>59900</gdppc>
<neighbor direction="N" name="Malaysia" />
</country>
<country name="Panama">
<rank updated="yes">69</rank>
<year>2026</year>
<gdppc>13600</gdppc>
<neighbor direction="W" name="Costa Rica" />
<neighbor direction="E" name="Colombia" />
</country>
</data>'''
file_03 = ET.XML(content)
print(file_03)
# 获取data标签的从属标签
for ch in file_03:
print(ch.tag,ch.attrib)
# 获取data标签的从属标签的从属标签
for note in ch:
print(note.tag,note.attrib,note.text)
file_04 = file_03.find('country') # 获取子标签
print(file_04.tag,file_04.attrib) # 输出子标签的名字和属性
file_05 = file_04.find('rank') # 获取子标签的子标签
print(file_05.tag,file_05.attrib,file_05.text) # 输出子标签的子标签的名字、属性和内容
for chi in file_03.iter('year'): # 遍历整个内容,获取year标签的内容
print(chi.tag,chi.text) # 输出year标签的名字和内容
file_06 = file_03.findall('country') # 找到所有country标签
print(file_06)
file_07 = file_03.find('country').find('gdppc') # 找到第一个country标签下的gdppc标签
print(file_07.text)
# 问题:如何找到第二个标签
# 修改、删除标签
file_08 = file_03.find('country').find('gdppc') # 找到第一个country标签下的gdppc标签
file_08.text = '999' # 将这个标签的值变成999,赋值为字符串
file_08.set('name','刘德华') # 增加这个标签的属性
print(file_08.attrib,file_08.text)
file_03.remove(file_03.find('country')) # 删除标签,删除后,此标签下的所有内容都将被删除
print(file_03.findall('country'))
tree = ET.ElementTree(file_03) # 修改过的内容保存在内存里,所以需要保存到文件里
tree.write('new_anli.xml',encoding='utf-8') # 可以选择保存在原文件或者新建一个文件
# 构建文档一
file_09 = ET.Element('home') # 创建根节点
son_01 = ET.Element('son',{'name':'xueyou'})
son_02 = ET.Element('son',{'name':'dehua'})
grandson_01 = ET.Element('grandson',{'name':'xueyoujunior'})
grandson_02 = ET.Element('grandson',{'name':'dehuajunior'})
son_01.append(grandson_01)
son_02.append(grandson_02)
file_09.append(son_01)
file_09.append(son_02)
file_10 = ET.ElementTree(file_09)
file_10.write('tianwang.xml',encoding='utf-8',short_empty_elements = True)
# 生成的是一行,如何换行<home><son name="xueyou"><grandson name="xueyoujunior"></grandson></son><son name="dehua"><grandson name="dehuajunior"></grandson></son></home>
# short_empty_elements = False生成的长标签,short_empty_elements = True生成的短标签,如果标签没有值,尽量用短标签
# 长标签<grandson name="xueyoujunior"></grandson>,短标签<grandson name="xueyoujunior" />
# 构建文档二
file_11 = ET.Element('family') # 创建根节点
son_03 = file_11.makeelement('son',{'name':'liming'})
son_04 = file_11.makeelement('son',{'name':'fucheng'})
grandson_03 = son_03.makeelement('grandson',{'name':'limingjunior'})
grandson_04 = son_04.makeelement('grandson',{'name':'fuchengjunior'})
son_03.append(grandson_03)
son_04.append(grandson_04)
file_11.append(son_03)
file_11.append(son_04)
file_12 = ET.ElementTree(file_11)
file_12.write('tianwang2.xml',encoding='utf-8',short_empty_elements = True)
# 构建文档三
file_12 = ET.Element('areas') # 创建根节点
son_05 = ET.SubElement(file_12,'son',attrib={'area':'Taizhou'})
son_06 = ET.SubElement(file_12,'son',attrib={'area':'Wenzhou'})
grandson_05 = ET.SubElement(son_05,'xianshi',attrib={'area':'Linhai'})
grandson_05.text = '籍贯'
file_13 = ET.ElementTree(file_12)
file_13.write('jiaxiang.xml',encoding='utf-8')
'''
python
content = """<xml>
<ToUserName><![CDATA[gh_7f083739789a]]></ToUserName>
<FromUserName><![CDATA[oia2TjuEGTNoeX76QEjQNrcURxG8]]></FromUserName>
<CreateTime>1395658920</CreateTime>
<MsgType><![CDATA[event]]></MsgType>
<Event><![CDATA[TEMPLATESENDJOBFINISH]]></Event>
<MsgID>200163836</MsgID>
<Status><![CDATA[success]]></Status>
</xml>"""
'''
# 微信公众号的XML文件格式创建
file_14 = ET.Element('user')
file_14.text = "<![CDDATA[哈喽]]" # ![CDDATA]为固定格式,[哈喽]是值
file_15 = ET.ElementTree(file_14)
file_15.write('text.xml',encoding='utf-8') # <user><![CDDATA[哈喽]</user>,正确是<user>![CDDATA[哈喽]</user>
# 微信公众号的XML文件案例:
content_01 = """<xml>
<ToUserName><![CDATA[gh_7f083739789a]]></ToUserName>
<FromUserName><![CDATA[oia2TjuEGTNoeX76QEjQNrcURxG8]]></FromUserName>
<CreateTime>1395658920</CreateTime>
<MsgType><![CDATA[event]]></MsgType>
<Event><![CDATA[TEMPLATESENDJOBFINISH]]></Event>
<MsgID>200163836</MsgID>
<Status><![CDATA[success]]></Status>
</xml>"""
file_16 = {}
file_17 = ET.XML(content_01)
for i in file_17:
file_16[i.tag] = i.text
print(file_16) # 输出{'ToUserName': 'gh_7f083739789a', 'FromUserName': 'oia2TjuEGTNoeX76QEjQNrcURxG8', 'CreateTime': '1395658920', 'MsgType': 'event', 'Event': 'TEMPLATESENDJOBFINISH', 'MsgID': '200163836', 'Status': 'success'}