Python:xml格式文件的应用

#  操作xml格式文件
#  简单的数据存储语言,被设计用来传输和存储数据
'''xml
<data>  # country name从属于data,rank updated等从属于country name
    <country name="Liechtenstein">
        <rank updated="yes">2</rank>  # updated="yes"是rank updated的属性,2是它的值
        <year>2023</year>
        <gdppc>141100</gdppc>
        <neighbor direction="E" name="Austria" />
        <neighbor direction="W" name="Switzerland" />
    </country>
    <country name="Singapore">
        <rank updated="yes">5</rank>
        <year>2026</year>
        <gdppc>59900</gdppc>
        <neighbor direction="N" name="Malaysia" />
    </country>
    <country name="Panama">
        <rank updated="yes">69</rank>
        <year>2026</year>
        <gdppc>13600</gdppc>
        <neighbor direction="W" name="Costa Rica" />
        <neighbor direction="E" name="Colombia" />
    </country>
</data>
'''


from xml.etree import ElementTree as ET  # 'as ET'可以不加,加了之后,ET以后就代指ElementTree
#  直接解析xml文件
file_01 = ET.parse('anli.xml')
file_02 = file_01.getroot()  # 得到跟标签
print(file_02)  # 输出<Element 'data' at 0x0000015A4B37D270>

#  如果文件是字符串或者从网络上获取的等
content = '''<data>  # country name从属于data,rank updated等从属于country name
    <country name="Liechtenstein">
        <rank updated="yes">2</rank>  # updated="yes"是rank updated的属性,2是它的值
        <year>2023</year>
        <gdppc>141100</gdppc>
        <neighbor direction="E" name="Austria" />
        <neighbor direction="W" name="Switzerland" />
    </country>
    <country name="Singapore">
        <rank updated="yes">5</rank>
        <year>2026</year>
        <gdppc>59900</gdppc>
        <neighbor direction="N" name="Malaysia" />
    </country>
    <country name="Panama">
        <rank updated="yes">69</rank>
        <year>2026</year>
        <gdppc>13600</gdppc>
        <neighbor direction="W" name="Costa Rica" />
        <neighbor direction="E" name="Colombia" />
    </country>
</data>'''
file_03 = ET.XML(content)
print(file_03)

#  获取data标签的从属标签
for ch in file_03:
    print(ch.tag,ch.attrib)
    #  获取data标签的从属标签的从属标签
    for note in ch:
        print(note.tag,note.attrib,note.text)

file_04 = file_03.find('country')  # 获取子标签
print(file_04.tag,file_04.attrib)  # 输出子标签的名字和属性

file_05 = file_04.find('rank')  # 获取子标签的子标签
print(file_05.tag,file_05.attrib,file_05.text)  # 输出子标签的子标签的名字、属性和内容

for chi in file_03.iter('year'):  # 遍历整个内容,获取year标签的内容
    print(chi.tag,chi.text)  # 输出year标签的名字和内容

file_06 = file_03.findall('country')  # 找到所有country标签
print(file_06)

file_07 = file_03.find('country').find('gdppc')  # 找到第一个country标签下的gdppc标签
print(file_07.text)
#  问题:如何找到第二个标签

#  修改、删除标签
file_08 = file_03.find('country').find('gdppc')  # 找到第一个country标签下的gdppc标签
file_08.text = '999'  # 将这个标签的值变成999,赋值为字符串
file_08.set('name','刘德华')  # 增加这个标签的属性
print(file_08.attrib,file_08.text)
file_03.remove(file_03.find('country'))  # 删除标签,删除后,此标签下的所有内容都将被删除
print(file_03.findall('country'))
tree = ET.ElementTree(file_03)  # 修改过的内容保存在内存里,所以需要保存到文件里
tree.write('new_anli.xml',encoding='utf-8')  # 可以选择保存在原文件或者新建一个文件

#  构建文档一
file_09 = ET.Element('home')  # 创建根节点

son_01 = ET.Element('son',{'name':'xueyou'})
son_02 = ET.Element('son',{'name':'dehua'})

grandson_01 = ET.Element('grandson',{'name':'xueyoujunior'})
grandson_02 = ET.Element('grandson',{'name':'dehuajunior'})

son_01.append(grandson_01)
son_02.append(grandson_02)

file_09.append(son_01)
file_09.append(son_02)
file_10 = ET.ElementTree(file_09)
file_10.write('tianwang.xml',encoding='utf-8',short_empty_elements = True)
# 生成的是一行,如何换行<home><son name="xueyou"><grandson name="xueyoujunior"></grandson></son><son name="dehua"><grandson name="dehuajunior"></grandson></son></home>
# short_empty_elements = False生成的长标签,short_empty_elements = True生成的短标签,如果标签没有值,尽量用短标签
# 长标签<grandson name="xueyoujunior"></grandson>,短标签<grandson name="xueyoujunior" />

#  构建文档二
file_11 = ET.Element('family')  # 创建根节点

son_03 = file_11.makeelement('son',{'name':'liming'})
son_04 = file_11.makeelement('son',{'name':'fucheng'})

grandson_03 = son_03.makeelement('grandson',{'name':'limingjunior'})
grandson_04 = son_04.makeelement('grandson',{'name':'fuchengjunior'})

son_03.append(grandson_03)
son_04.append(grandson_04)

file_11.append(son_03)
file_11.append(son_04)

file_12 = ET.ElementTree(file_11)
file_12.write('tianwang2.xml',encoding='utf-8',short_empty_elements = True)

#  构建文档三
file_12 = ET.Element('areas')  # 创建根节点

son_05 = ET.SubElement(file_12,'son',attrib={'area':'Taizhou'})
son_06 = ET.SubElement(file_12,'son',attrib={'area':'Wenzhou'})

grandson_05 = ET.SubElement(son_05,'xianshi',attrib={'area':'Linhai'})
grandson_05.text = '籍贯'

file_13 = ET.ElementTree(file_12)
file_13.write('jiaxiang.xml',encoding='utf-8')

'''
python
content = """<xml>
    <ToUserName><![CDATA[gh_7f083739789a]]></ToUserName>
    <FromUserName><![CDATA[oia2TjuEGTNoeX76QEjQNrcURxG8]]></FromUserName>
    <CreateTime>1395658920</CreateTime>
    <MsgType><![CDATA[event]]></MsgType>
    <Event><![CDATA[TEMPLATESENDJOBFINISH]]></Event>
    <MsgID>200163836</MsgID>
    <Status><![CDATA[success]]></Status>
</xml>"""
'''
#  微信公众号的XML文件格式创建
file_14 = ET.Element('user')
file_14.text = "<![CDDATA[哈喽]]"  # ![CDDATA]为固定格式,[哈喽]是值
file_15 = ET.ElementTree(file_14)
file_15.write('text.xml',encoding='utf-8') # <user>&lt;![CDDATA[哈喽]</user>,正确是<user>![CDDATA[哈喽]</user>

#  微信公众号的XML文件案例:
content_01 = """<xml>
    <ToUserName><![CDATA[gh_7f083739789a]]></ToUserName>
    <FromUserName><![CDATA[oia2TjuEGTNoeX76QEjQNrcURxG8]]></FromUserName>
    <CreateTime>1395658920</CreateTime>
    <MsgType><![CDATA[event]]></MsgType>
    <Event><![CDATA[TEMPLATESENDJOBFINISH]]></Event>
    <MsgID>200163836</MsgID>
    <Status><![CDATA[success]]></Status>
</xml>"""
file_16 = {}
file_17 = ET.XML(content_01)
for i in file_17:
    file_16[i.tag] = i.text
print(file_16)  # 输出{'ToUserName': 'gh_7f083739789a', 'FromUserName': 'oia2TjuEGTNoeX76QEjQNrcURxG8', 'CreateTime': '1395658920', 'MsgType': 'event', 'Event': 'TEMPLATESENDJOBFINISH', 'MsgID': '200163836', 'Status': 'success'}
上一篇:trie模板


下一篇:Python 子类继承父类构造函数说明