参考:https://docs.python.org/2/library/xml.etree.elementtree.html
例子:
<?xml version="1.0"?>
<data>
<country name="Liechtenstein">
<rank>1</rank>
<year>2008</year>
<gdppc>141100</gdppc>
<neighbor name="Austria" direction="E"/>
<neighbor name="Switzerland" direction="W"/>
</country>
<country name="Singapore">
<rank>4</rank>
<year>2011</year>
<gdppc>59900</gdppc>
<neighbor name="Malaysia" direction="N"/>
</country>
<country name="Panama">
<rank>68</rank>
<year>2011</year>
<gdppc>13600</gdppc>
<neighbor name="Costa Rica" direction="W"/>
<neighbor name="Colombia" direction="E"/>
</country>
</data>
1、解析xml文件
>>> os.getcwd()
'D:\\workspace\\testpython'
>>> import xml.etree.ElementTree as ET
>>> tree = ET.parse('test.xml')
>>> root = tree.getroot()
>>> print root
<Element 'data' at 0x1d2a8b0>
>>> print tree
<xml.etree.ElementTree.ElementTree object at 0x01D2A9D0>
>>> root.tag
'data'
>>> root.attrib
{}
>>> #遍历子节点
>>> for child in root:
print child.tag,child.attrib country {'name': 'Liechtenstein'}
country {'name': 'Singapore'}
country {'name': 'Panama'}
>>> root[0].text
'\n '
>>> root[0][1].text
''
>>> root[1][3].text
>>> root[1][2].text
''
2、查找元素:root.iter()迭代,element.findall(),element.find(),element.get(),element.text
>>> #查询元素
>>> for neighbor in root.iter('neighbor'):
print neighbor.attrib {'direction': 'E', 'name': 'Austria'}
{'direction': 'W', 'name': 'Switzerland'}
{'direction': 'N', 'name': 'Malaysia'}
{'direction': 'W', 'name': 'Costa Rica'}
{'direction': 'E', 'name': 'Colombia'}
>>> root.iter('neighbor')
<generator object iter at 0x01D3CF30>
>>> root.findall('country')
[<Element 'country' at 0x1d2aa90>, <Element 'country' at 0x1d2ad30>, <Element 'country' at 0x1d2af10>]
>>> for country in root.findall('country'): #element.findall()查询当前元素的子元素
rank=country.find('rank').text #element.find()查询指定标签的第一个子元素,element.text获取元素的内容
name=country.get('name') #element.get()获取元素的属性值
print name,rank Liechtenstein 1
Singapore 4
Panama 68
3、修改xml文件:element.set()修改属性,element.append()增加子元素,element.remove()删除元素
修改元素属性:
>>> for rank in root.iter('rank'):
new_rank = int(rank.text) + 1
rank.text=str(new_rank)
rank.set('updated','yes') >>> tree.write('test.xml')
aaarticlea/png;base64,iVBORw0KGgoAAAANSUhEUgAAAgQAAAGYCAIAAACyLG1kAAAgAElEQVR4nO29va7rOrKurYtaQUP3slIB+7sKY12BspF21JnDj/HBiPoKRuKwT9AbI2lghzuoE0iiimQVLcl/kv08ICbGlCmKki2WXtKut/k/8Hb8/e9//6//+i8R+de//vXqvhwGLtpd4DIejuHN+s9//tO8uidwf7ghN8BFuwtcxsMxBwMBAIAPZgwG/wIAgE/lPxPN3wEA4ONp/gsAAD6e5v/6/Pd//7eILPkXAAAOTS0YyLJIQDwAADg6KAMAAEAZAADAEmUgTXOlAADAwVmgDJrmf3ykaVYpg66Rpn3Y2QAAwCYWKYN6MDDbvfTSNBKsl7oFwaBv7X1NukbafnFtAAAoeIAyuEibPf4PWxppGumCCgZhnmqaR3NVOe5Sqy8iIn0rTSP95d6XBwDgM7izMjAH5U5tSaaJwvz437dq0PeUgV9fxApCAACwjKXKwFw61spgmBcypmtCuvEibav+NhWAFwz8+nMVrxsAAOBzf2WQD8R+MNCK4dJfDwaV+nEjygAAYAP3XzMoZ4o6tZIcumm8LiRCFgzGFsK0EF2tzxwRAMAtPObbRNnQrBZ+m25eNhimdMY5n85dK47bvPpdw+oxAMBNPPt3BgAAsEP4BTIAAKzPTTSM/uQmAgB4J9ZnLZ3mhYgEAABvA8oAAABQBgAAgDIAAABBGQAAgKAMAABAtv3O4JTy6lMAAIBbufUXyKfTCWVwVC7y1cg5yG8vp0a+p3wew39PjfxYO50b+bohKex3K6dGTp396m/vvnQXNrS/4nyd6wlwCG7NTfQqZVBxUnsEq5zXDsM0eEnIh/7v9qYRv85P547IlZcefeg74FxPnPjgEBxQGVgJSkM3TWK1KuPplEJVJ7wbosiQ4W7Ya7hRx+1dXr/ivDY03vZzBr2g6g99GF+qpFMdWpkPrMaqoHqjm2vbseZYYdpFZ/Kbhp9L3zYF7XSBzsMD7EW+FgSDqBjK8fTcTC8V7ZzUS8PD8jAix5fiE7RuJDtK2f7YmW7u1fwkfpEvq51V7XvnWzuufz1x4oP9czBlYN5UocvHw6FCYq4Q5hE58UJQdgtabVz65GnOVQZBjfVhajYs2zc5q27+O55eUPu1aTQL6rTjAdpWLqrNBY+j3+04Zp1b+U23e8ogf7iOj8MiIskMyXcrp9hskNNU7aeb6/z2qo755O63/9PJqZVzn7dzzupvbd/bxTuu+NdzOBBZ1mHPLFUGJwtXGfz//599tBu2VyzMOvMGu0ib3sOhmx+vvWBgbpdqMDC7NNc3K2Tnq1uPvZT0Sb8MU6EbuzvsnlWeFENdGXgsDwbz47Mq5zAOssZTc9ZCOqNSjrxu+9Og/Fu0k8iRchxf3L63i3fcJeDEB7vleMqgvJF2GAzi9kWLDXYwCNKkT/r1YOB2ZQtrg4ExGhbBwG5hWTAwR9tKOzLpj1NzRRlU2vd2qR+3Ak58sGeOt2ZQzhSFLhkGu2YeJBPDNTVNNNa/SNssDQa585pVLetnWDg+m8FAT1SZE1h5MEh9QW9DB4NsNDSnibLBcXiy/m6TmrHNymCqv+0z1vfbt9vJ5utvaN8+323BgDki2D0HUwYjxa2lF4rnAT1d+zW395Nsj7MsXZh91pIlh8J5rXMWlvUui2RBbDeuGMdFj2ZaFu/aubvDwYbK/WWsNi4b5NNEGzBmTqw12GwKaK6czqFn22PjX/04kma7nK12yvaT1d24YtzKb7Z6XKiThe175+se17+eOPHBITieMjgM95y2AQB4LPwC+f7ob4TmcgEAYJeQmwgAAMhaCgAAKAMAAJAPUQb/+7e/UShvX159n8Gx+Qhl8PK7lEJ5Qnn1fQbHBmVAobxJefV9Bsfms5TBqzsCcH/4eMNd+Cxl8OqOwFHJstjuCj7ecBdQBs/jiUntw4NzYy5r/9HOX6vbVylcV/4asHuAk9K9Pg87+XjD0UEZ2DzISS3LnfdIlmXDaDfmL1rUvu+kdh/WOYtd+jbGgNA5ebzN9/1xssD8PKx1RiMYwF1AGRQ4WfByRzNJstdlCUYNx7Spqf6SWA+48Uann9OpzobO9crabXb40u2qwbp0TMsqZ12xHNO89tc5qaksct8XlahuGsptxzTHucxoP7146TgbOqUHLn2bRwM/sWhXvImP/jysEg0EA7gLKIME9yZ0HM2C2lENNEbC6Vgt3vxbfQ6GeKCt3bqpizEwpNM4pmPa+F9LGdiOaX77Pqbzl7aYn41ifMexinPZVmcxrRLGs/QGX1sWPOHzsDjrNcEA7sJjlMEDnM5u2e7eLcuc1EScSREnRXbdF6Gp3OELHdDa4vEypBXEskMwH03LYOA4pnntb3FSS70H8tTQheNYxbnsKtbbeunbuYtXrcfs1YLnfB6WOaMRDOAuoAwSXOdg6+bv0kmXhTf/MGOwaDXCDQbqyV2uBgPHMW1sqhxcncWASrBZz+B7o91vFjmOLTAP0FjOYqGzVo+99909y6d8HhY6oxEM4C6wZpBjzxiUN/9F2lb9vfhJsL+s8b2Jrme6T236rNjGaSI1ssdVSM8xbdx3ihOhm3e3HdOc9rcR1MrB2DfHcazqXOZizbEMIsb7GpH5vrtfInr052GNMxrBAO4CysAivRU9RzM9oTI833Wh5pgWh5u4oHtlOJ0PMK0Yj2vIk/FZtiacGCl0+YEb5ZimBYeeC4qYjmle+5swbZYNx7FrzmUltrNY6PLJrDIspO+7Jwse/XlY64xGMIC7gDI4INk00QG5Oim0B+5nKf1Y3u3jDS8CZXA04jP7bn8RW8VzG4bNvNXHG14HygDg2PDxhruAMgA4Nny84S6gDACODR9vuAufpQwolDcur77P4Nh8ljKgUN64vPo+g2ODMqBQ3qS8+j6DY/MRygAAAOp8hDKAzezZ4QsA7gjKYM94zlw6tYJ+Ye326zzC4QsAdgjKwOZBTmcmK525klz86oW127ODGef7ZFlwY+47ALgFlEFBljBySlKv84eOr+r89TrBpOV4JRWHrHXOXKlJV+iWbK8Fg4UOXy+7DgDwDFAGCUvyV0dHAG1gm6SedxyvhpfmETOkLy115lobDKQyTbTO4ev11wEAHsVHOJ0t2V63lJotYeKAmNpaJU+4juNVsrvDVWeuDcHAnDLa5vD10usAAA8EZZBw1elMG4V1ztOr53il2zFZ5sy1ZZoobs+M4Dc7fD39OgDAY2HNIMebOelbCekQFjrHCNdxvBLxB8EVzlyrF4r14kOyEOGfb+VLRM+/DgDwBFAGFuaQZHlV9q0xPWI6XonvkLXemWvtV0jVV1RNr/plDl+vuQ4A8BRQBou5Ns39Nlxx+PqY6wDwUaAMrpNY/37wT7C4DgBvDMoAAABQBgAAgDIAAABBGQAAgKAMAABAUAYAACAoAwAAEJQBPAcc0wB2DsrgPnRPzqgzJJ4wf/o15p5IX/Pq29tf75j27OsJ8PGgDGw2OJ15yTs3U3NA8wfk0DVt22a+OFb96vYdOKY963oCgAjKwMDKUhdzq/U6z77K19+FcfAak4CqHG3DkOltH5gzPbSJV0zFfKbMPzpvtV6z6xvbN5jnuNctXr1QbNnb9QT4cFAGCeZgMbt0XaRVT5c6oZue1hga0QYvobo9dNJ0477Dk3XSAWeQtQb3abx+ejBY5Zi2z+sJ8OHgdDbiWmvpvPx6aMuSd6pqmcVj9HXxti+ZDym7Vw7u8zzOE4PBase0vV5PgA8HZZBgOH+lg9c8ZlUHL/0oqgcvc/vVwct0/ioG8cIAIZ3GebQyWOqYttfrCfDhsGaQU854ZJPOelpD2c2k0xptsnuobs+cwjqdINqf0/AGd++1VcFgwwLycse0fV5PgA8HZWCRDRlqYbPVj8BBOXZ18zR3347GXuMuU31vu6ROYdecv9S3gDwFkGz36lfa2frV0oWOafu6ngAggjJYy5JvSWbTF1e3vytXHNNEhOsJsBtQBouYnzSvzTB4NZe38AlwPQH2BsoAAABQBgAAgDIAAABBGQAAgKAMAABAUAYAACAoAwAAEJQBABwXHPTuCMrgUWQZcjyG9JmWMcEzcycEI4fnkh8QP4LHHfciX42cg/z2cmrk+yIi8t3KqcnLj19/j/j9/Onk1Mipld/bjvDby6m7Xk1zbuRr4TB9w3Ve66AHFVAGNhuczm45lplx7rnpFsJTH7HalUPLXZgGHQlqxB9GzNidYASDrP7uqPTzIl+tnNtbI1lyie7O1uucyQKc7G4EZVBQZFtrmiQhWshqFkkRLpP9VvnRNB2+Lr10/dxUbL9vpQ/z9iyRatmfYWPbzx24Es*cshCQDXHlIkbEr/WWq0BntlPIibm9buaSVzRMwBdF8obuk8vzGdFNfsvR7SQq+8/DgeZGvMhhc5KuR7z4Zj8z6Nk5/RLTv2nRqo53CdBnHCrXrueG8hlP76pPn+lEJdZNoKJTQrJCmXc6Zcsq2q6aGkDM84Oua83WOjaRiZcV1VpSyACe7W0AZJJgfJq0S9MOIfnIvU+Qvd/jSblxZ+3NnUoevJNNnag8wR6ZgC45kz4varexueVcNw9ZQM3Rjh9rYTpGTOvZgyDcdq1WUQXncVr0levatb5PYsGAM+G7HseasBiM9SGVP0Gb9Ws/N/gT1NrTqkzR8qqIxW8zB7V3Plecl9eA39m0OBufh8VxEhgE9u0TWOxYv3U8xlVQXE99tMom07jqLyNWcuGSsWg9OZyMV66tkGic+QGtZUIqG8sPqOXx57XtOXpd8IA2dGgbrkz36OiS7ObHLDAZmqExKd70ry4NBOYnWTWNlHD1Vx+pP0CaJMlj1UJl9rqz+jKdQfkriBQ/dbNYT3Ou54bz0gK4HXy8YJMsn5UO9HwzMZ3ljl5AojKUrCmLfv/XVApzsNoAySDCfw7zBuu6oVQ8Gup1KMDBGxVuCgbvbDcHAPeRrgsEGsmFLPyCvw+5PSCRRrOMFg/st3uRr41NgcNdI4qvFTE4tGDjvZP5SXBiY+rYiGBTUv0SEk902WDPIKWeKvMG6/n2h+uiqo04lGGQGO6FoR7KYtGIkCcm8drn6tjQY+N//SbaHRDrFmRDtXu8dV0+bZBdo/8Eg04BXlYHc6ftUxSgfz0uf7xAwfqSYr093z1YdbJGRkr+kGwwrlUGBK*" alt="" />
删除元素:
>>> for country in root.findall('country'):
rank = int(country.find('rank').text)
if rank>50:
root.remove(country) >>> tree.write('test.xml')
<?xml version="1.0"?>
<data>
<country name="Liechtenstein">
<rank updated="yes">2</rank>
<year>2008</year>
<gdppc>141100</gdppc>
<neighbor name="Austria" direction="E"/>
<neighbor name="Switzerland" direction="W"/>
</country>
<country name="Singapore">
<rank updated="yes">5</rank>
<year>2011</year>
<gdppc>59900</gdppc>
<neighbor name="Malaysia" direction="N"/>
</country>
</data>
4、构建xml文件:SubElement()
>>> a = ET.Element('a')
>>> b = ET.SubElement(a, 'b')
>>> c = ET.SubElement(a, 'c')
>>> d = ET.SubElement(c, 'd')
>>> ET.dump(a)
<a><b /><c><d /></c></a>
>>>
5、用命名空间来解析xml文档
If the XML input has namespaces, tags and attributes with prefixes in the form prefix:sometag
get expanded to {uri}sometag
where the prefix is replaced by the full URI. Also, if there is a default namespace, that full URI gets prepended to all of the non-prefixed tags.
Here is an XML example that incorporates two namespaces, one with the prefix “fictional” and the other serving as the default namespace:
<?xml version="1.0"?>
<actors xmlns:fictional="http://characters.example.com"
xmlns="http://people.example.com">
<actor>
<name>John Cleese</name>
<fictional:character>Lancelot</fictional:character>
<fictional:character>Archie Leach</fictional:character>
</actor>
<actor>
<name>Eric Idle</name>
<fictional:character>Sir Robin</fictional:character>
<fictional:character>Gunther</fictional:character>
<fictional:character>Commander Clement</fictional:character>
</actor>
</actors>
>>> import xml.etree.ElementTree as ET
>>> tree=ET.parse('namespace.xml')
>>> root=tree.getroot()
>>> for actor in root.findall('real_person:actor', ns):
name = actor.find('real_person:name', ns)
print name.text
for char in actor.findall('role:character', ns):
print ' |-->', char.text John Cleese
|--> Lancelot
|--> Archie Leach
Eric Idle
|--> Sir Robin
|--> Gunther
|--> Commander Clement
>>>
定位、编辑、保存元素属性:
>>> import xml.etree.ElementTree as ET
>>> tree=ET.parse('spring-subtract.xml')
>>> root=tree.getroot()
>>> print root
<Element '{http://www.springframework.org/schema/beans}beans' at 0x1d423f0>
>>> print root.getchildren()
[<Element '{http://www.springframework.org/schema/beans}bean' at 0x1d422f0>, <Element '{http://www.springframework.org/schema/beans}bean' at 0x1d422d0>, <Element '{http://www.springframework.org/schema/beans}bean' at 0x1d425d0>]
>>> print root.getchildren()[2]
<Element '{http://www.springframework.org/schema/beans}bean' at 0x1d425d0>
>>> print root.getchildren()[2].getchildren()
[<Element '{http://www.springframework.org/schema/beans}property' at 0x1d42610>, <Element '{http://www.springframework.org/schema/beans}property' at 0x1d42670>]
>>> print root.getchildren()[2].getchildren()[1].attrib
{'name': 'cronExpression', 'value': ''}
>>> print root.getchildren()[2].getchildren()[1].attrib['value']
0000
>>> #编辑属性值
>>> root.getchildren()[2].getchildren()[1].set('value','')
>>> tree.write('spring-subtract.xml') #保存文件
>>> print root.getchildren()[2].getchildren()[1].attrib
{'name': 'cronExpression', 'value': ''}
>>>