xml.etree.ElementTree模块(简称 ET)
1.加载模块
xml.etree.cElementTree的底层是c,所以速度快,如果没有就只能加载xml.etree.ElementTree了
try:
import xml.etree.cElementTree as ET
except ImportError:
import xml.etree.ElementTree as ET
2.载入xml文档
tree = ET.parse("country.xml")
3.获取根节点
root = tree.getroot()
4.查找节点
allobj=root.findall('country')
obj=root.find('country')
subobj=root.find('subcountry')
5.获取节点的值
subobjtext=subobj.text
subobjtext=root.find('subcountry').text
6.获取节点属性
name = country.get('name') #子节点下属性name的值
添加节点
from xml.etree.ElementTree import Element, SubElement
方法1
root = Element("root")
child = Element("child")
root.append(child)
方法2
root = Element("root")
child = SubElement(root, "child")
节点赋值
child.text="zifuchuan"
#!/usr/bin/evn python
#coding:utf-8
try:
import xml.etree.cElementTree as ET
except ImportError:
import xml.etree.ElementTree as ET
import sys
try:
tree = ET.parse("country.xml") #打开xml文档
#root = ET.fromstring(country_string) #从字符串传递xml
root = tree.getroot() #获得root节点
except Exception, e:
print "Error:cannot parse file:country.xml."
sys.exit(1)
print root.tag, "---", root.attrib
for child in root:
print child.tag, "---", child.attrib
print "*"*10
print root[0][1].text #通过下标访问
print root[0].tag, root[0].text
print "*"*10
for country in root.findall('country'): #找到root节点下的所有country节点
rank = country.find('rank').text #子节点下节点rank的值
name = country.get('name') #子节点下属性name的值
print name, rank
#修改xml文件
for country in root.findall('country'):
rank = int(country.find('rank').text)
if rank > 50:
root.remove(country)
tree.write('output.xml')
7.查找并修改某个值
def changeXmlsLabel(xmlFolder,currentLabels,aimLabels):
'''
修改标注结果的类型名字
xmlFolder='/home/lfs/dateset/sdjw/train/Annotations'
currentLabels=['OuterProtrusion','InnerProtrusion']
aimLabels=['defect','defect']
changeXmlsLabel(xmlFolder,currentLabels,aimLabels)
'''
import xml.etree.ElementTree as ET
xmllist=os.listdir(xmlFolder)
num=0
for xmlname in xmllist:
xmlpathname=os.path.join(xmlFolder,xmlname)
tree = ET.parse(xmlpathname)
root = tree.getroot()
objects=root.findall('object')
for obj in objects:
for name in obj.iter('name'):
id=currentLabels.index(name.text)
clabeltext=aimLabels[id]
name.text=clabeltext
tree.write(xmlpathname,encoding='utf-8')
num+=1
print('num:%d,name:%s'%(num,xmlname))
构建新的xml文件
#生成根元素
root=ET.Element("root")
#生成子元素 A
a=ET.Element("A")
#增加元素A的子元素 child1
a_child=ET.SubElement(a,"child1")
a_child.text="I'm child of A"
#增加元素A的子元素 child2
a_child1=ET.SubElement(a,"child2")
#生成子元素B
b=ET.Element("B")
#增加元素B的子元素child1
b_child=ET.SubElement(b,"child1")
b_child.text="I'm child of B"
b_child.set("name","book") #set() 接收的是 key,value 形式
#将a和b 组成一个元组传入extend()方法中,元素 A和B作为根元素的子元素
root.extend((a,b))
trees=ET.ElementTree(root)
#将trees 写入到文件 test4.xml, 内容为 <root><A><child1>I'm child of A</child1><child2 /></A><B><child1 name="book">I'm child of B</child1></B></root>
trees.write(os.path.join(os.path.dirname(__file__),"test4.xml"))
解决排版缩进问题
def indent(elem, level=0):
i = "\n" + level*"\t"
if len(elem):
if not elem.text or not elem.text.strip():
elem.text = i + "\t"
if not elem.tail or not elem.tail.strip():
elem.tail = i
for elem in elem:
indent(elem, level+1)
if not elem.tail or not elem.tail.strip():
elem.tail = i
else:
if level and (not elem.tail or not elem.tail.strip()):
elem.tail = i
indent(root)
trees=ET.ElementTree(root)
trees.write(os.path.join(os.path.dirname(__file__),"test4.xml"))