有如下的xml文件:
代码如下:
1 2
下面介绍python解析xml文件的几种方法,使用python模块实现。
方式1,python模块实现自动遍历所有节点:
代码如下:
#!/usr/bin/env python # -*- coding: utf-8 -*- from xml.sax.handler import contenthandler from xml.sax import parseclass testhandle(contenthandler): def __init__(self, inlist): self.inlist = inlist def startelement(self,name,attrs): print ‘name:’,name, ‘attrs:’,attrs.keys() def endelement(self,name): print ‘endname’,name def characters(self,chars): print ‘chars’,chars self.inlist.append(chars) if __name__ == ‘__main__’: lt = [] parse(‘test.xml’, testhandle(lt)) print lt
结果:[html] view plaincopyname: root attrs: [] chars name: childs attrs: [] chars name: child attrs: [u’name’] chars 1 endname child chars name: child attrs: [u’value’] chars 2 endname child chars endname childs chars endname root [u’\n’, u’\n’, u’1′, u’\n’, u’2′, u’\n’, u’\n’]
方式2,python模块实现获取根节点,按需查找指定节点:
代码如下:
#!/usr/bin/env python # -*- coding: utf-8 -*- from xml.dom import minidom xmlstr = ””’ /2/photos/square/type.xml 21301 auth faild! ”’ def doxml(xmlstr): dom = minidom.parsestring(xmlstr) print ‘dom:’ print dom.toxml() root = dom.firstchild print ‘root:’ print root.toxml() childs = root.childnodes for child in childs: print child.toxml() if child.nodetype == child.text_node: pass else: print ‘child node attribute name:’, child.getattribute(‘name’) print ‘child node name:’, child.nodename print ‘child node len:’,len(child.childnodes) print ‘child data:’,child.childnodes[0].data print ‘=======================================’ print ‘more help info to see:’ for med in dir(child): print help(med) if __name__ == ‘__main__’: doxml(xmlstr)
结果:[html] view plaincopydom: /2/photos/square/type.xml 21301 auth faild! root: /2/photos/square/type.xml 21301 auth faild! /2/photos/square/type.xml child node attribute name: first child node name: request child node len: 1 child data: /2/photos/square/type.xml ======================================= more help info to see: 两种方法各有其优点,python的xml处理模块太多,目前只用到这2个。
=====补充分割线================实际工作中发现python的mimidom无法解析其它编码的xml,只能解析utf-8的编码,而其xml文件的头部申明也必须是utf-8,为其它编码会报错误。网上的解决办法都是替换xml文件头部的编码申明,然后转换编码为utf-8再用minidom解码,实际测试为可行,不过有点累赘的感觉。
本节是 python解析xml模块封装代码 的第二部分。====写xml内容的分割线=========
代码如下:
#!\urs\bin\env python #encoding: utf-8 from xml.dom import minidom class xmlwrite: def __init__(self, resultfile): self.resultfile = resultfile self.rootname = ‘api’ self.__create_xml_dom() def __create_xml_dom(self): xmlimpl = minidom.getdomimplementation() self.dom = xmlimpl.createdocument(none, self.rootname, none) self.root = self.dom.documentelement def __get_spec_node(self, xpath): patharr = xpath.split(r’/’) parentnode = self.root exist = 1 for nodename in patharr: if nodename.strip() == ”: continue if not exist: return none spcindex = nodename.find(‘[‘) if spcindex > -1: index = int(nodename[spcindex+1:-1]) else: index = 0 count = 0 childs = parentnode.childnodes for child in childs: if child.nodename == nodename[:spcindex]: if count == index: parentnode = child exist = 1 break count += 1 continue else: exist = 0 return parentnode def write_node(self, parent, nodename, value, attribute=none, cdata=false): node = self.dom.createelement(nodename) if value: if cdata: nodedata = self.dom.createcdatasection(value) else: nodedata = self.dom.createtextnode(value) node.appendchild(nodedata) if attribute and isinstance(attribute, dict): for key, value in attribute.items(): node.setattribute(key, value) try: parentnode = self.__get_spec_node(parent) except: print ‘get parent node fail, use the root as parent node’ parentnode = self.root parentnode.appendchild(node) def write_start_time(self, time): self.write_node(‘/’,’starttime’, time) def write_end_time(self, time): self.write_node(‘/’,’endtime’, time) def write_pass_count(self, count): self.write_node(‘/’,’passcount’, count) def write_fail_count(self, count): self.write_node(‘/’,’failcount’, count) def write_case(self): self.write_node(‘/’,’case’, none) def write_case_no(self, index, value): self.write_node(‘/case[%s]/’ % index,’no’, value) def write_case_url(self, index, value): self.write_node(‘/case[%s]/’ % index,’url’, value) def write_case_dbdata(self, index, value): self.write_node(‘/case[%s]/’ % index,’dbdata’, value) def write_case_apidata(self, index, value): self.write_node(‘/case[%s]/’ % index,’apidata’, value) def write_case_dbsql(self, index, value): self.write_node(‘/case[%s]/’ % index,’dbsql’, value, cdata=true) def write_case_apixpath(self, index, value): self.write_node(‘/case[%s]/’ % index,’apixpath’, value) def save_xml(self): myfile = file(self.resultfile, ‘w’) self.dom.writexml(myfile, encoding=’utf-8′) myfile.close() if __name__ == ‘__main__’: xr = xmlwrite(r’d:\test.xml’) xr.write_start_time(‘2223’) xr.write_end_time(‘444′) xr.write_pass_count(’22’) xr.write_fail_count(’33’) xr.write_case() xr.write_case() xr.write_case_no(0, ‘0’) xr.write_case_url(0, ‘http://www.google.com’) xr.write_case_url(0, ‘http://www.google.com’) xr.write_case_dbsql(0, ‘select * from ‘) xr.write_case_dbdata(0, ‘dbtata’) xr.write_case_apixpath(0, ‘/xpath’) xr.write_case_apidata(0, ‘apidata’) xr.write_case_no(1, ‘1’) xr.write_case_url(1, ‘http://www.baidu.com’) xr.write_case_url(1, ‘http://www.baidu.com’) xr.write_case_dbsql(1, ‘select 1 from ‘) xr.write_case_dbdata(1, ‘dbtata1’) xr.write_case_apixpath(1, ‘/xpath1’) xr.write_case_apidata(1, ‘apidata1’) xr.save_xml()
以上封装了minidom,支持通过xpath来写节点,不支持xpath带属性的匹配,但支持带索引的匹配。比如:/root/child[1], 表示root的第2个child节点。