Python Programming/XML Tools


Introduction

edit

Python includes several modules for manipulating xml.

xml.sax.handler

edit

Python Doc

import xml.sax.handler as saxhandler import xml.sax as saxparser class MyReport: def __init__(self): self.Y = 1 class MyCH(saxhandler.ContentHandler): def __init__(self, report): self.X = 1 self.report = report def startDocument(self): print('startDocument') def startElement(self, name, attrs): print('Element:', name) report = MyReport() #for future use ch = MyCH(report) xml = """\ <collection>  <comic title=\"Sandman\" number='62'>  <writer>Neil Gaiman</writer>  <penciller pages='1-9,18-24'>Glyn Dillon</penciller>  <penciller pages="10-17">Charles Vess</penciller>  </comic> </collection> """ print(xml) saxparser.parseString(xml, ch) 

xml.dom.minidom

edit

An example of doing RSS feed parsing with DOM

from xml.dom import minidom as dom import urllib2 def fetchPage(url): a = urllib2.urlopen(url) return ''.join(a.readlines()) def extract(page): a = dom.parseString(page) item = a.getElementsByTagName('item') for i in item: if i.hasChildNodes(): t = i.getElementsByTagName('title')[0].firstChild.wholeText l = i.getElementsByTagName('link')[0].firstChild.wholeText d = i.getElementsByTagName('description')[0].firstChild.wholeText print(t, l, d) if __name__=='__main__': page = fetchPage("http://rss.slashdot.org/Slashdot/slashdot") extract(page) 

XML document provided by pyxml documentation.