""" xml_test1.py -- V.1.1 by leonardo maffi, Feb 7 2007 This is just a little exercise of managing SML from Python. This the first time I manage XML with Python, so in this code there can be various problems. This is the Python version of the Perl code of the article: "XML for Perl developers, Part 1: XML plus Perl -- simply magic Integrate XML into a Perl application using XML::Simple" By Jim Dixon, 30 Jan 2007: http://www-128.ibm.com/developerworks/xml/library/x-xmlperl1.html?ca=dgr-lnxw01XML-Simple REQUIREMENTS to run this code: - Python V.2.5 (but with little changes it works with Python 2.3 too). - cElementTree, that is built-in in Python 2.5, but can be installed for older versions of Python too free: http://effbot.org/zone/element-index.htm - mx.DateTime, to manage dates better than the built-ins of CPython. Can be found free: http://www.egenix.com/files/python/mxDateTime.html THINGS TO BE DONE FOR THIS EXERCISE: - To save space, change all of the subelements to attributes - Increase prices by 20% - Make all prices look the same, so all will show two decimal places - Sort the list [I sort it according to pet name] - Replace dates of birth with ages """ import xml.etree.cElementTree as cet from time import strptime from mx import DateTime def plural(n): return "s" if n>1 else "" def manage_dob(pet_attr_text): dob = strptime(pet_attr_text, "%d %B %Y") dob2 = DateTime.Date(*dob[0:3]) diff = DateTime.RelativeDateDiff(DateTime.now(), dob2) if diff.years > 0: age = str(diff.years) + " year" + plural(diff.years) elif diff.months > 0: age = str(diff.months) + " month" + plural(diff.months) else: age = str(diff.days) + " day" + plural(diff.days) return "age", age def manage_price(pet_attr_text): return "price", "%.02f" % (1.2 * float(pet_attr_text)) def et_prettify(elem, level=0): "Indents an Element and its subelements, recursively, to produce a pretty print" # From: http://effbot.org/zone/element-lib.htm#prettyprint i = "\n" + level * " " if len(elem): if not elem.text or not elem.text.strip(): elem.text = i + " " for elem in elem: et_prettify(elem, level+1) if not elem.tail or not elem.tail.strip(): elem.tail = i else: if level and (not elem.tail or not elem.tail.strip()): elem.tail = i def process_xml(input_xml, declaration=True, prettify=True): root = cet.fromstring(input_xml) # sort according to names, I don't know if there is a better way to sort sorted_tree = sorted(root, key=lambda el:el.find("name").text) for i in xrange(len(sorted_tree)): root[i] = sorted_tree[i] root2 = cet.Element("pets") # Compute on the fly the dispatch table dict, it equals to: # {"dob":manage_dob(), "price":manage_price(), ...} pr = "manage_" dispatch = dict((name[len(pr):], globals()[name]) for name in globals() if name.startswith(pr)) for pet in root: attributes = {} for pet_attr in pet: if pet_attr.tag in dispatch: key, val = dispatch[pet_attr.tag](pet_attr.text) else: key, val = pet_attr.tag, pet_attr.text attributes[key] = val # Attributes are unsorted, it seems there's no way to put 'name' first cet.SubElement(root2, pet.tag, attributes) if prettify: # optional et_prettify(root2) result = cet.tostring(root2) if declaration: xml_declaration = input_xml.split(">", 1)[0] + ">\n" result = xml_declaration + result return result input_xml = """\ Madness 1 February 2004 150 Maggie 12 October 2006 75 Rosie Little 28 January 2007 25 """ # If used with a file on disk: # tree = cet.parse("xml_test2.xml") # root = tree.getroot() print process_xml(input_xml, declaration=True, prettify=True) """ OUTPUT XML: """