Sign in to follow this  
ninmonkeys

python xml.dom.minidom output whitespace removal

Recommended Posts

ninmonkeys    132
Using python 2.5, with xml.dom.minidom My output is:
<?xml version="1.0" ?>
<Log>
    <msg>
        game start!
    </msg>
    <msg>
        game end.
    </msg>
</Log>
I want indentation, but I don't want whitespace inbetween the text of elements. Like this output:
<?xml version="1.0" ?>
<Log>
    <msg>game start!</msg>
    <msg>game end.</msg>
</Log>
This is my code:
from xml.dom.minidom import getDOMImplementation
class LoggerXML():
	"""A XML logger"""
	def __init__(self, filename, root_element ):		
		self.filename = os.path.join('logs', filename)
		impl = getDOMImplementation()
		doc = self.doc = impl.createDocument(None, root_element, None)
		self.root = doc.documentElement

	def log(self, msg):
		"""Log message. same as .add_element(); set_text()"""
		text = self.set_text( self.add_element(self.root, 'msg'), msg )

	def add_element(self, parent, element_name):
		"""creates new 'element' as child of 'parent'"""
		if parent==None: parent = self.root
		element = self.doc.createElement(element_name)
		parent.appendChild( element )
		return element
		
	def set_text(self, parent, text):
		"""set text; merge text element;"""
		self.add_text( parent, text )
		self.element_normalize( parent )
	
	def get_text(self, parent):
		"""get text value / child of the parent node"""
		if parent.childNodes: return str( parent.childNodes[0].nodeValue )
		return None # bad element / no text
		
	def add_text(self, parent, text):
		"""adds a text element to parent."""
		text_node = self.doc.createTextNode( str(text) )
		parent.appendChild( text_node )
		return parent # i'm returning parent because its more usefull than text_node
	
	def element_normalize( self, element ):
		"""will merge all text elements into one element"""
		element.normalize()
	
	def writexml(self, indent='', addindent='    ', newl='\n'):
		"""write file to .filename. [indent,[addindent[,newl]]]"""
		file = open( self.filename, "w" )
		self.doc.writexml( file, indent, addindent, newl ) # self.doc.writexml( self.filename )

if __name__ == '__main__':
	l = LoggerXML("log.xml", "Log")	
	
	l.log("game start!")	
	l.log("game end.")
	
	l.writexml() 	

Share this post


Link to post
Share on other sites
ninmonkeys    132
Can you give me help in the right direction? Ie: psuedo code of the recursive function? I'm trying to re-write .writexml(), but Its becoming more complicated than I thought.

I think i need a recursive function, but the problem is I'm not exactly sure on what I'm supposed to do, and so it's a mess.

def _write_subtree(self, element, writer, indent='', addindent=' ', newl='\n'):
"""pseudo: (i'm broke too )
foreach child of element.childNodes:
print child
foreach child of child.childNodes:
_write_subtree()
"""

if element == self.root:
"""im at top level"""
# self._write_subtree( self, element, writer, indent, addindent, newl )
element = self.root.childNodes
for e in element:
self._write_subtree( e, writer, indent, addindent, newl )

else: # non-root element
writer.write( "<%s>" % self.root.nodeName )
for child in element.childNodes:
writer.write( "%s%s" % ( addindent, str(child.nodeName) ) )
# if child.childNodes:
for c in child.childNodes:
print "child: %s, %s, %s" % (
c.nodeValue, c.nodeType, c.nodeValue )

writer.write(newl)

def _custom_writexml(self, writer, indent='', addindent=' ', newl='\n'):
"""like .writexml() except dont split elements onto multiple lines if
all they contain is .text element"""

print "testing _jake_writexml() !"
encoding = None
# wrapper = _LineWrapper(writer, indent, addindent, newl, 78)
# wrapper.write('<%s' % "sometag")
if encoding is None:
writer.write('<?xml version="1.0" ?>\n')
else:
writer.write('<?xml version="1.0" encoding="%s" ?>\n' % encoding)
# now write tree
# start out with no spacing
writer.write( "<%s>" % self.root.nodeName )
self._write_subtree( self.root, writer, indent='', addindent=' ', newl='\n' )
writer.write( "</%s>" % self.root.nodeName )
This is the sample XML file that I'm tyring to generate:
<?xml version="1.0" ?>
<Log>
<msg>game start!</msg>
<graphics>
<resolution h="800" w="800"/>
<LOD value="1"/>
</graphics>
<character name="jake">
<loc x="20.3" z="-5"/>
</character>
<msg>game end.</msg>
</Log>


Share this post


Link to post
Share on other sites
Oluseyi    2103
The problem is simple. In an XML document like this:
<?xml version="1.0" ?>
<Log>
<msg>game start!</msg>
<msg>game end.</msg>
</Log>

the msg nodes have child nodes (text nodes), and the default writer in xml.dom.minidom inserts a newline whenever a node has child nodes. You need to supply a writer that doesn't insert a newline when a node has a single text node child.

That's a rather elaborate undertaking just to change a single feature, so do what I did instead:


# This is a modification of your original code, with the function below added
# and a few new lines in your main function.

def new_writexml(self, writer, indent="", addindent="", newl=""):
# indent = current indentation
# addindent = indentation to add to higher levels
# newl = newline string
writer.write(indent+"<" + self.tagName)

attrs = self._get_attributes()
a_names = attrs.keys()
a_names.sort()

for a_name in a_names:
writer.write(" %s=\"" % a_name)
_write_data(writer, attrs[a_name].value)
writer.write("\"")
if self.childNodes:
# The next three lines are the only difference between new_writexml
# and xml.dom.minidom.Element.writexml
#
if len(self.childNodes) == 1 and self.childNodes[0].nodeType == 3:
writer.write(">%s</%s>%s" % (self.childNodes[0].data, self.tagName, newl))
return
writer.write(">%s"%(newl))
for node in self.childNodes:
node.writexml(writer,indent+addindent,addindent,newl)
writer.write("%s</%s>%s" % (indent,self.tagName,newl))
else:
writer.write("/>%s"%(newl))

if __name__ == '__main__':
# And here's where we hook up our new code to the existing codebase:
#
oldwritexml = xml.dom.minidom.Element.writexml
xml.dom.minidom.Element.writexml = new_writexml


l = LoggerXML("log.xml", "Log")

l.log("game start!")
l.log("game end.")

l.writexml()



Let me know how it works out for you.

Share this post


Link to post
Share on other sites
Ron Rothman    122
With some minor mods, Oluseyi's code does the trick. I've posted the details here:

http://ronrothman.com/public/leftbraned/xml-dom-minidom-toprettyxml-and-silly-whitespace

(I'll also paste the updated code below.)

A tweak was needed to correctly handle entities within text nodes.

Thanks to Oluseyi for the solution,
Ron


def fixed_writexml(self, writer, indent="", addindent="", newl=""):
# indent = current indentation
# addindent = indentation to add to higher levels
# newl = newline string
writer.write(indent+"<" + self.tagName)

attrs = self._get_attributes()
a_names = attrs.keys()
a_names.sort()

for a_name in a_names:
writer.write(" %s=\"" % a_name)
xml.dom.minidom._write_data(writer, attrs[a_name].value)
writer.write("\"")
if self.childNodes:
if len(self.childNodes) == 1 and self.childNodes[0].nodeType == xml.dom.minidom.Node.TEXT_NODE:
writer.write(">")
self.childNodes[0].writexml(writer, "", "", "" )
writer.write("%s" % (self.tagName, newl))
return
writer.write(">%s"%(newl))
for node in self.childNodes:
node.writexml(writer,indent+addindent,addindent,newl)
writer.write("%s%s" % (indent,self.tagName,newl))
else:
writer.write("/>%s"%(newl))
# replace minidom's function with ours
xml.dom.minidom.Element.writexml = fixed_writexml





[Edited by - Ron Rothman on June 15, 2008 11:24:38 AM]

Share this post


Link to post
Share on other sites
Ron Rothman    122
Good news: I've come across a much simper solution (if PyXML is installed, but you still want to use minidom to generate the document): xml.dom.PrettyPrint. Here's a small wrapper for it:


from xml.dom.ext import PrettyPrint
from StringIO import StringIO

def toprettyxml_fixed (node, encoding='utf-8'):
tmpStream = StringIO()
PrettyPrint(node, stream=tmpStream, encoding=encoding)
return tmpStream.getvalue()




[Edited by - Ron Rothman on June 15, 2008 11:23:37 AM]

Share this post


Link to post
Share on other sites

Create an account or sign in to comment

You need to be a member in order to leave a comment

Create an account

Sign up for a new account in our community. It's easy!

Register a new account

Sign in

Already have an account? Sign in here.

Sign In Now

Sign in to follow this