/usr/share/pyshared/bitten/util/xmlio.py is in trac-bitten-slave 0.6+final-3.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 | # -*- coding: utf-8 -*-
#
# Copyright (C) 2005-2007 Christopher Lenz <cmlenz@gmx.de>
# Copyright (C) 2007-2010 Edgewall Software
# All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at http://bitten.edgewall.org/wiki/License.
"""Utility code for easy input and output of XML.
The current implementation uses ``xml.dom.minidom`` under the hood for parsing.
"""
import os
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
from UserDict import DictMixin
import cgi
import string
__all__ = ['Fragment', 'Element', 'ParsedElement', 'parse']
__docformat__ = 'restructuredtext en'
def _from_utf8(text):
"""Convert utf-8 string to unicode. All other input returned as-is."""
if isinstance(text, str):
return text.decode('utf-8')
else:
return text
def _to_utf8(text):
"""Convert any input to utf-8 byte string."""
if isinstance(text, str):
return text # presumes utf-8
elif not isinstance(text, unicode):
text = unicode(text)
return text.encode('utf-8')
__trans = string.maketrans('', '')
# http://www.w3.org/TR/xml11/#charsets (partial)
__todel = ('\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c\x0e\x0f\x10\x12\x13\x14'
'\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f\x7f\x80\x81\x82\x83'
'\x84\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94'
'\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f')
__uni_trans = dict([(ord(c), None) for c in __todel])
def _escape_text(text):
"""Escape special characters in the provided text so that it can be safely
included in XML text nodes.
"""
if isinstance(text, str):
text = cgi.escape(text.translate(__trans, __todel))
elif isinstance(text, unicode):
text = cgi.escape(text.translate(__uni_trans))
return text
def _escape_attr(attr):
"""Escape special characters in the provided text so that it can be safely
included in XML attribute values.
"""
if isinstance(attr, basestring):
return _escape_text(attr).replace('"', '"')
else:
return attr
class Fragment(object):
"""A collection of XML elements."""
__slots__ = ['children']
def __init__(self):
"""Create an XML fragment."""
self.children = []
def __getitem__(self, nodes):
"""Add nodes to the fragment."""
if not isinstance(nodes, (list, tuple)):
nodes = [nodes]
for node in nodes:
self.append(node)
return self
def __str__(self):
"""Return a string representation of the XML fragment."""
buf = StringIO()
self.write(buf)
return buf.getvalue()
def append(self, node):
"""Append an element or fragment as child."""
if isinstance(node, Element):
self.children.append(node)
elif isinstance(node, Fragment):
self.children += node.children
elif node is not None and node != '':
if isinstance(node, basestring):
self.children.append(_from_utf8(node))
else:
self.children.append(unicode(node))
def write(self, out, newlines=False):
"""Serializes the element and writes the XML to the given output
stream.
"""
for child in self.children:
if isinstance(child, (Element, ParsedElement)):
child.write(out, newlines=newlines)
else:
if child.startswith('<'):
out.write('<![CDATA[' + _to_utf8(child) + ']]>')
else:
out.write(_to_utf8(_escape_text(child)))
class Element(Fragment):
"""Simple XML output generator based on the builder pattern.
Construct XML elements by passing the tag name to the constructor:
>>> print Element('foo')
<foo/>
Attributes can be specified using keyword arguments. The values of the
arguments will be converted to strings and any special XML characters
escaped:
>>> print Element('foo', bar=42)
<foo bar="42"/>
>>> print Element('foo', bar='1 < 2')
<foo bar="1 < 2"/>
>>> print Element('foo', bar='"baz"')
<foo bar=""baz""/>
The order in which attributes are rendered is undefined.
Elements can be using item access notation:
>>> print Element('foo')[Element('bar'), Element('baz')]
<foo><bar/><baz/></foo>
Text nodes can be nested in an element by using strings instead of elements
in item access. Any special characters in the strings are escaped
automatically:
>>> print Element('foo')['Hello world']
<foo>Hello world</foo>
>>> print Element('foo')[42]
<foo>42</foo>
>>> print Element('foo')['1 < 2']
<foo>1 < 2</foo>
This technique also allows mixed content:
>>> print Element('foo')['Hello ', Element('b')['world']]
<foo>Hello <b>world</b></foo>
Finally, text starting with an opening angle bracket is treated specially:
under the assumption that the text actually contains XML itself, the whole
thing is wrapped in a CDATA block instead of escaping all special characters
individually:
>>> print Element('foo')['<bar a="3" b="4"><baz/></bar>']
<foo><![CDATA[<bar a="3" b="4"><baz/></bar>]]></foo>
Valid input are utf-8 or unicode strings, or any type easily converted
to unicode such as integers. Output is always utf-8.
"""
__slots__ = ['name', 'attr']
def __init__(self, name_, **attr):
"""Create an XML element using the specified tag name.
The tag name must be supplied as the first positional argument. All
keyword arguments following it are handled as attributes of the element.
"""
Fragment.__init__(self)
self.name = _from_utf8(name_)
self.attr = dict([(_from_utf8(name), _from_utf8(value)) \
for name, value in attr.items() \
if value is not None])
def write(self, out, newlines=False):
"""Serializes the element and writes the XML to the given output
stream.
"""
out.write('<')
out.write(_to_utf8(self.name))
for name, value in self.attr.items():
out.write(_to_utf8(' %s="%s"' % (name, _escape_attr(value))))
if self.children:
out.write('>')
Fragment.write(self, out, newlines)
out.write('</' + _to_utf8(self.name) + '>')
else:
out.write('/>')
if newlines:
out.write(os.linesep)
class ParseError(Exception):
"""Exception thrown when there's an error parsing an XML document."""
def parse(text_or_file):
"""Parse an XML document provided as string or file-like object.
Returns an instance of `ParsedElement` that can be used to traverse the
parsed document.
"""
from xml.dom import minidom
from xml.parsers import expat
try:
if isinstance(text_or_file, basestring):
dom = minidom.parseString(_to_utf8(text_or_file))
else:
dom = minidom.parse(text_or_file)
return ParsedElement(dom.documentElement)
except expat.error, e:
raise ParseError(e)
class ParsedElement(object):
"""Representation of an XML element that was parsed from a string or
file.
This class should not be used directly. Rather, XML text parsed using
`xmlio.parse()` will return an instance of this class.
>>> xml = parse('<root/>')
>>> print xml.name
root
Parsed elements can be serialized to a string using the `write()` method:
>>> import sys
>>> parse('<root></root>').write(sys.stdout)
<root/>
For convenience, this is also done when coercing the object to a string
using the builtin ``str()`` function, which is used when printing an
object:
>>> print parse('<root></root>')
<root/>
(Note that serializing the element will produce a normalized representation
that may not excatly match the input string.)
Attributes are accessed via the `attr` member:
>>> print parse('<root foo="bar"/>').attr['foo']
bar
Attributes can also be updated, added or removed:
>>> xml = parse('<root foo="bar"/>')
>>> xml.attr['foo'] = 'baz'
>>> print xml
<root foo="baz"/>
>>> del xml.attr['foo']
>>> print xml
<root/>
>>> xml.attr['foo'] = 'bar'
>>> print xml
<root foo="bar"/>
CDATA sections are included in the text content of the element returned by
`gettext()`:
>>> xml = parse('<root>foo<![CDATA[ <bar> ]]>baz</root>')
>>> xml.gettext()
'foo <bar> baz'
Valid input are utf-8 or unicode strings, or any type easily converted
to unicode such as integers. Output is always utf-8.
"""
__slots__ = ['_node', 'attr']
class _Attrs(DictMixin):
"""Simple wrapper around the element attributes to provide a dictionary
interface."""
def __init__(self, node):
self._node = node
def __getitem__(self, name):
attr = self._node.getAttributeNode(name)
if not attr:
raise KeyError(name)
return _to_utf8(attr.value)
def __setitem__(self, name, value):
self._node.setAttribute(name, value)
def __delitem__(self, name):
self._node.removeAttribute(name)
def keys(self):
return [_to_utf8(key) for key in self._node.attributes.keys()]
def __init__(self, node):
self._node = node
self.attr = ParsedElement._Attrs(node)
name = property(fget=lambda self: self._node.localName,
doc='Local name of the element')
namespace = property(fget=lambda self: self._node.namespaceURI,
doc='Namespace URI of the element')
def children(self, name=None):
"""Iterate over the child elements of this element.
If the parameter `name` is provided, only include elements with a
matching local name. Otherwise, include all elements.
"""
for child in [c for c in self._node.childNodes if c.nodeType == 1]:
if name in (None, child.tagName):
yield ParsedElement(child)
def __iter__(self):
return self.children()
def gettext(self):
"""Return the text content of this element.
This concatenates the values of all text and CDATA nodes that are
immediate children of this element.
"""
return ''.join([_to_utf8(c.nodeValue)
for c in self._node.childNodes
if c.nodeType in (3, 4)])
def write(self, out, newlines=False):
"""Serializes the element and writes the XML to the given output
stream.
"""
out.write(self._node.toprettyxml(newl=newlines and '\n' or '',
indent=newlines and '\t' or '', encoding='utf-8'))
def __str__(self):
"""Return a string representation of the XML element."""
buf = StringIO()
self.write(buf)
return buf.getvalue()
if __name__ == '__main__':
import doctest
doctest.testmod()
|