Source code for nxpy.xml.util

# nxpy.xml package -----------------------------------------------------------

# Copyright Nicola Musatti 2017
# Use, modification, and distribution are subject to the Boost Software
# License, Version 1.0. (See accompanying file LICENSE.txt or copy at
# http://www.boost.org/LICENSE_1_0.txt)

# See http://nxpy.sourceforge.net for library home page. ---------------------

r"""
XML utility classes and functions.

Requires at least Python 2.6. Simple import breaks on Python 2.5

"""
from __future__ import absolute_import

import collections
import re

import lxml.etree

import six

import nxpy.core.error
import nxpy.core.past

nxpy.core.past.enforce_at_least(nxpy.core.past.V_2_6)


[docs]def parse(src): r"""parse the 'src' XML file and return a DOM.""" parser = lxml.etree.XMLParser(strip_cdata=False) tree = lxml.etree.parse(src, parser) return tree
[docs]def make_property(elem, key=None): r""" Create a property on the text of element 'elem' or, if the 'key' argument is given, on its 'key' attribute. """ if key: def _get(self): return getattr(self, elem).get(key) def _set(self, value): getattr(self, elem).set(key, value) self._modified = True return property(_get, _set) else: def _get(self): return getattr(self, elem).text def _set(self, value): getattr(self, elem).text = value self._modified = True return property(_get, _set)
[docs]class QName(object): r"""Represent a qualified name""" _re = re.compile(r"\{(.*)\}(.*)")
[docs] def __init__(self, tag): m = QName._re.match(tag) self.url = m.group(1) self.tag = m.group(2)
@property def text(self): t = [] if len(self.url) != 0: t.append("{{{0}}}".format(self.url)) t.append(self.tag) return "".join(t)
[docs] def __str__(self): return self.text()
[docs]class Namespace(object): r""" Represent an XML namespace and provide several utility functions that help handle a document without namespace tags. """
[docs] def __init__(self, url="", element=None): if len(url) > 0 and element is not None: raise nxpy.core.error.ArgumentError( "Only one between url and element should be specified") if element is not None: url = QName(element.tag).url self.url = url self.nspace = "{" + url + "}" if len(url) != 0 else ""
[docs] def find(self, element, tag): return element.find(self.nspace + tag)
[docs] def findall(self, element, tag): return element.findall(self.nspace + tag)
[docs] def findtext(self, element, tag, default=None): return element.findtext(self.nspace + tag, default)
[docs] def get_tag(self, element): return element.tag[len(self.nspace):]
[docs] def Element(self, tag, attrib={}, **extra): return lxml.etree.Element(self.nspace + tag, attrib, **extra)
[docs] def SubElement(self, parent, tag, attrib={}, **extra): return lxml.etree.SubElement(parent, self.nspace + tag, attrib, **extra)
[docs]class ContainerElementMixin(Namespace): r""" Mixin class for container workalikes backed by a DOM. """
[docs] def __init__(self, parent, root_tag, namespace=""): super(ContainerElementMixin, self).__init__(namespace) self.parent = parent self.root_tag = root_tag self.root = self.find(self.parent, self.root_tag) self.modified = False
[docs] def __len__(self): return len(self.root) if self.root is not None else 0
[docs]class MappingElementIterator(collections.Iterator): r""" Iterator over a 'MappingElement'. """
[docs] def __init__(self, element): self.element = element self.iter = element.getchildren().iter()
[docs] def __next__(self): return self.element.get_tag(next(self.iter))
[docs]class MappingElement(ContainerElementMixin, collections.MutableMapping): r""" Provide a tag/text map for the children of the 'root_tag' descendent of 'parent'. Given: .. code-block:: xml <parent> <root_tag> <key1>value1</key1> <key2>value2</key2> </root_tag> </parent> One could write:: mappingElement["key1"] == "value1" """
[docs] def __init__(self, parent, root_tag, namespace=""): ContainerElementMixin.__init__(self, parent, root_tag, namespace)
[docs] def __getitem__(self, key): if self.root is None: raise KeyError() elem = self.find(self.root, key) if elem is None: raise KeyError() return elem.text
[docs] def __setitem__(self, key, value): if self.root is None: self.root = self.SubElement(self.parent, self.root_tag) elem = self.find(self.root, key) if elem is None: elem = self.SubElement(self.root, key) self.modified = True elem.text = value
[docs] def __delitem__(self, key): if self.root is None: raise KeyError() elem = self.find(self.root, key) if elem is None: raise KeyError() self.modified = True self.root.remove(elem)
[docs] def __iter__(self): return MappingElementIterator(self)
[docs]class SequenceElement(ContainerElementMixin, collections.MutableSequence): r""" Provide a sequence of the values of the children tagged 'element_tag' of the 'root_tag' descendent of 'parent'. Given: .. code-block:: xml <parent> <root_tag> <tag>value1</tag> <tag>value2</tag> </root_tag> </parent> One could write:: sequenceElement[1] == "value2" """
[docs] def __init__(self, parent, root_tag, element_tag, namespace="", indent=" "): ContainerElementMixin.__init__(self, parent, root_tag, namespace) self.element_tag = element_tag self.indent = indent self.elements = [] i = 0 if self.root is not None: for e in self.root: if e.tag is not lxml.etree.Comment and self.get_tag(e) == self.element_tag: self.elements.append([i, e]) i += 1
[docs] def __len__(self): return len(self.elements) if self.elements is not None else 0
[docs] def __getitem__(self, index): if self.root is None: raise IndexError() return self.elements[index][1].text
[docs] def __setitem__(self, index, value): if self.root is None: if index != 0: raise IndexError() self.root = self.SubElement(self.parent, self.root_tag) elif index > len(self.root): raise IndexError() elem = None try: elem = self.elements[index][1] except IndexError: elem = self.SubElement(self.root, self.element_tag) self.elements.append([len(self.root) - 1, elem]) elem.text = value self.modified = True
[docs] def __delitem__(self, index): if self.root is None: raise IndexError() del self.root[self.elements[index][0]] del self.elements[index] for i in range(index, len(self.elements)): self.elements[i][0] -= 1 self.modified = True
[docs] def insert(self, index, value): if index > len(self.elements): raise IndexError() if self.root is None: self.root = self.SubElement(self.parent, self.root_tag) elem = self.Element(self.element_tag) elem.text = value elem.tail = self.root.tail + self.indent if index == len(self.elements): self.root.append(elem) self.elements.append([len(self.root) - 1, elem]) else: self.root.insert(self.elements[index][0], elem) self.elements.insert(index, [self.elements[index][0], elem]) for i in range(index + 1, len(self.elements)): self.elements[i][0] -= 1 self.modified = True
[docs]class Writer(object): r""" Pretty-print an XML tree. """ _name_re = re.compile(r"<([^\s]+)") _tag_re = re.compile(r"(</?)[^:]+:((:?[^>]+>)|(:?[^/]+/>))") _empty_re = re.compile(r"<([^/^>^ ]*)\s*/>")
[docs] def __init__(self, root_tag, attributes=None, tab_size=0): self.root_tag = root_tag self.tab_size = tab_size self.attributes = attributes self.name = self._name_re.search(self.root_tag).group(1) self._root_re = re.compile(r"(<" + self.name + r"[^>]+>)")
[docs] def marshal(self, node): s = None if nxpy.core.past.V_2_7.at_most(): s = lxml.etree.tostring(node) else: s = lxml.etree.tostring(node, encoding="unicode") s = self._root_re.sub(self.root_tag, s, 1) s = self._empty_re.sub(r"<\1 />", s) if self.tab_size > 0: s = s.replace("\t", " " * self.tab_size) if self.attributes is not None: d = ( '<?xml version="' + self.attributes.get("version", "1.0") + '" encoding="' + self.attributes.get("encoding", "UTF-8") + '"') if "standalone" in self.attributes: d += ' standalone="' + self.attributes["standalone"] + '"' d += "?>\n" s = d + s return s + "\n\n"
[docs] def write(self, node, where): f = open(where, "w+") if isinstance(where, six.string_types) else where try: f.write(self.marshal(node)) finally: f.close()