Source code for astropy.utils.xml.writer

# Licensed under a 3-clause BSD style license - see LICENSE.rst
"""
Contains a class that makes it simple to stream out well-formed and
nicely-indented XML.
"""

# STDLIB
import contextlib
import textwrap


[docs]def xml_escape_cdata(s):
    """
    Escapes &, < and > in an XML CDATA string.
    """
    s = s.replace(u"&", u"&amp;")
    s = s.replace(u"<", u"&lt;")
    s = s.replace(u">", u"&gt;")
    return s


[docs]def xml_escape(s):
    """
    Escapes &, ', ", < and > in an XML attribute value.
    """
    s = s.replace(u"&", u"&amp;")
    s = s.replace(u"'", u"&apos;")
    s = s.replace(u"\"", u"&quot;")
    s = s.replace(u"<", u"&lt;")
    s = s.replace(u">", u"&gt;")
    return s


[docs]class XMLWriter:
    """
    A class to write well-formed and nicely indented XML.

    Use like this::

        w = XMLWriter(fh)
        with w.tag('html'):
            with w.tag('body'):
                w.data('This is the content')

    Which produces::

        <html>
         <body>
          This is the content
         </body>
        </html>
    """

    def __init__(self, file):
        """
        Parameters
        ----------
        file : writable file-like object.
        """
        self.write = file.write
        if hasattr(file, "flush"):
            self.flush = file.flush
        self._open = 0  # true if start tag is open
        self._tags = []
        self._data = []
        self._indentation = u" " * 64

        try:
            from . import _iterparser
            self.xml_escape_cdata = _iterparser.escape_xml_cdata
            self.xml_escape = _iterparser.escape_xml
        except ImportError:
            self.xml_escape_cdata = xml_escape_cdata
            self.xml_escape = xml_escape

    def _flush(self, indent=True, wrap=False):
        """
        Flush internal buffers.
        """
        if self._open:
            if indent:
                self.write(u">\n")
            else:
                self.write(u">")
            self._open = 0
        if self._data:
            data = u''.join(self._data)
            if wrap:
                indent = self.get_indentation_spaces(1)
                data = textwrap.fill(
                    data,
                    initial_indent=indent,
                    subsequent_indent=indent)
                self.write(u'\n')
                self.write(self.xml_escape_cdata(data))
                self.write(u'\n')
                self.write(self.get_indentation_spaces())
            else:
                self.write(self.xml_escape_cdata(data))
            self._data = []

[docs]    def start(self, tag, attrib={}, **extra):
        """
        Opens a new element.  Attributes can be given as keyword
        arguments, or as a string/string dictionary.  The method
        returns an opaque identifier that can be passed to the
        :meth:`close` method, to close all open elements up to and
        including this one.

        Parameters
        ----------
        tag : str
            The element name

        attrib : dict of str -> str
            Attribute dictionary.  Alternatively, attributes can
            be given as keyword arguments.

        Returns
        -------
        id : int
            Returns an element identifier.
        """
        self._flush()
        # This is just busy work -- we know our tag names are clean
        # tag = xml_escape_cdata(tag)
        self._data = []
        self._tags.append(tag)
        self.write(self.get_indentation_spaces(-1))
        self.write(u"<%s" % tag)
        if attrib or extra:
            attrib = attrib.copy()
            attrib.update(extra)
            attrib = attrib.items()
            attrib.sort()
            for k, v in attrib:
                if v is not None:
                    # This is just busy work -- we know our keys are clean
                    # k = xml_escape_cdata(k)
                    v = self.xml_escape(v)
                    self.write(u" %s=\"%s\"" % (k, v))
        self._open = 1

        return len(self._tags)

    @contextlib.contextmanager
[docs]    def tag(self, tag, attrib={}, **extra):
        """
        A convenience method for use with the `with` statement::

            with writer.tag('foo'):
                writer.element('bar')
            # </foo> is implicitly closed here

        Parameters are the same as to `start`.
        """
        self.start(tag, attrib, **extra)
        yield
        self.end(tag)

[docs]    def comment(self, comment):
        """
        Adds a comment to the output stream.

        Parameters
        ----------
        comment : str
            Comment text, as a Unicode string.
        """
        self._flush()
        self.write(self.get_indentation_spaces())
        self.write(u"<!-- %s -->\n" % self.xml_escape_cdata(comment))

[docs]    def data(self, text):
        """
        Adds character data to the output stream.

        Parameters
        ----------
        text : str
            Character data, as a Unicode string.
        """
        self._data.append(text)

[docs]    def end(self, tag=None, indent=True, wrap=False):
        """
        Closes the current element (opened by the most recent call to
        `start`).

        Parameters
        ----------
        tag : str
            Element name.  If given, the tag must match the start tag.
            If omitted, the current element is closed.
        """
        if tag:
            assert self._tags, "unbalanced end(%s)" % tag
            assert tag == self._tags[-1],\
                   "expected end(%s), got %s" % (self._tags[-1], tag)
        else:
            assert self._tags, "unbalanced end()"
        tag = self._tags.pop()
        if self._data:
            self._flush(indent, wrap)
        elif self._open:
            self._open = 0
            self.write(u"/>\n")
            return
        if indent:
            self.write(self.get_indentation_spaces())
        self.write(u"</%s>\n" % tag)

[docs]    def close(self, id):
        """
        Closes open elements, up to (and including) the element identified
        by the given identifier.

        Parameters
        ----------
        id : int
            Element identifier, as returned by the `start` method.
        """
        while len(self._tags) > id:
            self.end()

[docs]    def element(self, tag, text=None, wrap=False, attrib={}, **extra):
        """
        Adds an entire element.  This is the same as calling `start`,
        `data`, and `end` in sequence. The `text` argument
        can be omitted.
        """
        self.start(tag, attrib, **extra)
        if text:
            self.data(text)
        self.end(indent=False, wrap=wrap)

[docs]    def flush(self):
        pass  # replaced by the constructor

[docs]    def get_indentation(self):
        """
        Returns the number of indentation levels the file is currently
        in.
        """
        return len(self._tags)

[docs]    def get_indentation_spaces(self, offset=0):
        """
        Returns a string of spaces that matches the current
        indentation level.
        """
        return self._indentation[:len(self._tags) + offset]

    @staticmethod
[docs]    def object_attrs(obj, attrs):
        """
        Converts an object with a bunch of attributes on an object
        into a dictionary for use by the `XMLWriter`.

        Parameters
        ----------
        obj : object
            Any Python object

        attrs : sequence of str
            Attribute names to pull from the object

        Returns
        -------
        attrs : dict
            Maps attribute names to the values retrieved from
            `obj.attr`.  If any of the attributes is `None`, it will
            not appear in the output dictionary.
        """
        d = {}
        for attr in attrs:
            if getattr(obj, attr) is not None:
                d[attr.replace(u'_', u'-')] = unicode(getattr(obj, attr))
        return d
Navigation

Source code for astropy.utils.xml.writer

Page Contents