Source code for astropy.utils.xml.check

# Licensed under a 3-clause BSD style license - see LICENSE.rst
"""
A collection of functions for checking various XML-related strings for
standards compliance.
"""

import re
import urlparse


[docs]def check_id(ID):
    """
    Returns `True` if *ID* is a valid XML ID.
    """
    return re.match(r"^[A-Za-z_][A-Za-z0-9_\.\-]*$", ID) is not None


[docs]def fix_id(ID):
    """
    Given an arbitrary string, create one that can be used as an xml
    id.  This is rather simplistic at the moment, since it just
    replaces non-valid characters with underscores.
    """
    if re.match(r"^[A-Za-z_][A-Za-z0-9_\.\-]*$", ID):
        return ID
    if len(ID):
        corrected = ID
        if not len(corrected) or re.match('^[^A-Za-z_]$', corrected[0]):
            corrected = '_' + corrected
        corrected = (re.sub(r"[^A-Za-z_]", '_', corrected[0]) +
                     re.sub(r"[^A-Za-z0-9_\.\-]", "_", corrected[1:]))
        return corrected
    return ''

_token_regex = r"(?![\r\l\t ])[^\r\l\t]*(?![\r\l\t ])"


[docs]def check_token(token):
    """
    Returns `True` if *token* is a valid XML token, as defined by XML
    Schema Part 2.
    """
    return (token == '' or
            re.match(
                "[^\r\n\t ]?([^\r\n\t ]| [^\r\n\t ])*[^\r\n\t ]?$", token)
            is not None)


[docs]def check_mime_content_type(content_type):
    """
    Returns `True` if *content_type* is a valid MIME content type
    (syntactically at least), as defined by RFC 2045.
    """
    ctrls = ''.join(chr(x) for x in xrange(0, 0x20))
    token_regex = '[^()<>@,;:\\\"/[\]?= %s\x7f]+' % ctrls
    return re.match(
        r'(?P<type>%s)/(?P<subtype>%s)$' % (token_regex, token_regex),
        content_type) is not None


[docs]def check_anyuri(uri):
    """
    Returns `True` if *uri* is a valid URI as defined in RFC 2396.
    """
    if (re.match(
        (r"(([a-zA-Z][0-9a-zA-Z+\-\.]*:)?/{0,2}[0-9a-zA-Z;" +
         r"/?:@&=+$\.\-_!~*'()%]+)?(#[0-9a-zA-Z;/?:@&=+$\.\-_!~*'()%]+)?"),
        uri) is None):
        return False
    try:
        urlparse.urlparse(uri)
    except:
        return False
    return True
Navigation

Source code for astropy.utils.xml.check

Page Contents