Source code for astropy.io.votable.ucd

"""
This file contains routines to verify the correctness of UCD strings.
"""

from __future__ import with_statement, absolute_import

#STDLIB
import re

#LOCAL
from ...utils import data

__all__ = ['parse_ucd', 'check_ucd']


class UCDWords:
    """
    Manages a list of acceptable UCD words.

    Works by reading in a data file exactly as provided by IVOA.  This
    file resides in data/ucd1p-words.txt.
    """
    def __init__(self):
        self._primary = set()
        self._secondary = set()
        self._descriptions = {}
        self._capitalization = {}

        with data.get_pkg_data_fileobj(
                "data/ucd1p-words.txt", encoding='ascii') as fd:
            for line in fd.readlines():
                type, name, descr = [
                    x.strip() for x in line.split(u'|')]
                name_lower = name.lower()
                if type in u'QPEV':
                    self._primary.add(name_lower)
                if type in u'QSEV':
                    self._secondary.add(name_lower)
                self._descriptions[name_lower] = descr
                self._capitalization[name_lower] = name

    def is_primary(self, name):
        """
        Returns True if *name* is a valid primary name.
        """
        return name.lower() in self._primary

    def is_secondary(self, name):
        """
        Returns True if *name* is a valid secondary name.
        """
        return name.lower() in self._secondary

    def get_description(self, name):
        """
        Returns the official English description of the given UCD
        *name*.
        """
        return self._descriptions[name.lower()]

    def normalize_capitalization(self, name):
        """
        Returns the standard capitalization form of the given name.
        """
        return self._capitalization[name.lower()]

_ucd_singleton = None


[docs]def parse_ucd(ucd, check_controlled_vocabulary=False, has_colon=False):
    """
    Parse the UCD into its component parts.

    Parameters
    ----------
    ucd : str
        The UCD string

    check_controlled_vocabulary : bool, optional
        If `True`, then each word in the UCD will be verified against
        the UCD1+ controlled vocabulary, (as required by the VOTable
        specification version 1.2), otherwise not.

    has_colon : bool, optional
        If `True`, the UCD may contain a colon (as defined in earlier
        versions of the standard).

    Returns
    -------
    parts : list
        The result is a list of tuples of the form:

            (*namespace*, *word*)

        If no namespace was explicitly specified, *namespace* will be
        returned as ``'ivoa'`` (i.e., the default namespace).

    Raises
    ------
    ValueError : *ucd* is invalid
    """
    global _ucd_singleton
    if _ucd_singleton is None:
        _ucd_singleton = UCDWords()

    if has_colon:
        m = re.search(u'[^A-Za-z0-9_.:;\-]', ucd)
    else:
        m = re.search(u'[^A-Za-z0-9_.;\-]', ucd)
    if m is not None:
        raise ValueError("UCD has invalid character '%s' in '%s'" %
                         (m.group(0), ucd))

    word_component_re = u'[A-Za-z0-9][A-Za-z0-9\-_]*'
    word_re = u'%s(\.%s)*' % (word_component_re, word_component_re)

    parts = ucd.split(u';')
    words = []
    for i, word in enumerate(parts):
        colon_count = word.count(u':')
        if colon_count == 1:
            ns, word = word.split(u':', 1)
            if not re.match(word_component_re, ns):
                raise ValueError("Invalid namespace '%s'" % ns)
            ns = ns.lower()
        elif colon_count > 1:
            raise ValueError("Too many colons in '%s'" % word)
        else:
            ns = u'ivoa'

        if not re.match(word_re, word):
            raise ValueError("Invalid word '%s'" % word)

        if ns == u'ivoa' and check_controlled_vocabulary:
            if i == 0:
                if not _ucd_singleton.is_primary(word):
                    if _ucd_singleton.is_secondary(word):
                        raise ValueError(
                            "Secondary word '%s' is not valid as a primary "
                            "word" % word)
                    else:
                        raise ValueError("Unknown word '%s'" % word)
            else:
                if not _ucd_singleton.is_secondary(word):
                    if _ucd_singleton.is_primary(word):
                        raise ValueError(
                            "Primary word '%s' is not valid as a secondary "
                            "word" % word)
                    else:
                        raise ValueError("Unknown word '%s'" % word)

        try:
            normalized_word = _ucd_singleton.normalize_capitalization(word)
        except KeyError:
            normalized_word = word
        words.append((ns, normalized_word))

    return words


[docs]def check_ucd(ucd, check_controlled_vocabulary=False, has_colon=False):
    """
    Returns False if *ucd* is not a valid `unified content descriptor`_.

    Parameters
    ----------
    ucd : str
        The UCD string

    check_controlled_vocabulary : bool, optional
        If `True`, then each word in the UCD will be verified against
        the UCD1+ controlled vocabulary, (as required by the VOTable
        specification version 1.2), otherwise not.

    Returns
    -------
    valid : bool
    """
    if ucd is None:
        return True

    try:
        parse_ucd(ucd,
                  check_controlled_vocabulary=check_controlled_vocabulary,
                  has_colon=has_colon)
    except ValueError:
        return False
    return True
Navigation

Source code for astropy.io.votable.ucd

Page Contents