Source code for astropy.io.votable.ucd

"""
This file contains routines to verify the correctness of UCD strings.
"""

from __future__ import with_statement, absolute_import

#STDLIB
import re

#LOCAL
from ...utils import data

__all__ = ['parse_ucd', 'check_ucd']


class UCDWords:
    """
    Manages a list of acceptable UCD words.

    Works by reading in a data file exactly as provided by IVOA.  This
    file resides in data/ucd1p-words.txt.
    """
    def __init__(self):
        self._primary = set()
        self._secondary = set()
        self._descriptions = {}
        self._capitalization = {}

        with data.get_pkg_data_fileobj(
                "data/ucd1p-words.txt", encoding='ascii') as fd:
            for line in fd.readlines():
                type, name, descr = [
                    x.strip() for x in line.split(u'|')]
                name_lower = name.lower()
                if type in u'QPEV':
                    self._primary.add(name_lower)
                if type in u'QSEV':
                    self._secondary.add(name_lower)
                self._descriptions[name_lower] = descr
                self._capitalization[name_lower] = name

    def is_primary(self, name):
        """
        Returns True if *name* is a valid primary name.
        """
        return name.lower() in self._primary

    def is_secondary(self, name):
        """
        Returns True if *name* is a valid secondary name.
        """
        return name.lower() in self._secondary

    def get_description(self, name):
        """
        Returns the official English description of the given UCD
        *name*.
        """
        return self._descriptions[name.lower()]

    def normalize_capitalization(self, name):
        """
        Returns the standard capitalization form of the given name.
        """
        return self._capitalization[name.lower()]

_ucd_singleton = None


[docs]def parse_ucd(ucd, check_controlled_vocabulary=False, has_colon=False): """ Parse the UCD into its component parts. Parameters ---------- ucd : str The UCD string check_controlled_vocabulary : bool, optional If `True`, then each word in the UCD will be verified against the UCD1+ controlled vocabulary, (as required by the VOTable specification version 1.2), otherwise not. has_colon : bool, optional If `True`, the UCD may contain a colon (as defined in earlier versions of the standard). Returns ------- parts : list The result is a list of tuples of the form: (*namespace*, *word*) If no namespace was explicitly specified, *namespace* will be returned as ``'ivoa'`` (i.e., the default namespace). Raises ------ ValueError : *ucd* is invalid """ global _ucd_singleton if _ucd_singleton is None: _ucd_singleton = UCDWords() if has_colon: m = re.search(u'[^A-Za-z0-9_.:;\-]', ucd) else: m = re.search(u'[^A-Za-z0-9_.;\-]', ucd) if m is not None: raise ValueError("UCD has invalid character '%s' in '%s'" % (m.group(0), ucd)) word_component_re = u'[A-Za-z0-9][A-Za-z0-9\-_]*' word_re = u'%s(\.%s)*' % (word_component_re, word_component_re) parts = ucd.split(u';') words = [] for i, word in enumerate(parts): colon_count = word.count(u':') if colon_count == 1: ns, word = word.split(u':', 1) if not re.match(word_component_re, ns): raise ValueError("Invalid namespace '%s'" % ns) ns = ns.lower() elif colon_count > 1: raise ValueError("Too many colons in '%s'" % word) else: ns = u'ivoa' if not re.match(word_re, word): raise ValueError("Invalid word '%s'" % word) if ns == u'ivoa' and check_controlled_vocabulary: if i == 0: if not _ucd_singleton.is_primary(word): if _ucd_singleton.is_secondary(word): raise ValueError( "Secondary word '%s' is not valid as a primary " "word" % word) else: raise ValueError("Unknown word '%s'" % word) else: if not _ucd_singleton.is_secondary(word): if _ucd_singleton.is_primary(word): raise ValueError( "Primary word '%s' is not valid as a secondary " "word" % word) else: raise ValueError("Unknown word '%s'" % word) try: normalized_word = _ucd_singleton.normalize_capitalization(word) except KeyError: normalized_word = word words.append((ns, normalized_word)) return words
[docs]def check_ucd(ucd, check_controlled_vocabulary=False, has_colon=False): """ Returns False if *ucd* is not a valid `unified content descriptor`_. Parameters ---------- ucd : str The UCD string check_controlled_vocabulary : bool, optional If `True`, then each word in the UCD will be verified against the UCD1+ controlled vocabulary, (as required by the VOTable specification version 1.2), otherwise not. Returns ------- valid : bool """ if ucd is None: return True try: parse_ucd(ucd, check_controlled_vocabulary=check_controlled_vocabulary, has_colon=has_colon) except ValueError: return False return True

Page Contents