Source code for astropy.io.ascii.fixedwidth

"""An extensible ASCII table reader and writer.

fixedwidth.py:
  Read or write a table with fixed width columns.

:Copyright: Smithsonian Astrophysical Observatory (2011)
:Author: Tom Aldcroft (aldcroft@head.cfa.harvard.edu)
"""

## 
## Redistribution and use in source and binary forms, with or without
## modification, are permitted provided that the following conditions are met:
##     * Redistributions of source code must retain the above copyright
##       notice, this list of conditions and the following disclaimer.
##     * Redistributions in binary form must reproduce the above copyright
##       notice, this list of conditions and the following disclaimer in the
##       documentation and/or other materials provided with the distribution.
##     * Neither the name of the Smithsonian Astrophysical Observatory nor the
##       names of its contributors may be used to endorse or promote products
##       derived from this software without specific prior written permission.
## 
## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
## ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
## WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
## DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
## DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
## (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
## LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
## ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
## (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS  
## SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import re
import itertools
from . import core
from .core import io, next, izip, any

[docs]class FixedWidthSplitter(core.BaseSplitter): """Split line based on fixed start and end positions for each ``col`` in ``self.cols``. This class requires that the Header class will have defined ``col.start`` and ``col.end`` for each column. The reference to the ``header.cols`` gets put in the splitter object by the base Reader.read() function just in time for splitting data lines by a ``data`` object. Note that the ``start`` and ``end`` positions are defined in the pythonic style so line[start:end] is the desired substring for a column. This splitter class does not have a hook for ``process_lines`` since that is generally not useful for fixed-width input. """ delimiter_pad = '' bookend = False def __call__(self, lines): for line in lines: vals = [line[x.start:x.end] for x in self.cols] if self.process_val: yield [self.process_val(x) for x in vals] else: yield vals
[docs] def join(self, vals, widths): pad = self.delimiter_pad or '' delimiter = self.delimiter or '' padded_delim = pad + delimiter + pad if self.bookend: bookend_left = delimiter + pad bookend_right = pad + delimiter else: bookend_left = '' bookend_right = '' vals = [' ' * (width - len(val)) + val for val, width in zip(vals, widths)] return bookend_left + padded_delim.join(vals) + bookend_right
[docs]class FixedWidthHeader(core.BaseHeader): """Fixed width table header reader. The key settable class attributes are: :param auto_format: format string for auto-generating column names :param start_line: None, int, or a function of ``lines`` that returns None or int :param comment: regular expression for comment lines :param splitter_class: Splitter class for splitting data lines into columns :param names: list of names corresponding to each data column :param include_names: list of names to include in output (default=None selects all names) :param exclude_names: list of names to exlude from output (applied after ``include_names``) :param position_line: row index of line that specifies position (default = 1) :param position_char: character used to write the position line (default = "-") :param col_starts: list of start positions for each column (0-based counting) :param col_ends: list of end positions (inclusive) for each column :param delimiter_pad: padding around delimiter when writing (default = None) :param bookend: put the delimiter at start and end of line when writing (default = False) """ position_line = None # secondary header line position
[docs] def get_line(self, lines, index): for i, line in enumerate(self.process_lines(lines)): if i == index: break else: # No header line matching raise InconsistentTableError('No header line found in table') return line
[docs] def get_cols(self, lines): """Initialize the header Column objects from the table ``lines``. Based on the previously set Header attributes find or create the column names. Sets ``self.cols`` with the list of Columns. This list only includes the actual requested columns after filtering by the include_names and exclude_names attributes. See ``self.names`` for the full list. :param lines: list of table lines :returns: None """ # See "else" clause below for explanation of start_line and position_line start_line = core._get_line_index(self.start_line, self.process_lines(lines)) position_line = core._get_line_index(self.position_line, self.process_lines(lines)) # If start_line is none then there is no header line. Column positions are # determined from first data line and column names are either supplied by user # or auto-generated. if start_line is None: if position_line is not None: raise ValueError("Cannot set position_line without also setting header_start") data_lines = self.data.process_lines(lines) if not data_lines: raise InconsistentTableError('No data lines found so cannot autogenerate column names') vals, starts, ends = self.get_fixedwidth_params(data_lines[0]) if self.names is None: self.names = [self.auto_format % i for i in range(1, len(vals) + 1)] else: # This bit of code handles two cases: # start_line = <index> and position_line = None # Single header line where that line is used to determine both the # column positions and names. # start_line = <index> and position_line = <index2> # Two header lines where the first line defines the column names and # the second line defines the column positions if position_line is not None: # Define self.col_starts and self.col_ends so that the call to # get_fixedwidth_params below will use those to find the header # column names. Note that get_fixedwidth_params returns Python # slice col_ends but expects inclusive col_ends on input (for # more intuitive user interface). line = self.get_line(lines, position_line) vals, self.col_starts, col_ends = self.get_fixedwidth_params(line) self.col_ends = [x - 1 for x in col_ends] # Get the header column names and column positions line = self.get_line(lines, start_line) vals, starts, ends = self.get_fixedwidth_params(line) # Possibly override the column names with user-supplied values if self.names is None: self.names = vals # Filter self.names using include_names and exclude_names, then create # the actual Column objects. self._set_cols_from_names() self.n_data_cols = len(self.cols) # Set column start and end positions. Also re-index the cols because # the FixedWidthSplitter does NOT return the ignored cols (as is the # case for typical delimiter-based splitters) for i, col in enumerate(self.cols): col.start = starts[col.index] col.end = ends[col.index] col.index = i
[docs] def get_fixedwidth_params(self, line): """Split ``line`` on the delimiter and determine column values and column start and end positions. This might include null columns with zero length (e.g. for ``header row = "| col1 || col2 | col3 |"`` or ``header2_row = "----- ------- -----"``). The null columns are stripped out. Returns the values between delimiters and the corresponding start and end positions. :param line: input line :returns: (vals, starts, ends) """ # If column positions are already specified then just use those, otherwise # figure out positions between delimiters. if self.col_starts is not None and self.col_ends is not None: starts = list(self.col_starts) # could be any iterable, e.g. np.array ends = [x + 1 for x in self.col_ends] # user supplies inclusive endpoint if len(starts) != len(ends): raise ValueError('Fixed width col_starts and col_ends must have the same length') vals = [line[start:end].strip() for start, end in zip(starts, ends)] else: # There might be a cleaner way to do this but it works... vals = line.split(self.splitter.delimiter) starts = [0] ends = [] for val in vals: if val: ends.append(starts[-1] + len(val)) starts.append(ends[-1] + 1) else: starts[-1] += 1 starts = starts[:-1] vals = [x.strip() for x in vals if x] if len(vals) != len(starts) or len(vals) != len(ends): raise InconsistentTableError('Error parsing fixed width header') return vals, starts, ends
[docs] def write(self, lines): # Header line not written until data are formatted. Until then it is # not known how wide each column will be for fixed width. pass
[docs]class FixedWidthData(core.BaseData): """Base table data reader. :param start_line: None, int, or a function of ``lines`` that returns None or int :param end_line: None, int, or a function of ``lines`` that returns None or int :param comment: Regular expression for comment lines :param splitter_class: Splitter class for splitting data lines into columns """ splitter_class = FixedWidthSplitter
[docs] def write(self, lines): with self._set_col_formats(self.cols, self.formats): vals_list = [] # Col iterator does the formatting defined above so each val is a string # and vals is a tuple of strings for all columns of each row col_str_iters = [col.iter_str_vals() for col in self.cols] for vals in izip(*col_str_iters): vals_list.append(vals) for i, col in enumerate(self.cols): col.width = max([len(vals[i]) for vals in vals_list]) if self.header.start_line is not None: col.width = max(col.width, len(col.name)) widths = [col.width for col in self.cols] if self.header.start_line is not None: lines.append(self.splitter.join([col.name for col in self.cols], widths)) if self.header.position_line is not None: char = self.header.position_char if len(char) != 1: raise ValueError('Position_char="%s" must be a single character' % char) vals = [char * col.width for col in self.cols] lines.append(self.splitter.join(vals, widths)) for vals in vals_list: lines.append(self.splitter.join(vals, widths)) return lines
[docs]class FixedWidth(core.BaseReader): """Read or write a fixed width table with a single header line that defines column names and positions. Examples:: # Bar delimiter in header and data | Col1 | Col2 | Col3 | | 1.2 | hello there | 3 | | 2.4 | many words | 7 | # Bar delimiter in header only Col1 | Col2 | Col3 1.2 hello there 3 2.4 many words 7 # No delimiter with column positions specified as input Col1 Col2Col3 1.2hello there 3 2.4many words 7 See the :ref:`fixed_width_gallery` for specific usage examples. :param col_starts: list of start positions for each column (0-based counting) :param col_ends: list of end positions (inclusive) for each column :param delimiter_pad: padding around delimiter when writing (default = None) :param bookend: put the delimiter at start and end of line when writing (default = False) """ def __init__(self, col_starts=None, col_ends=None, delimiter_pad=' ', bookend=True): core.BaseReader.__init__(self) self.header = FixedWidthHeader() self.data = FixedWidthData() self.data.header = self.header self.header.data = self.data self.header.splitter.delimiter = '|' self.data.splitter.delimiter = '|' self.data.splitter.delimiter_pad = delimiter_pad self.data.splitter.bookend = bookend self.header.start_line = 0 self.data.start_line = 1 self.header.comment = r'\s*#' self.header.write_comment = '# ' self.data.comment = r'\s*#' self.data.write_comment = '# ' self.header.col_starts = col_starts self.header.col_ends = col_ends
[docs]class FixedWidthNoHeader(FixedWidth): """Read or write a fixed width table which has no header line. Column names are either input (``names`` keyword) or auto-generated. Column positions are determined either by input (``col_starts`` and ``col_stops`` keywords) or by splitting the first data line. In the latter case a ``delimiter`` is required to split the data line. Examples:: # Bar delimiter in header and data | 1.2 | hello there | 3 | | 2.4 | many words | 7 | # Compact table having no delimiter and column positions specified as input 1.2hello there3 2.4many words 7 This class is just a convenience wrapper around the ``FixedWidth`` reader but with ``header.start_line = None`` and ``data.start_line = 0``. See the :ref:`fixed_width_gallery` for specific usage examples. :param col_starts: list of start positions for each column (0-based counting) :param col_ends: list of end positions (inclusive) for each column :param delimiter_pad: padding around delimiter when writing (default = None) :param bookend: put the delimiter at start and end of line when writing (default = False) """ def __init__(self, col_starts=None, col_ends=None, delimiter_pad=' ', bookend=True): FixedWidth.__init__(self, col_starts, col_ends, delimiter_pad=delimiter_pad, bookend=bookend) self.header.start_line = None self.data.start_line = 0
[docs]class FixedWidthTwoLine(FixedWidth): """Read or write a fixed width table which has two header lines. The first header line defines the column names and the second implicitly defines the column positions. Examples:: # Typical case with column extent defined by ---- under column names. col1 col2 <== header_start = 0 ----- ------------ <== position_line = 1, position_char = "-" 1 bee flies <== data_start = 2 2 fish swims # Pretty-printed table +------+------------+ | Col1 | Col2 | +------+------------+ | 1.2 | "hello" | | 2.4 | there world| +------+------------+ See the :ref:`fixed_width_gallery` for specific usage examples. :param position_line: row index of line that specifies position (default = 1) :param position_char: character used to write the position line (default = "-") :param delimiter_pad: padding around delimiter when writing (default = None) :param bookend: put the delimiter at start and end of line when writing (default = False) """ def __init__(self, position_line=1, position_char='-', delimiter_pad=None, bookend=False): FixedWidth.__init__(self, delimiter_pad=delimiter_pad, bookend=bookend) self.header.position_line = position_line self.header.position_char = position_char self.data.start_line = position_line + 1 self.header.splitter.delimiter = ' ' self.data.splitter.delimiter = ' '

Page Contents