Source code for pds4_tools.reader.header_objects

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import sys

from .general_objects import Structure, Meta_Structure

from ..extern.cached_property import threaded_cached_property
from ..extern import six


[docs]class HeaderStructure(Structure): """ Stores a single PDS4 header data structure. Contains the header's data, meta data and label portion. See `Structure`'s and `pds4_read`'s docstrings for attributes, properties and usage instructions of this object. Inherits all Attributes, Parameters and Properties from `Structure`. Overrides `info`, `data` and `from_file` methods to implement them. """
[docs] @classmethod def from_file(cls, data_filename, structure_label, full_label, lazy_load=False, no_scale=None, decode_strings=None): """ Create an header structure from relevant labels and file for the data. Parameters ---------- data_filename : str or unicode Filename of the data file that contained the data for this array structure. structure_label : Label The segment of the label describing only this array structure. full_label : Label The entire label describing the PDS4 product this structure originated from. lazy_load : bool, optional If True, does not read-in the data of this structure until the first attempt to access it. Defaults to False. no_scale : None, optional Has no effect because Headers do not contain data that can be scaled. Defaults to None. decode_strings : bool, optional Has no effect because Headers are not necessarily plain-text. See ``parser`` method instead. Defaults to None. Returns ------- HeaderStructure An object representing the PDS4 header structure; contains its label, data and meta data. """ # Create the meta data structure for this header meta_header_structure = Meta_HeaderStructure.from_label(structure_label) # Create the data structure for this array header_structure = cls(structure_data=None, structure_meta_data=meta_header_structure, structure_label=structure_label, full_label=full_label, parent_filename=data_filename) # Attempt to access the data property such that the data gets read-in (if not on lazy-load) if not lazy_load: header_structure.data return header_structure
[docs] @classmethod def from_bytes(cls, input, **structure_kwargs): """ Create an header structure from PDS-compliant data. Parameters ---------- input : bytes, str or unicode A string or bytes containing the data for header. structure_kwargs : dict, optional Keywords that are passed directly to the `HeaderStructure` constructor. Returns ------- HeaderStructure An object representing the PDS4 header structure. The data attribute will contain *input*. Other attributes may be specified via *structure_kwargs*. """ from .read_headers import new_header return new_header(input, **structure_kwargs)
[docs] def info(self, abbreviated=False, output=None): """ Prints a summary of this data structure. Contains the type and dimensions of the Array, and if *abbreviated* is False then also outputs the name and number of elements of each axis in the array. Parameters ---------- abbreviated : bool, optional Has no effect on header data structures. output : file, bool or None, optional A file-like object to write the output to. If set to False then instead of outputting to a file a list representing the summary parameters for the Structure is returned. Writes to ``sys.stdout`` by default. Returns ------- None or list If output is False, then returns a list representing the summary parameters for the Structure. Otherwise returns None. """ # Set default output to write to command line if output is None: output = sys.stdout # Obtain abbreviated version of summary id = "'{0}'".format(self.id) parsing_std_info = '{0}'.format(self.meta_data['parsing_standard_id']) summary_args = [self.type, id, parsing_std_info] abbreviated_info = "{0} {1} ({2})".format(*summary_args) # If output is false, return list representing the various parameters of the summary if not output: return summary_args # Otherwise write out summary to output output.write(abbreviated_info) output.write('\n') output.flush()
@threaded_cached_property def data(self): """ All data in the PDS4 header data structure. This property is implemented as a thread-safe cacheable attribute. Once it is run for the first time, it replaces itself with an attribute having the exact data that was originally returned. Unlike normal properties, this property/attribute is settable without a __set__ method. To never run the read-in routine inside this property, you need to manually create the the ``.data`` attribute prior to ever invoking this method (or pass in the data to the constructor on object instantiation, which does this for you). Returns ------- str, unicode or bytes The header described by this data structure. """ super(HeaderStructure, self).data() from .read_headers import read_header_data read_header_data(self) return self.data
[docs] def parser(self): """ Obtain a parser for the data in the header. Returns ------- HeaderParser A parser for the header. """ return HeaderParser().get_parser(self)
[docs]class Meta_HeaderStructure(Meta_Structure): """ Meta data about a PDS4 header data structure. Meta data stored in this class is accessed in ``dict``-like fashion. Normally this meta data originates from the label (e.g., if this is a Header then everything from the opening tag of Header to its closing tag will be stored in this object), via the `from_label` method. Inherits all Attributes, Parameters and Properties from `Meta_Structure`. Examples -------- Supposing the following Header definition from a label:: <Header> <local_identifier>header</local_identifier> <offset unit="byte">0</offset> <object_length unit="byte">2880</object_length> <parsing_standard_id>FITS 3.0</parsing_standard_id> </Header> >>> meta_array = Meta_HeaderStructure.from_label(header_xml) >>> print(meta_array['local_identifier']) header >>> print(meta_array['parsing_standard_id'] FITS 3.0 """
[docs] @classmethod def from_label(cls, xml_header): """ Create a Meta_HeaderStructure from the XML portion describing it in the label. Parameters ---------- xml_header : Label or ElementTree Element Portion of label that defines the Header data structure. Returns ------- Meta_HeaderStructure Instance containing meta data about the header structure, as taken from the XML label. Raises ------ PDS4StandardsException Raised if required meta data is absent. """ obj = cls() obj._load_keys_from_xml(xml_header) # Ensure required keys for Array_* exist keys_must_exist = ['object_length', 'offset', 'parsing_standard_id'] obj._check_keys_exist(keys_must_exist) return obj
[docs] def is_plain_text(self): """ Obtain whether a Header is in plain text. Under the definition of plain-text taken here, this includes all data that contains "only characters of readable material but not its graphical representation nor other objects (images, etc)." Returns ------- bool True if the Header's data is plain text, False otherwise. """ plain_text_standards = ['7-Bit ASCII Text', 'UTF-8 Text', 'PDS3', 'Pre-PDS3', 'PDS ODL 2', 'PDS DSV 1', 'FITS 3.0', 'FITS 4.0', 'VICAR1', 'VICAR2', 'ISIS2 History Label'] return self['parsing_standard_id'] in plain_text_standards
[docs]class HeaderParser(object): """ Provides a base class for parsers of any PDS Header object. Parsers for specific header objects should inherit from this class. Where a specific parser is not available, this object may serve as a general parser. Parameters ---------- header_structure : HeaderStructure, optional The header structure to provide parsing capability for. Attributes ---------- structure : HeaderStructure or None The header structure to provide parsing capability for. """ def __init__(self, header_structure=None): self.structure = header_structure
[docs] @staticmethod def get_parser(header_structure): """ Factory method to obtain the most specific parser for the data. Parameters ---------- header_structure : HeaderStructure, optional The header structure to provide a parser for. Returns ------- HeaderParser A parser (whether specific, if available, or generic) for the header. """ meta_data = header_structure.meta_data if 'FITS' in meta_data['parsing_standard_id']: return HeaderFITSParser(header_structure) elif meta_data.is_plain_text(): return HeaderPlainTextParser(header_structure) else: return HeaderParser(header_structure)
[docs]class HeaderPlainTextParser(HeaderParser): """ A generic parser for any plain-text header. """
[docs] def to_string(self): """ Returns ------- str or unicode An unmodified version of the plain-text string that forms the header. """ data = self.structure.data if isinstance(data, six.binary_type): data = data.decode('utf-8') return data
[docs]class HeaderFITSParser(HeaderPlainTextParser): """ A parser for FITS headers. """
[docs] def to_string(self): """ Returns ------- str or unicode A human-readable representation of the FITS header, which leaves it unmodified except for splitting each CARD (80 characters) into their own lines and removing padding. """ data = super(HeaderFITSParser, self).to_string() # Add newlines every 80 characters data = [data[i:i + 80] for i in range(0, len(data), 80)] data = ('\r\n'.join(data)) return data.strip()