from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import sys
from collections import Sequence
from ..utils.helpers import xml_to_dict, is_array_like
from ..utils.exceptions import PDS4StandardsException
from ..utils.logging import logger_init
from ..extern import six
from ..extern.cached_property import threaded_cached_property
# Safe import of OrderedDict
try:
from collections import OrderedDict
except ImportError:
from ..extern.ordered_dict import OrderedDict
# Initialize the logger
logger = logger_init()
#################################
[docs]class StructureList(Sequence):
""" Stores the label and all supported data structures of a PDS4 product.
An object of this type is returned by `pds4_read`. PDS4 supported data structures are forms of Arrays,
Tables and Headers. This class allows both ``dict``-like and ``list``-like access to each individual
PDS4 data structure inside.
Parameters
----------
structures : list[Structure]
Each data `Structure`, including the data and the structure's label portion,
in the PDS4 product.
label : Label
The entire label describing the PDS4 product.
read_in_log : str or unicode
Output of the log during read-in of the entire PDS4 product.
Attributes
----------
structures : list[Structure]
Each data `Structure`, including the data and the structure's label portion,
in the PDS4 product.
label : Label
The entire label describing the PDS4 product.
read_in_log : str or unicode
Output of the log during read-in of the entire PDS4 product.
Examples
--------
Supposing the label described two objects, an Array_2D_Image (named 'Obs') and a Table_Binary
(unnamed) in the same order as described here, they can be accessed as follows:
>>> image_array = struct_list[0]
>>> or struct_list['Obs']
>>> obs_table = struct_list[1]
>>> or struct_list['TABLE_0']
See `pds4_read` and `__getitem__` docstrings for more examples.
"""
def __init__(self, structures, label, read_in_log):
super(StructureList, self).__init__()
self.structures = structures
self.label = label
self.read_in_log = read_in_log
[docs] def __getitem__(self, key):
""" Searches `StructureList` for a specific data structure.
Parameters
----------
key : str, unicode, int, slice or tuple
Selection for desired `Structure`. May be a string containing the name or local identifier of a
single Structure, similar to ``dict`` indexing functionality. May be an integer or slice
specifying which Structure(s) to select, similar to ``list`` or ``tuple`` indexing functionality.
May be a two-valued tuple, with the first value providing the name or local identifier and the
second value a zero-based count, providing which repetition of Structure by that name to select.
Returns
-------
Structure or list[Structure]
Matched PDS4 data structure(s).
Raises
------
IndexError
Raised if *key* is a larger integer than the number of Structures.
KeyError
Raised if *key* is a name or local identifier and does not match any Structure.
Examples
--------
>>> struct_list[0]
>>> struct_list['Observations']
If both of the first two data structures have the name 'Observations', then to select the second
we can do,
>>> struct_list['Observations', 1]
We can select both of the first two data structures via,
>>> struct_list[0:2]
"""
if isinstance(key, six.integer_types) or isinstance(key, slice):
structure = self.structures[key]
else:
# Reuse array-search logic (where key is an array_like) for simple (ie, where key is a str)
# name and lid searches
if not is_array_like(key):
key = (key,) + (0,)
# Search for structure by LIDs and names
structure = self._get_structure_by_id(key)
if structure is None:
if key[1] > 0:
raise KeyError("Structure '{0}' (repetition {1}) not found.".format(key[0], key[1]))
else:
raise KeyError("Structure '{0}' not found.".format(key[0]))
return structure
[docs] def __len__(self):
"""
Returns
-------
int
Number of data structures contained.
"""
return len(self.structures)
def __repr__(self):
"""
Returns
-------
str
A repr string identifying the structure list, and all the structures it has.
"""
structures_repr = [repr(structure) for structure in self.structures]
return str('<{0} at {1}> with:\n{2}').format(self.__class__.__name__, hex(id(self)),
'\n'.join(structures_repr))
@property
def type(self):
"""
Examples of types include Product_Observational, Product_Ancillary, Product_Document, etc.
Returns
-------
str or unicode
Root tag of a PDS4 label.
"""
return self.label.tag
[docs] def info(self, abbreviated=True, output=None):
""" Prints to stdout a summary of the contained data structures.
For Arrays the summary contains the type and dimensions of the Array,
and for Tables it contains the type and number of fields. Set *abbreviated*
parameter to False to output additional detail.
Parameters
----------
abbreviated : bool, optional
If False, output additional detail. Defaults to True.
output : file, bool or None, optional
A file-like object to write the output to. If set to False, does not output to a file
and instead returns a list of lists representing info for each Structure. Writes
to sys.stdout`` by default.
Returns
-------
None
"""
# Set default output to write to command line
if output is None:
output = sys.stdout
# Obtain abbreviated version of summary
summary_args = []
for i, structure in enumerate(self.structures):
summary_args.append([i] + structure.info(output=False))
# If output is false, return list representing the various parameters of the summary
if not output:
return summary_args
# Otherwise write out summary to output
type_length = max([len(ss[1]) for ss in summary_args])
name_length = max([len(ss[2]) for ss in summary_args])
for i, structure in enumerate(self.structures):
if abbreviated:
format_string = '{{0:<3d}} {{1:{0}s}} {{2:{1}s}} {{3:s}}'.format(type_length, name_length)
output.write(format_string.format(*summary_args[i]))
output.write('\n')
else:
structure.info(abbreviated=False, output=output)
output.write('---------------------------------------- \n\n')
output.flush()
def _get_structure_by_id(self, key, id_type='both'):
""" Obtain a specific `Structure` from `StructureList` by an ID.
Parameters
----------
key : array_like[str or unicode, int]
First value sets the key to search for (must be either the name or local identifier of the
Structure), second value indicates which repetition to select, with zero-based indexing,
typically used if there are multiple `Structure`'s with the same id.
id_type : str, unicode or list[str or unicode], optional
If given, either 'name', 'local_identifier', 'both', or a list of the three values in the
order that the search should look at them while trying to match key. For Structures without
either but with an ID set, the search will try to match key to the ID also when doing 'both'.
Defaults to 'both'.
Returns
-------
Structure or None
Matched PDS4 data structure, or None.
"""
# Ensure key is set correctly
key = key[0], int(key[1])
# Re-use tuple logic when given a string id_type
if isinstance(id_type, six.string_types):
id_type = [id_type]
# Look for a match for each id_type in the order they are specified
for cur_id_type in id_type:
matches = []
for structure in self.structures:
if cur_id_type in ('local_identifier', 'name') and structure.meta_data.get(cur_id_type) == key[0]:
matches.append(structure)
elif cur_id_type == 'both' and structure.id == key[0]:
matches.append(structure)
if len(matches) > key[1]:
return matches[key[1]]
return None
[docs]class Structure(object):
""" Stores a single PDS4 data structure.
Subclassed by `TableStructure` and `ArrayStructure`.
Parameters
----------
structure_data : any, optional
The data in this PDS4 data structure. If not given and never set, data can be read-in
via `from_file`.
structure_meta_data : Meta_Structure, optional
Meta data describing this object (originating from the label).
full_label : Label, optional
The entire label describing the PDS4 product this structure originated from.
structure_label : Label, optional
The segment of the label describing only this data structure.
parent_filename : str or unicode, optional
Filename, including full path, of the data file that contained the data for this structure.
structure_id : str or unicode, optional
If given, sets an ID for the structure. If not given, an attempt to obtain ID will be made from
the meta data.
Attributes
----------
parent_filename : str or unicode
Filename of the data file that contained the data for this structure.
full_label : Label
The entire label describing the PDS4 product this structure originated from.
label : Label
The segment of the label describing only this data structure.
meta_data : Meta_Structure
Meta data describing this object (originating from the label).
data : any
The data of this PDS4 data structure.
Examples
--------
See `pds4_read` docstring for examples.
"""
def __init__(self, structure_data=None, structure_meta_data=None, structure_label=None,
full_label=None, parent_filename=None, structure_id=None):
super(Structure, self).__init__()
if structure_id is not None:
self._id = structure_id
else:
self._id = None if (structure_meta_data is None) else structure_meta_data.id
self.parent_filename = parent_filename
self.label = structure_label
self.full_label = full_label
self.meta_data = structure_meta_data
# If data is given, set it. Otherwise the `data` method will lazy-load it as appropriate
if structure_data is not None:
self.data = structure_data
# Controls whether data read-in from file will be scaled, whether it will be masked and
# whether byte strings will be decoded to unicode
self._no_scale = None
self._masked = None
self._decode_strings = None
def __repr__(self):
"""
Returns
-------
str
A repr string identifying the Structure type, id and location.
"""
return str('<{0} {1} at {2}>').format(self.__class__.__name__, repr(self.id), hex(id(self)))
@property
def id(self):
"""
Returns
-------
str or unicode
The ID (either local identifier if given, or name if given) of this data structure. If
neither was given, an ID was likely assigned.
"""
id = None
if self._id:
id = self._id
elif self.meta_data:
id = self.meta_data.id
return id
@id.setter
def id(self, value):
"""
Parameters
----------
value : str or unicode
The ID to set for this data structure.
Returns
-------
None
"""
self._id = value
@property
def type(self):
"""
Returns
-------
str, unicode or None
The official PDS4 data structure type name for this structure.
"""
if self.label:
return self.label.tag
return None
@property
def data_loaded(self):
"""
Returns
-------
bool
True if the `data` attribute has been set (e.g. data has been read from file or set),
False otherwise.
"""
return 'data' in self.__dict__
@classmethod
[docs] def from_file(cls, data_filename, structure_label, full_label,
lazy_load=False, no_scale=False, decode_strings=False):
""" Create structure from relevant labels and file for the data.
Parameters
----------
data_filename : str or unicode
Filename of the data file that contained the data for this structure.
structure_label : Label
The segment of the label describing only this data structure.
full_label : Label
The entire label describing the PDS4 product this structure originated from.
lazy_load : bool, optional
If True, does not read-in the data of this structure until the first attempt to access it.
Defaults to False.
no_scale : bool, optional
If True, read-in data will be adjusted according to the offset and scaling factor.
Defaults to False.
decode_strings : bool, optional
If True, strings data types contained in the returned data will be decoded to the ``unicode``
type in Python 2, and to the ``str`` type in Python 3. If False, leaves string types as byte
strings. Defaults to False.
Returns
-------
Structure
An object representing the PDS4 structure; contains its label, data and meta data.
Raises
------
NotImplementedError
Each type of `Structure` subclassing this class must implement its own `from_file` method.
"""
return NotImplementedError
[docs] def info(self, abbreviated=False, output=None):
""" Prints a summary of this data structure.
Parameters
----------
abbreviated : bool, optional
If False, output additional detail. Defaults to False.
output : file, bool or None, optional
A file-like object to write the output to. If set to False then instead of outputting
to a file a list representing the summary parameters for the Structure is returned.
Writes to ``sys.stdout`` by default.
Returns
-------
None or list
If output is False, then returns a list representing the summary parameters for the Structure.
Otherwise returns None.
Raises
------
NotImplementedError
Each type of `Structure` subclassing class must implement its own `info` method.
"""
raise NotImplementedError
@threaded_cached_property
def data(self):
""" The data of this PDS4 structure.
Raises
------
NotImplementedError
Each type of `Structure` subclassing this class must implement its own `data` method.
"""
raise NotImplementedError
[docs] def is_array(self):
"""
Returns
-------
bool
True if this `Structure` is a form of a PDS4 array, false otherwise.
"""
from .array_objects import ArrayStructure
return isinstance(self, ArrayStructure)
[docs] def is_table(self):
"""
Returns
-------
bool
True if this `Structure` is a form of a PDS4 table, false otherwise.
"""
from .table_objects import TableStructure
return isinstance(self, TableStructure)