from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import re
import abc
import sys
import numpy as np
from .general_objects import Structure, Meta_Class, Meta_Structure
from .label_objects import Meta_SpectralCharacteristics
from .data_types import pds_to_numpy_name, PDSdtype
from ..utils.compat import OrderedDict
from ..utils.helpers import is_array_like, dict_extract
from ..utils.exceptions import PDS4StandardsException
from ..utils.logging import logger_init
from ..extern import six
from ..extern.six.moves import collections_abc
from ..extern.cached_property import threaded_cached_property
# Initialize the logger
logger = logger_init()
#################################
[docs]class TableStructure(Structure):
""" Stores a single PDS4 table data structure.
Contains the table's data, meta data and label portion. All forms of PDS4 tables
(e.g. Table_Character, Table_Binary and Table_Delimited) are stored by this class.
See `Structure`'s and `pds4_read`'s docstrings for attributes, properties and usage instructions
of this object.
Inherits all Attributes and Parameters from `Structure`. Overrides `info`, `data` and `from_file`
methods to implement them.
"""
[docs] @classmethod
def from_file(cls, data_filename, structure_label, full_label,
lazy_load=False, no_scale=False, decode_strings=False):
""" Create a table structure from relevant labels and file for the data.
Parameters
----------
data_filename : str or unicode
Filename of the data file that contained the data for this table structure.
structure_label : Label
The segment of the label describing only this table structure.
full_label : Label
The entire label describing the PDS4 product this structure originated from.
no_scale : bool, optional
Read-in data will not be adjusted according to the offset and scaling factor. Defaults to False.
decode_strings : bool, optional
If True, strings data types contained in the returned data will be decoded to
the ``unicode`` type in Python 2, and to the ``str`` type in Python 3. If
false, leaves string types as byte strings. Defaults to False.
lazy_load : bool, optional
If True, does not read-in the data of this structure until the first attempt to access it.
Defaults to False.
Returns
-------
TableStructure
An object representing the PDS4 table structure; contains its label, data and meta data.
"""
# Create the meta data structure for this table
meta_table_structure = Meta_TableStructure.from_label(structure_label, full_label)
# Create the data structure for this table
table_structure = cls(structure_data=None, structure_meta_data=meta_table_structure,
structure_label=structure_label, full_label=full_label,
parent_filename=data_filename)
table_structure._no_scale = no_scale
table_structure._decode_strings = decode_strings
# Attempt to access the data property such that the data gets read-in (if not on lazy-load)
if not lazy_load:
table_structure.data
return table_structure
[docs] @classmethod
def from_fields(cls, fields, no_scale=False, decode_strings=False, masked=None, **structure_kwargs):
""" Create a table structure from PDS-compliant data or meta data.
Parameters
----------
fields : list[PDS_ndarray, PDS_marray or Meta_Field]
A list of either fields with data, where each must contain a valid PDS4 meta_data attribute
defining the field, or a list of valid Meta_Field's.
no_scale : bool, optional
If True, and input is a list of fields with data, then the data will scaled according to the
scaling_factor and value_offset meta data. If the *fields* is meta data only, then the output
data type will be large enough to store the scaled values. If False, no scaling or data type
conversions will be done. Defaults to False.
decode_strings : bool, optional
If True, then fields containing character byte data will be converted to a dtype of unicode.
If False, then for character data the obtained dtype will remain byte strings. Defaults to
False.
masked : bool or None, optional
If True, and input is a list of fields with data, then the data will retain any masked values and
in additional have numeric Special_Constants values masked. If False, any masked values in the
input fields will be unmasked and data assignments will not preserve masked values. If None,
masked values in the input will be retained only if any are present. Defaults to None.
structure_kwargs : dict, optional
Keywords that are passed directly to the `TableStructure` constructor.
Returns
-------
TableStructure
An object representing the PDS4 table structure. The data will contain a structured array that
can store values for *fields* (or does store it, if input is a list of fields with data). Other
attributes may be specified via *structure_kwargs*.
"""
from .read_tables import new_table
return new_table(fields, no_scale=no_scale, decode_strings=decode_strings, masked=masked,
**structure_kwargs)
[docs] def info(self, abbreviated=False, output=None):
""" Prints a summary of this data structure.
Contains the type, number of fields and number of records in the Table.
If *abbreviated* is True then also outputs the name and number of elements
for each field in the table.
Parameters
----------
abbreviated : bool, optional
If False, output additional detail. Defaults to False.
output : file, bool or None, optional
A file-like object to write the output to. If set to False then instead of outputting
to a file a list representing the summary parameters for the Structure is returned.
Writes to ``sys.stdout`` by default.
Returns
-------
None or list
If output is False, then returns a list representing the summary parameters for the Structure.
Otherwise returns None.
"""
# Set default output to write to command line
if output is None:
output = sys.stdout
# Obtain abbreviated version of summary
dimensions = self.meta_data.dimensions()
id = "'{0}'".format(self.id)
field_info = '{0} fields x {1} records'.format(dimensions[0], dimensions[1])
summary_args = [self.type, id, field_info]
abbreviated_info = "{0} {1} ({2})".format(*summary_args)
# If output is false, return list representing the various parameters of the summary
if not output:
return summary_args
# Otherwise write out summary to output
if abbreviated:
output.write(abbreviated_info)
output.write('\n')
output.flush()
else:
output.write('Fields for {0}: \n\n'.format(abbreviated_info))
for field in self.fields:
name = field.meta_data['name']
full_name = field.meta_data.full_name(separator=', ')
full_name_print = ''
if name != full_name:
full_name_print = ' (full name: {0})'.format(full_name)
output.write(name + full_name_print + '\n')
output.flush()
[docs] def __getitem__(self, key):
""" Get data for specific field in `TableStructure`.
In the case of GROUP fields, the key can include the full location for disambiguation,
(e.g. field_name = 'GROUP_1, GROUP_0, field_name'). See `info` method for full locations of all
fields.
Parameters
----------
key : str, unicode, int, slice or tuple
Selection for desired field. May be a string containing the name of a field to search for,
similar to ``dict`` indexing functionality. May be an integer or slice specifying which field(s)'
data to select, similar to ``list`` or ``tuple`` indexing functionality. May be a two-valued
tuple, with the first value providing the field name and the second value a zero-based repetition
selector count if there are multiple fields with the same name.
Returns
-------
PDS_ndarray, PDS_marray or None
The data for the matched field, or None if no match found.
Raises
------
IndexError
Raised if *key* is a larger int than there are fields in the table.
ValueError
Raised if *key* contains a name that does not match any field in the table.
Examples
--------
>>> table_struct['First_Field_Name']
>>> table_struct.field(0)
To access two fields at the same time by name,
>>> table_struct[['First_Field', 'Second_Field']]
In-case of GROUP fields, we can use the full name (as given by the `info` method) for disambiguation,
>>> table_struct['GROUP_1, GROUP_0, field_name']
If both of the first two data structures have the name 'Field_Name', then to select the second
we can do,
>>> table_structure['Field_Name [1]']
>>> table_struct.field('Field_Name', 1)
We can also record record-wise. To obtain all fields for the first five records,
>>> table_struct[0:5]
"""
# Search by full or partial field name for single-valued search
if isinstance(key, six.string_types):
return self.field(key)
# Search by full or partial field name for multi-valued search
elif is_array_like(key):
if np.issubdtype(np.asarray(key).dtype, np.character):
fields = [self.field(_key) for _key in key]
field_names = [pds_to_numpy_name(field.meta_data.full_name())
for field in fields]
return self.data[field_names]
# Allow all other searches (slice, specific indexes, etc)
return self.data[key]
[docs] def __len__(self):
"""
Returns
-------
int
Number of fields contained in the table.
"""
if self.data_loaded:
num_fields = len(self.fields)
else:
dimensions = self.meta_data.dimensions()
num_fields = dimensions[0]
return num_fields
[docs] def field(self, key, repetition=0, all=False):
""" Get data for specific field in table.
In the case of GROUP fields, the key can include the full location for disambiguation,
(e.g. *key* = 'GROUP_1, GROUP_0, field_name'). See `info` method for full locations of all
fields.
Parameters
----------
key : str, unicode, int or slice
Selection for desired field. May be an integer or slice specifying which field(s)' data
to select, similar to ``list`` or ``tuple`` indexing functionality. May be a string
containing the name of a field to search for.
repetition : int, optional
If there are multiple fields with the same name, specifies a zero-based repetition count
to return. Defaults to 0.
all : bool, optional
If there are multiple fields with the same name, setting to True indicates that
all fields should be returned in a ``list``. If set, and no match is found
then an empty list will be returned. Defaults to False.
Returns
-------
PDS_ndarray or PDS_marray
The data for the field(s).
Raises
------
IndexError
Raised if *key* is a larger integer than there are fields in the table.
ValueError
Raised if *key* is a name that does not match any field.
Examples
--------
See :func:`TableStructure.__getitem__` method examples.
"""
return_fields = []
# Search by index or slice
if isinstance(key, six.integer_types) or isinstance(key, slice):
return self.fields[key]
# Search by name (full or partial)
for field in self.fields:
if field.meta_data.full_name() == key:
return_fields.append(field)
elif field.meta_data['name'] == key:
return_fields.append(field)
# Return result
if len(return_fields) > repetition and not all:
return return_fields[repetition]
elif all:
return return_fields
if repetition > 0:
raise ValueError("Field '{0}' (repetition {1}) not found.".format(key, repetition))
else:
raise ValueError("Field '{0}' not found.".format(key))
@threaded_cached_property
def data(self):
""" All data in the PDS4 table data structure.
This property is implemented as a thread-safe cacheable attribute. Once it is run
for the first time, it replaces itself with an attribute having the exact
data that was originally returned.
Unlike normal properties, this property/attribute is settable without a __set__ method.
To never run the read-in routine inside this property, you need to manually create the
the ``.data`` attribute prior to ever invoking this method (or pass in the data to the
constructor on object instantiation, which does this for you).
Returns
-------
PDS_ndarray or PDS_marray
A structured array (either effectively np.ndarray or np.ma.MaskedArray) representing all the
fields in this table.
"""
super(TableStructure, self).data()
from .read_tables import read_table_data
read_table_data(self, no_scale=self._no_scale, decode_strings=self._decode_strings, masked=self._masked)
return self.data
@property
def fields(self):
"""
Returns
-------
list[PDS_ndarray or PDS_marray]
All the fields in this table.
"""
return [self.data[name] for name in self.data.dtype.names]
[docs] def set_field(self, data, meta_data):
""" Set a field in the table.
Notes
-----
Prior to calling this method, ``self.data`` must be set to a structured ``ndarray`` that will be
used to store each field. See `TableStructure.from_fields`.
Parameters
----------
data : array_like
Data for the field to be added.
meta_data : Meta_Field
Meta data of the field to be added.
Returns
-------
None
"""
if not self.data_loaded:
raise RuntimeError('Unable to add field: backing data structure has not been initialized.')
self.data.set_field(data, meta_data)
[docs] def as_masked(self):
""" Obtain a new TableStructure, where numeric fields with Special_Constants are masked.
Notes
-----
The returned Structure may not be easily converted back to unmasked. However, the original
Structure can continue to be used to access the unmasked data.
Returns
-------
TableStructure
A new structure, where numeric fields with Special_Constants are masked. The data, labels and
meta data are all effectively views, no copies are made.
"""
kwargs = {'structure_meta_data': self.meta_data,
'structure_label': self.label, 'full_label': self.full_label,
'parent_filename': self.parent_filename, 'structure_id': self.id}
# If data is already loaded, create a view of the TableStructure where the data is masked
if self.data_loaded:
table_structure = self.from_fields(fields=self.fields, no_scale=True, decode_strings=False,
masked=True, copy=False, structure_data=self.data, **kwargs)
# If the data has not been loaded, create a view of TableStructure that indicates to mask data
# when the attempt to access it is made later
else:
table_structure = self.__class__(**kwargs)
table_structure._no_scale = self._no_scale
table_structure._decode_strings = self._decode_strings
table_structure._masked = True
return table_structure
[docs]class TableManifest(collections_abc.Sequence):
""" Stores a single table's Meta_Fields and Meta_Groups
The manifest is a representation the table, with fields and groups in the same order as they physically
appear in the label, created by adding the appropriate Meta_Field and Meta_Group structures.
The manifest is normally initialized from the label portion describing the Table, via `from_label`.
Parameters
----------
items : list[Meta_Field or Meta_Group], optional
A list of Meta_Fields or Meta_Groups describing the Fields and Groups in this TableManifest.
table_type : str or unicode, optional
The type of table. One of
table_label : Label or ElementTree Element, optional
Portion of label that defines the PDS4 table data structure.
"""
def __init__(self, items=None, table_type=None, table_label=None):
self._struct = [] if (items is None) else items
self._table_type = table_type
self._table_label = table_label
super(TableManifest, self).__init__()
[docs] def __getitem__(self, key):
"""
Parameters
----------
key : int or slice
Index of requested field or group.
Returns
-------
Meta_Field or Meta_Group
Matched field or group meta data.
"""
return self._struct[key]
[docs] def __len__(self):
"""
Returns
-------
int
Total number of fields and groups in the table.
"""
return len(self._struct)
[docs] @classmethod
def from_label(cls, table_label):
""" Create a TableManifest from the XML portion describing the Table structure.
Parameters
----------
table_label : Label or ElementTree Element
Portion of label that defines the PDS4 table data structure.
Returns
-------
TableManifest
Instance containing all appropriate Meta_Field's and Meta_Group's.
"""
# Find the <Record_*> and set the table type (e.g. Character, Binary or Delimited)
record_str = [elem.tag for elem in table_label if 'Record_' in elem.tag][0]
record_xml = table_label.find(record_str)
table_type = record_str.split('_')[-1]
obj = cls(table_type=table_type, table_label=table_label)
# Add Uniformly_Sampled fields to the manifest
obj._add_uniformly_sampled(table_label)
# Add all fields and groups in <Record_*> to the manifest
obj._add_fields_and_groups(record_xml)
# Set group names for all groups
obj._create_group_names()
# Set location list for all fields and groups
obj._create_full_locations()
# Perform basic sanity and basic validation checking on the table
obj._validate_table(record_xml)
return obj
@property
def num_items(self):
"""
Returns
-------
int
Total number of fields and groups in the table manifest.
"""
return len(self)
@property
def num_records(self):
if self._table_label is not None:
return int(self._table_label.findtext('records'))
return None
[docs] def fields(self, skip_uniformly_sampled=False):
""" Obtain all fields in the table manifest.
Parameters
----------
skip_uniformly_sampled : bool, optional
If True, uniformly sampled fields not added to the returned list. Defaults to False.
Returns
-------
list[Meta_Field]
All fields in the table manifest.
"""
return [item for item in self
if item.is_field() and
(not skip_uniformly_sampled or (item not in self.uniformly_sampled_fields()))]
[docs] def groups(self):
"""
Returns
-------
list[Meta_Group]
All groups in the table manifest.
"""
return [item for item in self if item.is_group()]
[docs] def get_field_by_full_name(self, field_full_name):
""" Returns field(s) matched by full name.
In the case of GROUP fields, the full name will include the full location for disambiguation,
(e.g. field_full_name = 'GROUP_1, GROUP_0, field_name'). The full location includes the names of
the groups necessary to reach this field.
Parameters
----------
field_full_name : str or unicode
The full name of the field(s) to search for.
Returns
-------
list[Meta_Field]
List of matched fields.
"""
matching_fields = [item for item in self._struct if item.full_name() == field_full_name]
return matching_fields
[docs] def get_children_by_idx(self, parent_idx, direct_only=False, return_idx=False):
""" Obtains the child items (Meta_Fields and Meta_Groups) of the *parent_idx* item.
Child items of a Group field are other Fields and Groups inside said Group field,
and the children of those groups, etc.
Parameters
----------
parent_idx : int
The index (in this TableManifest) of the Meta_Group whose children to find.
A value of -1 indicates to find all items in the entire manifest.
direct_only : bool, optional
If True, only the immediate children of the *parent_idx* element are returned. Defaults to False.
return_idx : bool, optional
If True, changes the return type to a ``list`` containing the indicies (in this TableManifest)
of the matched children. Defaults to False.
Returns
-------
TableManifest
A new table manifest containing only the matched children.
"""
if return_idx:
children = []
else:
children = TableManifest()
if parent_idx == -1:
parent_item_group_lvl = 0
else:
parent_item_group_lvl = self._struct[parent_idx].group_level + 1
for i in range(parent_idx + 1, self.num_items):
cur_item = self._struct[i]
if cur_item.group_level >= parent_item_group_lvl:
if cur_item.group_level == parent_item_group_lvl or not direct_only:
if return_idx:
children.append(i)
else:
children._append(cur_item)
else:
break
return children
[docs] def get_parent_by_idx(self, child_idx, return_idx=False):
""" Obtains the parent Meta_Group of the child item given by *child_idx*.
Parameters
----------
child_idx : int
The index (in this TableManifest) of the Meta_Field or Meta_Group whose parent to find.
return_idx : bool, optional
If True, changes the return type to an ``int``, which is the index (in this TableManifest) of the
parent Meta_Group instead of the group itself. Returns -1 if a parent was not found. Defaults to
False.
Returns
-------
Meta_Group or None
The parent of the specified child, or None if a parent was not found.
"""
child_item = self._struct[child_idx]
child_group_level = child_item.group_level
for i in range(child_idx - 1, -1, -1):
cur_item = self._struct[i]
if (cur_item.group_level + 1) == child_group_level and cur_item.is_group():
if return_idx:
return i
else:
return cur_item
if return_idx:
return -1
return None
[docs] def get_parents_by_idx(self, child_idx, return_idx=False):
""" Obtains all Meta_Group parents of the child item given by *child_idx*.
Parameters
----------
child_idx : int
The index (in this TableManifest) of the Meta_Field or Meta_Group whose parent to find.
return_idx : bool, optional
If True, changes the return type to a list of ``int``'s, each of which is an index (in this
TableManifest) of the parent Meta_Group instead of the group itself. A value of -1 will be present
in the list if no parent is found. Defaults to False.
Returns
-------
list[Meta_Group]
The parents of the specified child, or empty list if no parents were not found.
"""
child_item = self._struct[child_idx]
parent_idx = child_idx
parent_groups = []
# Obtain values for all parents of child
for j in range(0, child_item.group_level):
parent_idx = self.get_parent_by_idx(parent_idx, return_idx=True)
parent_or_idx = parent_idx if return_idx else self[parent_idx]
parent_groups.append(parent_or_idx)
return parent_groups
[docs] def get_field_offset(self, field_idx, parent_idx=None):
""" Obtains the number of data values between *parent_idx* and *field_idx* in a single record.
Each value in a record is a single field value, or a single repetition of a field value. If all
the data values for all fields in a single record were combined into an array, then data_array[offset]
(with the default *parent_idx*) would select that field's value (or the value of its first
repetition, for fields inside groups).
Parameters
----------
field_idx : int
The index (in this TableManifest) of the Meta_Field whose offset to find.
parent_idx : int, optional
The parent index (index of one of the group fields that *field_idx* is inside of), from which
to calculate the offset to *field_idx*. If None, calculates the offset from the beginning of the
record. Defaults to None.
Returns
-------
int
The number of values, where each repetition of any field also counts as a separate value, that
are between *parent_idx* and *field_idx* (inclusive).
"""
offset_num = 0
field = self._struct[field_idx]
# Calculate the offset from beginning of record unless otherwise requested
if parent_idx is None:
parent_idx = -1
# Iterate over every child of `parent_idx` until we reach the `field`
for child_item in self.get_children_by_idx(parent_idx, direct_only=True):
if child_item is field:
break
elif child_item.is_field():
offset_num += 1
elif child_item.is_group():
full_index = self.index(child_item)
offset_add = self.get_field_offset(field_idx, parent_idx=full_index)
if field in self.get_children_by_idx(full_index):
offset_num += offset_add
break
else:
offset_num += offset_add * child_item['repetitions']
return offset_num
[docs] def get_field_skip_factors(self, field_idx, parent_idxs=None):
""" Obtains the number of data values between each repetition of *field_idx* for every parent group.
Each value in a record is a single field value, or a single repetition of a field value. If each
skip factor were multiplied by the max number of repetitions for that group, and the results were
added together then that skip value would represent the number of data values between the first
value in the field and the last value in the field.
Parameters
----------
field_idx : int
The index (in this TableManifest) of the Meta_Field whose offset to find.
parent_idxs : list[int], optional
The parent indexes (indexes of group fields that *field_idx* is inside of) for which to
calculate the skip factors. If None, selects the parent indexes for all groups that *field_idx*
is inside of. Defaults to None.
Returns
-------
list[int]
Each item in the list is the number of values, where each repetition of any field also counts
as a separate value, between each repetition of the field for that group. The values are
in reverse order from *parent_idxs*.
"""
skip_factors = []
field = self._struct[field_idx]
# Calculate skip factors for all groups that `field` is unless requested otherwise
if parent_idxs is None:
item_idx = self.index(field)
parent_idxs = self.get_parents_by_idx(item_idx, return_idx=True)
# Iterate over every group
for parent_idx in reversed(parent_idxs):
skip_factor = 0
# For each group that `field` is inside of, find the amount of other fields and groups; this
# is how many other values will need to be skipped until we reach `field` on the next repetition.
for child_item in self.get_children_by_idx(parent_idx, direct_only=True):
if (child_item is not field) and child_item.is_field():
skip_factor += 1
elif child_item.is_group():
child_idx = [self.index(child_item)]
skip_factor += self.get_field_skip_factors(field_idx, parent_idxs=child_idx)[0] \
* child_item['repetitions']
skip_factors.append(skip_factor)
return skip_factors
def _insert(self, key, field_or_group):
""" Inserts an item (Meta_Field or Meta_Group) into the manifest.
Parameters
----------
key : int
The key (index) the item will have in this TableManifest.
field_or_group : Meta_Field or Meta_Group
The field or group to insert.
Returns
-------
None
"""
self._struct.insert(key, field_or_group)
def _append(self, field_or_group):
""" Appends an item (Meta_Field or Meta_Group) at the end of the manifest.
Parameters
----------
field_or_group : Meta_Field or Meta_Group
The field or group to append.
Returns
-------
None
"""
self._struct.append(field_or_group)
def _add_fields_and_groups(self, xml_parent, group_level=0):
"""
Adds all <Field_*>s and <Group_Field_*>s which are direct children of xml_parent to the
TableManifest, and then recursively all children of the <Group_Field_*>s.
The end result, when this is called on the <Record_*> of a table, is to add all Fields
and all Groups defined inside <Record_*> into the manifest.
Parameters
----------
xml_parent : Label or ElementTree Element
An XML element containing <Field_*> or <Group_Field_*> tags as children.
group_level : int, optional
The depth the current elements being added are nested from the <Record_*>. Defaults to 0,
which means direct children. (Only non-zero for children of Group fields.)
Returns
-------
None
"""
field_tag = 'Field_' + self._table_type
group_tag = 'Group_Field_' + self._table_type
# Work around ElementTree not having ability to findall based on multiple conditions
# while preserving order
elements_xml = [element for element in xml_parent
if element.tag == field_tag or
element.tag == group_tag]
# Iterate over all fields and groups
for element in elements_xml:
# Append Fields to Data
if element.tag == field_tag:
if self._table_type == 'Character':
field_type = Meta_FieldCharacter
elif self._table_type == 'Binary':
field_type = Meta_FieldBinary
elif self._table_type == 'Delimited':
field_type = Meta_FieldDelimited
else:
raise ValueError('Unknown table type: ' + self._table_type)
field = field_type.from_label(element)
self._append(field)
field_idx = self.index(field)
field.group_level = group_level
field.shape = tuple(self._get_item_shape(field_idx))
# Append Groups (and recursively sub-Fields and sub-Groups) to Data
else:
group = Meta_Group.from_label(element)
self._append(group)
group.group_level = group_level
self._add_fields_and_groups(element, group_level + 1)
def _add_uniformly_sampled(self, table_xml):
""" Add <Uniformly_Sampled> fields into the manifest.
Parameters
----------
table_xml : Label or ElementTree Element
Portion of label that defines the PDS4 table data structure.
Returns
-------
None
"""
for element in table_xml.findall('Uniformly_Sampled'):
field = Meta_FieldUniformlySampled.from_label(element)
self._append(field)
field_idx = self.index(field)
field.group_level = 0
field.shape = tuple(self._get_item_shape(field_idx))
def _create_group_names(self, group_level=0):
""" Creates a name for each Meta_Group in the manifest that was not given one in the label.
PDS4 Standards make group field names optional. This method creates names where necessary, with the
format 'GROUP_n', where n is an integer from 0 to infinity (depending on the number of groups).
In groups that are nested inside other groups, n will start from 0 regardless of the name of the
parent group.
Parameters
----------
group_level : int, optional
The depth the current group being named is nested from the <Record_*>. Defaults to 0,
which means direct child.
Returns
-------
None
"""
group_indicies = [i for i, item in enumerate(self._struct)
if item.is_group()
if item.group_level == group_level]
if not group_indicies:
return
prev_parent = -2
counter = 0
for group_idx in group_indicies:
group = self._struct[group_idx]
parent_idx = self.get_parent_by_idx(group_idx, return_idx=True)
if parent_idx == prev_parent:
counter += 1
else:
counter = 0
if 'name' not in group:
group['name'] = 'GROUP_' + six.text_type(counter)
prev_parent = parent_idx
self._create_group_names(group_level + 1)
def _create_full_locations(self):
""" Sets the correct full locations for all Meta_Fields and Meta_Groups in the manifest.
A full location is a ``list`` of ``dict``s, where each ``dict`` has a single key and a single
value. All together these keys and values indicate the full path to the table element from the
beginning of the <Record_*>. Available keys are: 'field', 'group', and 'overlap'. A field key
has a value corresponding to the name of a (non-group) field. A group key has a value corresponding
to the name of a group field. An overlap has a value corresponding to the overlap number of the
preceding field or group (zero-based index).
Returns
-------
None
Examples
--------
Suppose the following Example Label Outline::
Table_Binary: Observations
Field: order
Field: wavelength (1)
Group: unnamed
Field: pos_vector (2)
Group: unnamed
Group: unnamed
Field: pos_vector
Field: pos_vector (3)
The full locations of the fields labeled above in parenthesis are as follows:
(1) [{'field': 'wavelength'}]
(2) [{'group': 'GROUP_0'},
{'field': 'pos_vector'}]
(3) [{'group': 'GROUP_1'},
{'group': 'GROUP_0'},
{'field': 'pos_vector'},
{'overlap': '1'}]
"""
for i, item in enumerate(self._struct):
full_location = []
# Begin by adding item name to full location
idx = i
type = 'group' if item.is_group() else 'field'
full_location.insert(0, {type: item['name']})
# Find _struct indicies of items having the same name and group location as current item
identical_idxs = self._get_overlapping_item_idxs(i)
# Add overlap count for groups and fields that have the same name and same group location
# (otherwise these groups and fields would have exactly the same full name)
if len(identical_idxs) >= 2:
full_location.append({'overlap': identical_idxs.index(i)})
parent_idx = self.get_parent_by_idx(idx, return_idx=True)
# Add field parent group's full location
# (this should already contain locations of all prior groups)
if parent_idx != -1:
group = self._struct[parent_idx]
# Add parent group's location
group_full_location = [location.copy() for location in group.full_location]
group_full_location.extend(full_location)
full_location = group_full_location
item.full_location = full_location
def _get_overlapping_item_idxs(self, item_idx):
"""
Overlapping items are those Meta_Field's and Meta_Group's that have the same type (i.e., either
field or group), name, and group location as `table_manifest[item_idx]`. The simplest case is two
fields with the same name in a table without groups. To disambiguate these fields there is special
handling, but first we need to find them, which is the purpose of this method.
Parameters
----------
item_idx : int
The index (in this TableManifest) of the Meta_Field or Meta_Group whose overlapping items
to find.
Returns
-------
list[int]
Indexes (in this TableManifest) of the Meta_Field's or Meta_Group's that overlap with `item_idx`.
"""
item = self._struct[item_idx]
item_parent_idx = self.get_parent_by_idx(item_idx, return_idx=True)
overlapping_item_idxs = []
# Determine if we're looking for overlapping fields or overlapping groups
find_fields = True
if item.is_group():
find_fields = False
child_idxs = self.get_children_by_idx(item_parent_idx, direct_only=True, return_idx=True)
for idx in child_idxs:
if self._struct[idx]['name'] == item['name']:
if (find_fields and not self._struct[idx].is_group()) or (
not find_fields and self._struct[idx].is_group()):
overlapping_item_idxs.append(idx)
return overlapping_item_idxs
def _get_item_shape(self, item_idx):
""" Obtain dimensions of item given by *item_idx*.
Normally this would be used only on Meta_Field items, to obtain the dimensions of the array needed
for that field's data.
Parameters
----------
item_idx : int
The index (in this TableManifest) of the Meta_Field or Meta_Group whose shape to obtain.
Returns
-------
list[int]
Dimensions of the item. For fields not inside groups, this will be equivalent to the number of
records.
"""
item_shape = [self.num_records]
for parent_group in self.get_parents_by_idx(item_idx):
item_shape.insert(1, parent_group['repetitions'])
return item_shape
def _validate_table(self, record_xml):
"""
Performs basic sanity checks on the table and basic verification of some PDS4 standards
not currently tested by the schema and schematron.
Checks performed:
1) Ensure <fields> matches number of Field_* (including for nested fields)
2) Ensure <groups> matches number of Group_Field_* (including for nested groups)
3) Ensure the <format> of each field, if given, is valid according to the PDS4 standards
4) Ensure that <group_repetitions> mod <group_length> equals zero for all group fields
If any of the optional checks (1-3) fail, a warning is output. It is normally safe to continue.
If an unsafe check fails, an exception is raised.
Parameters
----------
record_xml : Label or ElementTree Element
The Record_* portion of the label for this table.
Returns
-------
None
Raises
------
PDS4StandardsException
Raised if group_repetitions do not evenly divide into group_length for all group fields.
"""
# Check that number of fields and groups claimed matches number of <Field_*> and <Group_Field_*>
num_rec_fields = int(record_xml.findtext('fields'))
num_rec_groups = int(record_xml.findtext('groups'))
groups = [group for group in self.groups() if group.group_level == 0]
fields = [field for field in self.fields(skip_uniformly_sampled=True) if field.group_level == 0]
fields_mismatch_warn = '<fields> value does not match number of <Field_{0}> in'.format(self._table_type)
groups_mismatch_warn = '<groups> value does not match number of <Group_Field_{0}> in'.format(self._table_type)
if len(fields) != num_rec_fields:
logger.warning('{0} Record_{1}.'.format(fields_mismatch_warn, self._table_type))
if len(groups) != num_rec_groups:
logger.warning('{0} Record_{1}.'.format(groups_mismatch_warn, self._table_type))
for i, item in enumerate(self._struct):
full_location_warn = "'{0}' (full location: '{1}')".format(item['name'], item.full_name(' -> '))
if item.is_group():
# Check number of fields and groups matching in nested groups
children = self.get_children_by_idx(i, direct_only=True)
groups = [child for child in children if child.is_group()]
fields = [child for child in children if not child.is_group()]
# For fixed-width tables, check that group_length is evenly divisible by group_repetitions
if self._table_type != 'Delimited':
if len(fields) != item['fields']:
logger.warning(fields_mismatch_warn + ' group ' + full_location_warn)
if len(groups) != item['groups']:
logger.warning(groups_mismatch_warn + ' group ' + full_location_warn)
if item['length'] % item['repetitions'] != 0:
raise PDS4StandardsException("Group length '{0}' must be evenly divisible by the "
"number of repetitions '{1}' for group {2}"
.format(item['length'], item['repetitions'],
full_location_warn))
elif 'format' in item:
# Ensure field_format requirements
valid_field_format = re.compile(r'^%([\+,-])?([0-9]+)(\.([0-9]+))?([doxfeEs])$')
if not valid_field_format.match(item['format']):
logger.warning("field_format '{0}' does not conform to PDS4 standards for field {1} "
.format(item['format'], full_location_warn))
else:
is_numeric = {'ASCII_Real': True,
'ASCII_Integer': True,
'ASCII_NonNegative_Integer': True,
'ASCII_Numeric_Base2': True,
'ASCII_Numeric_Base8': True,
'ASCII_Numeric_Base16': True}.get(item['data_type'], False)
sub_exp = valid_field_format.search(item['format']).groups()
plus_minus = sub_exp[0]
width = sub_exp[1]
precision = sub_exp[3]
specifier = sub_exp[4]
if plus_minus is not None:
if plus_minus == '+' and not is_numeric:
logger.warning("field_format '{0}' may not have '+' for a numeric data_type '{1}' for "
"field {2}".format(item['format'], item['data_type'], full_location_warn))
elif plus_minus == '-' and is_numeric:
logger.warning("field_format '{0}' may not have '-' for a non-numeric data_type '{1}' for "
"field {2}".format(item['format'], item['data_type'], full_location_warn))
if self._table_type == 'Character' and int(width) != item['length']:
logger.warning("in Table_Character field_format '{0}' width must be equal to field_length "
"'{1}' for field {2}".format(item['format'], item['length'], full_location_warn))
if precision is not None and specifier == 's' and int(width) != int(precision):
logger.warning("field_format '{0}' precision must be equal to width '{1}' for a string format "
"for field {2}".format(item['format'], width, full_location_warn))
class Meta_FieldBit(Meta_Field):
""" Stores meta data about a single <Field_Bit>. """
@classmethod
def from_label(cls, field_bit_xml):
raise NotImplementedError