from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
from __future__ import unicode_literals
import copy
import numpy as np
from .data_types import pds_to_numpy_name
from ..utils.compat import OrderedDict
from ..extern import six
# List of comparison functions. Used in __array_wrap__ to ensure they only
# return plain ``np.ndarray`` or ``np.ma.MaskedArray`` as opposed to `PDS_ndarray`
# or `PDS_marray`.
_comparison_functions = set(
[np.greater, np.greater_equal, np.less, np.less_equal,
np.not_equal, np.equal,
np.isfinite, np.isinf, np.isnan, np.sign, np.signbit])
[docs]class PDS_array(object):
""" A factory and helper class to work with PDS_ndarray and PDS_marray.
Intended such that `PDS_ndarray` and `PDS_marray` never need to be separately imported or called, and
rather that all initialization and type checking should go through this helper class.
"""
def __new__(cls, data, meta_data=None, masked=None, **options):
""" Convert the input into a PDS array.
Parameters
----------
data : array_like
Input data, of any dimension or content.
meta_data : Meta_ArrayStructure or Meta_Field, optional
Input meta-data.
masked: bool or None, optional
If True, forces a PDS masked array as output. If False, forces a PDS non-masked array as output.
Defaults to None, which determines output array-type based on input.
options : dict, optional
Arguments to pass directly into the NumPy array initializer.
Returns
-------
PDS_ndarray or PDS_marray
A ``PDS_ndarray`` is returned if the input data is not masked, otherwise a ``PDS_marray`` will
be returned; unless ``masked`` is a bool. Both array types will contain a view (rather than a copy)
of the original data if the input is an ``np.ndarray`` or its subtype.
"""
use_masked = (masked is True) or (isinstance(data, np.ma.MaskedArray) and (masked is not False))
use_unmasked = (masked is False) or (isinstance(data, (np.ndarray, list, tuple)) and (not use_masked))
if use_masked:
return PDS_marray(data, meta_data=meta_data, **options)
elif use_unmasked:
return PDS_ndarray(data, meta_data=meta_data, **options)
raise TypeError('Unknown data kind.')
[docs] @classmethod
def get_array(cls, masked):
""" Obtain a PDS array type.
Parameters
----------
masked : bool
If True, a PDS array class subclassing ``np.ma.MaskedArray`` is returned. Otherwise a PDS
array class subclassing the regular ``np.ndarray`` is returned.
Returns
-------
PDS_ndarray or PDS_marray
A PDS array class. See *masked*.
"""
if masked:
return cls.get_marray()
return cls.get_ndarray()
[docs] @staticmethod
def get_ndarray():
"""
Returns
-------
PDS_ndarray
A PDS array class based on ``np.ndarray``.
"""
return PDS_ndarray
[docs] @staticmethod
def get_marray():
"""
Returns
-------
PDS_marray
A PDS array class based on ``np.ma.MaskedArray``.
"""
return PDS_marray
[docs] @classmethod
def isinstance(cls, input):
"""
Parameters
----------
input : any
Returns
-------
bool
True if *input* is an instance of PDS_ndarray or PDS_marray. False otherwise.
"""
return isinstance(input, (cls.get_ndarray(), cls.get_marray()))
[docs]class PDS_ndarray(np.ndarray):
""" PDS ndarray, enabling some record array functionality and having a meta_data attribute.
Subclasses ndarrays such that we can provide meta data for an individual array or table field.
Inherits all Attributes from ``np.ndarray``.
Parameters
----------
data : array_like
Data for the array.
meta_data : Meta_ArrayStructure or Meta_Field, optional
Meta-data for the array.
options : dict, optional
NumPy keywords to pass to the ``np.ndarray`` initializer.
Attributes
----------
meta_data : Meta_ArrayStructure, Meta_Field or None
Meta-data for the array. Defaults to None if no meta-data was given on initialization
or has been set.
"""
def __new__(cls, data, meta_data=None, **options):
obj = np.asanyarray(data, **options).view(cls)
if meta_data is None:
meta_data = getattr(data, 'meta_data', OrderedDict())
obj.meta_data = meta_data
return obj
def __getitem__(self, idx):
"""
Parameters
----------
idx : str, slice, array_like
Standard ``np.ndarray`` indexes: including field name, list of field names, record number or
slice, or an array-like of record numbers.
Returns
-------
PDS_ndarray, np.void, np.record, or any scalar
Item(s) in the array for key. If the index is selecting a field(s) or multiple records
then the meta_data will be preserved for those fields or records.
"""
obj = super(PDS_ndarray, self).__getitem__(idx)
# For structured arrays, retrieve the correct meta_data portion if we are not obtaining all of the
# fields
if isinstance(obj, np.ndarray):
obj = obj.view(PDS_ndarray)
obj.meta_data = self._meta_data_resolve(idx)
return obj
def __reduce__(self):
""" Subclassed to ensure pickling preserves the ``meta_data`` attribute. """
default_state = super(PDS_ndarray, self).__reduce__()
new_state = default_state[2] + (self.meta_data,)
return default_state[0], default_state[1], new_state
def __setstate__(self, state):
""" Subclassed to ensure pickling preserves the ``meta_data`` attribute. """
self.meta_data = state[-1]
super(PDS_ndarray, self).__setstate__(state[0:-1])
def __repr__(self):
""" Subclassed to ensure that scalars take-on their normal dtype, instead of being a 0-d array. """
# For scalars convert to correct NumPy type and then use regular repr. This ensures we do
# not get back a type of this array with a single value when using functions that return just one
# value (e.g. ``np.min`` or ``np.max``), but rather just the value, which preserved regular NumPy
# behavior.
if self.ndim == 0:
return repr(self.item())
return super(PDS_ndarray, self).__repr__()
def __array_finalize__(self, obj):
"""
Subclassed to ensure that creation and views correctly set and preserve the ``meta_data``
attribute. """
if obj is None:
return
self.meta_data = getattr(obj, 'meta_data', OrderedDict())
def __array_wrap__(self, out_arr, context=None):
"""
Based on AstroPy ``Column.__array_wrap__`` implementation. __array_wrap__ is
called at the end of every ufunc.
"Normally, we want a PDS_ndarray object back and do not have to do anything
special. But there are two exceptions:
1) If the output shape is different (e.g. for reduction ufuncs
like sum() or mean()), a PDS_array makes little sense, so we return
the output viewed as the array content (ndarray or MaskedArray).
For this case, we use "[()]" to select everything, and to ensure we
convert a zero rank array to a scalar. (For some reason np.sum()
returns a zero rank scalar array while np.mean() returns a scalar;
So the [()] is needed for this case.
2) When the output is created by any function that returns a boolean
we also want to consistently return an array rather than a PDS_ndarray"
"""
out_arr = super(PDS_ndarray, self).__array_wrap__(out_arr, context)
if (self.shape != out_arr.shape or
(isinstance(out_arr, PDS_ndarray) and
(context is not None and context[0] in _comparison_functions))):
return out_arr[()]
else:
return out_arr
[docs] def copy(self, order='C'):
""" Copy the array.
Parameters
----------
order : {'C', 'F', 'A', 'K'}, optional
Controls the memory layout of the copy. 'C' means C-order, 'F' means F-order, 'A' means 'F' if
a is Fortran contiguous, 'C' otherwise. 'K' means match the layout of a as closely as possible.
Returns
-------
PDS_ndarray
An array with both the data and meta data copied.
"""
try:
obj = super(PDS_ndarray, self).copy(order=order)
except TypeError:
obj = super(PDS_ndarray, self).copy()
obj.meta_data = copy.deepcopy(getattr(self, 'meta_data', OrderedDict()))
return obj
[docs] def field(self, key, val=None):
""" Get or set data for a single field.
Parameters
----------
key : int or str
Key to select the field on. Either the name of the field, or its index.
val : any, optional
If given, sets the field specified by *key* to have value of *val*.
Returns
-------
any or None
A view of the selected field, if val is None. Otherwise returns None.
"""
# Resolve field name from field index.
if isinstance(key, int):
key = self.dtype.names[key]
# Obtain field
obj = self.__getitem__(key)
# Either set field values or return the field
if val is not None:
self.set_field(data=val, meta_data=obj.meta_data, name=key)
else:
return obj
[docs] def set_field(self, data, meta_data, name=None):
""" Set data and meta data for a single field.
Parameters
----------
data : any
Data to set for the field.
meta_data : Meta_Field
Meta data to set for the field. If *name* is None, then the field name to set *data* for will
be pulled from this attribute.
name : str, optional
The name of the field to set data for.
Returns
-------
None
"""
if (name is None) and (meta_data is not None):
name = pds_to_numpy_name(meta_data.full_name())
self[name] = data
self.meta_data[name] = meta_data
def _meta_data_resolve(self, key):
"""
Parameters
----------
key : str, slice, array_like
Standard ``np.ndarray`` indexes, including field name, list of field names, record number or
slice, or an array-like of record numbers.
Returns
-------
any
Meta data for the *key*.
"""
meta_data = OrderedDict()
# For a string key, we are requesting a single field and therefore just that field's meta data
if isinstance(key, six.string_types):
meta_data = self.meta_data.get(key)
# For a slice, we must be requesting records, and therefore all fields, and therefore all meta data
elif isinstance(key, slice):
meta_data = self.meta_data
# For multi-valued keys
elif isinstance(key, (np.ndarray, tuple, list)):
# Cast multi-valued keys to an ndarray so we can get its type
key = np.asarray(key)
# For character multi-valued keys, we must be requesting multiple fields
if np.issubdtype(key.dtype, np.character):
meta_data = OrderedDict()
for _key in key:
if _key in self.meta_data:
meta_data[_key] = self.meta_data.get(_key)
# For non-character multiple-valued fields, we must be requesting specific records and therefore
# all fields, and therefore all meta data
else:
meta_data = self.meta_data
return meta_data
[docs]class PDS_marray(np.ma.MaskedArray, PDS_ndarray):
""" PDS masked array, enabling some record array functionality and having a meta_data attribute.
Subclasses np.ma.MaskedArray such that we can provide meta data for an individual array or table field.
Inherits all Attributes from ``np.ma.MaskedArray``.
Parameters
----------
data : array_like
Data for the array.
meta_data : Meta_ArrayStructure or Meta_Field, optional
Meta-data for the array.
options : dict, optional
NumPy keywords to pass to the ``np.ndarray`` initializer.
Attributes
----------
meta_data : Meta_ArrayStructure, Meta_Field or None
Meta-data for the array. Defaults to None if no meta-data was given on initialization
or has been set.
"""
def __new__(cls, data, meta_data=None, **options):
obj = np.ma.MaskedArray.__new__(cls, data=data, **options)
if meta_data is None:
meta_data = getattr(data, 'meta_data', OrderedDict())
obj.meta_data = meta_data
return obj
def __getitem__(self, idx):
"""
Parameters
----------
idx : str, slice, array_like
Standard ``np.ndarray`` indexes: including field name, list of field names, record number or
slice, or an array-like of record numbers.
Returns
-------
PDS_marray, np.ma.mvoid, any scalar
Item(s) in the array for key. If the index is selecting a field(s) or multiple records
then the meta_data will be preserved for those fields or records.
"""
obj = super(PDS_marray, self).__getitem__(idx)
# For structured arrays, retrieve the correct meta_data portion if we are not obtaining all of the
# fields
if isinstance(obj, np.ndarray) and not isinstance(obj, np.ma.mvoid) and \
not isinstance(obj, np.ma.core.MaskedConstant):
meta_data = self._meta_data_resolve(idx)
obj = obj.view(PDS_marray)
obj.meta_data = meta_data
# We update _optinfo, because otherwise selecting a single field from multiple fields, and then
# selecting a few records for that single field will give all meta-data rather than for a single
# field (via recovering it from _optinfo in ``np.ma.MaskedArray._update_from``)
if 'meta_data' in obj._optinfo:
obj._optinfo['meta_data'] = meta_data
return obj
def __reduce__(self):
""" Subclassed to ensure pickling preserves the ``meta_data`` attribute. """
default_state = super(PDS_marray, self).__reduce__()
new_state = default_state[2] + (self.meta_data,)
return default_state[0], default_state[1], new_state
def __setstate__(self, state):
""" Subclassed to ensure pickling preserves the ``meta_data`` attribute. """
self.meta_data = state[-1]
super(PDS_marray, self).__setstate__(state[0:-1])
def __repr__(self):
""" Subclassed to ensure that scalars take-on their normal dtype, instead of being a 0-d array,
and to adjust returned value to properly reflect the class name. """
# For scalars convert to correct NumPy type and then use regular repr. This ensures we do
# not get back a type of this array with a single value when using functions that return just one
# value (e.g. ``np.min`` or ``np.max``), but rather just the value, which preserved regular NumPy
# behavior.
if self.ndim == 0:
return repr(self.item())
# Avoid outputting masked_PDS_marray[...] or masked_PDS_ndarray, instead use just PDS_marray[...]
repr_str = super(PDS_marray, self).__repr__()
try:
idx = repr_str.index('(')
repr_str = self.__class__.__name__ + repr_str[idx:]
except ValueError:
pass
return repr_str
def __array_finalize__(self, obj):
"""
Subclassed to ensure that creation and views correctly set and preserve the ``meta_data``
attribute.
"""
if obj is None:
return
self.meta_data = getattr(obj, 'meta_data', OrderedDict())
np.ma.MaskedArray.__array_finalize__(self, obj)
def __array_wrap__(self, out_arr, context=None):
"""
Based on AstroPy ``Column.__array_wrap__`` implementation. __array_wrap__ is
called at the end of every ufunc.
"Normally, we want a PDS_marray object back and do not have to do anything
special. But there are two exceptions:
1) If the output shape is different (e.g. for reduction ufuncs
like sum() or mean()), a PDS_array makes little sense, so we return
the output viewed as the array content (ndarray or MaskedArray).
For this case, we use "[()]" to select everything, and to ensure we
convert a zero rank array to a scalar. (For some reason np.sum()
returns a zero rank scalar array while np.mean() returns a scalar;
So the [()] is needed for this case.
2) When the output is created by any function that returns a boolean
we also want to consistently return an array rather than a PDS_marray."
"""
out_arr = super(PDS_marray, self).__array_wrap__(out_arr, context)
if (self.shape != out_arr.shape or
(isinstance(out_arr, PDS_marray) and
(context is not None and context[0] in _comparison_functions))):
return out_arr[()]
else:
return out_arr
def _update_from(self, obj):
""" Subclassed to ensure the ``meta_data`` attribute is not lost under some conditions.
Upon certain operations that create a new masked array, NumPy uses ``_update_from`` to set properties
the new array to match an old one.
"""
if hasattr(obj, 'meta_data'):
self.meta_data = obj.meta_data
super(PDS_marray, self)._update_from(obj)
[docs] def view(self, dtype=None, type=None, fill_value=None):
""" Return a view of the PDS_marray data.
Subclassed to fix a NumPy bug that breaks setting fill_value when subselecting a field from a
structured array.
"""
try:
obj = super(PDS_marray, self).view(dtype=dtype, type=type, fill_value=fill_value)
# Fix bug in NumPy < v1.10, which resets fill value on ``view`` if mask is not nomask
if ((dtype is None) or ((type is None) and np.issubclass_(dtype, np.ma.MaskedArray))) and \
(fill_value is None):
obj._fill_value = self._fill_value
except TypeError:
# NumPy < v1.8 did not have a fill value attribute for ``view``
obj = super(PDS_marray, self).view(dtype=dtype, type=type)
return obj
[docs] def filled(self, fill_value=None):
""" Return a copy of self, with masked values filled with a given value.
Parameters
----------
fill_value : scalar, optional
The value to use for invalid entries. If None, the ``fill_value`` attribute
of the array is used instead. Defaults to None.
Returns
-------
PDS_ndarray
A copy of ``self`` with invalid entries replaced by *fill_value*.
"""
obj = super(PDS_marray, self).filled(fill_value)
obj = obj.view(PDS_ndarray)
obj.meta_data = copy.deepcopy(getattr(self, 'meta_data', OrderedDict()))
return obj
[docs] def compressed(self):
""" Return a copy of all the non-masked data as a 1-D array.
Returns
-------
PDS_ndarray
A new array holding the non-masked data is returned.
"""
obj = super(PDS_marray, self).compressed()
obj = obj.view(PDS_ndarray)
obj.meta_data = copy.deepcopy(getattr(self, 'meta_data', OrderedDict()))
return obj
[docs] def copy(self, order='C'):
""" Copy the array.
Parameters
----------
order : {'C', 'F', 'A', 'K'}, optional
Controls the memory layout of the copy. 'C' means C-order, 'F' means F-order, 'A' means 'F' if
a is Fortran contiguous, 'C' otherwise. 'K' means match the layout of a as closely as possible.
Returns
-------
PDS_marray
An array with both the data and meta data copied.
"""
try:
obj = super(PDS_marray, self).copy(order=order)
except TypeError:
obj = super(PDS_marray, self).copy()
obj.meta_data = copy.deepcopy(getattr(self, 'meta_data', OrderedDict()))
return obj
@property
def fill_value(self):
return super(PDS_marray, self).fill_value
@fill_value.setter
def fill_value(self, value):
self.set_fill_value(value)
[docs] def set_fill_value(self, value=None):
""" Set the filling value.
Parameters
----------
value : scalar, optional
A value used to fill the invalid entries of the masked array.
Returns
-------
None
"""
# Partial fix for NumPy bug 9748
# Multi-dimensional fields in structured masked arrays raise exceptions upon attempt
# to set fill_value under some NumPy versions. This fix properly sets fill value, however
# the fill value is not saved upstream in the PDS_marray the multi-dimensional field it
# was subselected from until NumPy implements the fix in the ticket.
if np.isscalar(self._fill_value) and (self.ndim > 1):
self._fill_value = np.array(self._fill_value)
super(PDS_marray, self).set_fill_value(value)
[docs] def set_field(self, data, meta_data, name=None):
""" Set data and meta data for a single field.
Parameters
----------
data : any
Data to set for the field.
meta_data : Meta_Field
Meta data to set for the field. If *name* is None, then the field name to set *data* for will
be pulled from this attribute.
name : str, optional
The name of the field to set data for.
Returns
-------
None
"""
if (name is None) and (meta_data is not None):
name = pds_to_numpy_name(meta_data.full_name())
if isinstance(data, np.ma.MaskedArray):
# NumPy does not properly set fill value in record arrays for multi-dimensional fields
if isinstance(self.fill_value[name], np.ndarray):
self.fill_value[name][:] = data.fill_value
else:
self[name].set_fill_value(data.fill_value)
super(PDS_marray, self).set_field(data, meta_data, name=name)