Source code for pds4_tools.reader.read_arrays

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import numpy as np

from .array_objects import ArrayStructure, Meta_ArrayStructure
from .data import PDS_array
from .data_types import (data_type_convert_array, pds_to_numpy_type, apply_scaling_and_value_offset,
                         mask_special_constants)

from ..utils.logging import logger_init
from ..extern import six

# Initialize the logger
logger = logger_init()

#################################


def _read_array_byte_data(array_structure, as_string=True, memmap=False):
    """ Reads the byte data from the data file for a PDS4 Array.

    Determines, from the structure's meta data, the relevant start and stop bytes in the data file prior to
    reading.

    Parameters
    ----------
    array_structure : ArrayStructure
        The PDS4 Array data structure for which the byte data needs to be read. Should have been
        initialized via `TableStructure.from_file` method, or contain the required meta data.
    as_string : bool, optional
        If True, the byte data is returned as a byte string (either ``str`` in Python 2, or ``bytes`` in
        Python 3). If False, the byte data is an ndarray of dtype int8. Defaults to True.
    memmap: bool, optional
        If True, the byte data is memory mapped when *as_string* is False. Defaults to False.

    Returns
    -------
    str, bytes, np.ndarray or np.memmap
        The byte data for the table. Either ndarray or memmap of each byte with a dtype of int8,
        or a byte string, depending on the input parameters.
    """

    data_filename = array_structure.parent_filename
    meta_data = array_structure.meta_data

    num_elements = np.prod([axis_array['elements'] for axis_array in meta_data.get_axis_arrays()])
    data_type = meta_data.data_type()
    element_size = pds_to_numpy_type(data_type).itemsize

    start_byte = meta_data['offset']
    stop_byte = start_byte + num_elements * element_size

    num_int8_elements = stop_byte - start_byte

    # Read byte data from file
    try:

        if memmap:

            data = np.memmap(data_filename, offset=start_byte, mode='c', dtype='int8',
                             shape=num_int8_elements)

        else:

            with open(data_filename, 'rb') as file_handler:
                file_handler.seek(start_byte)

                data = np.fromfile(file_handler, dtype='int8', count=num_int8_elements)

    except IOError as e:
        raise six.raise_from(IOError("Unable to read data from file '" + data_filename +
                                     "' found in label - {0}".format(e)), None)

    # Convert to a byte string if requested
    if as_string:
        data = data.tostring()

    return data


def _apply_bitmask(data, bit_mask_string, special_constants=None):
    """ Apply bitmask to *data*, modifying it in-place.

    Parameters
    ----------
    data : array_like
        Flat array-like integer data, byteswapped to be correct for endianness of current system if necessary
    bit_mask_string : str or unicode
        String of 1's and 0's, same length as number of bits in each *data* datum

    Returns
    -------
    None
    """

    # Skip needlessly applying bit_mask if it's all 1's
    if '0' not in bit_mask_string:
        return

    # Convert bit mask to binary (python assumes the input is a string describing the integer in MSB format,
    # which is what the PDS4 standard specifies.)
    bit_mask = int(bit_mask_string, 2)

    # Mask Special_Constants values so that bit mask application does not affect them
    non_masked = np.arange(0, len(data))
    if special_constants is not None:

        masked_data = mask_special_constants(data, special_constants=special_constants)
        non_masked = np.where(masked_data.mask == False)

        del masked_data

    # Apply bit mask to each datum
    for i in np.nditer(non_masked, flags=['zerosize_ok']):
        data[i] &= bit_mask


[docs]def new_array(input, no_scale=False, no_bitmask=False, masked=None, copy=True, **structure_kwargs): """ Create an `ArrayStructure` from PDS-compliant data or meta data. Notes ----- The data attribute will not be a view of the original *input* (if it is a data array), but rather a new array. However, the *input* passed into this method may still be modified in-place to save memory, see *copy*. A method to get a view of the original data, if conditions are satisfied, is to also pass *input* as a kwarg of the name ``structure_data``. Parameters ---------- input : PDS_ndarray, PDS_marray or Meta_ArrayStructure Either an array containing the data, which must also have a valid PDS4 meta_data attribute describing itself, or an instance of valid Meta_ArrayStructure. If input is data, the base data type will be taken from its actual dtype, rather than from the meta data it must still contain. no_scale : bool, optional If False, and input is an array of data, then the data will scaled according to the scaling_factor and value_offset meta data. If the *input* is meta data only, then the output data type will be large enough to store the scaled values. If False, no scaling or data type conversions will be done. no_bitmask : bool, optional If False, and input is an array of data, then the bitmask indicated in the meta data will be applied. If True, the bitmask will not be used. Defaults to False. masked : bool or None, optional If True, and input is an array of data, then the data will retain any masked values and in additional have numeric Special_Constants values masked. If False, any masked values in the input array will be unmasked and data assignments will not preserve masked values. If None, masked values in the input will be retained only if any are present. copy: bool, optional If True, a copy of *input* is made, ensuring that it does not get modified during processing. If False, then the input may change if it is an array of data. In either case, the output data will not be a view. Defaults to True. structure_kwargs : dict, optional Keywords that are passed directly to the `ArrayStructure` constructor. Returns ------- ArrayStructure An object representing the PDS4 array structure. The data attribute will contain an array that can store *input* values (or does store it, if input is an array of data). Other attributes may be specified via *structure_kwargs*. """ # Determine and validate that input is a Meta_ArrayStructure, PDS_ndarray or PDS_marray input_is_array = PDS_array.isinstance(input) input_is_meta_array = isinstance(input, Meta_ArrayStructure) if (not input_is_array) and (not input_is_meta_array): raise RuntimeError('Inputs must all be one of Meta_ArrayStructure, PDS_ndarray or PDS_marray.') # Obtain basic meta data if input_is_array: array = input meta_data = input.meta_data else: array = None meta_data = input special_constants = meta_data.get('Special_Constants') element_array = meta_data['Element_Array'] scale_kwargs = {} if no_scale else {'scaling_factor': element_array.get('scaling_factor'), 'value_offset': element_array.get('value_offset')} # Obtain dtype (ensuring to scale it for future application of scaling and offset if necessary) dtype = pds_to_numpy_type(meta_data.data_type(), data=array, **scale_kwargs) # Obtain shape array_shape = meta_data.dimensions() # Decide what type of data array we will be using (i.e., masked or otherwise) if masked is None: masked = np.ma.is_masked(input) array_type = PDS_array.get_array(masked=masked) # Create the ArrayStructure array_structure = ArrayStructure(**structure_kwargs) if array_structure.data_loaded: # Ensure data array is of requested type if it was already supplied array_structure.data = array_structure.data.view(array_type) else: # Create the structured data array, and assign a view of it as a PDS_array type array_structure.data = np.empty(array_shape, dtype=dtype).view(array_type) # For cases where input is PDS_array, we transfer their data into the new array if input_is_array: array = input.copy() if copy else input # Apply the bit mask to extracted_data if necessary bit_mask = (meta_data.get('Object_Statistics') or {}).get('bit_mask') if (not no_bitmask) and (bit_mask is not None): bit_mask_string = six.text_type(bit_mask).zfill(array.dtype.itemsize * 8) _apply_bitmask(array, bit_mask_string, special_constants=special_constants) # Adjust data values to account for 'scaling_factor' and 'value_offset' (in-place if possible) # (Note that this may change the data type to prevent overflow and thus increase memory usage.) if not no_scale: array = apply_scaling_and_value_offset(array, special_constants=special_constants, **scale_kwargs) # Mask Special_Constants in output if requested if masked: array = mask_special_constants(array, special_constants=special_constants) # Reshape array as necessary if len(array_shape) > 1: array = array.reshape(array_shape) # Assign data, and ensure data array is of requested type if it was already supplied array_structure.data = array.view(array_type) # Set correct fill value if our data is masked (necessary only on NumPy < v1.13) if masked and isinstance(array, np.ma.MaskedArray): array_structure.data.set_fill_value(array.fill_value) return array_structure
[docs]def read_array_data(array_structure, no_scale, masked, memmap=False): """ Reads and properly formats the data for a single PDS4 array structure, modifies *array_structure* to contain all extracted fields for said table. Parameters ---------- array_structure : ArrayStructure The PDS4 Array data structure to which the data should be added. no_scale : bool Returned data will not be adjusted according to the offset and scaling factor. masked : bool Returned data will have numeric Special_Constants masked. memmap : bool, optional If True, extracted data is memory mapped. Only guaranteed for unscaled data or for *no_scale*; otherwise returned data maybe a copy. Defaults to False. Returns ------- None """ # Obtain basic meta data meta_data = array_structure.meta_data data_type = meta_data.data_type() # Read the data in, and transform it to the necessary data type extracted_data = _read_array_byte_data(array_structure, as_string=False, memmap=memmap) extracted_data = data_type_convert_array(data_type, extracted_data) # Merge data and meta_data into a PDS_ndarray extracted_data = PDS_array(extracted_data, meta_data) # Finish processing (scale and applying bit mask), then set obtained data array_structure.data = new_array(extracted_data, no_scale=no_scale, no_bitmask=False, masked=masked, copy=False).data
[docs]def read_array(full_label, array_label, data_filename, lazy_load=False, no_scale=False): """ Create the `ArrayStructure`, containing label, data and meta data for a PDS4 Array from a file. Used for all forms of PDS4 Arrays (e.g., Array, Array_2D_Image, Array_3D_Spectrum, etc). Parameters ---------- full_label : Label The entire label for a PDS4 product, from which *array_label* originated. array_label : Label Portion of label that defines the PDS4 array data structure. data_filename : str or unicode Filename, including the full path, of the data file that contains the data for this array. lazy_load : bool, optional If True, does not read-in the data of this array until the first attempt to access it. Defaults to False. no_scale : bool, optional If True, returned data will not be adjusted according to the offset and scaling factor. Defaults to False. Returns ------- ArrayStructure An object representing the array; contains its label, data and meta data Raises ------ TypeError Raised if called on a non-array according to *array_label*. """ # Skip over data structure if its not actually an Array if 'Array' not in array_label.tag: raise TypeError('Attempted to read_array() on a non-array: ' + array_label.tag) # Create the data structure for this array array_structure = ArrayStructure.from_file(data_filename, array_label, full_label, lazy_load=lazy_load, no_scale=no_scale) return array_structure