Source code for pds4_tools.utils.helpers

from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
from __future__ import unicode_literals

import functools
import numpy as np

from ..utils.compat import OrderedDict
from ..extern import six

#################################



[docs]
def cast_int_float_string(value):
    """ Cast given string value, if possible, to an int, float or returns unchanged.

    Parameters
    ----------
    value : str or unicode
        Value to try casting to an int and float.

    Returns
    -------
    int, float, str or unicode
        Cast *value*.
    """

    try:
        return int(value)

    except ValueError:

        try:
            return float(value)

        except ValueError:
            return value




[docs]
def is_array_like(value):
    """
    Array-like values are defined as those that implement __len__ (such as ``list``, ``tuple``,
    ``array.array``, ``np.ndarray``, etc) but are not ``str``, ``unicode`` or ``bytes``.

    Parameters
    ----------
    value
        Any kind of value.

    Returns
    -------
    bool
        True if *value* is array-like, false otherwise.
    """

    # Checks if value implements __len__, and ensures its not a string (six is used because
    # str, unicode, and bytes can all represent strings, depending on the Python version)
    if hasattr(value, '__len__') and (not isinstance(value, (six.binary_type, six.text_type,
                                                             np.ma.core.MaskedConstant))):
        return True

    return False




[docs]
def finite_min_max(array_like):
    """ Obtain finite (non-NaN, non-Inf) minimum and maximum of an array.

    Parameters
    ----------
    array_like : array_like
        A numeric array of some kind, possibly containing NaN or Inf values.

    Returns
    -------
    tuple
        Two-valued tuple containing the finite minimum and maximum of *array_like*.
    """

    array_like = np.asanyarray(array_like)
    finite_values = array_like[np.isfinite(array_like)]

    return finite_values.min(), finite_values.max()




[docs]
def dict_extract(nested_dict, key):
    """ Recursively searches nested dictionaries.

    *nested_dict* may contain other dictionaries, or other array-like's that have dictionaries
    inside: all dictionaries anywhere will be searched.

    Adapted from http://stackoverflow.com/a/29652561.

    Notes
    -----
    This code is generally efficient. However, if you pass it a dictionary that has a huge array
    nested within it, it will not be performant because it will try to search each value in the
    array for a dictionary (this is by design; the intent if non-dict array-like's are present
    is to search them on the assumption they will be small where using this function makes sense).

    Parameters
    ----------
    nested_dict : dict or OrderedDict
        A dictionary potentially containing an arbitrary number of other dictionaries.
    key : str or unicode
        The key to search for in *nested_dict*.

    Returns
    -------
    generator
        Found values for *key* in any dictionary inside *nested_dict*.

    Examples
    --------

    >>> d = { "id" : "abcde",
              "key1" : "blah",
              "key2" : "blah blah",
              "nestedlist" : [
                "blah blah",
                { "id" : "qwerty",
                "key1": "blah"} ]
            }
    >>> result = dict_extract(d, 'id')

    >>> print(list(result))
    ['abcde', 'qwerty']
    """

    if hasattr(nested_dict, 'items'):

        for k, v in nested_dict.items():
            if k == key:
                yield v
            if isinstance(v, dict):
                for result in dict_extract(v, key):
                    yield result
            elif is_array_like(v):
                for d in v:
                    for result in dict_extract(d, key):
                        yield result




[docs]
def xml_to_dict(xml_element, skip_attributes=False, cast_values=False, cast_ignore=(), tag_modify=()):
    """ Transforms XML to an ``OrderedDict``.

    Takes an XML ``ElementTree`` Element or a `Label` and creates an equivalent ``OrderedDict``.
    Keys of the dictionary represent tag names and values represent the text values of the elements.
    In case of a (sub)element having child elements, values will be another ``OrderedDict``, inside
    which the text of the element has key '_text'. In case of (sub)elements having child elements with
    the same key, the value for the key will be a ``list``. In case of (sub)elements with attributes,
    the value will be an ``OrderedDict``, inside which the key for each attribute starts with '@' and
    the text of the element has key '_text'. For text elements, the text value is not preserved (and
    a '_text' key is not created) if it contains only whitespace (including spaces, tabs and newlines);
    otherwise whitespaces are preserved.

    Preserves order of elements in most cases. The exception is when an element has 2 or more sets
    of children, where each set has the same key names (i.e., there are at least 4 children, and 2 of
    those children have one key, and 2 have another key) and the order of the children with the
    non-matching keys is intertwined, in such a case the order of the intertwined keys will not be preserved.

    Adapted from http://stackoverflow.com/a/10076823.

    Parameters
    ----------
    xml_element : ``ElementTree`` Element or Label
        XML representation which will be turned into a dictionary.
    skip_attributes : bool, optional
        If True, skips adding attributes from XML. Defaults to False.
    cast_values : bool, optional
        If True, float and int compatible values of element text and attribute values will be cast as such
        in the output dictionary. Defaults to False.
    cast_ignore : tuple[str or unicode], optional
        If given, then a tuple of element tags and/or attribute names. If *cast_values* is True, then
        for elements and attributes matching exactly the values in this tuple, values will not be cast.
        Attribute names must be prepended by an '@'. If *tag_modify* is set, then tags and attribute names
        specified by *cast_ignore* should be the already tag modified versions. Empty by default.
    tag_modify : tuple, optional
        If given, then a 2-valued tuple with str or unicode values, or a tuple of 2-valued tuples. Any match,
        including partial, in element tag names and/or attributes names for each tag_modify[0] is replaced
        with tag_modify[1]. Empty by default.

    Returns
    -------
    OrderedDict
        Dictionary representation of the XML input.
    """

    # Modify tags if requested
    element_tag = xml_element.tag

    if tag_modify:

        if not is_array_like(tag_modify[0]):
            tag_modify = (tag_modify, )

        for tag in tag_modify:
            element_tag = element_tag.replace(tag[0], tag[1])

    d = {element_tag: OrderedDict() if xml_element.attrib else None}
    children = list(xml_element)

    # Add children
    if children:

        dd = OrderedDict()

        xml_to_dict_func = functools.partial(xml_to_dict,
                                             skip_attributes=skip_attributes, cast_values=cast_values,
                                             cast_ignore=cast_ignore, tag_modify=tag_modify)

        for dc in map(xml_to_dict_func, children):
            for k, v in six.iteritems(dc):
                try:
                    dd[k].append(v)
                except KeyError:
                    dd[k] = [v]

        ddd = OrderedDict()

        for k, v in six.iteritems(dd):
            if len(v) == 1:
                ddd[k] = v[0]
            else:
                ddd[k] = v

        d = {element_tag: ddd}

    has_attribs = xml_element.attrib and not skip_attributes

    # Add attributes
    if has_attribs:

        attrib = OrderedDict()

        for k, v in six.iteritems(xml_element.attrib):

            # Tag modify for attribute names
            new_k = '@' + k
            if tag_modify:

                for tag in tag_modify:
                    new_k = new_k.replace(tag[0], tag[1])

            # Cast value for attribute values
            new_v = v
            if cast_values and (new_k not in cast_ignore):
                new_v = cast_int_float_string(new_v)

            attrib[new_k] = new_v

        d[element_tag].update((k, v) for k, v in six.iteritems(attrib))

    # Add text elements
    text = xml_element.text
    if (text is not None) and (text.strip()):

        if cast_values and (element_tag not in cast_ignore):
            text = cast_int_float_string(text)

        if children or has_attribs:
            if text:
                d[element_tag]['_text'] = text

        else:
            d[element_tag] = text

    return d