Source code for csdmpy.dependent_variable

"""Dependent variable object: attributes and methods."""
import json
import warnings
from copy import deepcopy

import numpy as np

from csdmpy.dependent_variable.external import ExternalDataset
from csdmpy.dependent_variable.internal import InternalDataset
from csdmpy.utils import _axis_label
from csdmpy.utils import _get_dictionary

__author__ = "Deepansh J. Srivastava"
__email__ = "srivastava.89@osu.edu"
__all__ = ["DependentVariable"]


[docs]class DependentVariable:
    r"""Create an instance of the DependentVariable class.

    The instance of this class represents a dependent variable, :math:`\mathbf{U}`.
    A dependent variable holds :math:`p`-component data values, where :math:`p>0`
    is an integer. For example, a scalar is single-component (:math:`p=1`),
    a vector may have up to `n`-components (:math:`p=n`),
    while a second rank symmetric tensor have six unique component (:math:`p=6`).

    **Creating a new dependent variable.**

    There are two ways of creating a new instance of a DependentVariable class.

    *From a python dictionary containing valid keywords.*

    .. doctest::

        >>> from csdmpy import DependentVariable
        >>> import numpy as np
        >>> numpy_array = np.arange(30).reshape(3, 10).astype(np.float32)

        >>> dependent_variable_dictionary = {
        ...     "type": "internal",
        ...     "components": numpy_array,
        ...     "name": "star",
        ...     "unit": "W s",
        ...     "quantity_name": "energy",
        ...     "quantity_type": "pixel_3",
        ... }
        >>> y = DependentVariable(dependent_variable_dictionary)

    Here, `dependent_variable_dictionary` is the python dictionary.

    *From valid keyword arguments.*

    .. doctest::

        >>> y = DependentVariable(
        ...     type="internal",
        ...     name="star",
        ...     unit="W s",
        ...     quantity_type="pixel_3",
        ...     components=numpy_array,
        ... )
    """

    __slots__ = ("subtype", "_type")

    _immutable_objects_ = ()

    def __init__(self, *args, **kwargs):
        """Initialize an instance of a DependentVariable class."""
        dictionary = {
            "type": "internal",
            "description": "",
            "name": "",
            "unit": "",
            "quantity_name": None,
            "component_labels": None,
            "encoding": "none",
            "numeric_type": None,
            "quantity_type": "scalar",
            "components": None,
            "components_url": None,
            "filename": __file__,
            "application": None,
            "sparse_sampling": {},
        }
        input_dict = _get_dictionary(*args, **kwargs)
        self.__validate_key_value__(input_dict)
        dictionary.update(input_dict)

        self._type = "internal" if dictionary["type"] == "internal" else "external"
        self.subtype = (
            InternalDataset(**dictionary)
            if dictionary["type"] == "internal"
            else ExternalDataset(**dictionary)
        )
        self.encoding = "base64"
        self.type = "internal"

    @staticmethod
    def __validate_key_value__(input_dict):
        if "type" not in input_dict:
            raise KeyError(
                "Missing a required `type` key from the DependentVariable object."
            )

        if input_dict["type"] not in ["internal", "external"]:
            t_1 = input_dict["type"]
            raise ValueError(
                f"The value, '{t_1}', is an invalid `type` for the DependentVariable "
                "objects. The allowed values are 'internal', 'external'."
            )

        def message(item, subtype):
            return (
                f"Missing a required `{item}` key from the DependentVariable "
                f"object of type, `{subtype}`."
            )

        if "quantity_type" not in input_dict:
            raise KeyError(message("quantity_type", input_dict["type"]))

        if input_dict["type"] == "external" and "encoding" in input_dict:
            raise KeyError(
                "The `encoding` key is invalid for DependentVariable objects with the "
                "`external` type."
            )

        if input_dict["type"] == "internal" and "components" not in input_dict:
            raise KeyError(message("components", "internal"))

        if input_dict["type"] == "external" and "components_url" not in input_dict:
            raise KeyError(message("components_url", "external"))

    def __repr__(self):
        msg = f"{self.components.__repr__()} {self.unit}".strip()
        return f"DependentVariable({msg}, quantity_type={self.quantity_type})"

    def __str__(self):
        msg = f"{self.components.__str__()} {self.unit}".strip()
        q_t = self.quantity_type
        n_t = self.numeric_type
        return f"DependentVariable(\n{msg}, quantity_type={q_t}, numeric_type={n_t})"

    def __eq__(self, other):
        if not isinstance(other, DependentVariable):
            return False
        return self.subtype == other.subtype

    # ======================================================================= #
    #                      DependentVariable  Attributes                      #
    # ======================================================================= #
    @property
    def application(self):
        """Application metadata of the DependentVariable object.

        .. doctest::

            >>> print(y.application)
            None

        The application attribute is where an application can place its own
        metadata as a python dictionary object containing the application specific
        metadata, using a reverse domain name notation string as the attribute
        key, for example,

        .. doctest::

            >>> y.application = {"com.example.myApp": {"myApp_key": "myApp_metadata"}}
            >>> print(y.application)
            {'com.example.myApp': {'myApp_key': 'myApp_metadata'}}

        Please refer to the Core Scientific Dataset Model article for details.

        Returns:
            A python dictionary containing dependent variable application metadata.
        """
        return self.subtype.application

    @application.setter
    def application(self, value):
        self.subtype.application = value

    @property
    def axis_label(self):
        """List of formatted string labels for each component of the dependent variable.

        This attribute is not a part of the original core scientific dataset
        model, however, it is a convenient supplementary attribute that provides
        formatted string ready for labeling the components of the dependent variable.
        The string at index `i` is formatted as `component_labels[i] / unit` if
        `component_labels[i]` is a non-empty string, otherwise, `quantity_name / unit`.
        Here, `quantity_name`, `component_labels`, and `unit`are the attributes of the
        :ref:`dv_api` instance. For example,

        Returns:
            A list of formatted component label strings.

        Raises:
            AttributeError: When assigned a value.
        """
        labels = []
        unit_ = str(self.unit)
        if unit_ == "":
            unit_ = None
        for label in self.component_labels:
            if label.strip() == "":
                label = self.quantity_name
            labels.append(_axis_label(label, unit_))
        return labels

    @property
    def component_labels(self):
        """List of labels corresponding to the components of the dependent variable.

        .. doctest::

            >>> y.component_labels
            ['', '', '']

        To update the `component_labels`, assign an array of strings with same
        number of elements as the number of components.

        .. doctest::

            >>> y.component_labels = ["channel 0", "channel 1", "channel 2"]

        The individual labels are accessed with proper indexing, for example,

        .. doctest::

            >>> y.component_labels[2]
            'channel 2'

        Returns:
            A list of component label strings.

        Raises:
            TypeError: When the assigned value is not an array of strings.
        """
        return self.subtype.component_labels

    @component_labels.setter
    def component_labels(self, value):
        self.subtype.component_labels = value

    @property
    def components(self):
        r"""Component array of the dependent variable.

        The value of this attribute, :math:`\mathbb{U}`, is a Numpy array of
        shape :math:`(p \times N_{d-1} \times ... N_1 \times N_0)` where
        :math:`p` is the number of components, and :math:`N_k` is the number
        of points from the :math:`k^\mathrm{th}` :ref:`dim_api` object.

        .. note::
            The shape of the components Numpy array,
            :math:`(p \times N_{d-1} \times ... N_1 \times N_0)`, is reverse the
            shape of the components array,
            :math:`(N_0 \times N_1 \times ... N_{d-1} \times p)`, from the CSD model.
            This is because CSD model utilizes a column-major order to shape the
            components array relative to the order of the dimension while Numpy
            utilizes a row-major order.

        The dimensionality of this Numpy array is :math:`d+1` where :math:`d`
        is the number of dimension objects. The zeroth axis with :math:`p` points
        is the number of components.

        This attribute can only be updated when the shape of the new array is
        the same as the shape of the components array.

        For example,

        .. doctest::

            >>> print(y.components.shape)
            (3, 10)
            >>> y.numeric_type
            'float32'

        is a three-component dependent variable with ten data values per
        component. The numeric type of the data values, in this example, is
        `float32`. To update the components array, assign an array of
        shape (3, 10) to the `components` attribute. In the following example,
        we assign a Numpy array,

        .. doctest::

            >>> y.components = np.linspace(0, 256, 30, dtype="u1").reshape(3, 10)
            >>> y.numeric_type
            'uint8'

        Notice, the value of the `numeric_type` attribute is automatically
        updated based on the `dtype` of the Numpy array. In this case, from a
        *float32* to *uint8*.
        In this other example,

            >>> try: # doctest: +SKIP
            ...     y.components = np.random.rand(1,10).astype('u1')
            ... except ValueError as e:
            ...     print(e)
            The shape of the `ndarray`, `(1, 10)`, is inconsistent with the
            shape of the components array, `(3, 10)`.

        a `ValueError` is raised because the shape of the input array (1, 10)
        is not consistent with the shape of the components array, (3, 10).

        Returns:
            A Numpy array of components.

        Raises:
            ValueError: When assigning an array whose shape is not consistent with
                        the shape of the components array.
        """
        return self.subtype.components

    @components.setter
    def components(self, value):
        self.subtype.components = value

    @property
    def components_url(self):
        """URL where the data components of the dependent variable are stored.

        This attribute is only informative and cannot be modified. Its value is a
        string containing the local or remote address of the file where the data values
        are stored. The attribute is only valid for dependent variable with type,
        `external`.

        Returns:
            A string containing the URL.

        Raises:
            AttributeError: When assigned a value.
        """
        return self.subtype.components_url

    @property
    def data_structure(self):
        """Json serialized string describing the DependentVariable class instance.

        This supplementary attribute is useful for a quick preview of the dependent
        variable object. For convenience, the values from the `components` attribute
        are truncated to the first and the last two numbers per component.
        The `encoding` keyword is also hidden from this view.

        Returns:
            A json serialized string of the dependent variable object.

        Raises:
            AttributeError: When modified.
        """
        return json.dumps(
            self._dict(for_display=True), ensure_ascii=False, sort_keys=False, indent=2
        )

    @property
    def description(self):
        """Brief description of the dependent variables.

        The default value is an empty string, ''.

        .. doctest::

            >>> print(y.description)
            A test image
            >>> y.description = "A test pixel_3 image"
            >>> print(y.description)
            A test pixel_3 image

        Returns:
            A string of UTF-8 allowed characters describing the dependent variable.

        Raises:
            TypeError: When the assigned value is not a string.
        """
        return self.subtype.description

    @description.setter
    def description(self, value):
        self.subtype.description = value

    @property
    def encoding(self):
        """The encoding method used in representing the dependent variable.

        The value of this attribute determines the method used when serializing or
        deserializing the data values to and from the file. Currently, there are three
        `valid` encoding methods:

        | ``raw``
        | ``base64``
        | ``none``

        A value, `raw`, means that the data values are serialized as binary data.
        The value, `base64`, implies that the data values are serialized as base64
        strings, while, the value `none` refers to text-based serialization.

        By default, the encoding attribute of all dependent variable object are set to
        `base64` after import. The user may update this attribute, at any time, with a
        string containing a *valid* encoding literal, for example,

        .. doctest::

            >>> y.encoding = "base64"

        The value of this attribute will be used in serializing the data to the file,
        when using the :meth:`~csdmpy.CSDM.save` method.

        Returns:
            A string with a `valid` encoding type.

        Raises:
            ValueError: If an invalid encoding value is assigned.
        """
        return self.subtype.encoding

    @encoding.setter
    def encoding(self, value):
        self.subtype.encoding = value

    @property
    def name(self):
        """Name of the dependent variable.

        .. doctest::

            >>> y.name
            'star'
            >>> y.name = "rock star"

        Returns:
            A string containing the name of the dependent variable.

        Raises:
            TypeError: When the assigned value is not a string.
        """
        return self.subtype.name

    @name.setter
    def name(self, value):
        self.subtype.name = value

    @property
    def numeric_type(self):
        """The numeric type of the component values from the dependent variable.

        There are currently twelve *valid* numeric types in core scientific dataset
        model.

        ==============   ============   ============   ============
        ``uint8``        ``int8``       ``float32``    ``complex64``
        ``uint16``       ``int16``      ``float64``    ``complex128``
        ``uint32``       ``int32``
        ``uint64``       ``int64``
        ==============   ============   ============   ============

        Besides, csdmpy also accepts any valid `type` object, such as int, float,
        np.complex64, as long as the type is consistent with the above twelve entries.

        When assigning a valid value, this attribute updates the `dtype` of the Numpy
        array from the corresponding :attr:`~csdmpy.DependentVariable.components`
        attribute.

        .. doctest::

            >>> y.numeric_type
            'float32'

            >>> print(y.components)
            [[ 0.  1.  2.  3.  4.  5.  6.  7.  8.  9.]
             [10. 11. 12. 13. 14. 15. 16. 17. 18. 19.]
             [20. 21. 22. 23. 24. 25. 26. 27. 28. 29.]]

            >>> y.numeric_type = "complex64"
            >>> print(y.components[:, :5])
            [[ 0.+0.j  1.+0.j  2.+0.j  3.+0.j  4.+0.j]
             [10.+0.j 11.+0.j 12.+0.j 13.+0.j 14.+0.j]
             [20.+0.j 21.+0.j 22.+0.j 23.+0.j 24.+0.j]]

            >>> y.numeric_type = float  # python type object
            >>> print(y.components[:, :5])
            [[ 0.  1.  2.  3.  4.]
             [10. 11. 12. 13. 14.]
             [20. 21. 22. 23. 24.]]

        Returns:
            A string with a `valid` numeric type.

        Raises:
            ValueError: If an invalid numeric type value is assigned.
        """
        return self.subtype.numeric_type.value

    @numeric_type.setter
    def numeric_type(self, value):
        self.subtype.numeric_type = value

    @property
    def quantity_name(self):
        """Quantity name of physical quantities associated with the dependent variable.

        .. doctest::

            >>> y.quantity_name
            'energy'

        Returns:
            A string with the quantity name associated with the dependent variable
            physical quantities .

        Raises:
            NotImplementedError: When assigning a value.
        """
        return str(self.subtype.quantity_name)

    @quantity_name.setter
    def quantity_name(self, value=""):
        self.subtype.quantity_name = value

    @property
    def quantity_type(self):
        """Quantity type of the dependent variable.

        There are currently six *valid* quantity types,

        | ``scalar``
        | ``vector_n``
        | ``pixel_n``
        | ``matrix_n_m``
        | ``symmetric_matrix_n``

        where `n` and `m` are integers. The value of the attribute is modified with a
        string containing a *valid* quantity type.

        .. doctest::

            >>> y.quantity_type
            'pixel_3'
            >>> y.quantity_type = "vector_3"

        Returns:
            A string with a `valid` quantity type.

        Raises:
            ValueError: If an invalid value is assigned.
        """
        return self.subtype.quantity_type.value

    @quantity_type.setter
    def quantity_type(self, value):
        self.subtype.quantity_type = value

    @property
    def type(self):
        """The dependent variable subtype.

        There are two *valid* subtypes of DependentVariable class with the following
        enumeration literals,

        | ``internal``
        | ``external``

        corresponding to Internal and External sub class. By default, all instances of
        the DependentVariable class are assigned as  `internal` upon import. The user
        may update the value of this attribute, at any time, with a string containing a
        valid `type` literal, for example,

        .. doctest::

            >>> print(y.type)
            internal

            >>> y.type = "external"

        When `type` is external, the data values from the corresponding dependent
        variable are serialized to an external file within the same directory as the
        `.csdfe` file.

        Returns:
             A string with a `valid` dependent variable subtype.

        Raises:
            ValueError: When an invalid value is assigned.
        """
        return self._type

    @type.setter
    def type(self, value):
        if value in ["internal", "external"]:
            self._type = value
            return
        raise ValueError(
            f"The value, `{value}`, is invalid for the `type` attribute of the "
            "DependentVariable object. The allowed values are 'internal' and "
            "'external'."
        )

    @property
    def unit(self):
        """Unit associated with the dependent variable.

        .. note::
            The attribute cannot be modified. To convert the unit, use the
            :meth:`~csdmpy.DependentVariable.to` method of
            the class instance.

        Returns:
            A `Unit` object from astropy.unit package.

        Raises:
            AttributeError: When assigned a value.
        """
        return self.subtype.unit

    @unit.setter
    def unit(self, value):
        raise AttributeError(
            "The `unit` attribute cannot be modified. Use the ``to`` method "
            "of the instance for the unit conversion."
        )

    # ======================================================================= #
    #                                  Methods                                #
    # ======================================================================= #

[docs]    def to(self, unit):
        """Convert the unit of the dependent variable to the `unit`.

        Args:
            unit: A string containing a unit with the same dimensionality as the
                  components of the dependent variable.

        .. doctest::

            >>> print(y.components[0, 5])
            5.0
            >>> y.to("mJ")
            >>> y.unit
            Unit("mJ")
            >>> print(y.components[0, 5])
            5000.0

        .. note::
                This method is a wrapper of the `to` method from the `Quantity <http://
                docs.astropy.org/en/stable/api/\astropy.units.Quantity.html#astropy.
                units.Quantity.to>`_ class.

        """
        factor = (1.0 * self.unit).to(unit)
        self.subtype._components = self.subtype._components * factor.value
        self.subtype._unit = factor.unit

[docs]    def to_dict(self):
        """Alias to the `dict()` method of the class."""
        return self.dict()

[docs]    def dict(self):
        """Return DependentVariable object as a python dictionary.

        Example:
            >>> y.dict() # doctest: +SKIP
            {'type': 'internal', 'description': 'A test image', 'name': 'star',
            'unit': 's * W', 'quantity_name': 'energy', 'encoding': 'none',
            'numeric_type': 'float32', 'quantity_type': 'pixel_3',
            'components': [[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0],
            [10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0],
            [20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0]]}
        """
        return self.subtype.dict()

    def _dict(self, filename=None, dataset_index=None, for_display=False):
        """Return DependentVariable object as a python dictionary."""
        return self.subtype.dict(filename, dataset_index, for_display)

[docs]    def copy(self):
        """Return a copy of the DependentVariable object."""
        return deepcopy(self)

    def _reshape(self, shape):
        r"""Reshapes the components array.

        The array is reshaped to :math:`(p \times N_{d-1} \times ... N_1 \times N_0)`
        where :math:`p` is the number of components and :math:`N_k` is the number of
        points along the :math:`k^\mathrm{th}` dimension.
        """
        # for item in self.dependent_variables:
        item = self.subtype
        sub_shape = (item.quantity_type.p,) + tuple(shape)
        if item.components.shape == sub_shape:
            return

        dtype = item.numeric_type.dtype

        grid_points = np.asarray(sub_shape).prod()
        components_size = item._components.size

        if grid_points != components_size and item._sparse_sampling == {}:
            warnings.warn(
                "The number of elements in the components array, "
                f"{components_size}, is not consistent with the total "
                f"number of grid points, {grid_points}."
            )
        if item._sparse_sampling == {}:
            item._components = np.asarray(
                item._components[:, :grid_points].reshape(sub_shape), dtype=dtype
            )
        else:
            item._components = fill_sparse_space(item, sub_shape, dtype)

    def copy_metadata(self, obj, copy=False):
        """Copy DependentVariable metadata"""

        self.type = obj.type
        self.subtype._description = obj.subtype._description
        self.subtype._name = obj.subtype._name
        self.subtype._unit = obj.subtype._unit
        self.subtype._quantity_name = obj.subtype._quantity_name
        self.subtype._component_labels = obj.subtype._component_labels
        self.subtype._encoding = obj.subtype._encoding
        # self.subtype._numeric_type = obj.subtype._numeric_type
        # self.subtype._quantity_type = obj.subtype._quantity_type
        self.subtype._application = obj.subtype._application


def fill_sparse_space(item, shape, dtype):
    """Fill sparse grid using numpy broadcasting."""
    components = np.zeros(shape, dtype=dtype)
    sparse_dimensions_indexes = item._sparse_sampling._sparse_dimensions_indexes
    sgs = item._sparse_sampling._sparse_grid_vertexes.size
    grid_vertexes = item._sparse_sampling._sparse_grid_vertexes.reshape(
        int(sgs / len(sparse_dimensions_indexes)), len(sparse_dimensions_indexes)
    ).T

    vertexes = [slice(None) for i in range(len(shape))]
    for i, sparse_index in enumerate(sparse_dimensions_indexes):
        vertexes[sparse_index] = grid_vertexes[i]

    vertexes = tuple(vertexes[::-1])
    _new_shape = components[vertexes].shape

    components[vertexes] = item.components.reshape(_new_shape)
    return components


[docs]def as_dependent_variable(array, **kwargs):
    """Generate and return a DependentVariable object from a 1D or 2D numpy array.

    Args:
        array: A 1D or 2D numpy array.
        kwargs: Additional keyword arguments from the DependentVariable class.

    Example:
        >>> array = np.arange(1e4).astype(np.complex128)
        >>> dim_object = cp.as_dependent_variable(array)
        >>> print(dim_object)
        DependentVariable(
        [[0.000e+00+0.j 1.000e+00+0.j 2.000e+00+0.j ... 9.997e+03+0.j
          9.998e+03+0.j 9.999e+03+0.j]], quantity_type=scalar, numeric_type=complex128)
    """
    if not isinstance(array, (list, np.ndarray)):
        raise ValueError(
            f"Cannot convert {array.__class__.__name__} to a DependentVariable object."
        )
    if isinstance(array, list):
        array = np.asarray(array)
    if array.ndim < 1:
        raise ValueError(
            f"Cannot convert a {array.ndim} dimensional array to a DependentVariable "
            "object."
        )
    if "quantity_type" not in kwargs.keys():
        kwargs["quantity_type"] = "scalar"

    return DependentVariable(type="internal", components=array, **kwargs)