Source code for csdmpy.dependent_variable

"""Dependent variable object: attributes and methods."""
import json
import warnings
from copy import deepcopy

import numpy as np

from csdmpy.dependent_variable.external import ExternalDataset
from csdmpy.dependent_variable.internal import InternalDataset
from csdmpy.utils import _axis_label  # lgtm [py/import-own-module]
from csdmpy.utils import _get_dictionary  # lgtm [py/import-own-module]

__author__ = "Deepansh J. Srivastava"
__email__ = "srivastava.89@osu.edu"
__all__ = ["DependentVariable"]


[docs]class DependentVariable: r"""Create an instance of the DependentVariable class. The instance of this class represents a dependent variable, :math:`\mathbf{U}`. A dependent variable holds :math:`p`-component data values, where :math:`p>0` is an integer. For example, a scalar is single-component (:math:`p=1`), a vector may have up to `n`-components (:math:`p=n`), while a second rank symmetric tensor have six unique component (:math:`p=6`). **Creating a new dependent variable.** There are two ways of creating a new instance of a DependentVariable class. *From a python dictionary containing valid keywords.* .. doctest:: >>> from csdmpy import DependentVariable >>> import numpy as np >>> numpy_array = np.arange(30).reshape(3, 10).astype(np.float32) >>> dependent_variable_dictionary = { ... "type": "internal", ... "components": numpy_array, ... "name": "star", ... "unit": "W s", ... "quantity_name": "energy", ... "quantity_type": "pixel_3", ... } >>> y = DependentVariable(dependent_variable_dictionary) Here, `dependent_variable_dictionary` is the python dictionary. *From valid keyword arguments.* .. doctest:: >>> y = DependentVariable( ... type="internal", ... name="star", ... unit="W s", ... quantity_type="pixel_3", ... components=numpy_array, ... ) """ __slots__ = ("subtype", "_type") _immutable_objects_ = () def __init__(self, *args, **kwargs): """Initialize an instance of a DependentVariable class.""" dictionary = { "type": "internal", "description": "", "name": "", "unit": "", "quantity_name": None, "component_labels": None, "encoding": "none", "numeric_type": None, "quantity_type": "scalar", "components": None, "components_url": None, "filename": __file__, "application": None, "sparse_sampling": {}, } input_dict = _get_dictionary(*args, **kwargs) self.__validate_key_value__(input_dict) dictionary.update(input_dict) self._type = "internal" if dictionary["type"] == "internal" else "external" self.subtype = ( InternalDataset(**dictionary) if dictionary["type"] == "internal" else ExternalDataset(**dictionary) ) self.encoding = "base64" self.type = "internal" @staticmethod def __validate_key_value__(input_dict): if "type" not in input_dict: raise KeyError( "Missing a required `type` key from the DependentVariable object." ) if input_dict["type"] not in ["internal", "external"]: t_1 = input_dict["type"] raise ValueError( f"The value, '{t_1}', is an invalid `type` for the DependentVariable " "objects. The allowed values are 'internal', 'external'." ) def message(item, subtype): return ( f"Missing a required `{item}` key from the DependentVariable " f"object of type, `{subtype}`." ) if "quantity_type" not in input_dict: raise KeyError(message("quantity_type", input_dict["type"])) if input_dict["type"] == "external" and "encoding" in input_dict: raise KeyError( "The `encoding` key is invalid for DependentVariable objects with the " "`external` type." ) if input_dict["type"] == "internal" and "components" not in input_dict: raise KeyError(message("components", "internal")) if input_dict["type"] == "external" and "components_url" not in input_dict: raise KeyError(message("components_url", "external")) def __repr__(self): msg = f"{self.components.__repr__()} {self.unit}".strip() return f"DependentVariable({msg}, quantity_type={self.quantity_type})" def __str__(self): msg = f"{self.components.__str__()} {self.unit}".strip() q_t = self.quantity_type n_t = self.numeric_type return f"DependentVariable(\n{msg}, quantity_type={q_t}, numeric_type={n_t})" def __eq__(self, other): if not isinstance(other, DependentVariable): return False return self.subtype == other.subtype # ======================================================================= # # DependentVariable Attributes # # ======================================================================= # @property def application(self): """Application metadata of the DependentVariable object. .. doctest:: >>> print(y.application) None The application attribute is where an application can place its own metadata as a python dictionary object containing the application specific metadata, using a reverse domain name notation string as the attribute key, for example, .. doctest:: >>> y.application = {"com.example.myApp": {"myApp_key": "myApp_metadata"}} >>> print(y.application) {'com.example.myApp': {'myApp_key': 'myApp_metadata'}} Please refer to the Core Scientific Dataset Model article for details. Returns: A python dictionary containing dependent variable application metadata. """ return self.subtype.application @application.setter def application(self, value): self.subtype.application = value @property def axis_label(self): """List of formatted string labels for each component of the dependent variable. This attribute is not a part of the original core scientific dataset model, however, it is a convenient supplementary attribute that provides formatted string ready for labeling the components of the dependent variable. The string at index `i` is formatted as `component_labels[i] / unit` if `component_labels[i]` is a non-empty string, otherwise, `quantity_name / unit`. Here, `quantity_name`, `component_labels`, and `unit`are the attributes of the :ref:`dv_api` instance. For example, .. doctest:: >>> y.axis_label ['energy / (s W)', 'energy / (s W)', 'energy / (s W)'] Returns: A list of formatted component label strings. Raises: AttributeError: When assigned a value. """ labels = [] unit_ = str(self.unit) if unit_ == "": unit_ = None for label in self.component_labels: if label.strip() == "": label = self.quantity_name labels.append(_axis_label(label, unit_)) return labels @property def component_labels(self): """List of labels corresponding to the components of the dependent variable. .. doctest:: >>> y.component_labels ['', '', ''] To update the `component_labels`, assign an array of strings with same number of elements as the number of components. .. doctest:: >>> y.component_labels = ["channel 0", "channel 1", "channel 2"] The individual labels are accessed with proper indexing, for example, .. doctest:: >>> y.component_labels[2] 'channel 2' Returns: A list of component label strings. Raises: TypeError: When the assigned value is not an array of strings. """ return self.subtype.component_labels @component_labels.setter def component_labels(self, value): self.subtype.component_labels = value @property def components(self): r"""Component array of the dependent variable. The value of this attribute, :math:`\mathbb{U}`, is a Numpy array of shape :math:`(p \times N_{d-1} \times ... N_1 \times N_0)` where :math:`p` is the number of components, and :math:`N_k` is the number of points from the :math:`k^\mathrm{th}` :ref:`dim_api` object. .. note:: The shape of the components Numpy array, :math:`(p \times N_{d-1} \times ... N_1 \times N_0)`, is reverse the shape of the components array, :math:`(N_0 \times N_1 \times ... N_{d-1} \times p)`, from the CSD model. This is because CSD model utilizes a column-major order to shape the components array relative to the order of the dimension while Numpy utilizes a row-major order. The dimensionality of this Numpy array is :math:`d+1` where :math:`d` is the number of dimension objects. The zeroth axis with :math:`p` points is the number of components. This attribute can only be updated when the shape of the new array is the same as the shape of the components array. For example, .. doctest:: >>> print(y.components.shape) (3, 10) >>> y.numeric_type 'float32' is a three-component dependent variable with ten data values per component. The numeric type of the data values, in this example, is `float32`. To update the components array, assign an array of shape (3, 10) to the `components` attribute. In the following example, we assign a Numpy array, .. doctest:: >>> y.components = np.linspace(0, 256, 30, dtype="u1").reshape(3, 10) >>> y.numeric_type 'uint8' Notice, the value of the `numeric_type` attribute is automatically updated based on the `dtype` of the Numpy array. In this case, from a *float32* to *uint8*. In this other example, >>> try: # doctest: +SKIP ... y.components = np.random.rand(1,10).astype('u1') ... except ValueError as e: ... print(e) The shape of the `ndarray`, `(1, 10)`, is inconsistent with the shape of the components array, `(3, 10)`. a `ValueError` is raised because the shape of the input array (1, 10) is not consistent with the shape of the components array, (3, 10). Returns: A Numpy array of components. Raises: ValueError: When assigning an array whose shape is not consistent with the shape of the components array. """ return self.subtype.components @components.setter def components(self, value): self.subtype.components = value @property def components_url(self): """URL where the data components of the dependent variable are stored. This attribute is only informative and cannot be modified. Its value is a string containing the local or remote address of the file where the data values are stored. The attribute is only valid for dependent variable with type, `external`. Returns: A string containing the URL. Raises: AttributeError: When assigned a value. """ return self.subtype.components_url @property def data_structure(self): """Json serialized string describing the DependentVariable class instance. This supplementary attribute is useful for a quick preview of the dependent variable object. For convenience, the values from the `components` attribute are truncated to the first and the last two numbers per component. The `encoding` keyword is also hidden from this view. .. doctest:: >>> print(y.data_structure) { "type": "internal", "description": "A test image", "name": "star", "unit": "s * W", "quantity_name": "energy", "numeric_type": "float32", "quantity_type": "pixel_3", "components": [ [ "0.0, 1.0, ..., 8.0, 9.0" ], [ "10.0, 11.0, ..., 18.0, 19.0" ], [ "20.0, 21.0, ..., 28.0, 29.0" ] ] } Returns: A json serialized string of the dependent variable object. Raises: AttributeError: When modified. """ return json.dumps( self._dict(for_display=True), ensure_ascii=False, sort_keys=False, indent=2 ) @property def description(self): """Brief description of the dependent variables. The default value is an empty string, ''. .. doctest:: >>> print(y.description) A test image >>> y.description = "A test pixel_3 image" >>> print(y.description) A test pixel_3 image Returns: A string of UTF-8 allowed characters describing the dependent variable. Raises: TypeError: When the assigned value is not a string. """ return self.subtype.description @description.setter def description(self, value): self.subtype.description = value @property def encoding(self): """The encoding method used in representing the dependent variable. The value of this attribute determines the method used when serializing or deserializing the data values to and from the file. Currently, there are three `valid` encoding methods: | ``raw`` | ``base64`` | ``none`` A value, `raw`, means that the data values are serialized as binary data. The value, `base64`, implies that the data values are serialized as base64 strings, while, the value `none` refers to text-based serialization. By default, the encoding attribute of all dependent variable object are set to `base64` after import. The user may update this attribute, at any time, with a string containing a *valid* encoding literal, for example, .. doctest:: >>> y.encoding = "base64" The value of this attribute will be used in serializing the data to the file, when using the :meth:`~csdmpy.CSDM.save` method. Returns: A string with a `valid` encoding type. Raises: ValueError: If an invalid encoding value is assigned. """ return self.subtype.encoding @encoding.setter def encoding(self, value): self.subtype.encoding = value @property def name(self): """Name of the dependent variable. .. doctest:: >>> y.name 'star' >>> y.name = "rock star" Returns: A string containing the name of the dependent variable. Raises: TypeError: When the assigned value is not a string. """ return self.subtype.name @name.setter def name(self, value): self.subtype.name = value @property def numeric_type(self): """The numeric type of the component values from the dependent variable. There are currently twelve *valid* numeric types in core scientific dataset model. ============== ============ ============ ============ ``uint8`` ``int8`` ``float32`` ``complex64`` ``uint16`` ``int16`` ``float64`` ``complex128`` ``uint32`` ``int32`` ``uint64`` ``int64`` ============== ============ ============ ============ Besides, csdmpy also accepts any valid `type` object, such as int, float, np.complex64, as long as the type is consistent with the above twelve entries. When assigning a valid value, this attribute updates the `dtype` of the Numpy array from the corresponding :attr:`~csdmpy.DependentVariable.components` attribute. .. doctest:: >>> y.numeric_type 'float32' >>> print(y.components) [[ 0. 1. 2. 3. 4. 5. 6. 7. 8. 9.] [10. 11. 12. 13. 14. 15. 16. 17. 18. 19.] [20. 21. 22. 23. 24. 25. 26. 27. 28. 29.]] >>> y.numeric_type = "complex64" >>> print(y.components[:, :5]) [[ 0.+0.j 1.+0.j 2.+0.j 3.+0.j 4.+0.j] [10.+0.j 11.+0.j 12.+0.j 13.+0.j 14.+0.j] [20.+0.j 21.+0.j 22.+0.j 23.+0.j 24.+0.j]] >>> y.numeric_type = float # python type object >>> print(y.components[:, :5]) [[ 0. 1. 2. 3. 4.] [10. 11. 12. 13. 14.] [20. 21. 22. 23. 24.]] Returns: A string with a `valid` numeric type. Raises: ValueError: If an invalid numeric type value is assigned. """ return self.subtype.numeric_type.value @numeric_type.setter def numeric_type(self, value): self.subtype.numeric_type = value @property def quantity_name(self): """Quantity name of physical quantities associated with the dependent variable. .. doctest:: >>> y.quantity_name 'energy' Returns: A string with the quantity name associated with the dependent variable physical quantities . Raises: NotImplementedError: When assigning a value. """ return str(self.subtype.quantity_name) @quantity_name.setter def quantity_name(self, value=""): self.subtype.quantity_name = value @property def quantity_type(self): """Quantity type of the dependent variable. There are currently six *valid* quantity types, | ``scalar`` | ``vector_n`` | ``pixel_n`` | ``matrix_n_m`` | ``symmetric_matrix_n`` where `n` and `m` are integers. The value of the attribute is modified with a string containing a *valid* quantity type. .. doctest:: >>> y.quantity_type 'pixel_3' >>> y.quantity_type = "vector_3" Returns: A string with a `valid` quantity type. Raises: ValueError: If an invalid value is assigned. """ return self.subtype.quantity_type.value @quantity_type.setter def quantity_type(self, value): self.subtype.quantity_type = value @property def type(self): """The dependent variable subtype. There are two *valid* subtypes of DependentVariable class with the following enumeration literals, | ``internal`` | ``external`` corresponding to Internal and External sub class. By default, all instances of the DependentVariable class are assigned as `internal` upon import. The user may update the value of this attribute, at any time, with a string containing a valid `type` literal, for example, .. doctest:: >>> print(y.type) internal >>> y.type = "external" When `type` is external, the data values from the corresponding dependent variable are serialized to an external file within the same directory as the `.csdfe` file. Returns: A string with a `valid` dependent variable subtype. Raises: ValueError: When an invalid value is assigned. """ return self._type @type.setter def type(self, value): if value in ["internal", "external"]: self._type = value return raise ValueError( f"The value, `{value}`, is invalid for the `type` attribute of the " "DependentVariable object. The allowed values are 'internal' and " "'external'." ) @property def unit(self): """Unit associated with the dependent variable. .. note:: The attribute cannot be modified. To convert the unit, use the :meth:`~csdmpy.DependentVariable.to` method of the class instance. .. doctest:: >>> y.unit Unit("s W") Returns: A `Unit` object from astropy.unit package. Raises: AttributeError: When assigned a value. """ return self.subtype.unit @unit.setter def unit(self, value): raise AttributeError( "The `unit` attribute cannot be modified. Use the ``to`` method " "of the instance for the unit conversion." ) # ======================================================================= # # Methods # # ======================================================================= #
[docs] def to(self, unit): """Convert the unit of the dependent variable to the `unit`. Args: unit: A string containing a unit with the same dimensionality as the components of the dependent variable. .. doctest:: >>> y.unit Unit("s W") >>> print(y.components[0, 5]) 5.0 >>> y.to("mJ") >>> y.unit Unit("mJ") >>> print(y.components[0, 5]) 5000.0 .. note:: This method is a wrapper of the `to` method from the `Quantity <http:// docs.astropy.org/en/stable/api/\astropy.units.Quantity.html#astropy. units.Quantity.to>`_ class. """ factor = (1.0 * self.unit).to(unit) self.subtype._components = self.subtype._components * factor.value self.subtype._unit = factor.unit
[docs] def to_dict(self): """Alias to the `dict()` method of the class.""" return self.dict()
[docs] def dict(self): """Return DependentVariable object as a python dictionary. Example: >>> y.dict() # doctest: +SKIP {'type': 'internal', 'description': 'A test image', 'name': 'star', 'unit': 's * W', 'quantity_name': 'energy', 'encoding': 'none', 'numeric_type': 'float32', 'quantity_type': 'pixel_3', 'components': [[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0], [10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0], [20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0]]} """ return self.subtype.dict()
def _dict(self, filename=None, dataset_index=None, for_display=False): """Return DependentVariable object as a python dictionary.""" return self.subtype.dict(filename, dataset_index, for_display)
[docs] def copy(self): """Return a copy of the DependentVariable object.""" return deepcopy(self)
def _reshape(self, shape): r"""Reshapes the components array. The array is reshaped to :math:`(p \times N_{d-1} \times ... N_1 \times N_0)` where :math:`p` is the number of components and :math:`N_k` is the number of points along the :math:`k^\mathrm{th}` dimension. """ # for item in self.dependent_variables: item = self.subtype sub_shape = (item.quantity_type.p,) + tuple(shape) if item.components.shape == sub_shape: return dtype = item.numeric_type.dtype grid_points = np.asarray(sub_shape).prod() components_size = item._components.size if grid_points != components_size and item._sparse_sampling == {}: warnings.warn( "The number of elements in the components array, " f"{components_size}, is not consistent with the total " f"number of grid points, {grid_points}." ) if item._sparse_sampling == {}: item._components = np.asarray( item._components[:, :grid_points].reshape(sub_shape), dtype=dtype ) else: item._components = fill_sparse_space(item, sub_shape, dtype) def copy_metadata(self, obj, copy=False): """Copy DependentVariable metadata""" self.type = obj.type self.subtype._description = obj.subtype._description self.subtype._name = obj.subtype._name self.subtype._unit = obj.subtype._unit self.subtype._quantity_name = obj.subtype._quantity_name self.subtype._component_labels = obj.subtype._component_labels self.subtype._encoding = obj.subtype._encoding # self.subtype._numeric_type = obj.subtype._numeric_type # self.subtype._quantity_type = obj.subtype._quantity_type self.subtype._application = obj.subtype._application
def fill_sparse_space(item, shape, dtype): """Fill sparse grid using numpy broadcasting.""" components = np.zeros(shape, dtype=dtype) sparse_dimensions_indexes = item._sparse_sampling._sparse_dimensions_indexes sgs = item._sparse_sampling._sparse_grid_vertexes.size grid_vertexes = item._sparse_sampling._sparse_grid_vertexes.reshape( int(sgs / len(sparse_dimensions_indexes)), len(sparse_dimensions_indexes) ).T vertexes = [slice(None) for i in range(len(shape))] for i, sparse_index in enumerate(sparse_dimensions_indexes): vertexes[sparse_index] = grid_vertexes[i] vertexes = tuple(vertexes[::-1]) _new_shape = components[vertexes].shape components[vertexes] = item.components.reshape(_new_shape) return components
[docs]def as_dependent_variable(array, **kwargs): """Generate and return a DependentVariable object from a 1D or 2D numpy array. Args: array: A 1D or 2D numpy array. kwargs: Additional keyword arguments from the DependentVariable class. Example: >>> array = np.arange(1e4).astype(np.complex128) >>> dim_object = cp.as_dependent_variable(array) >>> print(dim_object) DependentVariable( [[0.000e+00+0.j 1.000e+00+0.j 2.000e+00+0.j ... 9.997e+03+0.j 9.998e+03+0.j 9.999e+03+0.j]], quantity_type=scalar, numeric_type=complex128) """ if not isinstance(array, (list, np.ndarray)): raise ValueError( f"Cannot convert {array.__class__.__name__} to a DependentVariable object." ) if isinstance(array, list): array = np.asarray(array) if array.ndim < 1: raise ValueError( f"Cannot convert a {array.ndim} dimensional array to a DependentVariable " "object." ) if "quantity_type" not in kwargs.keys(): kwargs["quantity_type"] = "scalar" return DependentVariable(type="internal", components=array, **kwargs)