# -*- coding: utf-8 -*-
"""Dependent variable object: attributes and methods."""
from __future__ import division
from __future__ import print_function
import json
from copy import deepcopy
from csdmpy.dependent_variables.external import ExternalDataset
from csdmpy.dependent_variables.internal import InternalDataset
from csdmpy.utils import _axis_label
from csdmpy.utils import _get_dictionary
__author__ = "Deepansh J. Srivastava"
__email__ = "srivastava.89@osu.edu"
__all__ = ["DependentVariable"]
[docs]class DependentVariable:
r"""
Create an instance of the DependentVariable class.
The instance of this class represents a dependent variable, :math:`\mathbf{U}`.
A dependent variable holds :math:`p`-component data values, where :math:`p>0`
is an integer. For example, a scalar is single-component (:math:`p=1`),
a vector may have up to `n`-components (:math:`p=n`),
while a second rank symmetric tensor have six unique component (:math:`p=6`).
**Creating a new dependent variable.**
There are two ways of creating a new instance of a DependentVariable class.
*From a python dictionary containing valid keywords.*
.. doctest::
>>> from csdmpy import DependentVariable
>>> import numpy as np
>>> numpy_array = np.arange(30).reshape(3,10).astype(np.float32)
>>> dependent_variable_dictionary = {
... 'type': 'internal',
... 'components': numpy_array,
... 'name': 'star',
... 'unit': 'W s',
... 'quantity_name': 'energy',
... 'quantity_type': 'pixel_3'
... }
>>> y = DependentVariable(dependent_variable_dictionary)
Here, `dependent_variable_dictionary` is the python dictionary.
*From valid keyword arguments.*
.. doctest::
>>> y = DependentVariable(
... type='internal',
... name='star',
... unit='W s',
... quantity_type='pixel_3',
... components=numpy_array
... )
"""
__slots__ = ("subtype", "_type")
_immutable_objects_ = ()
def __init__(self, *args, **kwargs):
"""Initialize an instance of a DependentVariable class."""
dictionary = {
"type": "internal",
"description": "",
"name": "",
"unit": "",
"quantity_name": None,
"component_labels": None,
"encoding": "none",
"numeric_type": None,
"quantity_type": "scalar",
"components": None,
"components_url": None,
"filename": __file__,
"application": {},
"sparse_sampling": {
"dimensions": None,
"sparse_grid_vertexes": None,
"encoding": "none",
"numeric_type": "int64",
"application": {},
"description": "",
},
}
default_keys = dictionary.keys()
input_dict = _get_dictionary(*args, **kwargs)
input_keys = input_dict.keys()
if "type" not in input_keys:
raise KeyError("Missing a required `type` key from the dependent variable.")
if input_dict["type"] not in ["internal", "external"]:
raise ValueError(
(
"'{0}' is an invalid DependentVariable 'type'. The "
"allowed values are 'internal', 'external'."
).format(input_dict["type"])
)
if "quantity_type" not in input_keys:
raise KeyError(
"Missing a required `quantity_type` key from the dependent variable."
)
if input_dict["type"] == "external" and "encoding" in input_dict:
raise ValueError(
"The `encoding` key is invalid for dependent variables of subtype `external`."
)
def message(item, subtype):
return (
f"Missing a required `{item}` key from the dependent "
"variable of type `{subtype}`."
)
if input_dict["type"] == "internal" and "components" not in input_keys:
raise KeyError(message("components", "internal"))
if input_dict["type"] == "external":
if "components_url" not in input_keys:
raise KeyError(message("components_url", "external"))
if "filename" in kwargs.keys():
dictionary["filename"] = kwargs["filename"]
for key in input_keys:
if key in default_keys and key != "sparse_sampling":
dictionary[key] = input_dict[key]
if "sparse_sampling" in input_keys:
check_sparse_sampling_key_value(input_dict)
for key in input_dict["sparse_sampling"].keys():
dictionary["sparse_sampling"][key] = input_dict["sparse_sampling"][key]
else:
dictionary["sparse_sampling"] = {}
if dictionary["type"] == "internal":
self._type = "internal"
self.subtype = InternalDataset(**dictionary)
if dictionary["type"] == "external":
self._type = "external"
self.subtype = ExternalDataset(**dictionary)
def __repr__(self):
properties = ", ".join([f"{k}={v}" for k, v in self.to_dict().items()])
return f"DependentVariable({properties})"
def __str__(self):
properties = ", ".join([f"{k}={v}" for k, v in self.to_dict().items()])
return f"DependentVariable({properties})"
# ======================================================================= #
# DependentVariable Attributes #
# ======================================================================= #
@property
def application(self):
"""
Application metadata of the DependentVariable object.
.. doctest::
>>> print(y.application)
{}
The application attribute is where an application can place its own
metadata as a python dictionary object containing the application specific
metadata, using a reverse domain name notation string as the attribute
key, for example,
.. doctest::
>>> y.application = {
... "com.example.myApp" : {
... "myApp_key": "myApp_metadata"
... }
... }
>>> print(y.application)
{'com.example.myApp': {'myApp_key': 'myApp_metadata'}}
Please refer to the Core Scientific Dataset Model article for details.
Returns:
A python dictionary containing dependent variable application metadata.
"""
return self.subtype.application
@application.setter
def application(self, value):
self.subtype.application = value
@property
def axis_label(self):
r"""
List of formatted string labels for each component of the dependent variable.
This attribute is not a part of the original core scientific dataset
model, however, it is a convenient supplementary attribute that provides
formated string ready for labeling the components of the dependent variable.
The string at index `i` is formatted as `component_labels[i] / unit` if
`component_labels[i]` is a non-empty string, otherwise, `quantity_name / unit`.
Here, `quantity_name`, `component_labels`, and `unit`are the attributes of the
:ref:`dv_api` instance. For example,
.. doctest::
>>> y.axis_label
['energy / (s * W)', 'energy / (s * W)', 'energy / (s * W)']
Returns:
A list of formated component label strings.
Raises:
AttributeError: When assigned a value.
"""
labels = []
for label in self.component_labels:
if label.strip() == "":
label = self.quantity_name
labels.append(_axis_label(label, self.unit))
return labels
@property
def component_labels(self):
r"""
List of labels corresponding to the components of the dependent variable.
.. doctest::
>>> y.component_labels
['', '', '']
To update the `component_labels`, assign an array of strings with same
number of elements as the number of components.
.. doctest::
>>> y.component_labels = ['channel 0', 'channel 1', 'channel 2']
The individual labels are accessed with proper indexing, for example,
.. doctest::
>>> y.component_labels[2]
'channel 2'
Returns:
A list of component label strings.
Raises:
TypeError: When the assigned value is not an array of strings.
"""
return self.subtype.component_labels
@component_labels.setter
def component_labels(self, value):
self.subtype.component_labels = value
@property
def components(self):
r"""
Component array of the dependent variable.
The value of this attribute, :math:`\mathbb{U}`, is a Numpy array of
shape :math:`(p \times N_{d-1} \times ... N_1 \times N_0)` where
:math:`p` is the number of components, and :math:`N_k` is the number
of points from the :math:`k^\mathrm{th}` :ref:`dim_api` object.
.. note::
The shape of the components Numpy array,
:math:`(p \times N_{d-1} \times ... N_1 \times N_0)`, is reverse the
shape of the components array,
:math:`(N_0 \times N_1 \times ... N_{d-1} \times p)`, from the CSD model.
This is because CSD model utilizes a column-major order to shape the
components array relative to the order of the dimension while Numpy
utilizes a row-major order.
The dimensionality of this Numpy array is :math:`d+1` where :math:`d`
is the number of dimension objects. The zeroth axis with :math:`p` points
is the number of components.
This attribute can only be updated when the shape of the new array is
the same as the shape of the components array.
For example,
.. doctest::
>>> print(y.components.shape)
(3, 10)
>>> y.numeric_type
'float32'
is a three-component dependent variable with ten data values per
component. The numeric type of the data values, in this example, is
`float32`. To update the components array, assign an array of
shape (3, 10) to the `components` attribute. In the following example,
we assign a Numpy array,
.. doctest::
>>> y.components = np.linspace(0,256,30, dtype='u1').reshape(3,10)
>>> y.numeric_type
'uint8'
Notice, the value of the `numeric_type` attribute is automatically
updated based on the `dtype` of the Numpy array. In this case, from a
*float32* to *uint8*.
In this other example,
.. doctest::
>>> try:
... y.components = np.random.rand(1,10).astype('u1')
... except ValueError as e:
... print(e)
The shape of `ndarray`, `(1, 10)`, is inconsistent
with the shape of the components array, `(3, 10)`.
a `ValueError` is raised because the shape of the input array (1, 10)
is not consistent with the shape of the components array, (3, 10).
Returns:
A Numpy array of components.
Raises:
ValueError: When assigning an array whose shape is not consistent with
the shape of the components array.
"""
return self.subtype.components
@components.setter
def components(self, value):
self.subtype.components = value
@property
def components_url(self):
r"""
URL where the data components of the dependent variable are stored.
This attribute is only informative and cannot be modified. Its value is a
string containing the local or remote address of the file where the data values
are stored. The attribute is only valid for dependent variable with type,
`external`.
Returns:
A string containing the URL.
Raises:
AttributeError: When assigned a value.
"""
return self.subtype.components_url
@property
def data_structure(self):
r"""
Json serialized string describing the DependentVariable class instance.
This supplementary attribute is useful for a quick preview of the dependent
variable object. For convenience, the values from the `components` attribute
are truncated to the first and the last two numbers per component.
The `encoding` keyword is also hidden from this view.
.. doctest::
>>> print(y.data_structure)
{
"type": "internal",
"description": "A test image",
"name": "star",
"unit": "s * W",
"quantity_name": "energy",
"numeric_type": "float32",
"quantity_type": "pixel_3",
"components": [
[
"0.0, 1.0, ..., 8.0, 9.0"
],
[
"10.0, 11.0, ..., 18.0, 19.0"
],
[
"20.0, 21.0, ..., 28.0, 29.0"
]
]
}
Returns:
A json serialized string of the dependent variable object.
Raises:
AttributeError: When modified.
"""
return json.dumps(
self._to_dict(for_display=True),
ensure_ascii=False,
sort_keys=False,
indent=2,
)
@property
def description(self):
"""
Brief description of the dependent variables.
The default value is an empty string, ''.
.. doctest::
>>> print(y.description)
A test image
>>> y.description = 'A test pixel_3 image'
>>> print(y.description)
A test pixel_3 image
Returns:
A string of UTF-8 allowed characters describing the dependent variable.
Raises:
TypeError: When the assigned value is not a string.
"""
return self.subtype.description
@description.setter
def description(self, value):
self.subtype.description = value
@property
def encoding(self):
r"""
The encoding method used in representing the dependent variable.
The value of this attribute determines the method used when serializing or
deserializing the data values to and from the file. Currently, there are three
`valid` encoding methods:
| ``raw``
| ``base64``
| ``none``
A value, `raw`, means that the data values are serialized as binary data.
The value, `base64`, implies that the data values are serialized as base64
strings, while, the value `none` refers to text-based serialization.
By default, the encoding attribute of all dependent variable object are set to
`base64` after import. The user may update this attribute, at any time, with a
string containing a *valid* encoding literal, for example,
.. doctest::
>>> y.encoding = 'base64'
The value of this attribute will be used in serializing the data to the file,
when using the :meth:`~csdmpy.csdm.CSDM.save` method.
Returns:
A string with a `valid` encoding type.
Raises:
ValueError: If an invalid encoding value is assigned.
"""
return self.subtype.encoding
@encoding.setter
def encoding(self, value):
self.subtype.encoding = value
@property
def name(self):
r"""
Name of the dependent variable.
.. doctest::
>>> y.name
'star'
>>> y.name = 'rock star'
Returns:
A string containing the name of the dependent variable.
Raises:
TypeError: When the assigned value is not a string.
"""
return self.subtype.name
@name.setter
def name(self, value):
self.subtype.name = value
@property
def numeric_type(self):
r"""
The numeric type of the data values from the dependent variable.
There are currently twelve *valid* numeric types:
============== ============ ============ ============
``uint8`` ``int8`` ``float32`` ``complex64``
``uint16`` ``int16`` ``float64`` ``complex128``
``uint32`` ``int32``
``uint64`` ``int64``
============== ============ ============ ============
When assigning a valid value, this attribute updates the `dtype` of the
Numpy array from the corresponding
:attr:`~csdmpy.dependent_variables.DependentVariable.components`
attribute. We recommended the use of the numeric type attribute for
updating the `dtype` of the Numpy array. For example,
.. doctest::
>>> y.numeric_type
'float32'
>>> print(y.components)
[[ 0. 1. 2. 3. 4. 5. 6. 7. 8. 9.]
[10. 11. 12. 13. 14. 15. 16. 17. 18. 19.]
[20. 21. 22. 23. 24. 25. 26. 27. 28. 29.]]
>>> y.numeric_type = 'complex64'
>>> print(y.components[:,:5])
[[ 0.+0.j 1.+0.j 2.+0.j 3.+0.j 4.+0.j]
[10.+0.j 11.+0.j 12.+0.j 13.+0.j 14.+0.j]
[20.+0.j 21.+0.j 22.+0.j 23.+0.j 24.+0.j]]
Returns:
A string with a `valid` numeric type.
Raises:
ValueError: If an invalid numeric type value is assigned.
"""
return self.subtype.numeric_type.value
@numeric_type.setter
def numeric_type(self, value):
self.subtype.numeric_type = value
@property
def quantity_name(self):
"""
Quantity name of the physical quantities associated with the dependent variable.
.. doctest::
>>> y.quantity_name
'energy'
Returns:
A string with the quantity name associated with the dependent variable
physical quantities .
Raises:
NotImplementedError: When assigning a value.
"""
return self.subtype.quantity_name
@quantity_name.setter
def quantity_name(self, value=""):
self.subtype.quantity_name = value
@property
def quantity_type(self):
r"""
Quantity type of the dependent variable.
There are currently six *valid* quantity types,
| ``scalar``
| ``vector_n``
| ``pixel_n``
| ``matrix_n_m``
| ``symmetric_matrix_n``
where `n` and `m` are integers. The value of the attribute is modified with a
string containing a *valid* quantity type.
.. doctest::
>>> y.quantity_type
'pixel_3'
>>> y.quantity_type = 'vector_3'
Returns:
A string with a `valid` quantity type.
Raises:
ValueError: If an invalid value is assigned.
"""
return self.subtype.quantity_type.value
@quantity_type.setter
def quantity_type(self, value):
self.subtype.quantity_type = value
@property
def type(self):
"""
The dependent variable subtype.
There are two *valid* subtypes of DependentVariable class with the following
enumeration literals,
| ``internal``
| ``external``
corresponding to Internal and External sub class. By default, all instances of
the DependentVariable class are assigned as `internal` upon import. The user
may update the value of this attribute, at any time, with a string containing a
valid `type` literal, for example,
.. doctest::
>>> print(y.type)
internal
>>> y.type = 'external'
When `type` is external, the data values from the corresponding dependent
variable are serialized to an external file within the same directory as the
`.csdfe` file.
Returns:
A string with a `valid` dependent variable subtype.
Raises:
ValueError: When an invalid value is assigned.
"""
return self._type
@type.setter
def type(self, value):
if value in ["internal", "external"]:
self._type = value
return
raise ValueError(
(
"{0} is not a valid value. The allowed values are "
"'internal' and 'external'.".format(value)
)
)
@property
def unit(self):
r"""
Unit associated with the dependent variable.
.. note::
The attribute cannot be modified. To convert the unit, use the
:meth:`~csdmpy.dependent_variables.DependentVariable.to` method of
the class instance.
.. doctest::
>>> y.unit
Unit("s W")
Returns:
A `Unit` object from astropy.unit package.
Raises:
AttributeError: When assigned a value.
"""
return self.subtype.unit
@unit.setter
def unit(self, value):
raise AttributeError(
"`unit` attribute cannot be modified. Use the ``to`` method of "
"the instance for the unit conversion."
)
# ======================================================================= #
# Methods #
# ======================================================================= #
[docs] def to(self, unit):
r"""
Convert the unit of the dependent variable to the `unit`.
Args:
unit: A string containing a unit with the same dimensionality as the
components of the dependent variable.
.. doctest::
>>> y.unit
Unit("s W")
>>> print(y.components[0,5])
5.0
>>> y.to('mJ')
>>> y.unit
Unit("mJ")
>>> print(y.components[0,5])
5000.0
.. note::
This method is a wrapper of the `to` method from the `Quantity <http://
docs.astropy.org/en/stable/api/\astropy.units.Quantity.html#astropy.
units.Quantity.to>`_ class.
"""
factor = (1.0 * self.unit).to(unit)
self.subtype._components = self.subtype._components * factor.value
self.subtype._unit = factor.unit
# factor = (1.0*self.unit).to(unit)
# self._components = self.components*factor.value
# self._unit = factor.unit
# self.subtype.set_attribute(
# '_components', self.components*factor.value
# )
# self.subtype.set_attribute('_unit', factor.unit)
[docs] def to_dict(self):
"""
Return DependentVariable object as a python dictionary.
Example:
>>> y.to_dict()
{'type': 'internal', 'description': 'A test image', 'name': 'star', 'unit': 's * W', 'quantity_name': 'energy', 'encoding': 'none', 'numeric_type': 'float32', 'quantity_type': 'pixel_3', 'components': [[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0], [10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0], [20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0]]}
"""
return self.subtype.to_dict()
def _to_dict(
self, filename=None, dataset_index=None, for_display=False, version=None
):
"""Return DependentVariable object as a python dictionary."""
return self.subtype.to_dict(filename, dataset_index, for_display, version)
def copy(self):
"""Return a copy of the DependentVariable object."""
return deepcopy(self)
def check_sparse_sampling_key_value(input_dict):
def message2(item):
return (
f"Missing a required `{item}` key from the sparse "
"sampling object of the dependent variable."
)
sparse_keys = input_dict["sparse_sampling"].keys()
if "dimension_indexes" not in sparse_keys:
raise KeyError(message2("dimension_indexes"))
if "sparse_grid_vertexes" not in sparse_keys:
raise KeyError(message2("sparse_grid_vertexes"))
if "unsigned_integer_type" not in sparse_keys:
raise KeyError(message2("unsigned_integer_type"))
uint_value = input_dict["sparse_sampling"]["unsigned_integer_type"]
if uint_value not in ["uint8", "uint16", "uint32", "uint64"]:
raise ValueError(
f"{uint_value} is not a valid `unsigned_integer_type` "
"enumeration literal. "
f"The allowed values are `uint8`, `uint16`, `uint32`, `uint64`."
)