Source code for pennylane.data.base.attribute

# Copyright 2018-2023 Xanadu Quantum Technologies Inc.

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

#     http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains the base class for Dataset attribute types, and a class for
attribute metadata."""

import warnings
from abc import ABC, abstractmethod
from collections.abc import Iterable, Iterator, Mapping, MutableMapping, Sequence
from functools import lru_cache
from numbers import Number
from types import MappingProxyType
from typing import Any, ClassVar, Generic, Literal, Optional, Type, TypeVar, Union, overload

from pennylane.data.base import hdf5
from pennylane.data.base.hdf5 import HDF5, HDF5Any, HDF5Group
from pennylane.data.base.typing_util import UNSET, get_type, get_type_str
from pennylane.pytrees import is_pytree

T = TypeVar("T")


[docs]class AttributeInfo(MutableMapping):
    """Contains metadata that may be assigned to a dataset
    attribute. Is stored in the HDF5 object's ``attrs`` dict.

    Attributes:
        attrs_bind: The HDF5 attrs dict that this instance is bound to,
            or any mutable mapping
        **kwargs: Extra metadata to include. Must be a string, number
            or numpy array
    """

    attrs_namespace: ClassVar[str] = "qml.data"
    attrs_bind: MutableMapping[str, Any]

    @overload
    def __init__(  # overload to specify known keyword args
        self,
        attrs_bind: Optional[MutableMapping[str, Any]] = None,
        *,
        doc: Optional[str] = None,
        py_type: Optional[str] = None,
        **kwargs: Any,
    ):
        pass

    @overload
    def __init__(self):  # need at least two overloads when using @overload
        pass

    def __init__(self, attrs_bind: Optional[MutableMapping[str, Any]] = None, **kwargs: Any):
        object.__setattr__(self, "attrs_bind", attrs_bind if attrs_bind is not None else {})

        for k, v in kwargs.items():
            setattr(self, k, v)

[docs]    def save(self, info: "AttributeInfo") -> None:
        """Inserts the values set in this instance into ``info``."""
        for k, v in self.items():
            info[k] = v

[docs]    def load(self, info: "AttributeInfo"):
        """Inserts the values set in ``info`` into this instance."""
        info.save(self)

    @property
    def py_type(self) -> Optional[str]:
        """String representation of this attribute's python type."""
        return self.get("py_type")

    @py_type.setter
    def py_type(self, type_: Union[str, Type]):
        self["py_type"] = get_type_str(type_)

    @property
    def doc(self) -> Optional[str]:
        """Documentation for this attribute."""
        return self.get("doc")

    @doc.setter
    def doc(self, doc: str):
        self["doc"] = doc

    def __len__(self) -> int:
        return self.attrs_bind.get("qml.__data_len__", 0)

    def _update_len(self, inc: int):
        self.attrs_bind["qml.__data_len__"] = len(self) + inc

    def __setitem__(self, __name: str, __value: Any):
        key = self.bind_key(__name)
        if __value is None:
            self.attrs_bind.pop(key, None)
            return

        exists = key in self.attrs_bind
        self.attrs_bind[key] = __value
        if not exists:
            self._update_len(1)

    def __getitem__(self, __name: str) -> Any:
        return self.attrs_bind[self.bind_key(__name)]

    def __setattr__(self, __name: str, __value: Any) -> None:
        if __name in self.__class__.__dict__:
            object.__setattr__(self, __name, __value)
        else:
            self[__name] = __value

    def __getattr__(self, __name: str) -> Any:
        try:
            return self[__name]
        except KeyError:
            return None

    def __delitem__(self, __name: str) -> None:
        del self.attrs_bind[self.bind_key(__name)]
        self._update_len(-1)

    def __iter__(self) -> Iterator[str]:
        ns = f"{self.attrs_namespace}."

        return (
            key.split(ns, maxsplit=1)[1]
            for key in filter(lambda k: k.startswith(ns), self.attrs_bind)
        )

    def __repr__(self) -> str:
        return f"{type(self).__name__}({repr(dict(self))})"

[docs]    @classmethod
    @lru_cache()
    def bind_key(cls, __name: str) -> str:
        """Returns ``__name`` dot-prefixed with ``attrs_namespace``."""
        return ".".join((cls.attrs_namespace, __name))


# Type variable for value returned by ``DatasetAttribute.get_value()``
ValueType = TypeVar("ValueType")
# Type variable for ``value`` argument to ``DatasetAttribute.__init__()``
InitValueType = TypeVar("InitValueType")


[docs]class DatasetAttribute(ABC, Generic[HDF5, ValueType, InitValueType]):
    """
    The DatasetAttribute class provides an interface for converting Python objects to and from a HDF5
    array or Group. It uses the registry pattern to maintain a mapping of type_id to
    DatasetAttribute, and Python types to compatible AttributeTypes.

    Attributes:
        type_id: Unique identifier for this DatasetAttribute class. Must be declared
            in subclasses.
    """

    type_id: ClassVar[str]

[docs]    @abstractmethod
    def hdf5_to_value(self, bind: HDF5) -> ValueType:
        """Parses bind into Python object."""

[docs]    @abstractmethod
    def value_to_hdf5(self, bind_parent: HDF5Group, key: str, value: InitValueType) -> HDF5:
        """Converts value into a HDF5 Array or Group under bind_parent[key]."""

    @overload
    def __init__(
        self,
        value: Union[InitValueType, Literal[UNSET]] = UNSET,
        info: Optional[AttributeInfo] = None,
        *,
        parent_and_key: Optional[tuple[HDF5Group, str]] = None,
    ):
        """Initialize a new dataset attribute from ``value``.

        Args:
            value: Value that will be stored in dataset attribute.
            info: Metadata to attach to attribute.
            parent_and_key: A 2-tuple specifying the HDF5 group that will contain
                this attribute, and its key. If None, attribute will be stored in-memory.
        """

    @overload
    def __init__(self, *, bind: HDF5):
        """Load previously persisted dataset attribute from ``bind``.

        If ``bind`` contains an attribute of a different type, or does not
        contain a dataset attribute, a ``TypeError` will be raised.

        Args:
            bind: HDF5 object from which existing attribute will be loaded.
        """

    def __init__(
        self,
        value: Union[InitValueType, Literal[UNSET]] = UNSET,
        info: Optional[AttributeInfo] = None,
        *,
        bind: Optional[HDF5] = None,
        parent_and_key: Optional[tuple[HDF5Group, str]] = None,
    ) -> None:
        """
        Initialize a new dataset attribute, or load from an existing
        hdf5 object.

        This constructor can be called two ways: value initialization
        or bind initialization.

        Value initialization creates the attribute with specified ``value`` in
        a new HDF5 object, with optional ``info`` attached. The attribute can
        be created in an existing HDF5 group by passing the ``parent_and_key``
        argument.

        Bind initialization loads an attribute that was previously persisted
        in HDF5 object ``bind``.

        Note that if ``bind`` is provided, all other arguments will be ignored.

        Args:
            value: Value to initialize attribute to
            info: Metadata to attach to attribute
            bind: HDF5 object from which existing attribute will be loaded
            parent_and_key: A 2-tuple specifying the HDF5 group that will contain
                this attribute, and its key.
        """
        if bind is not None:
            self._bind_init(bind)
        else:
            self._value_init(value, info, parent_and_key)

    def _bind_init(self, bind: HDF5) -> None:
        """Constructor for bind initialization. See __init__()."""
        self._bind = bind
        self._check_bind()

    def _value_init(
        self,
        value: Union[InitValueType, Literal[UNSET]],
        info: Optional[AttributeInfo],
        parent_and_key: Optional[tuple[HDF5Group, str]],
    ):
        """Constructor for value initialization. See __init__()."""

        if parent_and_key is not None:
            parent, key = parent_and_key
        else:
            parent, key = hdf5.create_group(), "_"

        if value is UNSET:
            value = self.default_value()
            if value is UNSET:
                raise TypeError("__init__() missing 1 required positional argument: 'value'")

        self._bind = self._set_value(value, info, parent, key)
        self._check_bind()
        self.__post_init__(value)

    @property
    def info(self) -> AttributeInfo:
        """Returns the ``AttributeInfo`` for this attribute."""
        return AttributeInfo(self.bind.attrs)

    @property
    def bind(self) -> HDF5:
        """Returns the HDF5 object that contains this attribute's
        data."""
        return self._bind

[docs]    @classmethod
    def default_value(cls) -> Union[InitValueType, Literal[UNSET]]:
        """Returns a valid default value for this type, or ``UNSET`` if this type
        must be initialized with a value."""
        return UNSET

[docs]    @classmethod
    def py_type(cls, value_type: Type[InitValueType]) -> str:
        """Determines the ``py_type`` of an attribute during value initialization,
        if it was not provided in the ``info`` argument. This method returns
        ``f"{value_type.__module__}.{value_type.__name__}``.
        """
        return get_type_str(value_type)

[docs]    @classmethod
    def consumes_types(cls) -> Iterable[type]:
        """
        Returns an iterable of types for which this should be the default
        codec. If a value of one of these types is assigned to a Dataset
        without specifying a `type_id`, this type will be used.
        """
        return ()

    def __post_init__(self, value: InitValueType) -> None:
        """Called after __init__(), only during value initialization. Can be implemented
        in subclasses that require additional initialization."""

[docs]    def get_value(self) -> ValueType:
        """Deserializes the mapped value from ``bind``."""
        return self.hdf5_to_value(self.bind)

[docs]    def copy_value(self) -> ValueType:
        """Deserializes the mapped value from ``bind``, and also perform a 'deep-copy'
        of any nested values contained in ``bind``."""
        return self.get_value()

    def _set_value(
        self, value: InitValueType, info: Optional[AttributeInfo], parent: HDF5Group, key: str
    ) -> HDF5:
        """Converts ``value`` into HDF5 format and sets the attribute info."""
        if info is None:
            info = AttributeInfo()

        info["type_id"] = self.type_id
        if info.py_type is None:
            info.py_type = self.py_type(type(value))

        new_bind = self.value_to_hdf5(parent, key, value)
        new_info = AttributeInfo(new_bind.attrs)
        info.save(new_info)

        return new_bind

    def _set_parent(self, parent: HDF5Group, key: str):
        """Copies this attribute's data into ``parent``, under ``key``."""
        hdf5.copy(source=self.bind, dest=parent, key=key, on_conflict="overwrite")
        self._bind = parent[key]  # pylint: disable=attribute-defined-outside-init

    def _check_bind(self):
        """
        Checks that ``bind.attrs`` contains the type_id corresponding to
        this type.
        """
        existing_type_id = self.info.get("type_id")
        if existing_type_id is None:
            raise ValueError("'bind' does not contain a dataset attribute.")
        if existing_type_id != self.type_id:
            raise TypeError(f"'bind' is bound to another attribute type '{existing_type_id}'")

    def __copy__(self) -> "DatasetAttribute":
        impl_group = hdf5.create_group()
        hdf5.copy(self.bind, impl_group, "_")

        return type(self)(bind=impl_group["_"])

    def __deepcopy__(self, memo) -> "DatasetAttribute":
        return self.__copy__()

    def __eq__(self, __value: object) -> bool:
        return self.get_value() == __value

    def __repr__(self) -> str:
        return f"{type(self).__name__}({repr(self.get_value())})"

    def __str__(self) -> str:
        return str(self.get_value())

    __registry: Mapping[str, Type["DatasetAttribute"]] = {}
    __type_consumer_registry: Mapping[type, Type["DatasetAttribute"]] = {}

    registry: Mapping[str, Type["DatasetAttribute"]] = MappingProxyType(__registry)
    """Maps type_ids to their DatasetAttribute classes."""

    type_consumer_registry: Mapping[type, Type["DatasetAttribute"]] = MappingProxyType(
        __type_consumer_registry
    )
    """Maps types to their default DatasetAttribute"""

    def __init_subclass__(  # pylint: disable=arguments-differ
        cls, *, abstract: bool = False
    ) -> None:
        if abstract:
            return super().__init_subclass__()

        existing_type = DatasetAttribute.__registry.get(cls.type_id)
        if existing_type is not None:
            raise TypeError(
                f"DatasetAttribute with type_id '{cls.type_id}' already exists: {existing_type}"
            )

        DatasetAttribute.__registry[cls.type_id] = cls  # type: ignore

        for type_ in cls.consumes_types():
            existing_type = DatasetAttribute.type_consumer_registry.get(type_)
            if existing_type is not None:
                warnings.warn(
                    f"Conflicting default types: Both '{cls.__name__}' and '{existing_type.__name__}' "
                    f"consume type '{type_.__name__}'. '{type_.__name__}' "
                    f"will now be consumed by '{cls.__name__}'"
                )
            DatasetAttribute.__type_consumer_registry[type_] = cls

        return super().__init_subclass__()


[docs]def attribute(
    val: T, doc: Optional[str] = None, **kwargs: Any
) -> DatasetAttribute[HDF5Any, T, Any]:
    """Creates a dataset attribute that contains both a value and associated metadata.

    Args:
        val (any): the dataset attribute value
        doc (str): the docstring that describes the attribute
        **kwargs: Additional keyword arguments may be passed, which represents metadata
            which describes the attribute.

    Returns:
        DatasetAttribute: an attribute object

    .. seealso:: :class:`~.Dataset`

    **Example**

    >>> hamiltonian = qml.Hamiltonian([1., 1.], [qml.Z(0), qml.Z(1)])
    >>> eigvals, eigvecs = np.linalg.eigh(qml.matrix(hamiltonian))
    >>> dataset = qml.data.Dataset(hamiltonian = qml.data.attribute(
    ...     hamiltonian,
    ...     doc="The hamiltonian of the system"))
    >>> dataset.eigen = qml.data.attribute(
    ...     {"eigvals": eigvals, "eigvecs": eigvecs},
    ...     doc="Eigenvalues and eigenvectors of the hamiltonian")

    This metadata can then be accessed using the :meth:`~.Dataset.attr_info` mapping:

    >>> dataset.attr_info["eigen"]["doc"]
    'Eigenvalues and eigenvectors of the hamiltonian'
    """
    return match_obj_type(val)(val, AttributeInfo(doc=doc, py_type=type(val), **kwargs))


def get_attribute_type(h5_obj: HDF5) -> Type[DatasetAttribute[HDF5, Any, Any]]:
    """
    Returns the ``DatasetAttribute`` of the dataset attribute contained
    in ``h5_obj``.
    """
    type_id = h5_obj.attrs[AttributeInfo.bind_key("type_id")]

    return DatasetAttribute.registry[type_id]


def match_obj_type(
    type_or_obj: Union[ValueType, Type[ValueType]]
) -> Type[DatasetAttribute[HDF5Any, ValueType, ValueType]]:
    """
    Returns an ``DatasetAttribute`` that can accept an object of type ``type_or_obj``
    as a value.

    Args:
        type_or_obj: A type or an object

    Returns:
        DatasetAttribute that can accept ``type_or_obj`` (or an object of that
            type) as a value.

    Raises:
        TypeError, if no DatasetAttribute can accept an object of that type
    """

    type_ = get_type(type_or_obj)
    if hasattr(type_, "type_id"):
        return DatasetAttribute.registry[type_.type_id]

    ret = DatasetAttribute.registry["array"]

    if type_ in DatasetAttribute.type_consumer_registry:
        ret = DatasetAttribute.type_consumer_registry[type_]
    elif issubclass(type_, Number):
        ret = DatasetAttribute.registry["scalar"]
    elif hasattr(type_, "__array__"):
        ret = DatasetAttribute.registry["array"]
    elif issubclass(type_, Sequence):
        ret = DatasetAttribute.registry["list"]
    elif issubclass(type_, Mapping):
        ret = DatasetAttribute.registry["dict"]
    elif is_pytree(type_):
        ret = DatasetAttribute.registry["pytree"]

    return ret
_modules/pennylane/data/base/attribute
Download Python script
Download Notebook
View on GitHub
Source code for pennylane.data.base.attribute

Contents

Downloads

Source code for pennylane.data.base.attribute

Contents

Downloads

Related