# Copyright 2018-2023 Xanadu Quantum Technologies Inc.# Licensed under the Apache License, Version 2.0 (the "License");# you may not use this file except in compliance with the License.# You may obtain a copy of the License at# http://www.apache.org/licenses/LICENSE-2.0# Unless required by applicable law or agreed to in writing, software# distributed under the License is distributed on an "AS IS" BASIS,# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.# See the License for the specific language governing permissions and# limitations under the License."""Contains the :class:`~pennylane.data.Dataset` base class, and `qml.data.Attribute` classfor declaratively defining dataset classes."""fromcollections.abcimportIterable,MappingfromdataclassesimportdataclassfrompathlibimportPathfromtypesimportMappingProxyTypefromtypingimportAny,ClassVar,Generic,Literal,Optional,Type,TypeVar,Union,cast,get_origin# pylint doesn't think this existsfromtyping_extensionsimportdataclass_transform# pylint: disable=no-name-in-modulefrompennylane.data.baseimporthdf5frompennylane.data.base.attributeimportAttributeInfo,DatasetAttributefrompennylane.data.base.hdf5importHDF5Any,HDF5Group,h5pyfrompennylane.data.base.mapperimportMapperMixin,match_obj_typefrompennylane.data.base.typing_utilimportUNSET,T@dataclassclassField(Generic[T]):""" The Field class is used to declaratively define the attributes of a Dataset subclass, in a similar way to dataclasses. This class should not be used directly, use the ``field()`` function instead. Attributes: attribute_type: The ``DatasetAttribute`` class for this attribute info: Attribute info """attribute_type:Type[DatasetAttribute[HDF5Any,T,Any]]info:AttributeInfo
[docs]deffield(# pylint: disable=too-many-arguments, unused-argumentattribute_type:Union[Type[DatasetAttribute[HDF5Any,T,Any]],Literal[UNSET]]=UNSET,doc:Optional[str]=None,py_type:Optional[Any]=None,**kwargs,)->Any:"""Used to define fields on a declarative Dataset. Args: attribute_type: ``DatasetAttribute`` class for this attribute. If not provided, type may be derived from the type annotation on the class. doc: Documentation for the attribute py_type: Type annotation or string describing this object's type. If not provided, the annotation on the class will be used kwargs: Extra arguments to ``AttributeInfo`` Returns: Field: .. seealso:: :class:`~.Dataset`, :func:`~.data.attribute` **Example** The datasets declarative API allows us to create subclasses of :class:`Dataset` that define the required attributes, or 'fields', and their associated type and documentation: .. code-block:: python class QuantumOscillator(qml.data.Dataset, data_name="quantum_oscillator", identifiers=["mass", "force_constant"]): \"""Datasetdescribingaquantumoscillator.\""" mass: float = qml.data.field(doc = "The mass of the particle") force_constant: float = qml.data.field(doc = "The force constant of the oscillator") hamiltonian: qml.Hamiltonian = qml.data.field(doc = "The hamiltonian of the particle") energy_levels: np.ndarray = qml.data.field(doc = "The first 1000 energy levels of the system") The ``data_name`` keyword argument specifies a category or descriptive name for the dataset type, and the ``identifiers`` keyword argument specifies fields that function as parameters, i.e., they determine the behaviour of the system. When a ``QuantumOscillator`` dataset is created, its attributes will have the documentation from the field definition: >>> dataset = QuantumOscillator( ... mass=1, ... force_constant=0.5, ... hamiltonian=qml.X(0), ... energy_levels=np.array([0.1, 0.2]) ... ) >>> dataset.attr_info["mass"]["doc"] 'The mass of the particle' """returnField(cast(Type[DatasetAttribute[HDF5Any,T,T]],attribute_type),AttributeInfo(doc=doc,py_type=py_type,**kwargs),)
class_InitArg:# pylint: disable=too-few-public-methods"""Sentinel value returned by ``_init_arg()``."""def_init_arg(# pylint: disable=unused-argumentdefault:Any,alias:Optional[str]=None,kw_only:bool=False)->Any:"""This function exists only for the benefit of the type checker. It is used to annotate attributes on ``Dataset`` that are not part of the data model, but should appear in the generated ``__init__`` method. """return_InitArg@dataclass_transform(order_default=False,eq_default=False,kw_only_default=True,field_specifiers=(field,_init_arg))class_DatasetTransform:# pylint: disable=too-few-public-methods"""This base class that tells the type system that ``Dataset`` behaves like a dataclass. See: https://peps.python.org/pep-0681/ """Self=TypeVar("Self",bound="Dataset")
[docs]classDataset(MapperMixin,_DatasetTransform):""" Base class for Datasets. """__data_name__:ClassVar[str]__identifiers__:ClassVar[tuple[str,...]]fields:ClassVar[Mapping[str,Field]]""" A mapping of attribute names to their ``Attribute`` information. Note that this contains attributes declared on the class, not attributes added to an instance. Use ``attrs`` to view all attributes on an instance. """bind_:Optional[HDF5Group]=_init_arg(default=None,alias="bind",kw_only=False)data_name_:Optional[str]=_init_arg(default=None,alias="data_name")def__init__(self,bind:Optional[HDF5Group]=None,*,data_name:Optional[str]=None,identifiers:Optional[tuple[str,...]]=None,**attrs:Any,):""" Load a dataset from a HDF5 Group or initialize a new Dataset. Args: bind: The HDF5 group that contains this dataset. If None, a new group will be created in memory. Any attributes that already exist in ``bind`` will be loaded into this dataset. data_name: String describing the type of data this datasets contains, e.g 'qchem' for quantum chemistry. Defaults to the data name defined by the class, this is 'generic' for base datasets. identifiers: Tuple of names of attributes of this dataset that will serve as its parameters **attrs: Attributes to add to this dataset. """ifisinstance(bind,(h5py.Group,h5py.File)):self._bind=bindelse:self._bind=hdf5.create_group()self._init_bind(data_name,identifiers)fornameinself.fields:try:attr_value=attrs.pop(name)setattr(self,name,attr_value)exceptKeyError:passforname,attrinattrs.items():setattr(self,name,attr)
[docs]@classmethoddefopen(cls,filepath:Union[str,Path],mode:Literal["w","w-","a","r","copy"]="r",)->"Dataset":"""Open existing dataset or create a new one at ``filepath``. Args: filepath: Path to dataset file mode: File handling mode. Possible values are "w-" (create, fail if file exists), "w" (create, overwrite existing), "a" (append existing, create if doesn't exist), "r" (read existing, must exist), and "copy", which loads the dataset into memory and detaches it from the underlying file. Default is "r". Returns: Dataset object from file """filepath=Path(filepath).expanduser()ifmode=="copy":withh5py.File(filepath,"r")asf_to_copy:f=hdf5.create_group()hdf5.copy_all(f_to_copy,f)else:f=h5py.File(filepath,mode)returncls(f)
[docs]defclose(self)->None:"""Close the underlying dataset file. The dataset will become inaccessible."""self.bind.close()
@propertydefdata_name(self)->str:"""Returns the data name (category) of this dataset."""returnself.info.get("data_name",self.__data_name__)@propertydefidentifiers(self)->Mapping[str,str]:# pylint: disable=function-redefined"""Returns this dataset's parameters."""return{attr_name:getattr(self,attr_name)forattr_nameinself.info.get("identifiers",self.info.get("params",[]))ifattr_nameinself.bind}@propertydefinfo(self)->AttributeInfo:"""Return metadata associated with this dataset."""returnAttributeInfo(self.bind.attrs)@propertydefbind(self)->HDF5Group:# pylint: disable=function-redefined"""Return the HDF5 group that contains this dataset."""returnself._bind@propertydefattrs(self)->Mapping[str,DatasetAttribute]:"""Returns all attributes of this Dataset."""returnself._mapper.view()@propertydefattr_info(self)->Mapping[str,AttributeInfo]:"""Returns a mapping of the ``AttributeInfo`` for each of this dataset's attributes."""returnMappingProxyType({attr_name:AttributeInfo(self.bind[attr_name].attrs)forattr_nameinself.list_attributes()})
[docs]deflist_attributes(self)->list[str]:"""Returns a list of this dataset's attributes."""returnlist(self.attrs.keys())
[docs]defread(self,source:Union[str,Path,"Dataset"],attributes:Optional[Iterable[str]]=None,*,overwrite:bool=False,)->None:"""Load dataset from HDF5 file at filepath. Args: source: Dataset, or path to HDF5 file containing dataset, from which to read attributes attributes: Optional list of attributes to copy. If None, all attributes will be copied. overwrite: Whether to overwrite attributes that already exist in this dataset. """ifnotisinstance(source,Dataset):source=Path(source).expanduser()source=Dataset.open(source,mode="r")source.write(self,attributes=attributes,overwrite=overwrite)source.close()
[docs]defwrite(self,dest:Union[str,Path,"Dataset"],mode:Literal["w","w-","a"]="a",attributes:Optional[Iterable[str]]=None,*,overwrite:bool=False,)->None:"""Write dataset to HDF5 file at filepath. Args: dest: HDF5 file, or path to HDF5 file containing dataset, to write attributes to mode: File handling mode, if ``source`` is a file system path. Possible values are "w-" (create, fail if file exists), "w" (create, overwrite existing), and "a" (append existing, create if doesn't exist). Default is "w-". attributes: Optional list of attributes to copy. If None, all attributes will be copied. Note that identifiers will always be copied. overwrite: Whether to overwrite attributes that already exist in this dataset. """attributes=attributesifattributesisnotNoneelse()on_conflict="overwrite"ifoverwriteelse"ignore"ifnotisinstance(dest,Dataset):dest=Path(dest).expanduser()dest=Dataset.open(dest,mode=mode)dest.info.update(self.info)hdf5.copy_all(self.bind,dest.bind,*attributes,on_conflict=on_conflict)missing_identifiers=[identifierforidentifierinself.identifiersifnothasattr(dest,identifier)]ifmissing_identifiers:hdf5.copy_all(self.bind,dest.bind,*missing_identifiers)
def_init_bind(self,data_name:Optional[str]=None,identifiers:Optional[tuple[str,...]]=None):ifself.bind.file.mode=="r+":if"type_id"notinself.info:self.info["type_id"]=self.type_idif"data_name"notinself.info:self.info["data_name"]=data_nameorself.__data_name__if"identifiers"notinself.info:self.info["identifiers"]=identifiersorself.__identifiers__def__setattr__(self,__name:str,__value:Union[Any,DatasetAttribute])->None:if__name.startswith("_")or__nameintype(self).__dict__:object.__setattr__(self,__name,__value)returnif__nameinself.fields:field_=self.fields[__name]self._mapper.set_item(__name,__value,field_.info,field_.attribute_type)else:self._mapper[__name]=__valuedef__getattr__(self,__name:str)->Any:try:returnself._mapper[__name].get_value()exceptKeyErrorasexc:if__nameinself.fields:returnUNSETraiseAttributeError(f"'{type(self).__name__}' object has no attribute '{__name}'")fromexcdef__delattr__(self,__name:str)->None:try:delself._mapper[__name]exceptKeyErrorasexc:raiseAttributeError(f"'{type(self).__name__}' object has no attribute '{__name}'")fromexcdef__repr__(self)->str:attrs_str=[repr(attr)forattrinself.list_attributes()]iflen(attrs_str)>2:attrs_str=attrs_str[:2]attrs_str.append("...")attrs_str="["+", ".join(attrs_str)+"]"repr_items=", ".join(f"{name}: {value}"forname,valuein{**self.identifiers,"attributes":attrs_str}.items())returnf"<{type(self).__name__} = {repr_items}>"def__init_subclass__(cls,*,data_name:Optional[str]=None,identifiers:Optional[tuple[str,...]]=None)->None:"""Initializes the ``fields`` dict of a Dataset subclass using the declared ``Attributes`` and their type annotations."""super().__init_subclass__()fields={}ifdata_name:cls.__data_name__=data_nameifidentifiers:cls.__identifiers__=identifiers# get field info from annotated class attributes, e.g:# name: int = field(...)forname,annotated_typeincls.__annotations__.items():ifget_origin(annotated_type)isClassVar:continuetry:field_=getattr(cls,name)delattr(cls,name)exceptAttributeError:# field only has type annotationfield_=field()iffield_is_InitArg:continuefield_.info.py_type=annotated_typeiffield_.attribute_typeisUNSET:field_.attribute_type=match_obj_type(annotated_type)fields[name]=field_cls.fields=MappingProxyType(fields)def__dir__(self):returnself.list_attributes()__data_name__="generic"__identifiers__=tuple()type_id="dataset""""Type identifier for this dataset. Used internally to load datasets from other datasets."""
Dataset.fields=MappingProxyType({})class_DatasetAttributeType(DatasetAttribute[HDF5Group,Dataset,Dataset]):"""Attribute type for loading and saving datasets as attributes of datasets, or elements of collection types."""type_id="dataset"defhdf5_to_value(self,bind:HDF5Group)->Dataset:returnDataset(bind)defvalue_to_hdf5(self,bind_parent:HDF5Group,key:str,value:Dataset)->HDF5Group:hdf5.copy(value.bind,bind_parent,key)returnbind_parent[key]