# Copyright 2018-2023 Xanadu Quantum Technologies Inc.# Licensed under the Apache License, Version 2.0 (the "License");# you may not use this file except in compliance with the License.# You may obtain a copy of the License at# http://www.apache.org/licenses/LICENSE-2.0# Unless required by applicable law or agreed to in writing, software# distributed under the License is distributed on an "AS IS" BASIS,# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.# See the License for the specific language governing permissions and# limitations under the License."""Contains functions for querying available datasets and downloadingthem."""importsysimporturllib.parsefromconcurrentimportfuturesfromfunctoolsimportlru_cachefrompathlibimportPathfromtimeimportsleepfromtypingimportAny,Iterable,Mapping,Optional,Unionfromrequestsimportget,headfrompennylane.data.baseimportDatasetfrompennylane.data.base.hdf5importopen_hdf5_s3frompennylane.data.data_managerimportprogressfrom.graphqlimport(get_dataset_urls,_get_parameter_tree,list_data_names,list_attributes,)from.foldermapimportDataPath,FolderMapView,ParamArgfrom.paramsimportDEFAULT,FULL,format_params,provide_defaultsS3_URL="https://datasets.cloud.pennylane.ai/datasets/h5"FOLDERMAP_URL=f"{S3_URL}/foldermap.json"DATA_STRUCT_URL=f"{S3_URL}/data_struct.json"@lru_cache(maxsize=1)def_get_foldermap():"""Fetch the foldermap from S3."""response=get(FOLDERMAP_URL,timeout=5.0)response.raise_for_status()returnFolderMapView(response.json())@lru_cache(maxsize=1)def_get_data_struct():"""Fetch the data struct from S3."""response=get(DATA_STRUCT_URL,timeout=5.0)response.raise_for_status()S3_URL="https://datasets.cloud.pennylane.ai/datasets/h5"def_download_partial(# pylint: disable=too-many-argumentss3_url:str,dest:Path,attributes:Optional[Iterable[str]],overwrite:bool,block_size:int,pbar_task:Optional[progress.Task],)->None:"""Download the requested attributes of the Dataset at ``s3_path`` into ``dest``. Args: s3_url: URL of the remote dataset dest: Destination dataset path attributes: Requested attributes to download. Passing ``None`` is equivalent to requesting all attributes of the remote dataset. overwrite: If True, overwrite attributes that already exist at ``dest``. Otherwise, only download attributes that do not exist at ``dest``. """dest_dataset=Dataset.open(dest,mode="a")remote_dataset=Noneattributes_to_fetch=set()ifattributesisnotNone:attributes_to_fetch.update(attributes)else:remote_dataset=Dataset(open_hdf5_s3(s3_url,block_size=block_size))attributes_to_fetch.update(remote_dataset.attrs)ifnotoverwrite:attributes_to_fetch.difference_update(dest_dataset.attrs)iflen(attributes_to_fetch)>0:remote_dataset=remote_datasetorDataset(open_hdf5_s3(s3_url,block_size=block_size))remote_dataset.write(dest_dataset,"a",attributes,overwrite=overwrite)ifremote_dataset:remote_dataset.close()dest_dataset.close()delremote_datasetdeldest_datasetifpbar_task:file_size=dest.stat().st_sizepbar_task.update(completed=file_size,total=file_size)def_download_full(s3_url:str,dest:Path,block_size:int,pbar_task:Optional[progress.Task]):"""Download the full dataset file at ``s3_url`` to ``path``."""resp=get(s3_url,timeout=5.0,stream=True)resp.raise_for_status()withopen(dest,"wb")asf:ifpbar_taskisnotNone:forblockinresp.iter_content(chunk_size=block_size):f.write(block)pbar_task.update(advance=len(block))else:forblockinresp.iter_content(chunk_size=block_size):f.write(block)def_download_dataset(# pylint: disable=too-many-argumentsdataset_url:str,dest:Path,attributes:Optional[Iterable[str]],block_size:int,force:bool,pbar_task:Optional[progress.Task],)->None:"""Downloads the dataset at ``dataset_url`` to ``dest``, optionally downloading only requested attributes. If ``attributes`` is not provided, every attribute will be requested. If any of the attributes of the remote dataset are already downloaded locally, they will not be overwritten unless ``force`` is True. If ``pbar_task`` is provided, it will be updated with the download progress. """ifattributesisnotNoneordest.exists():_download_partial(dataset_url,dest=dest,attributes=attributes,overwrite=force,block_size=block_size,pbar_task=pbar_task,)else:_download_full(dataset_url,dest=dest,block_size=block_size,pbar_task=pbar_task)def_download_datasets(# pylint: disable=too-many-argumentsdata_name:str,folder_path:Path,dataset_urls:list[str],dataset_ids:list[str],attributes:Optional[Iterable[str]],force:bool,block_size:int,num_threads:int,pbar:Optional[progress.Progress],)->list[Path]:"""Downloads the datasets with given ``dataset_urls`` to ``folder_path``. If ``pbar`` is provided, a progress task will be added for each requested dataset. Returns: list[Path]: List of downloaded dataset paths """file_names=[dataset_id+".h5"fordataset_idindataset_ids]dest_paths=[folder_path/data_name/data_idfordata_idinfile_names]forpath_parentsinset(path.parentforpathindest_paths):path_parents.mkdir(parents=True,exist_ok=True)ifpbarisnotNone:ifattributesisNone:file_sizes=[int(head(url,timeout=5).headers["Content-Length"])forurlindataset_urls]else:# Can't get file sizes for partial downloadsfile_sizes=(Nonefor_indataset_urls)pbar_tasks=[pbar.add_task(str(dest_path.relative_to(folder_path)),total=file_size)fordest_path,file_sizeinzip(dest_paths,file_sizes)]else:pbar_tasks=(Nonefor_indest_paths)withfutures.ThreadPoolExecutor(min(num_threads,len(dest_paths)))aspool:forurl,dest_path,pbar_taskinzip(dataset_urls,dest_paths,pbar_tasks):futs=[pool.submit(_download_dataset,url,dest_path,attributes=attributes,force=force,block_size=block_size,pbar_task=pbar_task,)]forresultinfutures.wait(futs,return_when=futures.FIRST_EXCEPTION).done:ifresult.exception()isnotNone:raiseresult.exception()returndest_pathsdef_validate_attributes(data_name:str,attributes:Iterable[str]):"""Checks that ``attributes`` contains only valid attributes for the given ``data_name``. If any attributes do not exist, raise a ValueError."""valid_attributes=list_attributes(data_name)invalid_attributes=[attrforattrinattributesifattrnotinvalid_attributes]ifnotinvalid_attributes:returniflen(invalid_attributes)==1:values_err=f"'{invalid_attributes[0]}' is an invalid attribute for '{data_name}'"else:values_err=f"{invalid_attributes} are invalid attributes for '{data_name}'"raiseValueError(f"{values_err}. Valid attributes are: {valid_attributes}")
[docs]defload(# pylint: disable=too-many-argumentsdata_name:str,attributes:Optional[Iterable[str]]=None,folder_path:Path=Path("./datasets/"),force:bool=False,num_threads:int=50,block_size:int=8388608,progress_bar:Optional[bool]=None,**params:Union[ParamArg,str,list[str]],):r"""Downloads the data if it is not already present in the directory and returns it as a list of :class:`~pennylane.data.Dataset` objects. For the full list of available datasets, please see the `datasets website <https://pennylane.ai/datasets>`_. Args: data_name (str) : A string representing the type of data required such as `qchem`, `qpsin`, etc. attributes (list[str]) : An optional list to specify individual data element that are required folder_path (str) : Path to the directory used for saving datasets. Defaults to './datasets' force (Bool) : Bool representing whether data has to be downloaded even if it is still present num_threads (int) : The maximum number of threads to spawn while downloading files (1 thread per file) block_size (int) : The number of bytes to fetch per read operation when fetching datasets from S3. Larger values may improve performance for large datasets, but will slow down small reads. Defaults to 8MB progress_bar (bool) : Whether to show a progress bars for downloads. Defaults to True if running in an interactive terminal, False otherwise. params (kwargs) : Keyword arguments exactly matching the parameters required for the data type. Note that these are not optional Returns: list[:class:`~pennylane.data.Dataset`] .. seealso:: :func:`~.load_interactive`, :func:`~.list_attributes`, :func:`~.list_data_names`. **Example** The :func:`~pennylane.data.load` function returns a ``list`` with the desired data. >>> H2datasets = qml.data.load("qchem", molname="H2", basis="STO-3G", bondlength=1.1) >>> print(H2datasets) [<Dataset = molname: H2, basis: STO-3G, bondlength: 1.1, attributes: ['basis', 'basis_rot_groupings', ...]>] .. note:: If not otherwise specified, ``qml.data.load`` will download the default parameter value specified by the dataset. The default values for attributes are as follows: - Molecules: ``basis`` is the smallest available basis, usually ``"STO-3G"``, and ``bondlength`` is the optimal bondlength for the molecule or an alternative if the optimal is not known. - Spin systems: ``periodicity`` is ``"open"``, ``lattice`` is ``"chain"``, and ``layout`` is ``1x4`` for ``chain`` systems and ``2x2`` for ``rectangular`` systems. We can load datasets for multiple parameter values by providing a list of values instead of a single value. To load all possible values, use the special value :const:`~pennylane.data.FULL` or the string 'full': >>> H2datasets = qml.data.load("qchem", molname="H2", basis="full", bondlength=[0.5, 1.1]) >>> print(H2datasets) [<Dataset = molname: H2, basis: STO-3G, bondlength: 0.5, attributes: ['basis', 'basis_rot_groupings', ...]>, <Dataset = molname: H2, basis: STO-3G, bondlength: 1.1, attributes: ['basis', 'basis_rot_groupings', ...]>, <Dataset = molname: H2, basis: CC-PVDZ, bondlength: 0.5, attributes: ['basis', 'basis_rot_groupings', ...]>, <Dataset = molname: H2, basis: CC-PVDZ, bondlength: 1.1, attributes: ['basis', 'basis_rot_groupings', ...]>, <Dataset = molname: H2, basis: 6-31G, bondlength: 0.5, attributes: ['basis', 'basis_rot_groupings', ...]>, <Dataset = molname: H2, basis: 6-31G, bondlength: 1.1, attributes: ['basis', 'basis_rot_groupings', ...]>] When we only want to download portions of a large dataset, we can specify the desired properties (referred to as 'attributes'). For example, we can download or load only the molecule and energy of a dataset as follows: >>> part = qml.data.load( ... "qchem", ... molname="H2", ... basis="STO-3G", ... bondlength=1.1, ... attributes=["molecule", "fci_energy"])[0] >>> part.molecule <Molecule = H2, Charge: 0, Basis: STO-3G, Orbitals: 2, Electrons: 2> To determine what attributes are available, please see :func:`~.list_attributes`. The loaded data items are fully compatible with PennyLane. We can therefore use them directly in a PennyLane circuit as follows: >>> H2data = qml.data.load("qchem", molname="H2", basis="STO-3G", bondlength=1.1)[0] >>> dev = qml.device("default.qubit",wires=4) >>> @qml.qnode(dev) ... def circuit(): ... qml.BasisState(H2data.hf_state, wires = [0, 1, 2, 3]) ... for op in H2data.vqe_gates: ... qml.apply(op) ... return qml.expval(H2data.hamiltonian) >>> print(circuit()) -1.0791430411076344 """params=format_params(**params)ifdata_name=="other":data_name=params[0]["values"][0]params=[]ifattributes:_validate_attributes(data_name,attributes)folder_path=Path(folder_path)params=provide_defaults(data_name,params)params=[paramforparaminparamsif("values",ParamArg.FULL)notinlist(param.items())]dataset_ids_and_urls=get_dataset_urls(data_name,params)ifdataset_ids_and_urls==[]:raiseValueError("No datasets exist for the provided configuration.\n""Please check the available datasets by using the ``qml.data.list_datasets()`` function.")dataset_urls=[dataset_urlfor_,dataset_urlindataset_ids_and_urls]dataset_ids=[dataset_idfordataset_id,_indataset_ids_and_urls]progress_bar=progress_barifprogress_barisnotNoneelsesys.stdout.isatty()ifprogress_bar:withprogress.Progress()aspbar:download_paths=_download_datasets(data_name,folder_path,dataset_urls,dataset_ids,attributes,force=force,block_size=block_size,num_threads=num_threads,pbar=pbar,)else:download_paths=_download_datasets(data_name,folder_path,dataset_urls,dataset_ids,attributes,force=force,block_size=block_size,num_threads=num_threads,pbar=None,)return[Dataset.open(path,"a")forpathindownload_paths]
[docs]deflist_datasets()->dict:r"""Returns a dictionary of the available datasets. Return: dict: Nested dictionary representing the directory structure of the hosted datasets. .. seealso:: :func:`~.load_interactive`, :func:`~.list_attributes`, :func:`~.load`. **Example:** Note that the results of calling this function may differ from this example as more datasets are added. For updates on available data see the `datasets website <https://pennylane.ai/datasets>`_. >>> available_data = qml.data.list_datasets() >>> available_data.keys() dict_keys(["qspin", "qchem"]) >>> available_data["qchem"].keys() dict_keys(["H2", "LiH", ...]) >>> available_data['qchem']['H2'].keys() dict_keys(["CC-PVDZ", "6-31G", "STO-3G"]) >>> print(available_data['qchem']['H2']['STO-3G']) ["0.5", "0.54", "0.62", "0.66", ...] Note that this example limits the results of the function calls for clarity and that as more data becomes available, the results of these function calls will change. """defremove_paths(foldermap):"""Copies the foldermap, converting the bottom-level mapping of parameters to Paths to a list of the parameters."""value=next(iter(foldermap.values()))ifnotisinstance(value,Mapping):returnsorted(foldermap.keys())return{param:remove_paths(foldermap[param])forparaminfoldermap.keys()}returnremove_paths(_get_foldermap())
def_interactive_request_data_name(data_names):"""Prompt the user to select a data name."""print("Please select the data name from the following:")fori,optioninenumerate(data_names):print(f"{i+1}: {option}")choice=input("Choice of data name: ").strip()ifchoicenotindata_names:raiseValueError(f"Must select a single data name from {data_names}")returnchoicedef_interactive_request_attributes(attribute_options):"""Prompt the user to select a list of attributes."""print('Please select a list of attributes from the following available attributes or "full" for all attributes.')fori,optioninenumerate(attribute_options):print(f"{i+1}: {option}")choice_input=input("Comma-separated list of attributes: ")choices=[str(choice).strip()forchoiceinchoice_input.strip("[]").split(",")]if"full"inchoices:returnattribute_optionsifnot(choicesandset(choices).issubset(set(attribute_options))):raiseValueError(f"Must select a list of attributes from {attribute_options}")returnchoicesdef_interactive_requests(parameters,parameter_tree):"""Prompts the user to select parameters for datasets one at a time."""branch=parameter_treeforparaminparameters:iflen(branch["next"])==1:branch=next(iter(branch["next"].values()))continueprint(f"Available options for {param}:")fori,optioninenumerate(branch["next"].keys()):print(f"{i+1}: {option}")user_value=input(f"Please select a {param}:").strip()try:branch=branch["next"][user_value]exceptKeyErrorase:raiseValueError(f"Must enter a valid {param}:")fromereturnbranch
[docs]defload_interactive():r"""Download a dataset using an interactive load prompt. Returns: :class:`~pennylane.data.Dataset` **Example** .. seealso:: :func:`~.load`, :func:`~.list_attributes`, :func:`~.list_data_names`. .. code-block :: pycon >>> qml.data.load_interactive() Please select the data name from the following: 1: qspin 2: qchem 3: other Choice [1-2]: 1 Please select a sysname: ... Please select a periodicity: ... Please select a lattice: ... Please select a layout: ... Please select attributes: ... Force download files? (Default is no) [y/N]: N Folder to download to? (Default is pwd, will download to /datasets subdirectory): Please confirm your choices: dataset: qspin/Ising/open/rectangular/4x4 attributes: ['parameters', 'ground_states'] force: False dest folder: /Users/jovyan/Downloads/datasets Would you like to continue? (Default is yes) [Y/n]: """data_names=list_data_names()data_name=_interactive_request_data_name(data_names)parameters,attribute_options,parameter_tree=_get_parameter_tree(data_name)dataset_id=_interactive_requests(parameters,parameter_tree)attributes=_interactive_request_attributes(attribute_options)force=input("Force download files? (Default is no) [y/N]: ")in["y","Y"]dest_folder=Path(input("Folder to download to? (Default is pwd, will download to /datasets subdirectory): "))print("\nPlease confirm your choices:")print("attributes:",attributes)print("force:",force)print("dest folder:",dest_folder/"datasets")print("dataset:",dataset_id)approve=input("Would you like to continue? (Default is yes) [Y/n]: ")ifapprovenotin["Y","","y"]:print("Aborting and not downloading!")returnNonereturnload(data_name,attributes=attributes,folder_path=dest_folder,force=force,)[0]