Program Listing for File MeasurementsGPU.hpp¶
↰ Return to documentation for file (pennylane_lightning/core/src/simulators/lightning_gpu/measurements/MeasurementsGPU.hpp
)
// Copyright 2018-2023 Xanadu Quantum Technologies Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include <complex>
#include <cuda.h>
#include <cusparse.h>
#include <custatevec.h> // custatevecApplyMatrix
#include <random>
#include <type_traits>
#include <unordered_map>
#include <vector>
#include "MeasurementsBase.hpp"
#include "Observables.hpp"
#include "ObservablesGPU.hpp"
#include "StateVectorCudaManaged.hpp"
#include "cuStateVecError.hpp"
#include "cuStateVec_helpers.hpp"
#include "cuda_helpers.hpp"
namespace {
using namespace Pennylane;
using namespace Pennylane::Measures;
using namespace Pennylane::Observables;
using namespace Pennylane::LightningGPU::Observables;
namespace cuUtil = Pennylane::LightningGPU::Util;
using Pennylane::LightningGPU::StateVectorCudaManaged;
using namespace Pennylane::Util;
} // namespace
namespace Pennylane::LightningGPU::Measures {
template <class StateVectorT>
class Measurements final
: public MeasurementsBase<StateVectorT, Measurements<StateVectorT>> {
private:
using PrecisionT = typename StateVectorT::PrecisionT;
using ComplexT = typename StateVectorT::ComplexT;
using BaseType = MeasurementsBase<StateVectorT, Measurements<StateVectorT>>;
using CFP_t = decltype(cuUtil::getCudaType(PrecisionT{}));
cudaDataType_t data_type_;
GateCache<PrecisionT> gate_cache_;
public:
explicit Measurements(StateVectorT &statevector)
: BaseType{statevector},
gate_cache_(true, statevector.getDataBuffer().getDevTag()) {
if constexpr (std::is_same_v<CFP_t, cuDoubleComplex> ||
std::is_same_v<CFP_t, double2>) {
data_type_ = CUDA_C_64F;
} else {
data_type_ = CUDA_C_32F;
}
};
auto probs(const std::vector<std::size_t> &wires)
-> std::vector<PrecisionT> {
PL_ABORT_IF_NOT(std::is_sorted(wires.cbegin(), wires.cend()) ||
std::is_sorted(wires.rbegin(), wires.rend()),
"LightningGPU does not currently support out-of-order "
"wire indices with probability calculations");
// Data return type fixed as double in custatevec function call
std::vector<double> probabilities(Pennylane::Util::exp2(wires.size()));
// this should be built upon by the wires not participating
int maskLen =
0; // static_cast<int>(BaseType::getNumQubits() - wires.size());
int *maskBitString = nullptr; //
int *maskOrdering = nullptr;
cudaDataType_t data_type;
if constexpr (std::is_same_v<CFP_t, cuDoubleComplex> ||
std::is_same_v<CFP_t, double2>) {
data_type = CUDA_C_64F;
} else {
data_type = CUDA_C_32F;
}
std::vector<int> wires_int(wires.size());
// Transform indices between PL & cuQuantum ordering
std::transform(wires.begin(), wires.end(), wires_int.begin(),
[&](std::size_t x) {
return static_cast<int>(
this->_statevector.getNumQubits() - 1 - x);
});
PL_CUSTATEVEC_IS_SUCCESS(custatevecAbs2SumArray(
/* custatevecHandle_t */ this->_statevector.getCusvHandle(),
/* const void* */ this->_statevector.getData(),
/* cudaDataType_t */ data_type,
/* const uint32_t */ this->_statevector.getNumQubits(),
/* double* */ probabilities.data(),
/* const int32_t* */ wires_int.data(),
/* const uint32_t */ wires_int.size(),
/* const int32_t* */ maskBitString,
/* const int32_t* */ maskOrdering,
/* const uint32_t */ maskLen));
if constexpr (std::is_same_v<CFP_t, cuDoubleComplex> ||
std::is_same_v<CFP_t, double2>) {
return probabilities;
} else {
std::vector<PrecisionT> probs(Pennylane::Util::exp2(wires.size()));
std::transform(
probabilities.begin(), probabilities.end(), probs.begin(),
[&](double x) { return static_cast<PrecisionT>(x); });
return probs;
}
}
auto probs() -> std::vector<PrecisionT> {
std::vector<std::size_t> wires;
for (size_t i = 0; i < this->_statevector.getNumQubits(); i++) {
wires.push_back(i);
}
return this->probs(wires);
}
std::vector<PrecisionT> probs(const Observable<StateVectorT> &obs,
std::size_t num_shots = 0) {
return BaseType::probs(obs, num_shots);
}
std::vector<PrecisionT> probs(size_t num_shots) {
return BaseType::probs(num_shots);
}
std::vector<PrecisionT> probs(const std::vector<std::size_t> &wires,
std::size_t num_shots) {
PL_ABORT_IF_NOT(std::is_sorted(wires.cbegin(), wires.cend()),
"LightningGPU does not currently support out-of-order "
"wire indices with probability calculations");
return BaseType::probs(wires, num_shots);
}
auto generate_samples(size_t num_samples) -> std::vector<std::size_t> {
std::vector<double> rand_nums(num_samples);
custatevecSamplerDescriptor_t sampler;
const std::size_t num_qubits = this->_statevector.getNumQubits();
const int bitStringLen = this->_statevector.getNumQubits();
std::vector<int> bitOrdering(num_qubits);
std::iota(std::begin(bitOrdering), std::end(bitOrdering),
0); // Fill with 0, 1, ...,
cudaDataType_t data_type;
if constexpr (std::is_same_v<CFP_t, cuDoubleComplex> ||
std::is_same_v<CFP_t, double2>) {
data_type = CUDA_C_64F;
} else {
data_type = CUDA_C_32F;
}
this->setRandomSeed();
std::uniform_real_distribution<PrecisionT> dis(0.0, 1.0);
for (size_t n = 0; n < num_samples; n++) {
rand_nums[n] = dis(this->rng);
}
std::vector<std::size_t> samples(num_samples * num_qubits, 0);
std::unordered_map<size_t, std::size_t> cache;
std::vector<custatevecIndex_t> bitStrings(num_samples);
void *extraWorkspace = nullptr;
std::size_t extraWorkspaceSizeInBytes = 0;
// create sampler and check the size of external workspace
PL_CUSTATEVEC_IS_SUCCESS(custatevecSamplerCreate(
this->_statevector.getCusvHandle(), this->_statevector.getData(),
data_type, num_qubits, &sampler, num_samples,
&extraWorkspaceSizeInBytes));
// allocate external workspace if necessary
if (extraWorkspaceSizeInBytes > 0)
PL_CUDA_IS_SUCCESS(
cudaMalloc(&extraWorkspace, extraWorkspaceSizeInBytes));
// sample preprocess
PL_CUSTATEVEC_IS_SUCCESS(custatevecSamplerPreprocess(
this->_statevector.getCusvHandle(), sampler, extraWorkspace,
extraWorkspaceSizeInBytes));
// sample bit strings
PL_CUSTATEVEC_IS_SUCCESS(custatevecSamplerSample(
this->_statevector.getCusvHandle(), sampler, bitStrings.data(),
bitOrdering.data(), bitStringLen, rand_nums.data(), num_samples,
CUSTATEVEC_SAMPLER_OUTPUT_ASCENDING_ORDER));
// destroy descriptor and handle
PL_CUSTATEVEC_IS_SUCCESS(custatevecSamplerDestroy(sampler));
// Pick samples
for (size_t i = 0; i < num_samples; i++) {
auto idx = bitStrings[i];
// If cached, retrieve sample from cache
if (cache.count(idx) != 0) {
std::size_t cache_id = cache[idx];
auto it_temp = samples.begin() + cache_id * num_qubits;
std::copy(it_temp, it_temp + num_qubits,
samples.begin() + i * num_qubits);
}
// If not cached, compute
else {
for (size_t j = 0; j < num_qubits; j++) {
samples[i * num_qubits + (num_qubits - 1 - j)] =
(idx >> j) & 1U;
}
cache[idx] = i;
}
}
if (extraWorkspaceSizeInBytes > 0)
PL_CUDA_IS_SUCCESS(cudaFree(extraWorkspace));
return samples;
}
template <class index_type>
auto expval(const index_type *csrOffsets_ptr, const int64_t csrOffsets_size,
const index_type *columns_ptr,
const std::complex<PrecisionT> *values_ptr,
const int64_t numNNZ) -> PrecisionT {
const std::size_t nIndexBits = this->_statevector.getNumQubits();
const std::size_t length = std::size_t{1} << nIndexBits;
auto device_id =
this->_statevector.getDataBuffer().getDevTag().getDeviceID();
auto stream_id =
this->_statevector.getDataBuffer().getDevTag().getStreamID();
std::unique_ptr<DataBuffer<CFP_t>> d_sv_prime =
std::make_unique<DataBuffer<CFP_t>>(length, device_id, stream_id,
true);
cuUtil::SparseMV_cuSparse<index_type, PrecisionT, CFP_t>(
csrOffsets_ptr, csrOffsets_size, columns_ptr, values_ptr, numNNZ,
this->_statevector.getData(), d_sv_prime->getData(), device_id,
stream_id, this->_statevector.getCusparseHandle());
auto expect =
innerProdC_CUDA(this->_statevector.getData(), d_sv_prime->getData(),
this->_statevector.getLength(), device_id,
stream_id, this->_statevector.getCublasCaller())
.x;
return expect;
}
auto expval(const std::string &operation,
const std::vector<std::size_t> &wires) -> PrecisionT {
std::vector<PrecisionT> params = {0.0};
std::vector<ComplexT> gate_matrix = {};
return this->expval_(operation, wires, params, gate_matrix);
}
template <typename op_type>
auto expval(const std::vector<op_type> &operations_list,
const std::vector<std::vector<std::size_t>> &wires_list)
-> std::vector<PrecisionT> {
PL_ABORT_IF(
(operations_list.size() != wires_list.size()),
"The lengths of the list of operations and wires do not match.");
std::vector<PrecisionT> expected_value_list;
for (size_t index = 0; index < operations_list.size(); index++) {
expected_value_list.emplace_back(
expval(operations_list[index], wires_list[index]));
}
return expected_value_list;
}
auto expval(const Observable<StateVectorT> &ob) -> PrecisionT {
StateVectorT ob_sv(this->_statevector);
ob.applyInPlace(ob_sv);
auto device_id = ob_sv.getDataBuffer().getDevTag().getDeviceID();
auto stream_id = ob_sv.getDataBuffer().getDevTag().getStreamID();
auto expect =
innerProdC_CUDA(this->_statevector.getData(), ob_sv.getData(),
this->_statevector.getLength(), device_id,
stream_id, this->_statevector.getCublasCaller())
.x;
return static_cast<PrecisionT>(expect);
}
auto expval(const Observable<StateVectorT> &obs,
const std::size_t &num_shots,
const std::vector<std::size_t> &shot_range) -> PrecisionT {
return BaseType::expval(obs, num_shots, shot_range);
}
auto expval(const std::vector<ComplexT> &matrix,
const std::vector<std::size_t> &wires) -> PrecisionT {
return this->expval_(wires, matrix);
}
auto expval(const std::vector<std::string> &pauli_words,
const std::vector<std::vector<std::size_t>> &target_wires,
const std::complex<PrecisionT> *coeffs) -> PrecisionT {
uint32_t nIndexBits =
static_cast<uint32_t>(this->_statevector.getNumQubits());
cudaDataType_t data_type;
if constexpr (std::is_same_v<CFP_t, cuDoubleComplex> ||
std::is_same_v<CFP_t, double2>) {
data_type = CUDA_C_64F;
} else {
data_type = CUDA_C_32F;
}
// Note: due to API design, cuStateVec assumes this is always a double.
// Push NVIDIA to move this to behind API for future releases, and
// support 32/64 bits.
std::vector<double> expect(pauli_words.size());
std::vector<std::vector<custatevecPauli_t>> pauliOps;
std::vector<custatevecPauli_t *> pauliOps_ptr;
for (auto &p_word : pauli_words) {
pauliOps.push_back(cuUtil::pauliStringToEnum(p_word));
pauliOps_ptr.push_back((*pauliOps.rbegin()).data());
}
std::vector<std::vector<int32_t>> basisBits;
std::vector<int32_t *> basisBits_ptr;
std::vector<uint32_t> n_basisBits;
for (auto &wires : target_wires) {
std::vector<int32_t> wiresInt(wires.size());
std::transform(wires.begin(), wires.end(), wiresInt.begin(),
[&](std::size_t x) {
return static_cast<int>(
this->_statevector.getNumQubits() - 1 - x);
});
basisBits.push_back(wiresInt);
basisBits_ptr.push_back((*basisBits.rbegin()).data());
n_basisBits.push_back(wiresInt.size());
}
// compute expectation
PL_CUSTATEVEC_IS_SUCCESS(custatevecComputeExpectationsOnPauliBasis(
/* custatevecHandle_t */ this->_statevector.getCusvHandle(),
/* const void* */ this->_statevector.getData(),
/* cudaDataType_t */ data_type,
/* const uint32_t */ nIndexBits,
/* double* */ expect.data(),
/* const custatevecPauli_t ** */
const_cast<const custatevecPauli_t **>(pauliOps_ptr.data()),
/* const uint32_t */ static_cast<uint32_t>(pauliOps.size()),
/* const int32_t ** */
const_cast<const int32_t **>(basisBits_ptr.data()),
/* const uint32_t */ n_basisBits.data()));
std::complex<PrecisionT> result{0, 0};
if constexpr (std::is_same_v<PrecisionT, double>) {
for (std::size_t idx = 0; idx < expect.size(); idx++) {
result += expect[idx] * coeffs[idx];
}
return std::real(result);
} else {
std::vector<PrecisionT> expect_cast(expect.size());
std::transform(expect.begin(), expect.end(), expect_cast.begin(),
[](double x) { return static_cast<float>(x); });
for (std::size_t idx = 0; idx < expect_cast.size(); idx++) {
result += expect_cast[idx] * coeffs[idx];
}
return std::real(result);
}
}
auto var(const Observable<StateVectorT> &ob) -> PrecisionT {
StateVectorT ob_sv(this->_statevector.getData(),
this->_statevector.getLength());
ob.applyInPlace(ob_sv);
auto device_id = ob_sv.getDataBuffer().getDevTag().getDeviceID();
auto stream_id = ob_sv.getDataBuffer().getDevTag().getStreamID();
const PrecisionT mean_square =
innerProdC_CUDA(ob_sv.getData(), ob_sv.getData(), ob_sv.getLength(),
device_id, stream_id, ob_sv.getCublasCaller())
.x;
const PrecisionT squared_mean = static_cast<PrecisionT>(std::pow(
innerProdC_CUDA(this->_statevector.getData(), ob_sv.getData(),
this->_statevector.getLength(), device_id,
stream_id, this->_statevector.getCublasCaller())
.x,
2));
return (mean_square - squared_mean);
}
auto var(const std::string &operation,
const std::vector<std::size_t> &wires) -> PrecisionT {
StateVectorT ob_sv(this->_statevector.getData(),
this->_statevector.getLength());
ob_sv.applyOperation(operation, wires);
auto device_id = ob_sv.getDataBuffer().getDevTag().getDeviceID();
auto stream_id = ob_sv.getDataBuffer().getDevTag().getStreamID();
const PrecisionT mean_square =
innerProdC_CUDA(ob_sv.getData(), ob_sv.getData(), ob_sv.getLength(),
device_id, stream_id, ob_sv.getCublasCaller())
.x;
const PrecisionT squared_mean = static_cast<PrecisionT>(std::pow(
innerProdC_CUDA(this->_statevector.getData(), ob_sv.getData(),
this->_statevector.getLength(), device_id,
stream_id, this->_statevector.getCublasCaller())
.x,
2));
return (mean_square - squared_mean);
};
auto var(const std::vector<ComplexT> &matrix,
const std::vector<std::size_t> &wires) -> PrecisionT {
StateVectorT ob_sv(this->_statevector.getData(),
this->_statevector.getLength());
ob_sv.applyMatrix(matrix, wires);
auto device_id = ob_sv.getDataBuffer().getDevTag().getDeviceID();
auto stream_id = ob_sv.getDataBuffer().getDevTag().getStreamID();
const PrecisionT mean_square =
innerProdC_CUDA(ob_sv.getData(), ob_sv.getData(), ob_sv.getLength(),
device_id, stream_id, ob_sv.getCublasCaller())
.x;
const PrecisionT squared_mean = static_cast<PrecisionT>(std::pow(
innerProdC_CUDA(this->_statevector.getData(), ob_sv.getData(),
this->_statevector.getLength(), device_id,
stream_id, this->_statevector.getCublasCaller())
.x,
2));
return (mean_square - squared_mean);
};
template <typename op_type>
auto var(const std::vector<op_type> &operations_list,
const std::vector<std::vector<std::size_t>> &wires_list)
-> std::vector<PrecisionT> {
PL_ABORT_IF(
(operations_list.size() != wires_list.size()),
"The lengths of the list of operations and wires do not match.");
std::vector<PrecisionT> expected_value_list;
for (size_t index = 0; index < operations_list.size(); index++) {
expected_value_list.emplace_back(
var(operations_list[index], wires_list[index]));
}
return expected_value_list;
};
template <class index_type>
PrecisionT var(const index_type *csrOffsets_ptr,
const int64_t csrOffsets_size, const index_type *columns_ptr,
const std::complex<PrecisionT> *values_ptr,
const int64_t numNNZ) {
PL_ABORT_IF(
(this->_statevector.getLength() != (size_t(csrOffsets_size) - 1)),
"Statevector and Hamiltonian have incompatible sizes.");
StateVectorT ob_sv(this->_statevector.getData(),
this->_statevector.getLength());
auto device_id =
this->_statevector.getDataBuffer().getDevTag().getDeviceID();
auto stream_id =
this->_statevector.getDataBuffer().getDevTag().getStreamID();
cusparseHandle_t handle = this->_statevector.getCusparseHandle();
cuUtil::SparseMV_cuSparse<index_type, PrecisionT, CFP_t>(
csrOffsets_ptr, csrOffsets_size, columns_ptr, values_ptr, numNNZ,
this->_statevector.getData(), ob_sv.getData(), device_id, stream_id,
handle);
const PrecisionT mean_square =
innerProdC_CUDA(ob_sv.getData(), ob_sv.getData(), ob_sv.getLength(),
device_id, stream_id, ob_sv.getCublasCaller())
.x;
const PrecisionT squared_mean = static_cast<PrecisionT>(std::pow(
innerProdC_CUDA(this->_statevector.getData(), ob_sv.getData(),
this->_statevector.getLength(), device_id,
stream_id, this->_statevector.getCublasCaller())
.x,
2));
return (mean_square - squared_mean);
};
auto var(const Observable<StateVectorT> &obs, const std::size_t &num_shots)
-> PrecisionT {
return BaseType::var(obs, num_shots);
}
private:
auto expval_(const std::string &obsName,
const std::vector<std::size_t> &wires,
const std::vector<PrecisionT> ¶ms = {0.0},
const std::vector<std::complex<PrecisionT>> &gate_matrix = {})
-> PrecisionT {
auto &&par = (params.empty()) ? std::vector<PrecisionT>{0.0} : params;
auto &&local_wires = wires;
std::vector<CFP_t> matrix_cu(gate_matrix.size());
if (!gate_cache_.gateExists(obsName, par[0]) && gate_matrix.empty()) {
std::string message =
"Currently unsupported observable: " + obsName;
throw LightningException(message.c_str());
}
return this->getExpectationValueDeviceMatrix_(
gate_cache_.get_gate_device_ptr(obsName, par[0]), local_wires);
}
auto expval_(const std::vector<std::size_t> &wires,
const std::vector<std::complex<PrecisionT>> &gate_matrix)
-> PrecisionT {
std::vector<CFP_t> matrix_cu(gate_matrix.size());
for (std::size_t i = 0; i < gate_matrix.size(); i++) {
matrix_cu[i] =
cuUtil::complexToCu<std::complex<PrecisionT>>(gate_matrix[i]);
}
if (gate_matrix.empty()) {
std::string message = "Currently unsupported observable";
throw LightningException(message.c_str());
}
// Wire order reversed to match expected custatevec wire ordering for
// tensor observables.
auto &&local_wires =
std::vector<std::size_t>{wires.rbegin(), wires.rend()};
auto expect_val = this->getExpectationValueDeviceMatrix_(
matrix_cu.data(), local_wires);
return expect_val;
}
auto
getExpectationValueDeviceMatrix_(const CFP_t *matrix,
const std::vector<std::size_t> &tgts) {
void *extraWorkspace = nullptr;
std::size_t extraWorkspaceSizeInBytes = 0;
std::vector<int> tgtsInt(tgts.size());
std::transform(tgts.begin(), tgts.end(), tgtsInt.begin(),
[&](std::size_t x) {
return static_cast<int>(
this->_statevector.getNumQubits() - 1 - x);
});
std::size_t nIndexBits = this->_statevector.getNumQubits();
cudaDataType_t data_type;
cudaDataType_t expectationDataType =
CUDA_C_64F; // Requested by the custatevecComputeExpectation API
custatevecComputeType_t compute_type;
if constexpr (std::is_same_v<CFP_t, cuDoubleComplex> ||
std::is_same_v<CFP_t, double2>) {
data_type = CUDA_C_64F;
compute_type = CUSTATEVEC_COMPUTE_64F;
} else {
data_type = CUDA_C_32F;
compute_type = CUSTATEVEC_COMPUTE_32F;
}
// check the size of external workspace
PL_CUSTATEVEC_IS_SUCCESS(custatevecComputeExpectationGetWorkspaceSize(
/* custatevecHandle_t */ this->_statevector.getCusvHandle(),
/* cudaDataType_t */ data_type,
/* const uint32_t */ nIndexBits,
/* const void* */ matrix,
/* cudaDataType_t */ data_type,
/* custatevecMatrixLayout_t */ CUSTATEVEC_MATRIX_LAYOUT_ROW,
/* const uint32_t */ tgtsInt.size(),
/* custatevecComputeType_t */ compute_type,
/* std::size_t* */ &extraWorkspaceSizeInBytes));
// LCOV_EXCL_START
if (extraWorkspaceSizeInBytes > 0) {
PL_CUDA_IS_SUCCESS(
cudaMalloc(&extraWorkspace, extraWorkspaceSizeInBytes));
}
// LCOV_EXCL_STOP
cuDoubleComplex expect;
// compute expectation
PL_CUSTATEVEC_IS_SUCCESS(custatevecComputeExpectation(
/* custatevecHandle_t */ this->_statevector.getCusvHandle(),
/* const void* */ this->_statevector.getData(),
/* cudaDataType_t */ data_type,
/* const uint32_t */ nIndexBits,
/* void* */ &expect,
/* cudaDataType_t */ expectationDataType,
/* double* */ nullptr,
/* const void* */ matrix,
/* cudaDataType_t */ data_type,
/* custatevecMatrixLayout_t */ CUSTATEVEC_MATRIX_LAYOUT_ROW,
/* const int32_t* */ tgtsInt.data(),
/* const uint32_t */ tgtsInt.size(),
/* custatevecComputeType_t */ compute_type,
/* void* */ extraWorkspace,
/* std::size_t */ extraWorkspaceSizeInBytes));
// LCOV_EXCL_START
if (extraWorkspaceSizeInBytes)
PL_CUDA_IS_SUCCESS(cudaFree(extraWorkspace));
// LCOV_EXCL_STOP
return static_cast<PrecisionT>(expect.x);
}
}; // class Measurements
} // namespace Pennylane::LightningGPU::Measures
api/program_listing_file_pennylane_lightning_core_src_simulators_lightning_gpu_measurements_MeasurementsGPU.hpp
Download Python script
Download Notebook
View on GitHub