Program Listing for File MeasurementsGPU.hpp

Return to documentation for file (pennylane_lightning/core/src/simulators/lightning_gpu/measurements/MeasurementsGPU.hpp)

// Copyright 2018-2023 Xanadu Quantum Technologies Inc.

// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

//     http://www.apache.org/licenses/LICENSE-2.0

// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <algorithm>
#include <complex>
#include <cuda.h>
#include <cusparse.h>
#include <custatevec.h> // custatevecApplyMatrix
#include <random>
#include <type_traits>
#include <unordered_map>
#include <vector>

#include "MeasurementsBase.hpp"
#include "Observables.hpp"
#include "ObservablesGPU.hpp"
#include "StateVectorCudaManaged.hpp"
#include "cuStateVecError.hpp"
#include "cuStateVec_helpers.hpp"
#include "cuda_helpers.hpp"

namespace {
using namespace Pennylane;
using namespace Pennylane::Measures;
using namespace Pennylane::Observables;
using namespace Pennylane::LightningGPU::Observables;
namespace cuUtil = Pennylane::LightningGPU::Util;
using Pennylane::LightningGPU::StateVectorCudaManaged;
using namespace Pennylane::Util;
} // namespace

namespace Pennylane::LightningGPU::Measures {
template <class StateVectorT>
class Measurements final
    : public MeasurementsBase<StateVectorT, Measurements<StateVectorT>> {
  private:
    using PrecisionT = typename StateVectorT::PrecisionT;
    using ComplexT = typename StateVectorT::ComplexT;
    using BaseType = MeasurementsBase<StateVectorT, Measurements<StateVectorT>>;
    using CFP_t = decltype(cuUtil::getCudaType(PrecisionT{}));
    cudaDataType_t data_type_;

    GateCache<PrecisionT> gate_cache_;

  public:
    explicit Measurements(StateVectorT &statevector)
        : BaseType{statevector},
          gate_cache_(true, statevector.getDataBuffer().getDevTag()) {
        if constexpr (std::is_same_v<CFP_t, cuDoubleComplex> ||
                      std::is_same_v<CFP_t, double2>) {
            data_type_ = CUDA_C_64F;
        } else {
            data_type_ = CUDA_C_32F;
        }
    };

    auto probs(const std::vector<std::size_t> &wires)
        -> std::vector<PrecisionT> {
        PL_ABORT_IF_NOT(std::is_sorted(wires.cbegin(), wires.cend()) ||
                            std::is_sorted(wires.rbegin(), wires.rend()),
                        "LightningGPU does not currently support out-of-order "
                        "wire indices with probability calculations");

        // Data return type fixed as double in custatevec function call
        std::vector<double> probabilities(Pennylane::Util::exp2(wires.size()));
        // this should be built upon by the wires not participating
        int maskLen =
            0; // static_cast<int>(BaseType::getNumQubits() - wires.size());
        int *maskBitString = nullptr; //
        int *maskOrdering = nullptr;

        cudaDataType_t data_type;

        if constexpr (std::is_same_v<CFP_t, cuDoubleComplex> ||
                      std::is_same_v<CFP_t, double2>) {
            data_type = CUDA_C_64F;
        } else {
            data_type = CUDA_C_32F;
        }

        std::vector<int> wires_int(wires.size());

        // Transform indices between PL & cuQuantum ordering
        std::transform(wires.begin(), wires.end(), wires_int.begin(),
                       [&](std::size_t x) {
                           return static_cast<int>(
                               this->_statevector.getNumQubits() - 1 - x);
                       });

        PL_CUSTATEVEC_IS_SUCCESS(custatevecAbs2SumArray(
            /* custatevecHandle_t */ this->_statevector.getCusvHandle(),
            /* const void* */ this->_statevector.getData(),
            /* cudaDataType_t */ data_type,
            /* const uint32_t */ this->_statevector.getNumQubits(),
            /* double* */ probabilities.data(),
            /* const int32_t* */ wires_int.data(),
            /* const uint32_t */ wires_int.size(),
            /* const int32_t* */ maskBitString,
            /* const int32_t* */ maskOrdering,
            /* const uint32_t */ maskLen));

        if constexpr (std::is_same_v<CFP_t, cuDoubleComplex> ||
                      std::is_same_v<CFP_t, double2>) {
            return probabilities;
        } else {
            std::vector<PrecisionT> probs(Pennylane::Util::exp2(wires.size()));
            std::transform(
                probabilities.begin(), probabilities.end(), probs.begin(),
                [&](double x) { return static_cast<PrecisionT>(x); });
            return probs;
        }
    }

    auto probs() -> std::vector<PrecisionT> {
        std::vector<std::size_t> wires;
        for (size_t i = 0; i < this->_statevector.getNumQubits(); i++) {
            wires.push_back(i);
        }
        return this->probs(wires);
    }

    std::vector<PrecisionT> probs(const Observable<StateVectorT> &obs,
                                  std::size_t num_shots = 0) {
        return BaseType::probs(obs, num_shots);
    }

    std::vector<PrecisionT> probs(size_t num_shots) {
        return BaseType::probs(num_shots);
    }

    std::vector<PrecisionT> probs(const std::vector<std::size_t> &wires,
                                  std::size_t num_shots) {
        PL_ABORT_IF_NOT(std::is_sorted(wires.cbegin(), wires.cend()),
                        "LightningGPU does not currently support out-of-order "
                        "wire indices with probability calculations");

        return BaseType::probs(wires, num_shots);
    }

    auto generate_samples(size_t num_samples) -> std::vector<std::size_t> {
        std::vector<double> rand_nums(num_samples);
        custatevecSamplerDescriptor_t sampler;

        const std::size_t num_qubits = this->_statevector.getNumQubits();
        const int bitStringLen = this->_statevector.getNumQubits();

        std::vector<int> bitOrdering(num_qubits);
        std::iota(std::begin(bitOrdering), std::end(bitOrdering),
                  0); // Fill with 0, 1, ...,

        cudaDataType_t data_type;

        if constexpr (std::is_same_v<CFP_t, cuDoubleComplex> ||
                      std::is_same_v<CFP_t, double2>) {
            data_type = CUDA_C_64F;
        } else {
            data_type = CUDA_C_32F;
        }

        this->setRandomSeed();
        std::uniform_real_distribution<PrecisionT> dis(0.0, 1.0);
        for (size_t n = 0; n < num_samples; n++) {
            rand_nums[n] = dis(this->rng);
        }
        std::vector<std::size_t> samples(num_samples * num_qubits, 0);
        std::unordered_map<size_t, std::size_t> cache;
        std::vector<custatevecIndex_t> bitStrings(num_samples);

        void *extraWorkspace = nullptr;
        std::size_t extraWorkspaceSizeInBytes = 0;
        // create sampler and check the size of external workspace
        PL_CUSTATEVEC_IS_SUCCESS(custatevecSamplerCreate(
            this->_statevector.getCusvHandle(), this->_statevector.getData(),
            data_type, num_qubits, &sampler, num_samples,
            &extraWorkspaceSizeInBytes));

        // allocate external workspace if necessary
        if (extraWorkspaceSizeInBytes > 0)
            PL_CUDA_IS_SUCCESS(
                cudaMalloc(&extraWorkspace, extraWorkspaceSizeInBytes));

        // sample preprocess
        PL_CUSTATEVEC_IS_SUCCESS(custatevecSamplerPreprocess(
            this->_statevector.getCusvHandle(), sampler, extraWorkspace,
            extraWorkspaceSizeInBytes));

        // sample bit strings
        PL_CUSTATEVEC_IS_SUCCESS(custatevecSamplerSample(
            this->_statevector.getCusvHandle(), sampler, bitStrings.data(),
            bitOrdering.data(), bitStringLen, rand_nums.data(), num_samples,
            CUSTATEVEC_SAMPLER_OUTPUT_ASCENDING_ORDER));

        // destroy descriptor and handle
        PL_CUSTATEVEC_IS_SUCCESS(custatevecSamplerDestroy(sampler));

        // Pick samples
        for (size_t i = 0; i < num_samples; i++) {
            auto idx = bitStrings[i];
            // If cached, retrieve sample from cache
            if (cache.count(idx) != 0) {
                std::size_t cache_id = cache[idx];
                auto it_temp = samples.begin() + cache_id * num_qubits;
                std::copy(it_temp, it_temp + num_qubits,
                          samples.begin() + i * num_qubits);
            }
            // If not cached, compute
            else {
                for (size_t j = 0; j < num_qubits; j++) {
                    samples[i * num_qubits + (num_qubits - 1 - j)] =
                        (idx >> j) & 1U;
                }
                cache[idx] = i;
            }
        }

        if (extraWorkspaceSizeInBytes > 0)
            PL_CUDA_IS_SUCCESS(cudaFree(extraWorkspace));

        return samples;
    }

    template <class index_type>
    auto expval(const index_type *csrOffsets_ptr, const int64_t csrOffsets_size,
                const index_type *columns_ptr,
                const std::complex<PrecisionT> *values_ptr,
                const int64_t numNNZ) -> PrecisionT {
        const std::size_t nIndexBits = this->_statevector.getNumQubits();
        const std::size_t length = std::size_t{1} << nIndexBits;

        auto device_id =
            this->_statevector.getDataBuffer().getDevTag().getDeviceID();
        auto stream_id =
            this->_statevector.getDataBuffer().getDevTag().getStreamID();

        std::unique_ptr<DataBuffer<CFP_t>> d_sv_prime =
            std::make_unique<DataBuffer<CFP_t>>(length, device_id, stream_id,
                                                true);

        cuUtil::SparseMV_cuSparse<index_type, PrecisionT, CFP_t>(
            csrOffsets_ptr, csrOffsets_size, columns_ptr, values_ptr, numNNZ,
            this->_statevector.getData(), d_sv_prime->getData(), device_id,
            stream_id, this->_statevector.getCusparseHandle());

        auto expect =
            innerProdC_CUDA(this->_statevector.getData(), d_sv_prime->getData(),
                            this->_statevector.getLength(), device_id,
                            stream_id, this->_statevector.getCublasCaller())
                .x;

        return expect;
    }

    auto expval(const std::string &operation,
                const std::vector<std::size_t> &wires) -> PrecisionT {
        std::vector<PrecisionT> params = {0.0};
        std::vector<ComplexT> gate_matrix = {};
        return this->expval_(operation, wires, params, gate_matrix);
    }

    template <typename op_type>
    auto expval(const std::vector<op_type> &operations_list,
                const std::vector<std::vector<std::size_t>> &wires_list)
        -> std::vector<PrecisionT> {
        PL_ABORT_IF(
            (operations_list.size() != wires_list.size()),
            "The lengths of the list of operations and wires do not match.");
        std::vector<PrecisionT> expected_value_list;

        for (size_t index = 0; index < operations_list.size(); index++) {
            expected_value_list.emplace_back(
                expval(operations_list[index], wires_list[index]));
        }

        return expected_value_list;
    }

    auto expval(const Observable<StateVectorT> &ob) -> PrecisionT {
        StateVectorT ob_sv(this->_statevector);
        ob.applyInPlace(ob_sv);

        auto device_id = ob_sv.getDataBuffer().getDevTag().getDeviceID();
        auto stream_id = ob_sv.getDataBuffer().getDevTag().getStreamID();

        auto expect =
            innerProdC_CUDA(this->_statevector.getData(), ob_sv.getData(),
                            this->_statevector.getLength(), device_id,
                            stream_id, this->_statevector.getCublasCaller())
                .x;
        return static_cast<PrecisionT>(expect);
    }

    auto expval(const Observable<StateVectorT> &obs,
                const std::size_t &num_shots,
                const std::vector<std::size_t> &shot_range) -> PrecisionT {
        return BaseType::expval(obs, num_shots, shot_range);
    }

    auto expval(const std::vector<ComplexT> &matrix,
                const std::vector<std::size_t> &wires) -> PrecisionT {
        return this->expval_(wires, matrix);
    }

    auto expval(const std::vector<std::string> &pauli_words,
                const std::vector<std::vector<std::size_t>> &target_wires,
                const std::complex<PrecisionT> *coeffs) -> PrecisionT {
        uint32_t nIndexBits =
            static_cast<uint32_t>(this->_statevector.getNumQubits());
        cudaDataType_t data_type;

        if constexpr (std::is_same_v<CFP_t, cuDoubleComplex> ||
                      std::is_same_v<CFP_t, double2>) {
            data_type = CUDA_C_64F;
        } else {
            data_type = CUDA_C_32F;
        }

        // Note: due to API design, cuStateVec assumes this is always a double.
        // Push NVIDIA to move this to behind API for future releases, and
        // support 32/64 bits.
        std::vector<double> expect(pauli_words.size());

        std::vector<std::vector<custatevecPauli_t>> pauliOps;

        std::vector<custatevecPauli_t *> pauliOps_ptr;

        for (auto &p_word : pauli_words) {
            pauliOps.push_back(cuUtil::pauliStringToEnum(p_word));
            pauliOps_ptr.push_back((*pauliOps.rbegin()).data());
        }

        std::vector<std::vector<int32_t>> basisBits;
        std::vector<int32_t *> basisBits_ptr;
        std::vector<uint32_t> n_basisBits;

        for (auto &wires : target_wires) {
            std::vector<int32_t> wiresInt(wires.size());
            std::transform(wires.begin(), wires.end(), wiresInt.begin(),
                           [&](std::size_t x) {
                               return static_cast<int>(
                                   this->_statevector.getNumQubits() - 1 - x);
                           });
            basisBits.push_back(wiresInt);
            basisBits_ptr.push_back((*basisBits.rbegin()).data());
            n_basisBits.push_back(wiresInt.size());
        }

        // compute expectation
        PL_CUSTATEVEC_IS_SUCCESS(custatevecComputeExpectationsOnPauliBasis(
            /* custatevecHandle_t */ this->_statevector.getCusvHandle(),
            /* const void* */ this->_statevector.getData(),
            /* cudaDataType_t */ data_type,
            /* const uint32_t */ nIndexBits,
            /* double* */ expect.data(),
            /* const custatevecPauli_t ** */
            const_cast<const custatevecPauli_t **>(pauliOps_ptr.data()),
            /* const uint32_t */ static_cast<uint32_t>(pauliOps.size()),
            /* const int32_t ** */
            const_cast<const int32_t **>(basisBits_ptr.data()),
            /* const uint32_t */ n_basisBits.data()));

        std::complex<PrecisionT> result{0, 0};

        if constexpr (std::is_same_v<PrecisionT, double>) {
            for (std::size_t idx = 0; idx < expect.size(); idx++) {
                result += expect[idx] * coeffs[idx];
            }
            return std::real(result);
        } else {
            std::vector<PrecisionT> expect_cast(expect.size());
            std::transform(expect.begin(), expect.end(), expect_cast.begin(),
                           [](double x) { return static_cast<float>(x); });

            for (std::size_t idx = 0; idx < expect_cast.size(); idx++) {
                result += expect_cast[idx] * coeffs[idx];
            }

            return std::real(result);
        }
    }

    auto var(const Observable<StateVectorT> &ob) -> PrecisionT {
        StateVectorT ob_sv(this->_statevector.getData(),
                           this->_statevector.getLength());
        ob.applyInPlace(ob_sv);

        auto device_id = ob_sv.getDataBuffer().getDevTag().getDeviceID();
        auto stream_id = ob_sv.getDataBuffer().getDevTag().getStreamID();

        const PrecisionT mean_square =
            innerProdC_CUDA(ob_sv.getData(), ob_sv.getData(), ob_sv.getLength(),
                            device_id, stream_id, ob_sv.getCublasCaller())
                .x;

        const PrecisionT squared_mean = static_cast<PrecisionT>(std::pow(
            innerProdC_CUDA(this->_statevector.getData(), ob_sv.getData(),
                            this->_statevector.getLength(), device_id,
                            stream_id, this->_statevector.getCublasCaller())
                .x,
            2));
        return (mean_square - squared_mean);
    }

    auto var(const std::string &operation,
             const std::vector<std::size_t> &wires) -> PrecisionT {
        StateVectorT ob_sv(this->_statevector.getData(),
                           this->_statevector.getLength());
        ob_sv.applyOperation(operation, wires);

        auto device_id = ob_sv.getDataBuffer().getDevTag().getDeviceID();
        auto stream_id = ob_sv.getDataBuffer().getDevTag().getStreamID();

        const PrecisionT mean_square =
            innerProdC_CUDA(ob_sv.getData(), ob_sv.getData(), ob_sv.getLength(),
                            device_id, stream_id, ob_sv.getCublasCaller())
                .x;

        const PrecisionT squared_mean = static_cast<PrecisionT>(std::pow(
            innerProdC_CUDA(this->_statevector.getData(), ob_sv.getData(),
                            this->_statevector.getLength(), device_id,
                            stream_id, this->_statevector.getCublasCaller())
                .x,
            2));
        return (mean_square - squared_mean);
    };

    auto var(const std::vector<ComplexT> &matrix,
             const std::vector<std::size_t> &wires) -> PrecisionT {
        StateVectorT ob_sv(this->_statevector.getData(),
                           this->_statevector.getLength());
        ob_sv.applyMatrix(matrix, wires);

        auto device_id = ob_sv.getDataBuffer().getDevTag().getDeviceID();
        auto stream_id = ob_sv.getDataBuffer().getDevTag().getStreamID();

        const PrecisionT mean_square =
            innerProdC_CUDA(ob_sv.getData(), ob_sv.getData(), ob_sv.getLength(),
                            device_id, stream_id, ob_sv.getCublasCaller())
                .x;

        const PrecisionT squared_mean = static_cast<PrecisionT>(std::pow(
            innerProdC_CUDA(this->_statevector.getData(), ob_sv.getData(),
                            this->_statevector.getLength(), device_id,
                            stream_id, this->_statevector.getCublasCaller())
                .x,
            2));
        return (mean_square - squared_mean);
    };

    template <typename op_type>
    auto var(const std::vector<op_type> &operations_list,
             const std::vector<std::vector<std::size_t>> &wires_list)
        -> std::vector<PrecisionT> {
        PL_ABORT_IF(
            (operations_list.size() != wires_list.size()),
            "The lengths of the list of operations and wires do not match.");

        std::vector<PrecisionT> expected_value_list;

        for (size_t index = 0; index < operations_list.size(); index++) {
            expected_value_list.emplace_back(
                var(operations_list[index], wires_list[index]));
        }

        return expected_value_list;
    };

    template <class index_type>
    PrecisionT var(const index_type *csrOffsets_ptr,
                   const int64_t csrOffsets_size, const index_type *columns_ptr,
                   const std::complex<PrecisionT> *values_ptr,
                   const int64_t numNNZ) {
        PL_ABORT_IF(
            (this->_statevector.getLength() != (size_t(csrOffsets_size) - 1)),
            "Statevector and Hamiltonian have incompatible sizes.");

        StateVectorT ob_sv(this->_statevector.getData(),
                           this->_statevector.getLength());

        auto device_id =
            this->_statevector.getDataBuffer().getDevTag().getDeviceID();
        auto stream_id =
            this->_statevector.getDataBuffer().getDevTag().getStreamID();
        cusparseHandle_t handle = this->_statevector.getCusparseHandle();

        cuUtil::SparseMV_cuSparse<index_type, PrecisionT, CFP_t>(
            csrOffsets_ptr, csrOffsets_size, columns_ptr, values_ptr, numNNZ,
            this->_statevector.getData(), ob_sv.getData(), device_id, stream_id,
            handle);

        const PrecisionT mean_square =
            innerProdC_CUDA(ob_sv.getData(), ob_sv.getData(), ob_sv.getLength(),
                            device_id, stream_id, ob_sv.getCublasCaller())
                .x;

        const PrecisionT squared_mean = static_cast<PrecisionT>(std::pow(
            innerProdC_CUDA(this->_statevector.getData(), ob_sv.getData(),
                            this->_statevector.getLength(), device_id,
                            stream_id, this->_statevector.getCublasCaller())
                .x,
            2));
        return (mean_square - squared_mean);
    };

    auto var(const Observable<StateVectorT> &obs, const std::size_t &num_shots)
        -> PrecisionT {
        return BaseType::var(obs, num_shots);
    }

  private:
    auto expval_(const std::string &obsName,
                 const std::vector<std::size_t> &wires,
                 const std::vector<PrecisionT> &params = {0.0},
                 const std::vector<std::complex<PrecisionT>> &gate_matrix = {})
        -> PrecisionT {
        auto &&par = (params.empty()) ? std::vector<PrecisionT>{0.0} : params;

        auto &&local_wires = wires;

        std::vector<CFP_t> matrix_cu(gate_matrix.size());
        if (!gate_cache_.gateExists(obsName, par[0]) && gate_matrix.empty()) {
            std::string message =
                "Currently unsupported observable: " + obsName;
            throw LightningException(message.c_str());
        }
        return this->getExpectationValueDeviceMatrix_(
            gate_cache_.get_gate_device_ptr(obsName, par[0]), local_wires);
    }
    auto expval_(const std::vector<std::size_t> &wires,
                 const std::vector<std::complex<PrecisionT>> &gate_matrix)
        -> PrecisionT {
        std::vector<CFP_t> matrix_cu(gate_matrix.size());

        for (std::size_t i = 0; i < gate_matrix.size(); i++) {
            matrix_cu[i] =
                cuUtil::complexToCu<std::complex<PrecisionT>>(gate_matrix[i]);
        }

        if (gate_matrix.empty()) {
            std::string message = "Currently unsupported observable";
            throw LightningException(message.c_str());
        }

        // Wire order reversed to match expected custatevec wire ordering for
        // tensor observables.
        auto &&local_wires =
            std::vector<std::size_t>{wires.rbegin(), wires.rend()};

        auto expect_val = this->getExpectationValueDeviceMatrix_(
            matrix_cu.data(), local_wires);
        return expect_val;
    }

    auto
    getExpectationValueDeviceMatrix_(const CFP_t *matrix,
                                     const std::vector<std::size_t> &tgts) {
        void *extraWorkspace = nullptr;
        std::size_t extraWorkspaceSizeInBytes = 0;

        std::vector<int> tgtsInt(tgts.size());
        std::transform(tgts.begin(), tgts.end(), tgtsInt.begin(),
                       [&](std::size_t x) {
                           return static_cast<int>(
                               this->_statevector.getNumQubits() - 1 - x);
                       });

        std::size_t nIndexBits = this->_statevector.getNumQubits();
        cudaDataType_t data_type;
        cudaDataType_t expectationDataType =
            CUDA_C_64F; // Requested by the custatevecComputeExpectation API
        custatevecComputeType_t compute_type;

        if constexpr (std::is_same_v<CFP_t, cuDoubleComplex> ||
                      std::is_same_v<CFP_t, double2>) {
            data_type = CUDA_C_64F;
            compute_type = CUSTATEVEC_COMPUTE_64F;
        } else {
            data_type = CUDA_C_32F;
            compute_type = CUSTATEVEC_COMPUTE_32F;
        }

        // check the size of external workspace
        PL_CUSTATEVEC_IS_SUCCESS(custatevecComputeExpectationGetWorkspaceSize(
            /* custatevecHandle_t */ this->_statevector.getCusvHandle(),
            /* cudaDataType_t */ data_type,
            /* const uint32_t */ nIndexBits,
            /* const void* */ matrix,
            /* cudaDataType_t */ data_type,
            /* custatevecMatrixLayout_t */ CUSTATEVEC_MATRIX_LAYOUT_ROW,
            /* const uint32_t */ tgtsInt.size(),
            /* custatevecComputeType_t */ compute_type,
            /* std::size_t* */ &extraWorkspaceSizeInBytes));

        // LCOV_EXCL_START
        if (extraWorkspaceSizeInBytes > 0) {
            PL_CUDA_IS_SUCCESS(
                cudaMalloc(&extraWorkspace, extraWorkspaceSizeInBytes));
        }
        // LCOV_EXCL_STOP

        cuDoubleComplex expect;

        // compute expectation
        PL_CUSTATEVEC_IS_SUCCESS(custatevecComputeExpectation(
            /* custatevecHandle_t */ this->_statevector.getCusvHandle(),
            /* const void* */ this->_statevector.getData(),
            /* cudaDataType_t */ data_type,
            /* const uint32_t */ nIndexBits,
            /* void* */ &expect,
            /* cudaDataType_t */ expectationDataType,
            /* double* */ nullptr,
            /* const void* */ matrix,
            /* cudaDataType_t */ data_type,
            /* custatevecMatrixLayout_t */ CUSTATEVEC_MATRIX_LAYOUT_ROW,
            /* const int32_t* */ tgtsInt.data(),
            /* const uint32_t */ tgtsInt.size(),
            /* custatevecComputeType_t */ compute_type,
            /* void* */ extraWorkspace,
            /* std::size_t */ extraWorkspaceSizeInBytes));

        // LCOV_EXCL_START
        if (extraWorkspaceSizeInBytes)
            PL_CUDA_IS_SUCCESS(cudaFree(extraWorkspace));
        // LCOV_EXCL_STOP

        return static_cast<PrecisionT>(expect.x);
    }
}; // class Measurements
} // namespace Pennylane::LightningGPU::Measures