Program Listing for File MeasurementsKokkos.hpp

Return to documentation for file (pennylane_lightning/core/simulators/lightning_kokkos/measurements/MeasurementsKokkos.hpp)

// Copyright 2018-2023 Xanadu Quantum Technologies Inc.

// Licensed under the Apache License, Version 2.0 (the License);
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

// http://www.apache.org/licenses/LICENSE-2.0

// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an AS IS BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <chrono>
#include <cstdint>

#include <Kokkos_Core.hpp>
#include <Kokkos_Random.hpp>

#include "ExpValFunctors.hpp"
#include "LinearAlgebraKokkos.hpp" // getRealOfComplexInnerProduct
#include "MeasurementsBase.hpp"
#include "MeasuresFunctors.hpp"
#include "Observables.hpp"
#include "ObservablesKokkos.hpp"
#include "StateVectorKokkos.hpp"
#include "Util.hpp"

namespace {
using namespace Pennylane::LightningKokkos::Functors;
using namespace Pennylane::Measures;
using namespace Pennylane::Observables;
using Pennylane::LightningKokkos::StateVectorKokkos;
using Pennylane::LightningKokkos::Util::getRealOfComplexInnerProduct;
using Pennylane::LightningKokkos::Util::SparseMV_Kokkos;
using Pennylane::LightningKokkos::Util::vector2view;
using Pennylane::LightningKokkos::Util::view2vector;
using Pennylane::Util::exp2;
enum class ExpValFunc : uint32_t {
    BEGIN = 1,
    Identity = 1,
    PauliX,
    PauliY,
    PauliZ,
    Hadamard,
    END
};
} // namespace

namespace Pennylane::LightningKokkos::Measures {
template <class StateVectorT>
class Measurements final
    : public MeasurementsBase<StateVectorT, Measurements<StateVectorT>> {
  private:
    using BaseType = MeasurementsBase<StateVectorT, Measurements<StateVectorT>>;
    using KokkosExecSpace = typename StateVectorT::KokkosExecSpace;
    using HostExecSpace = typename StateVectorT::HostExecSpace;
    using KokkosVector = typename StateVectorT::KokkosVector;
    using KokkosSizeTVector = typename StateVectorT::KokkosSizeTVector;
    using UnmanagedSizeTHostView =
        typename StateVectorT::UnmanagedSizeTHostView;
    using UnmanagedConstComplexHostView =
        typename StateVectorT::UnmanagedConstComplexHostView;
    using UnmanagedConstSizeTHostView =
        typename StateVectorT::UnmanagedConstSizeTHostView;
    using ScratchViewComplex = typename StateVectorT::ScratchViewComplex;
    using TeamPolicy = typename StateVectorT::TeamPolicy;

  public:
    using PrecisionT = typename StateVectorT::PrecisionT;
    using ComplexT = typename StateVectorT::ComplexT;
#if _ENABLE_PLKOKKOS_MPI == 1
    explicit Measurements(StateVectorT &statevector) : BaseType{statevector} {
#else
    explicit Measurements(const StateVectorT &statevector)
        : BaseType{statevector} {
#endif
        init_expval_funcs_();
    };

    template <template <class> class functor_t, int num_wires>
    auto applyExpValNamedFunctor(const std::vector<std::size_t> &wires)
        -> PrecisionT {
        if constexpr (num_wires > 0)
            PL_ASSERT(wires.size() == num_wires);

        const std::size_t num_qubits = this->_statevector.getNumQubits();
        const Kokkos::View<ComplexT *> arr_data = this->_statevector.getView();
        PrecisionT expval = 0.0;
        Kokkos::parallel_reduce(exp2(num_qubits - num_wires),
                                functor_t(arr_data, num_qubits, wires), expval);
        return expval;
    }

    template <template <class> class functor_t, int num_wires>
    auto applyExpValFunctor(const KokkosVector matrix,
                            const std::vector<std::size_t> &wires)
        -> PrecisionT {
        PL_ASSERT(wires.size() == num_wires);
        const std::size_t num_qubits = this->_statevector.getNumQubits();
        Kokkos::View<ComplexT *> arr_data = this->_statevector.getView();
        PrecisionT expval = 0.0;
        Kokkos::parallel_reduce(
            exp2(num_qubits - num_wires),
            functor_t<PrecisionT>(arr_data, num_qubits, matrix, wires), expval);
        return expval;
    }

    auto getExpValMatrix(const KokkosVector matrix,
                         const std::vector<std::size_t> &wires) -> PrecisionT {
        const std::size_t num_qubits = this->_statevector.getNumQubits();
        const std::size_t two2N = exp2(num_qubits - wires.size());
        const std::size_t dim = exp2(wires.size());
        const KokkosVector arr_data = this->_statevector.getView();

        PrecisionT expval = 0.0;
        switch (wires.size()) {
        case 1:
            Kokkos::parallel_reduce(two2N,
                                    getExpVal1QubitOpFunctor<PrecisionT>(
                                        arr_data, num_qubits, matrix, wires),
                                    expval);
            break;
        case 2:
            Kokkos::parallel_reduce(two2N,
                                    getExpVal2QubitOpFunctor<PrecisionT>(
                                        arr_data, num_qubits, matrix, wires),
                                    expval);
            break;
        case 3:
            Kokkos::parallel_reduce(two2N,
                                    getExpVal3QubitOpFunctor<PrecisionT>(
                                        arr_data, num_qubits, matrix, wires),
                                    expval);
            break;
        case 4:
            Kokkos::parallel_reduce(two2N,
                                    getExpVal4QubitOpFunctor<PrecisionT>(
                                        arr_data, num_qubits, matrix, wires),
                                    expval);
            break;
        default:
            std::size_t scratch_size = ScratchViewComplex::shmem_size(dim);
            Kokkos::parallel_reduce(
                "getExpValMultiQubitOpFunctor",
                TeamPolicy(two2N, Kokkos::AUTO, dim)
                    .set_scratch_size(0, Kokkos::PerTeam(scratch_size)),
                getExpValMultiQubitOpFunctor<PrecisionT>(arr_data, num_qubits,
                                                         matrix, wires),
                expval);
            break;
        }
        return expval;
    }

    auto expval(const Observable<StateVectorT> &obs) -> PrecisionT {
        StateVectorT ob_sv{this->_statevector};
        obs.applyInPlace(ob_sv);
        return getRealOfComplexInnerProduct(this->_statevector.getView(),
                                            ob_sv.getView());
    }

    auto expval(const Pennylane::LightningKokkos::Observables::HermitianObs<
                StateVectorT> &obs) -> PrecisionT {
        return expval(obs.getMatrix(), obs.getWires());
    }

    auto expval(const std::vector<ComplexT> &matrix_,
                const std::vector<std::size_t> &wires) -> PrecisionT {
        PL_ABORT_IF(matrix_.size() != exp2(2 * wires.size()),
                    "The size of matrix does not match with the given "
                    "number of wires");
        return getExpValMatrix(vector2view(matrix_), wires);
    };

    auto expval(const std::string &operation,
                const std::vector<std::size_t> &wires) -> PrecisionT {
        switch (expval_funcs_[operation]) {
        case ExpValFunc::Identity:
#if _ENABLE_MPI == 1
            return applyExpValNamedFunctor<getExpectationValueIdentityFunctor,
                                           0>(wires);
#else
            return 1.0;
#endif
        case ExpValFunc::PauliX:
            return applyExpValNamedFunctor<getExpectationValuePauliXFunctor, 1>(
                wires);
        case ExpValFunc::PauliY:
            return applyExpValNamedFunctor<getExpectationValuePauliYFunctor, 1>(
                wires);
        case ExpValFunc::PauliZ:
            return applyExpValNamedFunctor<getExpectationValuePauliZFunctor, 1>(
                wires);
        case ExpValFunc::Hadamard:
            return applyExpValNamedFunctor<getExpectationValueHadamardFunctor,
                                           1>(wires);
        default:
            PL_ABORT(
                std::string("Expval does not exist for named observable ") +
                operation);
        }
    };

    auto expval(const std::vector<std::string> &pauli_words,
                const std::vector<std::vector<std::size_t>> &target_wires,
                const std::vector<PrecisionT> &coeffs) -> PrecisionT {
        PL_ABORT_IF((pauli_words.size() != target_wires.size()),
                    "The lengths of the Pauli sentence and list of wires do "
                    "not match.");
        PL_ABORT_IF((pauli_words.size() != coeffs.size()),
                    "The lengths of the Pauli sentence and list of coeffs do "
                    "not match.");

        PrecisionT expvalue = 0.0;

        const std::size_t num_qubits = this->_statevector.getNumQubits();
        const KokkosVector arr_data = this->_statevector.getView();
        for (std::size_t word = 0; word < pauli_words.size(); word++) {
            PL_ABORT_IF((pauli_words[word].size() != target_wires[word].size()),
                        "The number of Pauli words and wires do not match.");
            std::vector<std::size_t> X_wires;
            std::vector<std::size_t> Y_wires;
            std::vector<std::size_t> Z_wires;
            for (std::size_t i = 0; i < target_wires[word].size(); i++) {
                if (pauli_words[word][i] == 'X') {
                    X_wires.push_back(target_wires[word][i]);
                } else if (pauli_words[word][i] == 'Y') {
                    Y_wires.push_back(target_wires[word][i]);
                } else if (pauli_words[word][i] == 'Z') {
                    Z_wires.push_back(target_wires[word][i]);
                } else if (pauli_words[word][i] != 'I') {
                    PL_ABORT("Invalid Pauli word.");
                }
            }
            if (X_wires.empty() && Y_wires.empty() && Z_wires.empty()) {
                expvalue +=
                    expval("Identity", target_wires[word]) * coeffs[word];
            } else {
                PrecisionT expval_tmp = 0.0;
                Kokkos::parallel_reduce(
                    "getExpValPauliWordFunctor", exp2(num_qubits),
                    getExpValPauliWordFunctor<PrecisionT>(
                        arr_data, num_qubits, X_wires, Y_wires, Z_wires),
                    expval_tmp);
                expvalue += expval_tmp * coeffs[word];
            }
        }

        return expvalue;
    }

    template <typename op_type>
    auto expval(const std::vector<op_type> &operations_list,
                const std::vector<std::vector<std::size_t>> &wires_list)
        -> std::vector<PrecisionT> {
        PL_ABORT_IF(
            (operations_list.size() != wires_list.size()),
            "The lengths of the list of operations and wires do not match.");
        std::vector<PrecisionT> expected_value_list;

        for (std::size_t index = 0; index < operations_list.size(); index++) {
            expected_value_list.emplace_back(
                expval(operations_list[index], wires_list[index]));
        }

        return expected_value_list;
    }

    auto expval(const Observable<StateVectorT> &obs,
                const std::size_t &num_shots,
                const std::vector<std::size_t> &shot_range) -> PrecisionT {
        return BaseType::expval(obs, num_shots, shot_range);
    }

    template <class IndexT>
    PrecisionT expval(const IndexT *row_map_ptr, const IndexT row_map_size,
                      const IndexT *column_idx_ptr, const ComplexT *values_ptr,
                      const IndexT numNNZ) {
        const Kokkos::View<ComplexT *> arr_data = this->_statevector.getView();
        PrecisionT expval = 0.0;
        KokkosSizeTVector kok_row_map("row_map", row_map_size);
        KokkosSizeTVector kok_indices("indices", numNNZ);
        KokkosVector kok_data("data", numNNZ);

        Kokkos::deep_copy(kok_data,
                          UnmanagedConstComplexHostView(values_ptr, numNNZ));
        Kokkos::deep_copy(kok_indices,
                          UnmanagedConstSizeTHostView(column_idx_ptr, numNNZ));
        Kokkos::deep_copy(kok_row_map, UnmanagedConstSizeTHostView(
                                           row_map_ptr, row_map_size));

        Kokkos::parallel_reduce(
            row_map_size - 1,
            getExpectationValueSparseFunctor<PrecisionT>(
                arr_data, kok_data, kok_indices, kok_row_map),
            expval);
        return expval;
    };

    auto var(const Observable<StateVectorT> &obs) -> PrecisionT {
        StateVectorT ob_sv{this->_statevector};
        obs.applyInPlace(ob_sv);

        const PrecisionT mean_square =
            getRealOfComplexInnerProduct(ob_sv.getView(), ob_sv.getView());
        const PrecisionT squared_mean = static_cast<PrecisionT>(
            std::pow(getRealOfComplexInnerProduct(this->_statevector.getView(),
                                                  ob_sv.getView()),
                     2));
        return (mean_square - squared_mean);
    }

    auto var(const std::string &operation,
             const std::vector<std::size_t> &wires) -> PrecisionT {
        StateVectorT ob_sv{this->_statevector};
        ob_sv.applyOperation(operation, wires);

        const PrecisionT mean_square =
            getRealOfComplexInnerProduct(ob_sv.getView(), ob_sv.getView());
        const PrecisionT squared_mean = static_cast<PrecisionT>(
            std::pow(getRealOfComplexInnerProduct(this->_statevector.getView(),
                                                  ob_sv.getView()),
                     2));
        return (mean_square - squared_mean);
    };

    auto var(const std::vector<ComplexT> &matrix,
             const std::vector<std::size_t> &wires) -> PrecisionT {
        StateVectorT ob_sv{this->_statevector};
        ob_sv.applyMatrix(matrix, wires);

        const PrecisionT mean_square =
            getRealOfComplexInnerProduct(ob_sv.getView(), ob_sv.getView());
        const PrecisionT squared_mean = static_cast<PrecisionT>(
            std::pow(getRealOfComplexInnerProduct(this->_statevector.getView(),
                                                  ob_sv.getView()),
                     2));
        return (mean_square - squared_mean);
    };

    template <typename op_type>
    auto var(const std::vector<op_type> &operations_list,
             const std::vector<std::vector<std::size_t>> &wires_list)
        -> std::vector<PrecisionT> {
        PL_ABORT_IF(
            (operations_list.size() != wires_list.size()),
            "The lengths of the list of operations and wires do not match.");

        std::vector<PrecisionT> expected_value_list;

        for (std::size_t index = 0; index < operations_list.size(); index++) {
            expected_value_list.emplace_back(
                var(operations_list[index], wires_list[index]));
        }

        return expected_value_list;
    };

    template <class IndexT>
    auto var(const IndexT *row_map_ptr, const IndexT row_map_size,
             const IndexT *column_idx_ptr, const ComplexT *values_ptr,
             const IndexT numNNZ) -> PrecisionT {
        PL_ABORT_IF(
            (this->_statevector.getLength() != (std::size_t(row_map_size) - 1)),
            "Statevector and Hamiltonian have incompatible sizes.");

        StateVectorT ob_sv{this->_statevector};

        SparseMV_Kokkos<PrecisionT>(this->_statevector.getView(),
                                    ob_sv.getView(), row_map_ptr, row_map_size,
                                    column_idx_ptr, values_ptr, numNNZ);

        const PrecisionT mean_square =
            getRealOfComplexInnerProduct(ob_sv.getView(), ob_sv.getView());
        const PrecisionT squared_mean = static_cast<PrecisionT>(
            std::pow(getRealOfComplexInnerProduct(this->_statevector.getView(),
                                                  ob_sv.getView()),
                     2));
        return (mean_square - squared_mean);
    };

    auto var(const Observable<StateVectorT> &obs, const std::size_t &num_shots)
        -> PrecisionT {
        return BaseType::var(obs, num_shots);
    }

    auto probs_core() -> Kokkos::View<PrecisionT *> {
        const std::size_t N = this->_statevector.getLength();
        auto sv = this->_statevector.getView();
        Kokkos::View<PrecisionT *> d_probs("d_probs", N);
        Kokkos::parallel_for(
            Kokkos::RangePolicy<KokkosExecSpace>(0, N),
            KOKKOS_LAMBDA(const std::size_t k) {
                const PrecisionT rsv = sv(k).real();
                const PrecisionT isv = sv(k).imag();
                d_probs(k) = rsv * rsv + isv * isv;
            });
        return d_probs;
    }

    auto probs() -> std::vector<PrecisionT> {
        return view2vector(probs_core());
    }

    auto
    probs(const std::vector<std::size_t> &wires,
          [[maybe_unused]] const std::vector<std::size_t> &device_wires = {})
        -> std::vector<PrecisionT> {
        // GPU_SHARED_NWIRES_MAX is an upper bound for the size of the GPU array
        // used to reduce the probs (max size = 2 ** 7)
        constexpr std::size_t GPU_SHARED_NWIRES_MAX = 7;
        // BITSHIFT_FREE_WIRES_MIN is a lower bound for the size of the loop
        // over which the probs computation is parallelized in
        // `probs_bitshift_generic` The free wires are the wires which are
        // summed over.
        constexpr std::size_t BITSHIFT_FREE_WIRES_MIN = 10;
        // BITSHIFT_NWIRES_MAX is an upper bound for using
        // `probs_bitshift_generic`, beyond that size the other implementation
        // si more efficient
        constexpr std::size_t BITSHIFT_NWIRES_MAX = 9;
        // MDRANGE_NWIRES_MAX is an upper bound for using `MDRangePolicy` to
        // parallelize the probs computation. Beyond that size, parallelizing
        // over the probs elements is more efficient.
        constexpr std::size_t MDRANGE_NWIRES_MAX = 20;
        const std::size_t n_wires = wires.size();

#ifndef _ENABLE_MPI
        if (n_wires == 0) {
            return {1.0};
        }
#endif
        const std::size_t num_qubits = this->_statevector.getNumQubits();
        // is_equal_to_all_wires is True if `wires` includes all wires in order
        // and false otherwise
        bool is_equal_to_all_wires = n_wires == num_qubits;
        for (std::size_t k = 0; k < n_wires; k++) {
            if (!is_equal_to_all_wires) {
                break;
            }
            is_equal_to_all_wires = wires[k] == k;
        }
        if (is_equal_to_all_wires) {
            return probs();
        }
        const bool is_gpu_scratch_limited =
            n_wires > GPU_SHARED_NWIRES_MAX &&
            !std::is_same_v<KokkosExecSpace, HostExecSpace>;
        if (num_qubits - n_wires > BITSHIFT_FREE_WIRES_MIN &&
            n_wires < BITSHIFT_NWIRES_MAX && !is_gpu_scratch_limited) {
            return probs_bitshift_generic<KokkosExecSpace>(
                this->_statevector.getView(), num_qubits, wires);
        }
        std::vector<std::size_t> all_indices =
            Pennylane::Util::generateBitsPatterns(wires, num_qubits);
        Kokkos::View<std::size_t *> d_all_indices = vector2view(all_indices);
        std::vector<std::size_t> all_offsets =
            Pennylane::Util::generateBitsPatterns(
                Pennylane::Util::getIndicesAfterExclusion(wires, num_qubits),
                num_qubits);
        Kokkos::View<std::size_t *> d_all_offsets = vector2view(all_offsets);
        Kokkos::View<PrecisionT *> d_probabilities("d_probabilities",
                                                   all_indices.size());
        Kokkos::View<ComplexT *> sv = this->_statevector.getView();

        // Reducing over `d_probabilities` requires too much L0 scratch memory
        // on GPUs. If n_wires >= 20, this also requires quite a bit of memory
        // on CPUs, so we fallback to the next implementation
        bool use_mdrange =
            n_wires < MDRANGE_NWIRES_MAX && !is_gpu_scratch_limited;
#ifdef KOKKOS_ENABLE_HIP
        // On HIP, MDRangePolicy with ParallelReduce can fail to find a valid
        // tile size for small shapes (e.g. all_offsets.size() <= 2). We
        // fallback to parallel_for in these cases.
        if (std::is_same_v<KokkosExecSpace, Kokkos::HIP>) {
            if (all_offsets.size() <= 2) {
                use_mdrange = false;
            }
        }
#endif

        if (use_mdrange) {
            using MDPolicyType_2D =
                Kokkos::MDRangePolicy<Kokkos::Rank<2, Kokkos::Iterate::Left>>;
            auto md_policy = MDPolicyType_2D(
                {{0, 0}}, {{static_cast<int64_t>(all_indices.size()),
                            static_cast<int64_t>(all_offsets.size())}});
            Kokkos::parallel_reduce(
                md_policy,
                getProbsFunctor<PrecisionT, KokkosExecSpace>(
                    sv, wires, d_all_indices, d_all_offsets),
                d_probabilities);
        } else {
            Kokkos::parallel_for(
                all_indices.size(), KOKKOS_LAMBDA(const std::size_t i) {
                    for (std::size_t j = 0; j < d_all_offsets.size(); j++) {
                        const std::size_t index =
                            d_all_indices(i) + d_all_offsets(j);
                        const PrecisionT rsv = sv(index).real();
                        const PrecisionT isv = sv(index).imag();
                        d_probabilities(i) += rsv * rsv + isv * isv;
                    }
                });
        }

        return view2vector(d_probabilities);
    }

    auto probs(const Observable<StateVectorT> &obs, std::size_t num_shots = 0)
        -> std::vector<PrecisionT> {
        return BaseType::probs(obs, num_shots);
    }

    auto probs(std::size_t num_shots) -> std::vector<PrecisionT> {
        return BaseType::probs(num_shots);
    }

    auto probs(const std::vector<std::size_t> &wires, std::size_t num_shots)
        -> std::vector<PrecisionT> {
        PL_ABORT_IF_NOT(
            std::is_sorted(wires.cbegin(), wires.cend()),
            "LightningKokkos does not currently support out-of-order wire "
            "indices with probability calculations");

        return BaseType::probs(wires, num_shots);
    }

    auto generate_samples(std::size_t num_samples) -> std::vector<std::size_t> {
        const std::size_t num_qubits = this->_statevector.getNumQubits();
        const std::size_t N = this->_statevector.getLength();
        Kokkos::View<std::size_t *> samples("num_samples",
                                            num_samples * num_qubits);

        // Convert probability distribution to cumulative distribution
        auto probability = probs_core();
        Kokkos::parallel_scan(
            Kokkos::RangePolicy<KokkosExecSpace>(0, N),
            KOKKOS_LAMBDA(const std::size_t k, PrecisionT &update_value,
                          const bool is_final) {
                const PrecisionT val_k = probability(k);
                if (is_final)
                    probability(k) = update_value;
                update_value += val_k;
            });

        // Sampling using Random_XorShift64_Pool
        auto rand_pool =
            this->_deviceseed.has_value()
                ? Kokkos::Random_XorShift64_Pool<>(this->_deviceseed.value())
                : Kokkos::Random_XorShift64_Pool<>(
                      std::chrono::high_resolution_clock::now()
                          .time_since_epoch()
                          .count());

        Kokkos::parallel_for(
            Kokkos::RangePolicy<KokkosExecSpace>(0, num_samples),
            Sampler<PrecisionT, Kokkos::Random_XorShift64_Pool>(
                samples, probability, rand_pool, num_qubits, N));

        return view2vector(samples);
    }

  private:
    std::unordered_map<std::string, ExpValFunc> expval_funcs_;

    // clang-format off
    void init_expval_funcs_() {
        expval_funcs_["Identity"] = ExpValFunc::Identity;
        expval_funcs_["PauliX"]   = ExpValFunc::PauliX;
        expval_funcs_["PauliY"]   = ExpValFunc::PauliY;
        expval_funcs_["PauliZ"]   = ExpValFunc::PauliZ;
        expval_funcs_["Hadamard"] = ExpValFunc::Hadamard;
    }
    // clang-format on
};

} // namespace Pennylane::LightningKokkos::Measures