Program Listing for File StateVectorCudaBase.hpp

Return to documentation for file (pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaBase.hpp)

// Copyright 2022-2023 Xanadu Quantum Technologies Inc.

// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

//     http://www.apache.org/licenses/LICENSE-2.0

// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include "cuda.h"
#include <cuda_runtime_api.h> // cudaMalloc, cudaMemcpy, etc.
#include <memory>
#include <type_traits>
#include <unordered_map>
#include <unordered_set>

#include "DataBuffer.hpp"
#include "DevTag.hpp"
#include "Error.hpp"
#include "StateVectorBase.hpp"
#include "cuStateVecError.hpp"
#include "cuda_helpers.hpp"

namespace {
namespace cuUtil = Pennylane::LightningGPU::Util;
// using namespace Pennylane::Util;
} // namespace

namespace Pennylane::LightningGPU {
template <class Precision, class Derived>
class StateVectorCudaBase : public StateVectorBase<Precision, Derived> {
  private:
    using BaseType = StateVectorBase<Precision, Derived>;

  public:
    using scalar_type_t = Precision;

    using CFP_t = decltype(cuUtil::getCudaType(Precision{}));

    [[nodiscard]] auto getData() const -> const CFP_t * {
        return data_buffer_->getData();
    }
    [[nodiscard]] auto getData() -> CFP_t * { return data_buffer_->getData(); }

    inline auto getStream() -> cudaStream_t {
        return data_buffer_->getStream();
    }
    inline auto getStream() const -> cudaStream_t {
        return data_buffer_->getStream();
    }
    void setStream(const cudaStream_t &s) { data_buffer_->setStream(s); }

    inline void
    CopyHostDataToGpu(const std::vector<std::complex<Precision>> &sv,
                      bool async = false) {
        PL_ABORT_IF_NOT(BaseType::getLength() == sv.size(),
                        "Sizes do not match for Host and GPU data");
        data_buffer_->CopyHostDataToGpu(sv.data(), sv.size(), async);
    }

    inline void CopyGpuDataToGpuIn(const CFP_t *gpu_sv, std::size_t length,
                                   bool async = false) {
        PL_ABORT_IF_NOT(BaseType::getLength() == length,
                        "Sizes do not match for Host and GPU data");
        data_buffer_->CopyGpuDataToGpu(gpu_sv, length, async);
    }
    inline void CopyGpuDataToGpuIn(const Derived &sv, bool async = false) {
        PL_ABORT_IF_NOT(BaseType::getNumQubits() == sv.getNumQubits(),
                        "Sizes do not match for Host and GPU data");
        auto same =
            std::is_same_v<typename std::decay_t<typename std::remove_pointer_t<
                               decltype(data_buffer_->getData())>>,
                           typename std::decay_t<typename std::remove_pointer_t<
                               decltype(sv.getData())>>>;
        PL_ABORT_IF_NOT(same,
                        "Data types are incompatible for GPU-GPU transfer");
        data_buffer_->CopyGpuDataToGpu(sv.getData(), sv.getLength(), async);
    }

    inline void CopyHostDataToGpu(const std::complex<Precision> *host_sv,
                                  std::size_t length, bool async = false) {
        PL_ABORT_IF_NOT(BaseType::getLength() == length,
                        "Sizes do not match for Host and GPU data");
        data_buffer_->CopyHostDataToGpu(
            reinterpret_cast<const CFP_t *>(host_sv), length, async);
    }

    inline void CopyGpuDataToHost(std::complex<Precision> *host_sv,
                                  size_t length, bool async = false) const {
        PL_ABORT_IF_NOT(BaseType::getLength() == length,
                        "Sizes do not match for Host and GPU data");
        data_buffer_->CopyGpuDataToHost(host_sv, length, async);
    }

    inline void CopyGpuDataToGpuOut(Derived &sv, bool async = false) {
        PL_ABORT_IF_NOT(BaseType::getNumQubits() == sv.getNumQubits(),
                        "Sizes do not match for GPU data objects");
        sv.getDataBuffer()->CopyGpuDataToGpu(getData(),
                                             data_buffer_->getLength(), async);
    }

    const LightningGPU::DataBuffer<CFP_t> &getDataBuffer() const {
        return *data_buffer_;
    }

    LightningGPU::DataBuffer<CFP_t> &getDataBuffer() { return *data_buffer_; }

    void updateData(const Derived &other, bool async = false) {
        CopyGpuDataToGpuIn(other, async);
    }

    void updateData(std::unique_ptr<LightningGPU::DataBuffer<CFP_t>> &&other) {
        data_buffer_ = std::move(other);
    }

    void initSV(bool async = false) {
        size_t index = 0;
        const std::complex<Precision> value(1, 0);
        static_cast<Derived *>(this)->setBasisState(value, index, async);
    };

  protected:
    using ParFunc = std::function<void(const std::vector<size_t> &, bool,
                                       const std::vector<Precision> &)>;
    using FMap = std::unordered_map<std::string, ParFunc>;

    StateVectorCudaBase(size_t num_qubits, int device_id = 0,
                        cudaStream_t stream_id = 0, bool device_alloc = true)
        : StateVectorBase<Precision, Derived>(num_qubits),
          data_buffer_{std::make_unique<LightningGPU::DataBuffer<CFP_t>>(
              Pennylane::Util::exp2(num_qubits), device_id, stream_id,
              device_alloc)} {}

    StateVectorCudaBase(size_t num_qubits, LightningGPU::DevTag<int> dev_tag,
                        bool device_alloc = true)
        : StateVectorBase<Precision, Derived>(num_qubits),
          data_buffer_{std::make_unique<LightningGPU::DataBuffer<CFP_t>>(
              Pennylane::Util::exp2(num_qubits), dev_tag, device_alloc)} {}
    StateVectorCudaBase() = delete;
    StateVectorCudaBase(const StateVectorCudaBase &other) = delete;
    StateVectorCudaBase(StateVectorCudaBase &&other) = delete;

    virtual ~StateVectorCudaBase(){};

    auto getCtrlMap() -> const std::unordered_map<std::string, std::size_t> & {
        return ctrl_map_;
    }
    auto getParametricGatesMap()
        -> const std::unordered_map<std::string, std::size_t> & {
        return ctrl_map_;
    }

  private:
    std::unique_ptr<LightningGPU::DataBuffer<CFP_t>> data_buffer_;
    const std::unordered_set<std::string> const_gates_{
        "Identity", "PauliX", "PauliY", "PauliZ", "Hadamard", "T",      "S",
        "CNOT",     "SWAP",   "CY",     "CZ",     "CSWAP",    "Toffoli"};
    const std::unordered_map<std::string, std::size_t> ctrl_map_{
        // Add mapping from function name to required wires.
        {"Identity", 0},
        {"PauliX", 0},
        {"PauliY", 0},
        {"PauliZ", 0},
        {"Hadamard", 0},
        {"T", 0},
        {"S", 0},
        {"RX", 0},
        {"RY", 0},
        {"RZ", 0},
        {"Rot", 0},
        {"PhaseShift", 0},
        {"ControlledPhaseShift", 1},
        {"CNOT", 1},
        {"SWAP", 0},
        {"CY", 1},
        {"CZ", 1},
        {"CRX", 1},
        {"CRY", 1},
        {"CRZ", 1},
        {"CRot", 1},
        {"CSWAP", 1},
        {"Toffoli", 2}};
};

} // namespace Pennylane::LightningGPU