Program Listing for File TNCudaGateCache.hpp¶
↰ Return to documentation for file (pennylane_lightning/core/src/simulators/lightning_tensor/tncuda/gates/TNCudaGateCache.hpp
)
// Copyright 2024 Xanadu Quantum Technologies Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <complex>
#include <functional>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "DataBuffer.hpp"
#include "DevTag.hpp"
#include "TensorCuda.hpp"
#include "cuGates_host.hpp"
namespace {
namespace cuUtil = Pennylane::LightningGPU::Util;
using namespace Pennylane::LightningGPU;
using namespace cuUtil;
using namespace Pennylane::LightningTensor::TNCuda;
} // namespace
namespace Pennylane::LightningTensor::TNCuda::Gates {
template <class PrecisionT> class TNCudaGateCache {
public:
using CFP_t = decltype(cuUtil::getCudaType(PrecisionT{}));
using gate_key_info = std::pair<const std::string, std::vector<PrecisionT>>;
using gate_info = std::pair<gate_key_info, TensorCuda<PrecisionT>>;
TNCudaGateCache() = delete;
TNCudaGateCache(const TNCudaGateCache &other) = delete;
TNCudaGateCache(TNCudaGateCache &&other) = delete;
TNCudaGateCache(int device_id = 0, cudaStream_t stream_id = 0)
: device_tag_(device_id, stream_id), total_alloc_bytes_{0} {}
TNCudaGateCache(const DevTag<int> device_tag)
: device_tag_{device_tag}, total_alloc_bytes_{0} {}
~TNCudaGateCache(){};
void add_gate(const std::size_t gate_id, const std::string &gate_name,
[[maybe_unused]] std::vector<PrecisionT> gate_param = {},
bool adjoint = false) {
auto gate_key = std::make_pair(gate_name, gate_param);
auto &gateMap =
cuGates::DynamicGateDataAccess<PrecisionT>::getInstance();
add_gate(gate_id, gate_key, gateMap.getGateData(gate_name, gate_param),
adjoint);
}
void add_gate(const std::size_t gate_id, gate_key_info gate_key,
const std::vector<CFP_t> &gate_data_host,
bool adjoint = false) {
const std::size_t rank = Pennylane::Util::log2(gate_data_host.size());
auto modes = std::vector<std::size_t>(rank, 0);
auto extents = std::vector<std::size_t>(rank, 2);
auto &&tensor =
TensorCuda<PrecisionT>(rank, modes, extents, device_tag_);
device_gates_.emplace(
std::piecewise_construct, std::forward_as_tuple(gate_id),
std::forward_as_tuple(gate_key, std::move(tensor)));
if (adjoint) {
// TODO: This is a temporary solution for gates data transpose.
// There should be a better way to handle this, but there is not
// a big performance issue for now since the size of gates is small.
// TODO: The implementation here can be optimized by generating the
// data buffer directly on the device instead of performing the
// transpose operation here
std::vector<CFP_t> data_host_transpose(gate_data_host.size());
const std::size_t col_size = 1 << (rank / 2);
const std::size_t row_size = col_size;
PL_ASSERT(col_size * row_size == gate_data_host.size());
for (std::size_t idx = 0; idx < gate_data_host.size(); idx++) {
std::size_t col = idx / row_size;
std::size_t row = idx % row_size;
data_host_transpose.at(row * col_size + col) = {
gate_data_host.at(idx).x, -gate_data_host.at(idx).y};
}
device_gates_.at(gate_id).second.getDataBuffer().CopyHostDataToGpu(
data_host_transpose.data(), data_host_transpose.size());
} else {
device_gates_.at(gate_id).second.getDataBuffer().CopyHostDataToGpu(
gate_data_host.data(), gate_data_host.size());
}
total_alloc_bytes_ += (sizeof(CFP_t) * gate_data_host.size());
}
CFP_t *get_gate_device_ptr(const std::size_t gate_id) {
return device_gates_.at(gate_id).second.getDataBuffer().getData();
}
auto size() const -> std::size_t { return device_gates_.size(); }
void update_key(const std::size_t old_key, const std::size_t new_key) {
auto it = device_gates_.extract(old_key);
it.key() = new_key;
device_gates_.insert(std::move(it));
}
private:
const DevTag<int> device_tag_;
std::size_t total_alloc_bytes_;
struct gate_info_hash {
std::size_t operator()(std::size_t key_id) const {
return std::hash<std::size_t>()(key_id);
}
};
// device_gates_ is a map of id of gate tensor operator in the graph to
// the gate_info and gate_info is a pair of gate_info_key, which
// contains both gate name and parameter value, and the tensor data on
// device.
std::unordered_map<std::size_t, gate_info, gate_info_hash> device_gates_;
};
} // namespace Pennylane::LightningTensor::TNCuda::Gates
api/program_listing_file_pennylane_lightning_core_src_simulators_lightning_tensor_tncuda_gates_TNCudaGateCache.hpp
Download Python script
Download Notebook
View on GitHub