Program Listing for File AVXUtil.hpp¶
↰ Return to documentation for file (pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/AVXUtil.hpp
)
// Copyright 2018-2023 Xanadu Quantum Technologies Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "BitUtil.hpp" // fillTrailingOnes, fillLeadingOnes, log2PerfectPower
#include "Error.hpp"
#include "Macros.hpp"
#include "Util.hpp" // INVSQRT2
#include <immintrin.h>
#include <cstdlib>
namespace Pennylane::LightningQubit::Gates::AVXCommon {
using Pennylane::Util::fillLeadingOnes;
using Pennylane::Util::fillTrailingOnes;
using Pennylane::Util::INVSQRT2;
using Pennylane::Util::log2PerfectPower;
template <typename PrecisionT, size_t packed_size> struct AVXIntrinsic {
static_assert((sizeof(PrecisionT) * packed_size == 32) ||
(sizeof(PrecisionT) * packed_size == 64));
};
template <typename T, size_t size>
using AVXIntrinsicType = typename AVXIntrinsic<T, size>::Type;
#ifdef PL_USE_AVX2
template <> struct AVXIntrinsic<float, 8> {
// AVX2
using Type = __m256;
};
template <> struct AVXIntrinsic<double, 4> {
// AVX2
using Type = __m256d;
};
#endif
#ifdef PL_USE_AVX512F
template <> struct AVXIntrinsic<float, 16> {
// AVX512
using Type = __m512;
};
template <> struct AVXIntrinsic<double, 8> {
// AVX512
using Type = __m512d;
};
#endif
template <typename PrecisionT, size_t packed_size>
constexpr auto internalParity(size_t rev_wire)
-> AVXIntrinsicType<PrecisionT, packed_size>;
#ifdef PL_USE_AVX2
template <> constexpr auto internalParity<float, 8>(size_t rev_wire) -> __m256 {
switch (rev_wire) {
case 0:
// When Z is applied to the 0th qubit
return __m256{1.0F, 1.0F, -1.0F, -1.0F, 1.0F, 1.0F, -1.0F, -1.0F};
case 1:
// When Z is applied to the 1st qubit
return __m256{1.0F, 1.0F, 1.0F, 1.0F, -1.0F, -1.0F, -1.0F, -1.0F};
default:
PL_UNREACHABLE;
}
return _mm256_setzero_ps();
}
template <>
constexpr auto internalParity<double, 4>([[maybe_unused]] size_t rev_wire)
-> __m256d {
PL_ASSERT(rev_wire == 0);
// When Z is applied to the 0th qubit
return __m256d{1.0, 1.0, -1.0, -1.0};
}
#endif
#ifdef PL_USE_AVX512F
// LCOV_EXCL_START
template <>
constexpr auto internalParity<float, 16>(size_t rev_wire) -> __m512 {
// AVX512 with float
// clang-format off
switch(rev_wire) {
case 0:
// When Z is applied to the 0th qubit
return __m512{1.0F, 1.0F, -1.0F, -1.0F, 1.0F, 1.0F, -1.0F, -1.0F,
1.0F, 1.0F, -1.0F, -1.0F, 1.0F, 1.0F, -1.0F, -1.0F};
case 1:
// When Z is applied to the 1st qubit
return __m512{1.0F, 1.0F, 1.0F, 1.0F, -1.0F, -1.0F, -1.0F, -1.0F,
1.0F, 1.0F, 1.0F, 1.0F, -1.0F,- 1.0F, -1.0F, -1.0F};
case 2:
// When Z is applied to the 2nd qubit
return __m512{ 1.0F, 1.0F, 1.0F, 1.0F,
1.0F, 1.0F, 1.0F, 1.0F,
-1.0F, -1.0F, -1.0F, -1.0F,
-1.0F,- 1.0F, -1.0F, -1.0F};
default:
PL_UNREACHABLE;
}
// clang-format on
return __m512{
0,
};
};
template <>
constexpr auto internalParity<double, 8>(size_t rev_wire) -> __m512d {
// AVX512 with double
switch (rev_wire) {
case 0:
// When Z is applied to the 0th qubit
return __m512d{1.0, 1.0, -1.0, -1.0, 1.0, 1.0, -1.0, -1.0};
case 1:
// When Z is applied to the 1st qubit
return __m512d{1.0, 1.0, 1.0, 1.0, -1.0, -1.0, -1.0, -1.0};
default:
PL_UNREACHABLE;
}
return __m512d{
0,
};
}
// LCOV_EXCL_STOP
#endif
template <typename PrecisionT, size_t packed_size> struct ImagFactor;
template <typename PrecisionT, size_t packed_size>
constexpr auto imagFactor(PrecisionT val = 1.0) {
return ImagFactor<PrecisionT, packed_size>::create(val);
}
#ifdef PL_USE_AVX2
template <> struct ImagFactor<float, 8> {
constexpr static auto create(float val) -> AVXIntrinsicType<float, 8> {
return __m256{-val, val, -val, val, -val, val, -val, val};
};
};
template <> struct ImagFactor<double, 4> {
constexpr static auto create(double val) -> AVXIntrinsicType<double, 4> {
return __m256d{-val, val, -val, val};
};
};
#endif
#ifdef PL_USE_AVX512F
// LCOV_EXCL_START
template <> struct ImagFactor<float, 16> {
constexpr static auto create(float val) -> AVXIntrinsicType<float, 16> {
return __m512{-val, val, -val, val, -val, val, -val, val,
-val, val, -val, val, -val, val, -val, val};
};
};
template <> struct ImagFactor<double, 8> {
constexpr static auto create(double val) -> AVXIntrinsicType<double, 8> {
return __m512d{-val, val, -val, val, -val, val, -val, val};
};
};
// LCOV_EXCL_STOP
#endif
template <typename PrecisionT, size_t packed_size> struct Set1;
#ifdef PL_USE_AVX2
template <> struct Set1<float, 8> {
constexpr static auto create(float val) -> AVXIntrinsicType<float, 8> {
return __m256{val, val, val, val, val, val, val, val};
}
};
template <> struct Set1<double, 4> {
constexpr static auto create(double val) -> AVXIntrinsicType<double, 4> {
return __m256d{val, val, val, val};
}
};
#endif
#ifdef PL_USE_AVX512F
// LCOV_EXCL_START
template <> struct Set1<float, 16> {
constexpr static auto create(float val) -> AVXIntrinsicType<float, 16> {
return __m512{val, val, val, val, val, val, val, val,
val, val, val, val, val, val, val, val};
}
};
template <> struct Set1<double, 8> {
constexpr static auto create(double val) -> AVXIntrinsicType<double, 8> {
return __m512d{val, val, val, val, val, val, val, val};
}
};
// LCOV_EXCL_STOP
#endif
template <typename PrecisionT, size_t packed_size>
constexpr auto set1(PrecisionT val) {
return Set1<PrecisionT, packed_size>::create(val);
}
template <size_t packed_size> struct InternalWires {
constexpr static auto value = log2PerfectPower(packed_size / 2);
};
template <size_t packed_size>
constexpr auto internal_wires_v = InternalWires<packed_size>::value;
#ifdef PL_USE_AVX2
constexpr static auto setValue(const std::array<float, 8> &arr)
-> AVXIntrinsicType<float, 8> {
// NOLINTBEGIN(readability-magic-numbers)
return __m256{arr[0], arr[1], arr[2], arr[3],
arr[4], arr[5], arr[6], arr[7]};
// NOLINTEND(readability-magic-numbers)
}
constexpr static auto setValue(const std::array<double, 4> &arr)
-> AVXIntrinsicType<double, 4> {
// NOLINTBEGIN(readability-magic-numbers)
return __m256d{arr[0], arr[1], arr[2], arr[3]};
// NOLINTEND(readability-magic-numbers)
}
#endif
#ifdef PL_USE_AVX512F
constexpr static auto setValue(const std::array<float, 16> &arr)
-> AVXIntrinsicType<float, 16> {
// NOLINTBEGIN(readability-magic-numbers)
return __m512{arr[0], arr[1], arr[2], arr[3], arr[4], arr[5],
arr[6], arr[7], arr[8], arr[9], arr[10], arr[11],
arr[12], arr[13], arr[14], arr[15]};
// NOLINTEND(readability-magic-numbers)
}
constexpr static auto setValue(const std::array<double, 8> &arr)
-> AVXIntrinsicType<double, 8> {
// NOLINTBEGIN(readability-magic-numbers)
return __m512d{arr[0], arr[1], arr[2], arr[3],
arr[4], arr[5], arr[6], arr[7]};
// NOLINTEND(readability-magic-numbers)
}
#endif
// clang-format off
#ifdef PL_USE_AVX2
constexpr __m256i setr256i(int32_t e0, int32_t e1, int32_t e2, int32_t e3,
int32_t e4, int32_t e5, int32_t e6, int32_t e7) {
// NOLINTBEGIN(hicpp-signed-bitwise)
return __m256i{(static_cast<int64_t>(e1) << 32) | e0,
(static_cast<int64_t>(e3) << 32) | e2,
(static_cast<int64_t>(e5) << 32) | e4,
(static_cast<int64_t>(e7) << 32) | e6};
// NOLINTEND(hicpp-signed-bitwise)
}
#endif
#ifdef PL_USE_AVX512F
// LCOV_EXCL_START
constexpr __m512i setr512i(int32_t e0, int32_t e1, int32_t e2, int32_t e3,
int32_t e4, int32_t e5, int32_t e6, int32_t e7,
int32_t e8, int32_t e9, int32_t e10, int32_t e11,
int32_t e12, int32_t e13, int32_t e14, int32_t e15) {
// NOLINTBEGIN(hicpp-signed-bitwise)
return __m512i{(static_cast<int64_t>(e1) << 32) | e0,
(static_cast<int64_t>(e3) << 32) | e2,
(static_cast<int64_t>(e5) << 32) | e4,
(static_cast<int64_t>(e7) << 32) | e6,
(static_cast<int64_t>(e9) << 32) | e8,
(static_cast<int64_t>(e11) << 32) | e10,
(static_cast<int64_t>(e13) << 32) | e12,
(static_cast<int64_t>(e15) << 32) | e14};
// NOLINTEND(hicpp-signed-bitwise)
}
constexpr __m512i setr512i(int64_t e0, int64_t e1, int64_t e2, int64_t e3,
int64_t e4, int64_t e5, int64_t e6, int64_t e7) {
return __m512i{e0, e1, e2, e3, e4, e5, e6, e7};
}
// LCOV_EXCL_STOP
#endif
// clang-format on
template <typename PrecisionT, size_t packed_size, typename Func>
auto toParity(Func &&func) -> AVXIntrinsicType<PrecisionT, packed_size> {
std::array<PrecisionT, packed_size> data{};
PL_LOOP_SIMD
for (size_t idx = 0; idx < packed_size / 2; idx++) {
data[2 * idx + 0] = static_cast<PrecisionT>(1.0) -
2 * static_cast<PrecisionT>(func(idx));
data[2 * idx + 1] = static_cast<PrecisionT>(1.0) -
2 * static_cast<PrecisionT>(func(idx));
}
return setValue(data);
}
template <typename PrecisionT, size_t packed_size, typename Func>
auto setValueOneTwo(Func &&func) -> AVXIntrinsicType<PrecisionT, packed_size> {
std::array<PrecisionT, packed_size> data{};
PL_LOOP_SIMD
for (size_t idx = 0; idx < packed_size / 2; idx++) {
data[2 * idx + 0] = static_cast<PrecisionT>(func(idx));
data[2 * idx + 1] = data[2 * idx + 0];
}
return setValue(data);
}
} // namespace Pennylane::LightningQubit::Gates::AVXCommon
api/program_listing_file_pennylane_lightning_core_src_simulators_lightning_qubit_gates_cpu_kernels_avx_common_AVXUtil.hpp
Download Python script
Download Notebook
View on GitHub