Program Listing for File ApplyCRY.hpp

Return to documentation for file (pennylane_lightning/core/src/simulators/lightning_qubit/gates/cpu_kernels/avx_common/ApplyCRY.hpp)

// Copyright 2023 Xanadu Quantum Technologies Inc.

// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

//     http://www.apache.org/licenses/LICENSE-2.0

// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "AVXConceptType.hpp"
#include "AVXUtil.hpp"
#include "BitUtil.hpp"
#include "Blender.hpp"
#include "Permutation.hpp"

#include "ConstantUtil.hpp"
#include "Util.hpp"

#include <complex>
#include <utility>

namespace Pennylane::LightningQubit::Gates::AVXCommon {
template <typename PrecisionT, std::size_t packed_size> struct ApplyCRY {
    using Precision = PrecisionT;
    using PrecisionAVXConcept = AVXConceptType<PrecisionT, packed_size>;

    constexpr static auto packed_size_ = packed_size;
    constexpr static bool symmetric = false;

    // clang-format off
    // clang-format on

    template <size_t control, std::size_t target>
    static consteval auto applyInternalInternalPermutation() {
        std::array<uint8_t, packed_size> perm{};
        for (size_t k = 0; k < packed_size / 2; k++) {
            if ((k >> control) & 1U) { // if control bit is 1
                perm[2 * k + 0] = 2 * (k ^ (1U << target)) + 0;
                perm[2 * k + 1] = 2 * (k ^ (1U << target)) + 1;
            } else {
                perm[2 * k + 0] = 2 * k + 0;
                perm[2 * k + 1] = 2 * k + 1;
            }
        }
        return Permutation::compilePermutation<PrecisionT>(perm);
    }

    template <size_t control, std::size_t target, class ParamT>
    static auto applyInternalInternalOffDiagFactor(ParamT angle) {
        std::array<PrecisionT, packed_size> arr{};
        PL_LOOP_SIMD
        for (size_t k = 0; k < packed_size / 2; k++) {
            if ((k >> control) & 1U) { // if control bit is 1
                if ((k >> target) & 1U) {
                    // if target bit is 1 (was 0) -> sin(phi/2)
                    arr[2 * k + 0] = std::sin(angle / 2);
                    arr[2 * k + 1] = std::sin(angle / 2);
                } else {
                    // if target bit is 0 (was 1) -> -sin(phi/2)
                    arr[2 * k + 0] = -std::sin(angle / 2);
                    arr[2 * k + 1] = -std::sin(angle / 2);
                }
            } else {
                arr[2 * k + 0] = Precision{0.0};
                arr[2 * k + 1] = Precision{0.0};
            }
        }
        return setValue(arr);
    }

    template <size_t control, std::size_t target, class ParamT>
    static auto applyInternalInternalDiagFactor(ParamT angle) {
        std::array<PrecisionT, packed_size> arr{};

        PL_LOOP_SIMD
        for (size_t k = 0; k < packed_size / 2; k++) {
            if ((k >> control) & 1U) { // if control bit is 1
                arr[2 * k + 0] = std::cos(angle / 2);
                arr[2 * k + 1] = std::cos(angle / 2);
            } else {
                arr[2 * k + 0] = Precision{1.0};
                arr[2 * k + 1] = Precision{1.0};
            }
        }
        return setValue(arr);
    }

    template <size_t control, std::size_t target, class ParamT>
    static void applyInternalInternal(std::complex<PrecisionT> *arr,
                                      std::size_t num_qubits, bool inverse,
                                      ParamT angle) {
        constexpr static auto perm =
            applyInternalInternalPermutation<control, target>();

        if (inverse) {
            angle *= -1.0;
        }

        const auto off_diag_factor =
            applyInternalInternalOffDiagFactor<control, target>(angle);
        const auto diag_factor =
            applyInternalInternalDiagFactor<control, target>(angle);
        PL_LOOP_PARALLEL(1)
        for (size_t n = 0; n < exp2(num_qubits); n += packed_size / 2) {
            const auto v = PrecisionAVXConcept::load(arr + n);
            const auto diag_w = diag_factor * v;
            const auto off_diag_w =
                off_diag_factor * Permutation::permute<perm>(v);
            PrecisionAVXConcept::store(arr + n, diag_w + off_diag_w);
        }
    }

    template <size_t control, typename ParamT>
    static auto applyInternalExternalDiagFactor(ParamT angle) {
        std::array<Precision, packed_size> arr{};
        PL_LOOP_SIMD
        for (size_t k = 0; k < packed_size / 2; k++) {
            if ((k >> control) & 1U) {
                // if control is 1
                arr[2 * k + 0] = std::cos(angle / 2);
                arr[2 * k + 1] = std::cos(angle / 2);
            } else {
                arr[2 * k + 0] = 1.0;
                arr[2 * k + 1] = 1.0;
            }
        }
        return setValue(arr);
    }

    template <size_t control, typename ParamT>
    static auto applyInternalExternalOffDiagFactor(ParamT angle) {
        std::array<Precision, packed_size> arr{};
        PL_LOOP_SIMD
        for (size_t k = 0; k < packed_size / 2; k++) {
            if ((k >> control) & 1U) {
                // if control is 1
                arr[2 * k + 0] = std::sin(angle / 2);
                arr[2 * k + 1] = std::sin(angle / 2);
            } else {
                arr[2 * k + 0] = 0.0;
                arr[2 * k + 1] = 0.0;
            }
        }
        return setValue(arr);
    }

    template <size_t control, typename ParamT>
    static void
    applyInternalExternal(std::complex<PrecisionT> *arr, std::size_t num_qubits,
                          std::size_t target, bool inverse, ParamT angle) {
        // control qubit is internal but target qubit is external
        // const std::size_t rev_wire_min = std::min(rev_wire0, rev_wire1);
        using namespace Permutation;

        const std::size_t target_rev_wire_shift =
            (static_cast<std::size_t>(1U) << target);
        const std::size_t target_wire_parity = fillTrailingOnes(target);
        const std::size_t target_wire_parity_inv = fillLeadingOnes(target + 1);

        if (inverse) {
            angle *= -1.0;
        }

        const auto diag_factor =
            applyInternalExternalDiagFactor<control>(angle);
        const auto off_diag_factor_p =
            applyInternalExternalOffDiagFactor<control>(angle);
        const auto off_diag_factor_m = -off_diag_factor_p;
        PL_LOOP_PARALLEL(1)
        for (size_t k = 0; k < exp2(num_qubits - 1); k += packed_size / 2) {
            const std::size_t i0 =
                ((k << 1U) & target_wire_parity_inv) | (target_wire_parity & k);
            const std::size_t i1 = i0 | target_rev_wire_shift;

            const auto v0 = PrecisionAVXConcept::load(arr + i0); // target is 0
            const auto v1 = PrecisionAVXConcept::load(arr + i1); // target is 1

            PrecisionAVXConcept::store(arr + i0, diag_factor * v0 +
                                                     off_diag_factor_m * v1);
            PrecisionAVXConcept::store(arr + i1, diag_factor * v1 +
                                                     off_diag_factor_p * v0);
        }
    }

    template <size_t target, typename ParamT>
    static auto applyExternalInternalOffDiagFactor(ParamT angle) {
        std::array<Precision, packed_size> arr{};
        PL_LOOP_SIMD
        for (size_t k = 0; k < packed_size / 2; k++) {
            if ((k >> target) & 1U) { // target bit is 1 (was 0)
                arr[2 * k + 0] = std::sin(angle / 2);
                arr[2 * k + 1] = std::sin(angle / 2);
            } else {
                arr[2 * k + 0] = -std::sin(angle / 2);
                arr[2 * k + 1] = -std::sin(angle / 2);
            }
        }
        return setValue(arr);
    }

    template <size_t target, typename ParamT>
    static void
    applyExternalInternal(std::complex<PrecisionT> *arr, std::size_t num_qubits,
                          std::size_t control, bool inverse, ParamT angle) {
        // control qubit is external but target qubit is external
        using namespace Permutation;

        const std::size_t control_shift =
            (static_cast<std::size_t>(1U) << control);
        const std::size_t max_wire_parity = fillTrailingOnes(control);
        const std::size_t max_wire_parity_inv = fillLeadingOnes(control + 1);

        constexpr static auto perm = compilePermutation<Precision>(
            flip(identity<packed_size>(), target));

        if (inverse) {
            angle *= -1.0;
        }
        const auto diag_factor =
            set1<PrecisionT, packed_size>(std::cos(angle / 2));
        const auto offdiag_factor =
            applyExternalInternalOffDiagFactor<target>(angle);

        PL_LOOP_PARALLEL(1)
        for (size_t k = 0; k < exp2(num_qubits - 1); k += packed_size / 2) {
            const std::size_t i0 =
                ((k << 1U) & max_wire_parity_inv) | (max_wire_parity & k);
            const std::size_t i1 = i0 | control_shift;

            const auto v1 =
                PrecisionAVXConcept::load(arr + i1); // control bit is 1
            const auto w1 = Permutation::permute<perm>(v1);
            PrecisionAVXConcept::store(arr + i1,
                                       diag_factor * v1 + offdiag_factor * w1);
        }
    }

    template <typename ParamT>
    static void applyExternalExternal(std::complex<PrecisionT> *arr,
                                      const std::size_t num_qubits,
                                      const std::size_t control,
                                      const std::size_t target, bool inverse,
                                      ParamT angle) {
        using namespace Permutation;
        const std::size_t control_shift = static_cast<std::size_t>(1U)
                                          << control;
        const std::size_t target_shift = static_cast<std::size_t>(1U) << target;

        const std::size_t rev_wire_min = std::min(control, target);
        const std::size_t rev_wire_max = std::max(control, target);

        const std::size_t parity_low = fillTrailingOnes(rev_wire_min);
        const std::size_t parity_high = fillLeadingOnes(rev_wire_max + 1);
        const std::size_t parity_middle =
            fillLeadingOnes(rev_wire_min + 1) & fillTrailingOnes(rev_wire_max);

        if (inverse) {
            angle *= -1.0;
        }

        const auto cos_factor =
            set1<PrecisionT, packed_size>(std::cos(angle / 2));
        const auto sin_factor =
            set1<PrecisionT, packed_size>(std::sin(angle / 2));
        PL_LOOP_PARALLEL(1)
        for (size_t k = 0; k < exp2(num_qubits - 2); k += packed_size / 2) {
            const std::size_t i00 = ((k << 2U) & parity_high) |
                                    ((k << 1U) & parity_middle) |
                                    (k & parity_low);
            const std::size_t i10 = i00 | control_shift;
            const std::size_t i11 = i00 | control_shift | target_shift;

            const auto v10 = PrecisionAVXConcept::load(arr + i10); // 10
            const auto v11 = PrecisionAVXConcept::load(arr + i11); // 11

            PrecisionAVXConcept::store(arr + i10,
                                       cos_factor * v10 - sin_factor * v11);
            PrecisionAVXConcept::store(arr + i11,
                                       sin_factor * v10 + cos_factor * v11);
        }
    }
};
} // namespace Pennylane::LightningQubit::Gates::AVXCommon