Source code for pennylane.gradients.general_shift_rules

# Copyright 2018-2021 Xanadu Quantum Technologies Inc.

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

#     http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains a function for generating generalized parameter shift rules and
helper methods for processing shift rules as well as for creating tapes with
shifted parameters."""
import functools
import itertools
import numbers
import warnings

import numpy as np
from scipy.linalg import solve as linalg_solve

import pennylane as qml
from pennylane.measurements import MeasurementProcess
from pennylane.ops.functions import bind_new_parameters
from pennylane.tape import QuantumScript


def process_shifts(rule, tol=1e-10, batch_duplicates=True):
    """Utility function to process gradient rules.

    Args:
        rule (array): a ``(M, N)`` array corresponding to ``M`` terms
            with parameter shifts. ``N`` has to be either ``2`` or ``3``.
            The first column corresponds to the linear combination coefficients;
            the last column contains the shift values.
            If ``N=3``, the middle column contains the multipliers.
        tol (float): floating point tolerance used when comparing shifts/coefficients
            Terms with coefficients below ``tol`` will be removed.
        batch_duplicates (bool): whether to check the input ``rule`` for duplicate
            shift values in its second column.

    Returns:
        array: The processed shift rule with small entries rounded to 0, sorted
        with respect to the absolute value of the shifts, and groups of shift
        terms with identical (multiplier and) shift fused into one term each,
        if ``batch_duplicates=True``.

    This utility function accepts coefficients and shift values as well as optionally
    multipliers, and performs the following processing:

    - Set all small (within absolute tolerance ``tol``) coefficients and shifts to 0

    - Remove terms where the coefficients are 0 (including the ones set to 0 in the previous step)

    - Terms with the same shift value (and multiplier) are combined into a single term.

    - Finally, the terms are sorted according to the absolute value of ``shift``,
      This ensures that a zero-shift term, if it exists, is returned first.
      For equal absolute values of two shifts, the positive shift is sorted to come first.
    """
    # set all small coefficients, multipliers if present, and shifts to zero.
    rule[np.abs(rule) < tol] = 0

    # remove columns where the coefficients are 0
    rule = rule[~(rule[:, 0] == 0)]

    if batch_duplicates:
        round_decimals = int(-np.log10(tol))
        rounded_rule = np.round(rule[:, 1:], round_decimals)
        # determine unique shifts or (multiplier, shift) combinations
        unique_mods = np.unique(rounded_rule, axis=0)

        if rule.shape[0] != unique_mods.shape[0]:
            matches = np.all(rounded_rule[:, np.newaxis] == unique_mods[np.newaxis, :], axis=-1)
            # TODO: The following line probably can be done in numpy
            coeffs = [np.sum(rule[slc, 0]) for slc in matches.T]
            rule = np.hstack([np.stack(coeffs)[:, np.newaxis], unique_mods])

    # sort columns according to abs(shift), ties are resolved with the sign,
    # positive shifts being returned before negative shifts.
    return rule[np.lexsort((-np.sign(rule[:, -1]), np.abs(rule[:, -1])))]


[docs]@functools.lru_cache(maxsize=None) def eigvals_to_frequencies(eigvals): r"""Convert an eigenvalue spectrum to frequency values, defined as the the set of positive, unique differences of the eigenvalues in the spectrum. Args: eigvals (tuple[int, float]): eigenvalue spectra Returns: tuple[int, float]: frequencies **Example** >>> eigvals = (-0.5, 0, 0, 0.5) >>> eigvals_to_frequencies(eigvals) (0.5, 1.0) """ unique_eigvals = sorted(set(eigvals)) return tuple({j - i for i, j in itertools.combinations(unique_eigvals, 2)})
@functools.lru_cache(maxsize=None) def frequencies_to_period(frequencies, decimals=5): r"""Returns the period of a Fourier series as defined by a set of frequencies. The period is simply :math:`2\pi/gcd(frequencies)`, where :math:`\text{gcd}` is the greatest common divisor. Args: spectra (tuple[int, float]): frequency spectra decimals (int): Number of decimal places to round to if there are non-integral frequencies. Returns: tuple[int, float]: frequencies **Example** >>> frequencies = (0.5, 1.0) >>> frequencies_to_period(frequencies) 12.566370614359172 """ try: gcd = np.gcd.reduce(frequencies) except TypeError: # np.gcd only support integer frequencies exponent = 10**decimals frequencies = np.round(frequencies, decimals) * exponent gcd = np.gcd.reduce(np.int64(frequencies)) / exponent return 2 * np.pi / gcd @functools.lru_cache(maxsize=None) def _get_shift_rule(frequencies, shifts=None): n_freqs = len(frequencies) frequencies = qml.math.sort(qml.math.stack(frequencies)) freq_min = frequencies[0] if len(set(frequencies)) != n_freqs or freq_min <= 0: raise ValueError( f"Expected frequencies to be a list of unique positive values, instead got {frequencies}." ) mu = np.arange(1, n_freqs + 1) if shifts is None: # assume equidistant shifts shifts = (2 * mu - 1) * np.pi / (2 * n_freqs * freq_min) equ_shifts = True else: shifts = qml.math.sort(qml.math.stack(shifts)) if len(shifts) != n_freqs: raise ValueError( f"Expected number of shifts to equal the number of frequencies ({n_freqs}), instead got {shifts}." ) if len(set(shifts)) != n_freqs: raise ValueError(f"Shift values must be unique, instead got {shifts}") equ_shifts = np.allclose(shifts, (2 * mu - 1) * np.pi / (2 * n_freqs * freq_min)) if len(set(np.round(np.diff(frequencies), 10))) <= 1 and equ_shifts: # equidistant case coeffs = ( freq_min * (-1) ** (mu - 1) / (4 * n_freqs * np.sin(np.pi * (2 * mu - 1) / (4 * n_freqs)) ** 2) ) else: # non-equidistant case sin_matrix = -4 * np.sin(np.outer(shifts, frequencies)) det_sin_matrix = np.linalg.det(sin_matrix) if abs(det_sin_matrix) < 1e-6: warnings.warn( f"Solving linear problem with near zero determinant ({det_sin_matrix}) " "may give unstable results for the parameter shift rules." ) coeffs = -2 * linalg_solve(sin_matrix.T, frequencies) coeffs = np.concatenate((coeffs, -coeffs)) shifts = np.concatenate((shifts, -shifts)) # pylint: disable=invalid-unary-operand-type return np.stack([coeffs, shifts]).T def _iterate_shift_rule_with_multipliers(rule, order, period): r"""Helper method to repeat a shift rule that includes multipliers multiple times along the same parameter axis for higher-order derivatives.""" combined_rules = [] for partial_rules in itertools.product(rule, repeat=order): c, m, s = np.stack(partial_rules).T cumul_shift = 0.0 for _m, _s in zip(m, s): cumul_shift *= _m cumul_shift += _s if period is not None: cumul_shift = np.mod(cumul_shift + 0.5 * period, period) - 0.5 * period combined_rules.append(np.stack([np.prod(c), np.prod(m), cumul_shift])) # combine all terms in the linear combination into a single # array, with column order (coefficients, multipliers, shifts) return qml.math.stack(combined_rules) def _iterate_shift_rule(rule, order, period=None): r"""Helper method to repeat a shift rule multiple times along the same parameter axis for higher-order derivatives.""" if len(rule[0]) == 3: return _iterate_shift_rule_with_multipliers(rule, order, period) # TODO: optimization: Without multipliers, the order of shifts does not matter, # so that we can only iterate over the symmetric part of the combined_rules tensor. # This requires the corresponding multinomial prefactors to be included in the coeffs. combined_rules = np.array(list(itertools.product(rule, repeat=order))) # multiply the coefficients of each rule coeffs = np.prod(combined_rules[..., 0], axis=1) # sum the shifts of each rule shifts = np.sum(combined_rules[..., 1], axis=1) if period is not None: # if a period is provided, make sure the shift value is within [-period/2, period/2) shifts = np.mod(shifts + 0.5 * period, period) - 0.5 * period return qml.math.stack([coeffs, shifts]).T def _combine_shift_rules(rules): r"""Helper method to combine shift rules for multiple parameters into simultaneous multivariate shift rules.""" combined_rules = [] for partial_rules in itertools.product(*rules): c, *m, s = np.stack(partial_rules).T combined = np.concatenate([[np.prod(c)], *m, s]) combined_rules.append(np.stack(combined)) return np.stack(combined_rules)
[docs]@functools.lru_cache() def generate_shift_rule(frequencies, shifts=None, order=1): r"""Computes the parameter shift rule for a unitary based on its generator's eigenvalue frequency spectrum. To compute gradients of circuit parameters in variational quantum algorithms, expressions for cost function first derivatives with respect to the variational parameters can be cast into linear combinations of expectation values at shifted parameter values. The coefficients and shifts defining the linear combination can be obtained from the unitary generator's eigenvalue frequency spectrum. Details can be found in `Wierichs et al. (2022) <https://doi.org/10.22331/q-2022-03-30-677>`__. Args: frequencies (tuple[int or float]): The tuple of eigenvalue frequencies. Eigenvalue frequencies are defined as the unique positive differences obtained from a set of eigenvalues. shifts (tuple[int or float]): the tuple of shift values. If unspecified, equidistant shifts are assumed. If supplied, the length of this tuple should match the number of given frequencies. order (int): the order of differentiation to compute the shift rule for Returns: tuple: a tuple of coefficients and shifts describing the gradient rule for the parameter-shift method. For parameter :math:`\phi`, the coefficients :math:`c_i` and the shifts :math:`s_i` combine to give a gradient rule of the following form: .. math:: \frac{\partial}{\partial\phi}f = \sum_{i} c_i f(\phi + s_i). where :math:`f(\phi) = \langle 0|U(\phi)^\dagger \hat{O} U(\phi)|0\rangle` for some observable :math:`\hat{O}` and the unitary :math:`U(\phi)=e^{iH\phi}`. Raises: ValueError: if ``frequencies`` is not a list of unique positive values, or if ``shifts`` (if specified) is not a list of unique values the same length as ``frequencies``. **Examples** An example of obtaining the frequencies from generator eigenvalues, and obtaining the parameter shift rule: >>> eigvals = (-0.5, 0, 0, 0.5) >>> frequencies = eigvals_to_frequencies(eigvals) >>> generate_shift_rule(frequencies) array([[ 0.4267767 , 1.57079633], [-0.4267767 , -1.57079633], [-0.0732233 , 4.71238898], [ 0.0732233 , -4.71238898]]) An example with explicitly specified shift values: >>> frequencies = (1, 2, 4) >>> shifts = (np.pi / 3, 2 * np.pi / 3, np.pi / 4) >>> generate_shift_rule(frequencies, shifts) array([[ 3. , 0.78539816], [-3. , -0.78539816], [-2.09077028, 1.04719755], [ 2.09077028, -1.04719755], [ 0.2186308 , 2.0943951 ], [-0.2186308 , -2.0943951 ]]) Higher order shift rules (corresponding to the :math:`n`-th derivative of the parameter) can be requested via the ``order`` argument. For example, to extract the second order shift rule for a gate with generator :math:`X/2`: >>> eigvals = (0.5, -0.5) >>> frequencies = eigvals_to_frequencies(eigvals) >>> generate_shift_rule(frequencies, order=2) array([[-0.5 , 0. ], [ 0.5 , -3.14159265]]) This corresponds to the shift rule :math:`\frac{\partial^2 f}{\partial \phi^2} = \frac{1}{2} \left[f(\phi) - f(\phi-\pi)\right]`. """ frequencies = tuple(f for f in frequencies if f > 0) rule = _get_shift_rule(frequencies, shifts=shifts) if order > 1: T = frequencies_to_period(frequencies) rule = _iterate_shift_rule(rule, order, period=T) return process_shifts(rule, tol=1e-10)
[docs]def generate_multi_shift_rule(frequencies, shifts=None, orders=None): r"""Computes the parameter shift rule with respect to two parametrized unitaries, given their generator's eigenvalue frequency spectrum. This corresponds to a shift rule that computes off-diagonal elements of higher order derivative tensors. For the second order, this corresponds to the Hessian. Args: frequencies (list[tuple[int or float]]): List of eigenvalue frequencies corresponding to the each parametrized unitary. shifts (list[tuple[int or float]]): List of shift values corresponding to each parametrized unitary. If unspecified, equidistant shifts are assumed. If supplied, the length of each tuple in the list must be the same as the length of the corresponding tuple in ``frequencies``. orders (list[int]): the order of differentiation for each parametrized unitary. If unspecified, the first order derivative shift rule is computed for each parametrized unitary. Returns: tuple: a tuple of coefficients, shifts for the first parameter, and shifts for the second parameter, describing the gradient rule for the parameter-shift method. For parameters :math:`\phi_a` and :math:`\phi_b`, the coefficients :math:`c_i` and the shifts :math:`s^{(a)}_i`, :math:`s^{(b)}_i`, combine to give a gradient rule of the following form: .. math:: \frac{\partial^2}{\partial\phi_a \partial\phi_b}f = \sum_{i} c_i f(\phi_a + s^{(a)}_i, \phi_b + s^{(b)}_i). where :math:`f(\phi_a, \phi_b) = \langle 0|U(\phi_a)^\dagger V(\phi_b)^\dagger \hat{O} V(\phi_b) U(\phi_a)|0\rangle` for some observable :math:`\hat{O}` and unitaries :math:`U(\phi_a)=e^{iH_a\phi_a}` and :math:`V(\phi_b)=e^{iH_b\phi_b}`. **Example** >>> generate_multi_shift_rule([(1,), (1,)]) array([[ 0.25 , 1.57079633, 1.57079633], [-0.25 , 1.57079633, -1.57079633], [-0.25 , -1.57079633, 1.57079633], [ 0.25 , -1.57079633, -1.57079633]]) This corresponds to the gradient rule .. math:: \begin{align*} \frac{\partial^2 f}{\partial x\partial y} &= \frac{1}{4} [f(x+\pi/2, y+\pi/2) - f(x+\pi/2, y-\pi/2)\\ &\phantom{\frac{1}{4}[}-f(x-\pi/2, y+\pi/2) + f(x-\pi/2, y-\pi/2) ]. \end{align*} """ rules = [] shifts = shifts or [None] * len(frequencies) orders = orders or [1] * len(frequencies) for f, s, o in zip(frequencies, shifts, orders): rule = generate_shift_rule(f, shifts=s, order=o) rules.append(process_shifts(rule)) return _combine_shift_rules(rules)
def _copy_and_shift_params(tape, indices, shifts, multipliers, cast=False): """Create a copy of a tape and of parameters, and set the new tape to the parameters rescaled and shifted as indicated by ``indices``, ``multipliers`` and ``shifts``.""" all_ops = tape.circuit for idx, shift, multiplier in zip(indices, shifts, multipliers): _, op_idx, p_idx = tape.get_operation(idx) op = ( all_ops[op_idx].obs if isinstance(all_ops[op_idx], MeasurementProcess) else all_ops[op_idx] ) # Shift copied parameter new_params = list(op.data) if not isinstance(new_params[p_idx], numbers.Integral): multiplier = qml.math.convert_like(multiplier, new_params[p_idx]) multiplier = qml.math.cast_like(multiplier, new_params[p_idx]) shift = qml.math.convert_like(shift, new_params[p_idx]) shift = qml.math.cast_like(shift, new_params[p_idx]) new_params[p_idx] = new_params[p_idx] * multiplier new_params[p_idx] = new_params[p_idx] + shift if cast: dtype = getattr(new_params[p_idx], "dtype", float) new_params[p_idx] = qml.math.cast(new_params[p_idx], dtype) # Create operator with shifted parameter and put into shifted tape shifted_op = bind_new_parameters(op, new_params) if op_idx < len(tape.operations): all_ops[op_idx] = shifted_op else: mp = all_ops[op_idx].__class__ all_ops[op_idx] = mp(obs=shifted_op) # pylint: disable=protected-access ops = all_ops[: len(tape.operations)] meas = all_ops[len(tape.operations) :] return QuantumScript(ops=ops, measurements=meas, shots=tape.shots)
[docs]def generate_shifted_tapes(tape, index, shifts, multipliers=None, broadcast=False): r"""Generate a list of tapes or a single broadcasted tape, where one marked trainable parameter has been shifted by the provided shift values. Args: tape (.QuantumTape): input quantum tape index (int): index of the trainable parameter to shift shifts (Sequence[float or int]): sequence of shift values. The length determines how many parameter-shifted tapes are created. multipliers (Sequence[float or int]): sequence of multiplier values. The length should match the one of ``shifts``. Each multiplier scales the corresponding gate parameter before the shift is applied. If not provided, the parameters will not be scaled. broadcast (bool): Whether or not to use broadcasting to create a single tape with the shifted parameters. Returns: list[QuantumTape]: List of quantum tapes. In each tape the parameter indicated by ``index`` has been shifted by the values in ``shifts``. The number of tapes matches the length of ``shifts`` and ``multipliers`` (if provided). If ``broadcast=True`` was used, the list contains a single broadcasted tape with all shifts distributed over the broadcasting dimension. In this case, the ``batch_size`` of the returned tape matches the length of ``shifts``. """ if len(shifts) == 0: return tuple() if multipliers is None: multipliers = np.ones_like(shifts) if broadcast: return (_copy_and_shift_params(tape, [index], [shifts], [multipliers]),) return tuple( _copy_and_shift_params(tape, [index], [shift], [multiplier]) for shift, multiplier in zip(shifts, multipliers) )
[docs]def generate_multishifted_tapes(tape, indices, shifts, multipliers=None): r"""Generate a list of tapes where multiple marked trainable parameters have been shifted by the provided shift values. Args: tape (.QuantumTape): input quantum tape indices (Sequence[int]): indices of the trainable parameters to shift shifts (Sequence[Sequence[float or int]]): Nested sequence of shift values. The length of the outer Sequence determines how many parameter-shifted tapes are created. The lengths of the inner sequences should match and have the same length as ``indices``. multipliers (Sequence[Sequence[float or int]]): Nested sequence of multiplier values of the same format as `shifts``. Each multiplier scales the corresponding gate parameter before the shift is applied. If not provided, the parameters will not be scaled. Returns: list[QuantumTape]: List of quantum tapes. Each tape has the marked parameters indicated by ``indices`` shifted by the values of ``shifts``. The number of tapes will match the summed lengths of all inner sequences in ``shifts`` and ``multipliers`` (if provided). """ if multipliers is None: multipliers = np.ones_like(shifts) tapes = [ _copy_and_shift_params(tape, indices, _shifts, _multipliers, cast=True) for _shifts, _multipliers in zip(shifts, multipliers) ] return tapes