Source code for pennylane.gradients.parameter_shift_hessian

# Copyright 2018-2021 Xanadu Quantum Technologies Inc.

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

#     http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This module contains functions for computing the parameter-shift hessian
of a qubit-based quantum tape.
"""
import itertools as it
import warnings
from collections.abc import Sequence

import pennylane as qml
from pennylane import numpy as np
from pennylane.measurements import ProbabilityMP, StateMP, VarianceMP

from .general_shift_rules import (
    _combine_shift_rules,
    generate_multishifted_tapes,
    generate_shifted_tapes,
)
from .gradient_transform import grad_method_validation, gradient_analysis
from .hessian_transform import hessian_transform
from .parameter_shift import _get_operation_recipe


def _process_argnum(argnum, tape):
    """Process the argnum keyword argument to ``param_shift_hessian`` from any of ``None``,
    ``int``, ``Sequence[int]``, ``array_like[bool]`` to an ``array_like[bool]``."""
    _trainability_note = (
        "This may be caused by attempting to differentiate with respect to parameters "
        "that are not marked as trainable."
    )
    if argnum is None:
        # All trainable tape parameters are considered
        argnum = list(range(tape.num_params))
    elif isinstance(argnum, int):
        if argnum >= tape.num_params:
            raise ValueError(
                f"The index {argnum} exceeds the number of trainable tape parameters "
                f"({tape.num_params}). " + _trainability_note
            )
        # Make single marked parameter an iterable
        argnum = [argnum]

    if len(qml.math.shape(argnum)) == 1:
        # If the iterable is 1D, consider all combinations of all marked parameters
        if not qml.math.array(argnum).dtype == bool:
            # If the 1D iterable contains indices, make sure it contains valid indices...
            if qml.math.max(argnum) >= tape.num_params:
                raise ValueError(
                    f"The index {qml.math.max(argnum)} exceeds the number of "
                    f"trainable tape parameters ({tape.num_params})." + _trainability_note
                )
            # ...and translate it to Boolean 1D iterable
            argnum = [i in argnum for i in range(tape.num_params)]
        elif len(argnum) != tape.num_params:
            # If the 1D iterable already is Boolean, check its length
            raise ValueError(
                "One-dimensional Boolean array argnum is expected to have as many entries as the "
                f"tape has trainable parameters ({tape.num_params}), but got {len(argnum)}."
                + _trainability_note
            )
        # Finally mark all combinations using the outer product
        argnum = qml.math.tensordot(argnum, argnum, axes=0)

    elif not (
        qml.math.shape(argnum) == (tape.num_params,) * 2
        and qml.math.array(argnum).dtype == bool
        and qml.math.allclose(qml.math.transpose(argnum), argnum)
    ):
        # If the iterable is 2D, make sure it is Boolean, symmetric and of the correct size
        raise ValueError(
            f"Expected a symmetric 2D Boolean array with shape {(tape.num_params,) * 2} "
            f"for argnum, but received {argnum}." + _trainability_note
        )
    return argnum


def _collect_recipes(tape, argnum, diff_methods, diagonal_shifts, off_diagonal_shifts):
    r"""Extract second order recipes for the tape operations for the diagonal of the Hessian
    as well as the first-order derivative recipes for the off-diagonal entries.
    """
    diag_argnum = qml.math.diag(argnum)
    offdiag_argnum = qml.math.any(argnum ^ qml.math.diag(qml.math.diag(argnum)), axis=0)

    diag_recipes = []
    partial_offdiag_recipes = []
    diag_shifts_idx = offdiag_shifts_idx = 0
    for i, (d, od) in enumerate(zip(diag_argnum, offdiag_argnum)):
        if not d or diff_methods[i] == "0":
            # hessian will be set to 0 for this row/column
            diag_recipes.append(None)
        else:
            # Get the diagonal second-order derivative recipe
            diag_shifts = None if diagonal_shifts is None else diagonal_shifts[diag_shifts_idx]
            diag_recipes.append(_get_operation_recipe(tape, i, diag_shifts, order=2))
            diag_shifts_idx += 1

        if not od or diff_methods[i] == "0":
            # hessian will be set to 0 for this row/column
            partial_offdiag_recipes.append((None, None, None))
        else:
            # Create the first-order gradient recipes per parameter for off-diagonal entries
            offdiag_shifts = (
                None if off_diagonal_shifts is None else off_diagonal_shifts[offdiag_shifts_idx]
            )
            partial_offdiag_recipes.append(_get_operation_recipe(tape, i, offdiag_shifts, order=1))
            offdiag_shifts_idx += 1

    return diag_recipes, partial_offdiag_recipes


def _generate_offdiag_tapes(tape, idx, first_order_recipes, add_unshifted, tapes, coeffs):
    r"""Combine two univariate first order recipes and create
    multi-shifted tapes to compute the off-diagonal entry of the Hessian."""
    # pylint: disable=too-many-arguments

    recipe_i = first_order_recipes[idx[0]]
    recipe_j = first_order_recipes[idx[1]]
    # The columns of combined_rules contain the coefficients (1), the multipliers (2) and the
    # shifts (2) in that order, with the number in brackets indicating the number of columns
    combined_rules = _combine_shift_rules([recipe_i, recipe_j])
    # If there are unmultiplied, unshifted tapes, the coefficient is memorized and the term
    # removed from the list of tapes to create
    if np.allclose(combined_rules[0, 1:3], 1.0) and np.allclose(combined_rules[0, 3:5], 0.0):
        # Extract the unshifted coefficient, if the first shifts (multipliers) equal 0 (1).
        if add_unshifted:
            # Add the unshifted tape if it has not been added yet and is required
            # because f0 was not provided (both captured by add_unshifted).
            tapes.insert(0, tape)
            add_unshifted = False
        unshifted_coeff = combined_rules[0, 0]
        combined_rules = combined_rules[1:]
    else:
        unshifted_coeff = None

    s = combined_rules[:, 3:5]
    m = combined_rules[:, 1:3]
    new_tapes = generate_multishifted_tapes(tape, idx, s, m)
    tapes.extend(new_tapes)
    coeffs.append(combined_rules[:, 0])

    return add_unshifted, unshifted_coeff


def _generate_diag_tapes(tape, idx, diag_recipes, add_unshifted, tapes, coeffs):
    """Create the required parameter-shifted tapes for a single diagonal entry of
    the Hessian using precomputed second-order shift rules."""
    # pylint: disable=too-many-arguments
    # Obtain the recipe for the diagonal.
    c, m, s = diag_recipes[idx].T
    if s[0] == 0 and m[0] == 1.0:
        # Extract the unshifted coefficient, if the first shift (multiplier) equals 0 (1).
        if add_unshifted:
            # Add the unshifted tape if it has not been added yet and is required
            # because f0 was not provided (both captured by add_unshifted).
            tapes.insert(0, tape)
            add_unshifted = False
        unshifted_coeff = c[0]
        c, m, s = c[1:], m[1:], s[1:]
    else:
        unshifted_coeff = None

    # Create the shifted tapes for the diagonal entry and store them along with coefficients
    new_tapes = generate_shifted_tapes(tape, idx, s, m)
    tapes.extend(new_tapes)
    coeffs.append(c)

    return add_unshifted, unshifted_coeff


def _no_trainable_grad_new(tape):
    if len(tape.measurements) == 1:
        return [], lambda _: qml.math.zeros((0,))

    return [], lambda _: tuple(qml.math.zeros((0,)) for _ in tape.measurements)


def _all_zero_grad_new(tape):
    num_params = len(tape.trainable_params)

    zeros_list = []
    for m in tape.measurements:
        shape = 2 ** len(m.wires) if isinstance(m, ProbabilityMP) else ()

        zeros = tuple(
            tuple(qml.math.zeros(shape) for _ in range(num_params)) for _ in range(num_params)
        )
        if num_params == 1:
            zeros = zeros[0][0]

        zeros_list.append(zeros)

    if len(tape.measurements) == 1:
        return [], lambda _: zeros_list[0]

    return [], lambda _: tuple(zeros_list)


def expval_hessian_param_shift(
    tape, argnum, diff_methods, diagonal_shifts, off_diagonal_shifts, f0
):
    r"""Generate the Hessian tapes that are used in the computation of the second derivative of a
    quantum tape, using analytical parameter-shift rules to do so exactly. Also define a
    post-processing function to combine the results of evaluating the tapes into the Hessian.

    Args:
        tape (.QuantumTape): quantum tape to differentiate
        argnum (array_like[bool]): Parameter indices to differentiate
            with respect to, in form of a two-dimensional boolean ``array_like`` mask.
        diff_methods (list[string]): The differentiation method to use for each trainable parameter.
            Can be "A" or "0", where "A" is the analytical parameter shift rule and "0" indicates
            a 0 derivative (that is the parameter does not affect the tape's output).
        diagonal_shifts (list[tuple[int or float]]): List containing tuples of shift values
            for the Hessian diagonal.
            If provided, one tuple of shifts should be given per trainable parameter
            and the tuple length should match the number of frequencies for that parameter.
            If unspecified, equidistant shifts are used.
        off_diagonal_shifts (list[tuple[int or float]]): List containing tuples of shift
            values for the off-diagonal entries of the Hessian.
            If provided, one tuple of shifts should be given per trainable parameter
            and the tuple should match the number of frequencies for that parameter.
            The combination of shifts into bivariate shifts is performed automatically.
            If unspecified, equidistant shifts are used.
        f0 (tensor_like[float] or None): Output of the evaluated input tape. If provided,
            and the Hessian tapes include the original input tape, the 'f0' value is used
            instead of evaluating the input tape, reducing the number of device invocations.

    Returns:
        tuple[list[QuantumTape], function]: A tuple containing a
        list of generated tapes, together with a post-processing
        function to be applied to the results of the evaluated tapes
        in order to obtain the Hessian matrix.
    """
    # pylint: disable=too-many-arguments, too-many-statements
    h_dim = tape.num_params

    unshifted_coeffs = {}
    # Marks whether we will need to add the unshifted tape to all Hessian tapes.
    add_unshifted = f0 is None

    # Assemble all univariate recipes for the diagonal and as partial components for the
    # off-diagonal entries.
    diag_recipes, partial_offdiag_recipes = _collect_recipes(
        tape, argnum, diff_methods, diagonal_shifts, off_diagonal_shifts
    )

    hessian_tapes = []
    hessian_coeffs = []
    for i, j in it.combinations_with_replacement(range(h_dim), r=2):
        if not argnum[i, j]:
            # The (i, j) entry of the Hessian is not to be computed
            hessian_coeffs.append(None)
            continue

        if i == j:
            add_unshifted, unshifted_coeffs[(i, i)] = _generate_diag_tapes(
                tape, i, diag_recipes, add_unshifted, hessian_tapes, hessian_coeffs
            )
        else:
            # Create tapes and coefficients for the off-diagonal entry by combining
            # the two univariate first-order derivative recipes.
            add_unshifted, unshifted_coeffs[(i, j)] = _generate_offdiag_tapes(
                tape, (i, j), partial_offdiag_recipes, add_unshifted, hessian_tapes, hessian_coeffs
            )
    unshifted_coeffs = {key: val for key, val in unshifted_coeffs.items() if val is not None}

    def processing_fn(results):
        # Apply the same squeezing as in qml.QNode to make the transform output consistent.
        # pylint: disable=protected-access
        if tape._qfunc_output is not None and not isinstance(tape._qfunc_output, Sequence):
            results = qml.math.squeeze(qml.math.stack(results))

        # The first results dimension is the number of terms/tapes in the parameter-shift
        # rule, the remaining ones are the QNode output dimensions.
        out_dim = qml.math.shape(results)[1:]
        # The desired shape of the Hessian is:
        #       (QNode output dimensions, # trainable gate args, # trainable gate args),
        # but first we accumulate all elements into a list, since no array assignment is possible.
        hessian = []
        # Keep track of tape results already consumed. Start with 1 if the unshifted tape was
        # included in the tapes for the Hessian.
        start = 1 if unshifted_coeffs and f0 is None else 0
        # Results of the unshifted tape.
        r0 = results[0] if start == 1 else f0

        for i, j in it.product(range(h_dim), repeat=2):
            if j < i:
                hessian.append(hessian[j * h_dim + i])
                continue
            k = i * h_dim + j - i * (i + 1) // 2
            coeffs = hessian_coeffs[k]
            if coeffs is None or len(coeffs) == 0:
                hessian.append(qml.math.zeros(out_dim))
                continue

            res = results[start : start + len(coeffs)]
            start = start + len(coeffs)

            res = qml.math.stack(res)
            coeffs = qml.math.cast(qml.math.convert_like(coeffs, res), res.dtype)
            hess = qml.math.tensordot(res, coeffs, [[0], [0]])
            unshifted_coeff = unshifted_coeffs.get((i, j), None)
            if unshifted_coeff is not None:
                hess = hess + unshifted_coeff * r0

            hessian.append(hess)

        # Reshape the Hessian to have the QNode output dimensions on the outside, that is:
        #    (h_dim*h_dim, *out_dims) -> (h_dim, h_dim, *out_dims) -> (*out_dims, h_dim, h_dim)
        # Remember: h_dim = num_gate_args
        hessian = qml.math.reshape(qml.math.stack(hessian), (h_dim, h_dim) + out_dim)
        reordered_axes = list(range(2, len(out_dim) + 2)) + [0, 1]
        return qml.math.transpose(hessian, axes=reordered_axes)

    return hessian_tapes, processing_fn


def _expval_hessian_param_shift_tuple(
    tape, argnum, diff_methods, diagonal_shifts, off_diagonal_shifts, f0
):
    r"""Generate the Hessian tapes that are used in the computation of the second derivative of a
    quantum tape, using analytical parameter-shift rules to do so exactly. Also define a
    post-processing function to combine the results of evaluating the tapes into the Hessian.

    Args:
        tape (.QuantumTape): quantum tape to differentiate
        argnum (array_like[bool]): Parameter indices to differentiate
            with respect to, in form of a two-dimensional boolean ``array_like`` mask.
        diff_methods (list[string]): The differentiation method to use for each trainable parameter.
            Can be "A" or "0", where "A" is the analytical parameter shift rule and "0" indicates
            a 0 derivative (that is the parameter does not affect the tape's output).
        diagonal_shifts (list[tuple[int or float]]): List containing tuples of shift values
            for the Hessian diagonal.
            If provided, one tuple of shifts should be given per trainable parameter
            and the tuple length should match the number of frequencies for that parameter.
            If unspecified, equidistant shifts are used.
        off_diagonal_shifts (list[tuple[int or float]]): List containing tuples of shift
            values for the off-diagonal entries of the Hessian.
            If provided, one tuple of shifts should be given per trainable parameter
            and the tuple should match the number of frequencies for that parameter.
            The combination of shifts into bivariate shifts is performed automatically.
            If unspecified, equidistant shifts are used.
        f0 (tensor_like[float] or None): Output of the evaluated input tape. If provided,
            and the Hessian tapes include the original input tape, the 'f0' value is used
            instead of evaluating the input tape, reducing the number of device invocations.

    Returns:
        tuple[list[QuantumTape], function]: A tuple containing a
        list of generated tapes, together with a post-processing
        function to be applied to the results of the evaluated tapes
        in order to obtain the Hessian matrix.
    """
    # pylint: disable=too-many-arguments, too-many-statements
    h_dim = tape.num_params

    unshifted_coeffs = {}
    # Marks whether we will need to add the unshifted tape to all Hessian tapes.
    add_unshifted = f0 is None

    # Assemble all univariate recipes for the diagonal and as partial components for the
    # off-diagonal entries.
    diag_recipes, partial_offdiag_recipes = _collect_recipes(
        tape, argnum, diff_methods, diagonal_shifts, off_diagonal_shifts
    )

    hessian_tapes = []
    hessian_coeffs = []
    for i, j in it.combinations_with_replacement(range(h_dim), r=2):
        if not argnum[i, j]:
            # The (i, j) entry of the Hessian is not to be computed
            hessian_coeffs.append(None)
            continue

        if i == j:
            add_unshifted, unshifted_coeffs[(i, i)] = _generate_diag_tapes(
                tape, i, diag_recipes, add_unshifted, hessian_tapes, hessian_coeffs
            )
        else:
            # Create tapes and coefficients for the off-diagonal entry by combining
            # the two univariate first-order derivative recipes.
            add_unshifted, unshifted_coeffs[(i, j)] = _generate_offdiag_tapes(
                tape, (i, j), partial_offdiag_recipes, add_unshifted, hessian_tapes, hessian_coeffs
            )
    unshifted_coeffs = {key: val for key, val in unshifted_coeffs.items() if val is not None}

    def processing_fn(results):

        num_measurements = len(tape.measurements)
        if num_measurements == 1:
            results = tuple((r,) for r in results)

        # the hessian should have a nested tuple structure with shape
        #     (num_measurements, num_params, num_params, *output_dims)
        # first accumulate all elements of the hessian into a list
        hessians = []

        # Keep track of tape results already consumed. Start with 1 if the unshifted tape was
        # included in the tapes for the Hessian.
        start = 1 if unshifted_coeffs and f0 is None else 0

        # Results of the unshifted tape.
        r0 = results[0] if start == 1 else f0

        for i, j in it.product(range(h_dim), repeat=2):

            if j < i:
                hessians.append(hessians[j * h_dim + i])
                continue

            k = i * h_dim + j - i * (i + 1) // 2
            coeffs = hessian_coeffs[k]

            if coeffs is None or len(coeffs) == 0:
                hessian = []
                for m in range(num_measurements):
                    hessian.append(qml.math.zeros_like(results[0][m]))

                hessians.append(tuple(hessian))
                continue

            res = results[start : start + len(coeffs)]
            start = start + len(coeffs)

            unshifted_coeff = unshifted_coeffs.get((i, j), None)
            hessian = []
            for m in range(num_measurements):

                # the res array has shape (num_tapes, num_measurements, *output_dims)

                # first collect all tape results for the individual measurements
                measure_res = qml.math.stack([r[m] for r in res])

                # then compute the hessian via parameter-shift
                coeffs = qml.math.convert_like(coeffs, measure_res)
                hess = qml.math.tensordot(measure_res, coeffs, [[0], [0]])

                if unshifted_coeff is not None:
                    hess = hess + unshifted_coeff * r0[m]

                hess = qml.math.array(hess, like=measure_res)
                hessian.append(hess)

            hessians.append(tuple(hessian))

        # at this point, the hessian has shape (num_params ** 2, num_measurements, *output_dims)

        # swap the first two axes, so that the hessian now has
        # shape (num_measurements, num_params ** 2, *output_dims)
        hessians = tuple(tuple(h[i] for h in hessians) for i in range(num_measurements))

        # replace the axis of size num_params ** 2 with two axes of size num_params;
        # that is, reshape the hessian to have shape (num_measurements, num_params, num_params, *output_dims)
        hessians = tuple(
            tuple(tuple(hess[i * h_dim + j] for j in range(h_dim)) for i in range(h_dim))
            for hess in hessians
        )

        # squeeze every axis with size 1
        if h_dim == 1:
            hessians = tuple(hess[0][0] for hess in hessians)

        if num_measurements == 1:
            hessians = hessians[0]

        return hessians

    return hessian_tapes, processing_fn


[docs]@hessian_transform def param_shift_hessian(tape, argnum=None, diagonal_shifts=None, off_diagonal_shifts=None, f0=None): r"""Transform a QNode to compute the parameter-shift Hessian with respect to its trainable parameters. Use this transform to explicitly generate and explore parameter-shift circuits for computing the Hessian of QNodes directly, without computing first derivatives. For second-order derivatives of more complicated cost functions, please consider using your chosen autodifferentiation framework directly, by chaining gradient computations: >>> qml.jacobian(qml.grad(cost))(weights) Args: tape (pennylane.QNode or .QuantumTape): quantum tape or QNode to differentiate argnum (int or list[int] or array_like[bool] or None): Parameter indices to differentiate with respect to. If not provided, the Hessian with respect to all trainable indices is returned. Note that the indices refer to tape parameters both if ``tape`` is a tape, and if it is a QNode. If an ``array_like`` is provided, it is expected to be a symmetric two-dimensional Boolean mask with shape ``(n, n)`` where ``n`` is the number of trainable tape parameters. diagonal_shifts (list[tuple[int or float]]): List containing tuples of shift values for the Hessian diagonal. The shifts are understood as first-order derivative shifts and are iterated to obtain the second-order derivative. If provided, one tuple of shifts should be given per trainable parameter and the tuple length should match the number of frequencies for that parameter. If unspecified, equidistant shifts are used. off_diagonal_shifts (list[tuple[int or float]]): List containing tuples of shift values for the off-diagonal entries of the Hessian. If provided, one tuple of shifts should be given per trainable parameter and the tuple should match the number of frequencies for that parameter. The combination of shifts into bivariate shifts is performed automatically. If unspecified, equidistant shifts are used. f0 (tensor_like[float] or None): Output of the evaluated input tape. If provided, and the Hessian tapes include the original input tape, the 'f0' value is used instead of evaluating the input tape, reducing the number of device invocations. Returns: function or tuple[list[QuantumTape], function]: - If the input is a QNode, an object representing the Hessian (function) of the QNode that can be executed to obtain the Hessian matrix. For QNodes with a single trainable argument, the returned matrix is a tensor of size ``(*QNode output dimensions, *QNode input dimensions, *QNode input dimensions)``. For QNodes with multiple trainable arguments, a tuple of Hessian tensors is returned, one for each argument. - If the input is a tape, a tuple containing a list of generated tapes, together with a post-processing function to be applied to the results of the evaluated tapes in order to obtain the Hessian matrix. Note: By default a QNode with the keyword ``hybrid=True`` computes derivates with respect to QNode arguments, which can include classical computations on those arguments before they are passed to quantum operations. The "purely quantum" Hessian can instead be obtained with ``hybrid=False``, which is then computed with respect to the gate arguments and produces a result of shape ``(*QNode output dimensions, # gate arguments, # gate arguments)``. **Example** Applying the Hessian transform to a QNode computes its Hessian tensor: >>> dev = qml.device("default.qubit", wires=2) >>> @qml.qnode(dev) ... def circuit(x): ... qml.RX(x[0], wires=0) ... qml.CRY(x[1], wires=[0, 1]) ... return qml.expval(qml.PauliZ(0)@qml.PauliZ(1)) >>> x = np.array([0.5, 0.2], requires_grad=True) >>> qml.gradients.param_shift_hessian(circuit)(x) tensor([[-0.86883595, 0.04762358], [ 0.04762358, 0.05998862]], requires_grad=True) .. details:: :title: Usage Details The Hessian transform can also be applied to a quantum tape instead of a QNode, producing the parameter-shifted tapes and a post-processing function to combine the execution results of these tapes into the Hessian: >>> circuit(x) # generate the QuantumTape inside the QNode >>> tape = circuit.qtape >>> hessian_tapes, postproc_fn = qml.gradients.param_shift_hessian(tape) >>> len(hessian_tapes) 13 >>> all(isinstance(tape, qml.tape.QuantumTape) for tape in hessian_tapes) True >>> postproc_fn(qml.execute(hessian_tapes, dev, None)) array([[-0.86883595, 0.04762358], [ 0.04762358, 0.05998862]]) The Hessian tapes can be inspected via their draw function, which reveals the different gate arguments generated from parameter-shift rules (we only draw the first four out of all 13 tapes here): >>> for h_tape in hessian_tapes[0:4]: ... print(qml.drawer.tape_text(h_tape, decimals=1)) 0: ──RX(0.5)─╭●───────┤ ╭<[email protected]> 1: ──────────╰RY(0.2)─┤ ╰<[email protected]> 0: ──RX(-2.6)─╭●───────┤ ╭<[email protected]> 1: ───────────╰RY(0.2)─┤ ╰<[email protected]> 0: ──RX(2.1)─╭●───────┤ ╭<[email protected]> 1: ──────────╰RY(1.8)─┤ ╰<[email protected]> 0: ──RX(2.1)─╭●────────┤ ╭<[email protected]> 1: ──────────╰RY(-1.4)─┤ ╰<[email protected]> To enable more detailed control over the parameter shifts, shift values can be provided per parameter, and separately for the diagonal and the off-diagonal terms. Here we choose them based on the parameters ``x`` themselves, mostly yielding multiples of the original parameters in the shifted tapes. >>> diag_shifts = [(x[0] / 2,), (x[1] / 2, x[1])] >>> offdiag_shifts = [(x[0],), (x[1], 2 * x[1])] >>> hessian_tapes, postproc_fn = qml.gradients.param_shift_hessian( ... tape, diagonal_shifts=diag_shifts, off_diagonal_shifts=offdiag_shifts ... ) >>> for h_tape in hessian_tapes[0:4]: ... print(qml.drawer.tape_text(h_tape, decimals=1)) 0: ──RX(0.5)─╭●───────┤ ╭<[email protected]> 1: ──────────╰RY(0.2)─┤ ╰<[email protected]> 0: ──RX(0.0)─╭●───────┤ ╭<[email protected]> 1: ──────────╰RY(0.2)─┤ ╰<[email protected]> 0: ──RX(1.0)─╭●───────┤ ╭<[email protected]> 1: ──────────╰RY(0.2)─┤ ╰<[email protected]> 0: ──RX(1.0)─╭●───────┤ ╭<[email protected]> 1: ──────────╰RY(0.4)─┤ ╰<[email protected]> .. note:: Note that the ``diagonal_shifts`` are interpreted as *first-order* derivative shift values. That means they are used to generate a first-order derivative recipe, which then is iterated in order to obtain the second-order derivative for the diagonal Hessian entry. Explicit control over the used second-order shifts is not implemented. Finally, the ``argnum`` argument can be used to compute the Hessian only for some of the variational parameters. Note that this indexing refers to trainable tape parameters both if ``tape`` is a ``QNode`` and if it is a ``QuantumTape``. >>> hessian_tapes, postproc_fn = qml.gradients.param_shift_hessian(tape, argnum=(1,)) >>> postproc_fn(qml.execute(hessian_tapes, dev, None)) array([[0. , 0. ], [0. , 0.05998862]]) """ if qml.active_return(): return _param_shift_hessian_tuple( tape, argnum=argnum, diagonal_shifts=diagonal_shifts, off_diagonal_shifts=off_diagonal_shifts, f0=f0, ) # Perform input validation before generating tapes. if any(isinstance(m, StateMP) for m in tape.measurements): raise ValueError( "Computing the Hessian of circuits that return the state is not supported." ) # The parameter-shift Hessian implementation currently doesn't support variance measurements. # TODO: Support variances similar to how param_shift does it if any(isinstance(m, VarianceMP) for m in tape.measurements): raise ValueError( "Computing the Hessian of circuits that return variances is currently not supported." ) if argnum is None and not tape.trainable_params: warnings.warn( "Attempted to compute the hessian of a tape with no trainable parameters. " "If this is unintended, please mark trainable parameters in accordance with the " "chosen auto differentiation framework, or via the 'tape.trainable_params' property." ) return [], lambda _: qml.math.zeros((tape.output_dim, 0, 0)) bool_argnum = _process_argnum(argnum, tape) compare_diag_to = qml.math.sum(qml.math.diag(bool_argnum)) offdiag = bool_argnum ^ qml.math.diag(qml.math.diag(bool_argnum)) compare_offdiag_to = qml.math.sum(qml.math.any(offdiag, axis=0)) if diagonal_shifts is not None and len(diagonal_shifts) != compare_diag_to: raise ValueError( "The number of provided sets of shift values for diagonal entries " f"({len(diagonal_shifts)}) does not match the number of marked arguments " f"to compute the diagonal for ({compare_diag_to})." ) if off_diagonal_shifts is not None and len(off_diagonal_shifts) != compare_offdiag_to: raise ValueError( "The number of provided sets of shift values for off-diagonal entries " f"({len(off_diagonal_shifts)}) does not match the number of marked arguments " f"for which to compute at least one off-diagonal entry ({compare_offdiag_to})." ) gradient_analysis(tape, grad_fn=qml.gradients.param_shift) # If argnum is given, the grad_method_validation may allow parameters with # finite-difference as method. If they are among the requested argnum, we catch this # further below (as no fallback function in analogy to `param_shift` is used currently). method = "analytic" if argnum is None else "best" diff_methods = grad_method_validation(method, tape) for i, g in enumerate(diff_methods): if g == "0": bool_argnum[i] = bool_argnum[:, i] = False if qml.math.all(~bool_argnum): # pylint: disable=invalid-unary-operand-type par_dim = len(tape.trainable_params) return [], lambda _: qml.math.zeros([tape.output_dim, par_dim, par_dim]) # Find all argument indices that appear in at least one derivative that was requested choose_argnum = qml.math.where(qml.math.any(bool_argnum, axis=0))[0] # If any of these argument indices correspond to a finite difference # derivative (diff_methods[idx]="F"), raise an error. unsupported_params = {idx for idx in choose_argnum if diff_methods[idx] == "F"} if unsupported_params: raise ValueError( "The parameter-shift Hessian currently does not support the operations " f"for parameter(s) {unsupported_params}." ) return expval_hessian_param_shift( tape, bool_argnum, diff_methods, diagonal_shifts, off_diagonal_shifts, f0 )
def _param_shift_hessian_tuple( tape, argnum=None, diagonal_shifts=None, off_diagonal_shifts=None, f0=None ): r"""Transform a QNode to compute the parameter-shift Hessian with respect to its trainable parameters. This is the Hessian transform to replace the old one in the new return types system Use this transform to explicitly generate and explore parameter-shift circuits for computing the Hessian of QNodes directly, without computing first derivatives. For second-order derivatives of more complicated cost functions, please consider using your chosen autodifferentiation framework directly, by chaining gradient computations: >>> qml.jacobian(qml.grad(cost))(weights) Args: tape (pennylane.QNode or .QuantumTape): quantum tape or QNode to differentiate argnum (int or list[int] or array_like[bool] or None): Parameter indices to differentiate with respect to. If not provided, the Hessian with respect to all trainable indices is returned. Note that the indices refer to tape parameters both if ``tape`` is a tape, and if it is a QNode. If an ``array_like`` is provided, it is expected to be a symmetric two-dimensional Boolean mask with shape ``(n, n)`` where ``n`` is the number of trainable tape parameters. diagonal_shifts (list[tuple[int or float]]): List containing tuples of shift values for the Hessian diagonal. The shifts are understood as first-order derivative shifts and are iterated to obtain the second-order derivative. If provided, one tuple of shifts should be given per trainable parameter and the tuple length should match the number of frequencies for that parameter. If unspecified, equidistant shifts are used. off_diagonal_shifts (list[tuple[int or float]]): List containing tuples of shift values for the off-diagonal entries of the Hessian. If provided, one tuple of shifts should be given per trainable parameter and the tuple should match the number of frequencies for that parameter. The combination of shifts into bivariate shifts is performed automatically. If unspecified, equidistant shifts are used. f0 (tensor_like[float] or None): Output of the evaluated input tape. If provided, and the Hessian tapes include the original input tape, the 'f0' value is used instead of evaluating the input tape, reducing the number of device invocations. Returns: function or tuple[list[QuantumTape], function]: - If the input is a QNode, an object representing the Hessian (function) of the QNode that can be executed to obtain the Hessian matrix. The returned Hessian matrix is given as a tensor or nested tuples of tensors. The level of nesting depends on the number of trainable QNode arguments, the output shape(s) of the input QNode itself, and the usage of shot vectors in the QNode execution. - If the input is a tape, a tuple containing a list of generated tapes, together with a post-processing function to be applied to the results of the evaluated tapes in order to obtain the Hessian matrix. Note: By default a QNode with the keyword ``hybrid=True`` computes derivates with respect to QNode arguments, which can include classical computations on those arguments before they are passed to quantum operations. The "purely quantum" Hessian can instead be obtained with ``hybrid=False``, which is then computed with respect to the gate arguments and produces a result of shape ``(*QNode output dimensions, # gate arguments, # gate arguments)``. **Example** Applying the Hessian transform to a QNode computes its Hessian tensor: >>> dev = qml.device("default.qubit", wires=2) >>> @qml.qnode(dev) ... def circuit(x): ... qml.RX(x[0], wires=0) ... qml.CRY(x[1], wires=[0, 1]) ... return qml.expval(qml.PauliZ(0)@qml.PauliZ(1)) >>> x = np.array([0.5, 0.2], requires_grad=True) >>> qml.gradients.param_shift_hessian(circuit)(x) ((array(-0.86883595), array(0.04762358)), (array(0.04762358), array(0.05998862))) .. details:: :title: Usage Details The Hessian transform can also be applied to a quantum tape instead of a QNode, producing the parameter-shifted tapes and a post-processing function to combine the execution results of these tapes into the Hessian: >>> circuit(x) # generate the QuantumTape inside the QNode >>> tape = circuit.qtape >>> hessian_tapes, postproc_fn = qml.gradients.param_shift_hessian(tape) >>> len(hessian_tapes) 13 >>> all(isinstance(tape, qml.tape.QuantumTape) for tape in hessian_tapes) True >>> postproc_fn(qml.execute(hessian_tapes, dev, None)) ((array(-0.86883595), array(0.04762358)), (array(0.04762358), array(0.05998862))) The Hessian tapes can be inspected via their draw function, which reveals the different gate arguments generated from parameter-shift rules (we only draw the first four out of all 13 tapes here): >>> for h_tape in hessian_tapes[0:4]: ... print(qml.drawer.tape_text(h_tape, decimals=1)) 0: ──RX(0.5)─╭●───────┤ ╭<[email protected]> 1: ──────────╰RY(0.2)─┤ ╰<[email protected]> 0: ──RX(-2.6)─╭●───────┤ ╭<[email protected]> 1: ───────────╰RY(0.2)─┤ ╰<[email protected]> 0: ──RX(2.1)─╭●───────┤ ╭<[email protected]> 1: ──────────╰RY(1.8)─┤ ╰<[email protected]> 0: ──RX(2.1)─╭●────────┤ ╭<[email protected]> 1: ──────────╰RY(-1.4)─┤ ╰<[email protected]> To enable more detailed control over the parameter shifts, shift values can be provided per parameter, and separately for the diagonal and the off-diagonal terms. Here we choose them based on the parameters ``x`` themselves, mostly yielding multiples of the original parameters in the shifted tapes. >>> diag_shifts = [(x[0] / 2,), (x[1] / 2, x[1])] >>> offdiag_shifts = [(x[0],), (x[1], 2 * x[1])] >>> hessian_tapes, postproc_fn = qml.gradients.param_shift_hessian( ... tape, diagonal_shifts=diag_shifts, off_diagonal_shifts=offdiag_shifts ... ) >>> for h_tape in hessian_tapes[0:4]: ... print(qml.drawer.tape_text(h_tape, decimals=1)) 0: ──RX(0.5)─╭●───────┤ ╭<[email protected]> 1: ──────────╰RY(0.2)─┤ ╰<[email protected]> 0: ──RX(0.0)─╭●───────┤ ╭<[email protected]> 1: ──────────╰RY(0.2)─┤ ╰<[email protected]> 0: ──RX(1.0)─╭●───────┤ ╭<[email protected]> 1: ──────────╰RY(0.2)─┤ ╰<[email protected]> 0: ──RX(1.0)─╭●───────┤ ╭<[email protected]> 1: ──────────╰RY(0.4)─┤ ╰<[email protected]> .. note:: Note that the ``diagonal_shifts`` are interpreted as *first-order* derivative shift values. That means they are used to generate a first-order derivative recipe, which then is iterated in order to obtain the second-order derivative for the diagonal Hessian entry. Explicit control over the used second-order shifts is not implemented. Finally, the ``argnum`` argument can be used to compute the Hessian only for some of the variational parameters. Note that this indexing refers to trainable tape parameters both if ``tape`` is a ``QNode`` and if it is a ``QuantumTape``. >>> hessian_tapes, postproc_fn = qml.gradients.param_shift_hessian(tape, argnum=(1,)) >>> postproc_fn(qml.execute(hessian_tapes, dev, None)) ((array(0.), array(0.)), (array(0.), array(0.05998862))) """ # Perform input validation before generating tapes. if any(isinstance(m, StateMP) for m in tape.measurements): raise ValueError( "Computing the Hessian of circuits that return the state is not supported." ) # The parameter-shift Hessian implementation currently doesn't support variance measurements. # TODO: Support variances similar to how param_shift does it if any(isinstance(m, VarianceMP) for m in tape.measurements): raise ValueError( "Computing the Hessian of circuits that return variances is currently not supported." ) if argnum is None and not tape.trainable_params: warnings.warn( "Attempted to compute the hessian of a tape with no trainable parameters. " "If this is unintended, please mark trainable parameters in accordance with the " "chosen auto differentiation framework, or via the 'tape.trainable_params' property." ) return _no_trainable_grad_new(tape) bool_argnum = _process_argnum(argnum, tape) compare_diag_to = qml.math.sum(qml.math.diag(bool_argnum)) offdiag = bool_argnum ^ qml.math.diag(qml.math.diag(bool_argnum)) compare_offdiag_to = qml.math.sum(qml.math.any(offdiag, axis=0)) if diagonal_shifts is not None and len(diagonal_shifts) != compare_diag_to: raise ValueError( "The number of provided sets of shift values for diagonal entries " f"({len(diagonal_shifts)}) does not match the number of marked arguments " f"to compute the diagonal for ({compare_diag_to})." ) if off_diagonal_shifts is not None and len(off_diagonal_shifts) != compare_offdiag_to: raise ValueError( "The number of provided sets of shift values for off-diagonal entries " f"({len(off_diagonal_shifts)}) does not match the number of marked arguments " f"for which to compute at least one off-diagonal entry ({compare_offdiag_to})." ) gradient_analysis(tape, grad_fn=qml.gradients.param_shift) # If argnum is given, the grad_method_validation may allow parameters with # finite-difference as method. If they are among the requested argnum, we catch this # further below (as no fallback function in analogy to `param_shift` is used currently). method = "analytic" if argnum is None else "best" diff_methods = grad_method_validation(method, tape) for i, g in enumerate(diff_methods): if g == "0": bool_argnum[i] = bool_argnum[:, i] = False if qml.math.all(~bool_argnum): # pylint: disable=invalid-unary-operand-type return _all_zero_grad_new(tape) # Find all argument indices that appear in at least one derivative that was requested choose_argnum = qml.math.where(qml.math.any(bool_argnum, axis=0))[0] # If any of these argument indices correspond to a finite difference # derivative (diff_methods[idx]="F"), raise an error. unsupported_params = {idx for idx in choose_argnum if diff_methods[idx] == "F"} if unsupported_params: raise ValueError( "The parameter-shift Hessian currently does not support the operations " f"for parameter(s) {unsupported_params}." ) return _expval_hessian_param_shift_tuple( tape, bool_argnum, diff_methods, diagonal_shifts, off_diagonal_shifts, f0 )