Source code for pennylane.devices.qubit.adjoint_jacobian

# Copyright 2018-2023 Xanadu Quantum Technologies Inc.

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

#     http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Functions to apply adjoint jacobian differentiation"""
import logging
from numbers import Number
from typing import Tuple

import numpy as np

import pennylane as qml
from pennylane.logging import debug_logger
from pennylane.operation import operation_derivative
from pennylane.tape import QuantumTape

from .apply_operation import apply_operation
from .initialize_state import create_initial_state
from .simulate import get_final_state

# pylint: disable=protected-access, too-many-branches

logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())


def _dot_product_real(bra, ket, num_wires):
    """Helper for calculating the inner product for adjoint differentiation."""
    # broadcasted inner product not summing over first dimension of the bra tensor
    sum_axes = tuple(range(1, num_wires + 1))
    return qml.math.real(qml.math.sum(qml.math.conj(bra) * ket, axis=sum_axes))


def _adjoint_jacobian_state(tape: QuantumTape):
    """Calculate the full jacobian for a circuit that returns the state.

    Args:
        tape (QuantumTape): the circuit we wish to differentiate

    Returns:
        TensorLike: the full jacobian.

    See ``adjoint_jacobian.md`` for details on the algorithm.
    """
    jacobian = []

    has_state_prep = isinstance(tape[0], qml.operation.StatePrepBase)
    state = create_initial_state(tape.wires, tape[0] if has_state_prep else None)

    param_idx = has_state_prep
    for op in tape.operations[has_state_prep:]:
        jacobian = [apply_operation(op, jac) for jac in jacobian]

        if op.num_params == 1:
            if param_idx in tape.trainable_params:
                d_op_matrix = operation_derivative(op)
                jacobian.append(
                    apply_operation(qml.QubitUnitary(d_op_matrix, wires=op.wires), state)
                )

            param_idx += 1
        state = apply_operation(op, state)

    return tuple(jac.flatten() for jac in jacobian)


[docs]@debug_logger def adjoint_jacobian(tape: QuantumTape, state=None): """Implements the adjoint method outlined in `Jones and Gacon <https://arxiv.org/abs/2009.02823>`__ to differentiate an input tape. After a forward pass, the circuit is reversed by iteratively applying adjoint gates to scan backwards through the circuit. .. note:: The adjoint differentiation method has the following restrictions: * Cannot differentiate with respect to observables. * Cannot differentiate with respect to state-prep operations. * Observable being measured must have a matrix. Args: tape (QuantumTape): circuit that the function takes the gradient of state (TensorLike): the final state of the circuit; if not provided, the final state will be computed by executing the tape Returns: array or tuple[array]: the derivative of the tape with respect to trainable parameters. Dimensions are ``(len(observables), len(trainable_params))``. """ # Map wires if custom wire labels used tape = tape.map_to_standard_wires() if isinstance(tape.measurements[0], qml.measurements.StateMP): return _adjoint_jacobian_state(tape) ket = state if state is not None else get_final_state(tape)[0] n_obs = len(tape.observables) bras = np.empty([n_obs] + [2] * len(tape.wires), dtype=np.complex128) for kk, obs in enumerate(tape.observables): bras[kk, ...] = 2 * apply_operation(obs, ket) jac = np.zeros((len(tape.observables), len(tape.trainable_params))) param_number = len(tape.get_parameters(trainable_only=False, operations_only=True)) - 1 trainable_param_number = len(tape.trainable_params) - 1 for op in reversed(tape.operations[tape.num_preps :]): if isinstance(op, qml.Snapshot): continue adj_op = qml.adjoint(op) ket = apply_operation(adj_op, ket) if op.num_params == 1: if param_number in tape.trainable_params: d_op_matrix = operation_derivative(op) ket_temp = apply_operation(qml.QubitUnitary(d_op_matrix, wires=op.wires), ket) jac[:, trainable_param_number] = _dot_product_real(bras, ket_temp, len(tape.wires)) trainable_param_number -= 1 param_number -= 1 for kk in range(n_obs): bras[kk, ...] = apply_operation(adj_op, bras[kk, ...]) # Post-process the Jacobian matrix for the new return jac = np.squeeze(jac) if jac.ndim == 0: return np.array(jac) if jac.ndim == 1: return tuple(np.array(j) for j in jac) # must be 2-dimensional return tuple(tuple(np.array(j_) for j_ in j) for j in jac)
[docs]@debug_logger def adjoint_jvp(tape: QuantumTape, tangents: Tuple[Number], state=None): """The jacobian vector product used in forward mode calculation of derivatives. Implements the adjoint method outlined in `Jones and Gacon <https://arxiv.org/abs/2009.02823>`__ to differentiate an input tape. After a forward pass, the circuit is reversed by iteratively applying adjoint gates to scan backwards through the circuit. .. note:: The adjoint differentiation method has the following restrictions: * Cannot differentiate with respect to observables. * Observable being measured must have a matrix. Args: tape (QuantumTape): circuit that the function takes the gradient of tangents (Tuple[Number]): gradient vector for input parameters. state (TensorLike): the final state of the circuit; if not provided, the final state will be computed by executing the tape Returns: Tuple[Number]: gradient vector for output parameters """ # Map wires if custom wire labels used if set(tape.wires) != set(range(tape.num_wires)): wire_map = {w: i for i, w in enumerate(tape.wires)} tapes, fn = qml.map_wires(tape, wire_map) tape = fn(tapes) ket = state if state is not None else get_final_state(tape)[0] n_obs = len(tape.observables) bras = np.empty([n_obs] + [2] * len(tape.wires), dtype=np.complex128) for i, obs in enumerate(tape.observables): bras[i] = apply_operation(obs, ket) param_number = len(tape.get_parameters(trainable_only=False, operations_only=True)) - 1 trainable_param_number = len(tape.trainable_params) - 1 tangents_out = np.zeros(n_obs) for op in reversed(tape.operations[tape.num_preps :]): adj_op = qml.adjoint(op) ket = apply_operation(adj_op, ket) if op.num_params == 1: if param_number in tape.trainable_params: # don't do anything if the tangent is 0 if not np.allclose(tangents[trainable_param_number], 0): d_op_matrix = operation_derivative(op) ket_temp = apply_operation(qml.QubitUnitary(d_op_matrix, wires=op.wires), ket) tangents_out += ( 2 * _dot_product_real(bras, ket_temp, len(tape.wires)) * tangents[trainable_param_number] ) trainable_param_number -= 1 param_number -= 1 for i in range(n_obs): bras[i] = apply_operation(adj_op, bras[i]) if n_obs == 1: return np.array(tangents_out[0]) return tuple(np.array(t) for t in tangents_out)
def _get_vjp_bras(tape, cotangents, ket): """Helper function for getting the bras for adjoint vjp, the batch size of the cotangents, as well as a list of indices for which the cotangents are zero. Args: tape (QuantumTape): circuit that the function takes the gradient of. tangents (Tuple[Number]): gradient vector for input parameters. ket (TensorLike): the final state of the circuit. Returns: Tuple[TensorLike, int, List]: The return contains the following: * Final bra for batch size ``None``, else array of bras * Batch size. None if cotangents are not batched * List containing batch indices that are zero. Empty for unbatched cotangents """ if isinstance(tape.measurements[0], qml.measurements.StateMP): batched_cotangents = np.ndim(cotangents) == 2 batch_size = np.shape(cotangents)[0] if batched_cotangents else None bras = np.conj(cotangents.reshape(-1, *ket.shape)) bras = bras if batched_cotangents else np.squeeze(bras) return bras, batch_size, [] # If not state measurement, measurements are guaranteed to be expectation values single_cotangent = len(tape.measurements) == 1 if not single_cotangent: # Pad cotangents if shape is inhomogenous # inner_shape will only be None if cotangents is a vector. We assume that for # inhomogenous cotangents, all non-scalar values have the same shape. inner_shape = next((np.shape(cot) for cot in cotangents if np.shape(cot) != ()), None) if inner_shape is not None: # Batched cotangents. Find scalar zeros and pad to make the shape of cotangents # homogenous new_cotangents = [] for i, c in enumerate(cotangents): if np.shape(c) == () and np.allclose(c, 0.0): new_cotangents.append(np.zeros(inner_shape)) else: new_cotangents.append(c) cotangents = new_cotangents cotangents = np.array(cotangents) if single_cotangent: # Expand dimensions for cases when there is a single broadcasted cotangent # so that the cotangent has 2 dimensions, which is expected in the rest of the # function for batched cotangents. For unbatched cases, this will make a scalar # cotangent a one-item array cotangents = np.expand_dims(cotangents, 0) # One dimension for number of expectation values, one dimension for batch size. batched_cotangents = np.ndim(cotangents) == 2 batch_size = cotangents.shape[1] if batched_cotangents else None if np.allclose(cotangents, 0.0): return None, batch_size, [] new_obs, null_batch_indices = [], [] # Collect list of observables to use for the adjoint algorithm. These will be used # to construct the initial bras if batched_cotangents: for i, cots in enumerate(cotangents.T): new_cs, new_os = [], [] for c, o in zip(cots, tape.observables): if not np.allclose(c, 0.0): new_cs.append(c) new_os.append(o) if len(new_cs) == 0: null_batch_indices.append(i) else: new_obs.append(qml.dot(new_cs, new_os)) else: new_cs, new_os = [], [] for c, o in zip(cotangents, tape.observables): if not np.allclose(c, 0.0): new_cs.append(c) new_os.append(o) new_obs.append(qml.dot(new_cs, new_os)) # Create bra(s) by taking product of observable(s) with the final state bras = np.empty((len(new_obs), *ket.shape), dtype=ket.dtype) for kk, obs in enumerate(new_obs): if obs.pauli_rep is not None: flat_bra = obs.pauli_rep.dot(ket.flatten(), wire_order=list(range(tape.num_wires))) bras[kk] = 2 * flat_bra.reshape(ket.shape) else: bras[kk] = 2 * apply_operation(obs, ket) bras = bras if batched_cotangents else np.squeeze(bras) return bras, batch_size, null_batch_indices
[docs]@debug_logger def adjoint_vjp(tape: QuantumTape, cotangents: Tuple[Number], state=None): """The vector jacobian product used in reverse-mode differentiation. Implements the adjoint method outlined in `Jones and Gacon <https://arxiv.org/abs/2009.02823>`__ to differentiate an input tape. After a forward pass, the circuit is reversed by iteratively applying adjoint gates to scan backwards through the circuit. .. note:: The adjoint differentiation method has the following restrictions: * Cannot differentiate with respect to observables. * Observable being measured must have a matrix. Args: tape (QuantumTape): circuit that the function takes the gradient of cotangents (Tuple[Number]): gradient vector for output parameters. For computing the full Jacobian, the cotangents can be batched to vectorize the computation. In this case, the cotangents can have the following shapes. ``batch_size`` below refers to the number of entries in the Jacobian: * For a state measurement, cotangents must have shape ``(batch_size, 2 ** n_wires)``. * For ``n`` expectation values, the cotangents must have shape ``(n, batch_size)``. If ``n = 1``, then the shape must be ``(batch_size,)``. state (TensorLike): the final state of the circuit; if not provided, the final state will be computed by executing the tape Returns: Tuple[Number]: gradient vector for input parameters """ # See ``adjoint_jacobian.md`` to more information on the algorithm. # Map wires if custom wire labels used) if set(tape.wires) != set(range(tape.num_wires)): wire_map = {w: i for i, w in enumerate(tape.wires)} tapes, fn = qml.map_wires(tape, wire_map) tape = fn(tapes) ket = state if state is not None else get_final_state(tape)[0] bras, batch_size, null_batch_indices = _get_vjp_bras(tape, cotangents, ket) if bras is None: # Cotangents are zeros if batch_size is None: return tuple(0.0 for _ in tape.trainable_params) return tuple(np.zeros((len(tape.trainable_params), batch_size))) if isinstance(tape.measurements[0], qml.measurements.StateMP): def real_if_expval(val): return val else: def real_if_expval(val): return np.real(val) param_number = len(tape.get_parameters(trainable_only=False, operations_only=True)) - 1 trainable_param_number = len(tape.trainable_params) - 1 res_shape = ( (len(tape.trainable_params),) if batch_size is None else (len(tape.trainable_params), batch_size) ) cotangents_in = np.empty(res_shape, dtype=tape.measurements[0].numeric_type) summing_axis = None if batch_size is None else tuple(range(1, np.ndim(bras))) for op in reversed(tape.operations[tape.num_preps :]): adj_op = qml.adjoint(op) ket = apply_operation(adj_op, ket) if op.num_params == 1: if param_number in tape.trainable_params: d_op_matrix = operation_derivative(op) ket_temp = apply_operation(qml.QubitUnitary(d_op_matrix, wires=op.wires), ket) # Pad cotangent in with zeros for batch number with zero cotangents cot_in = real_if_expval(np.sum(np.conj(bras) * ket_temp, axis=summing_axis)) for i in null_batch_indices: cot_in = np.insert(cot_in, i, 0.0) cotangents_in[trainable_param_number] = cot_in trainable_param_number -= 1 param_number -= 1 bras = apply_operation(adj_op, bras, is_state_batched=bool(batch_size)) return tuple(cotangents_in)