# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

# Unless required by applicable law or agreed to in writing, software
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
"""
This module contains functions for computing the vector-Jacobian product
of tapes.
"""
# pylint: disable=no-member, too-many-branches
import numpy as np

import pennylane as qml

def _convert(jac, dy_row):
"""Utility to convert and cast the jacobian as dy_row."""
if isinstance(jac, tuple):
jac_new = []
for j in jac:
j_ = qml.math.convert_like(j, dy_row)
j_ = qml.math.cast_like(j_, dy_row)
jac_new.append(j_)
jac = tuple(jac_new)
else:
jac = qml.math.convert_like(jac, dy_row)
jac = qml.math.cast_like(jac, dy_row)
return jac

def _all_close_to_zero(dy):
"""
Check if all entries of dy are close to 0. dy can also be a nested tuple
structure of tensors, in which case this returns True iff all tensors are
close to 0
"""
if not isinstance(dy, (list, tuple)):
return qml.math.allclose(dy, 0)

# call this method recursively
return all(_all_close_to_zero(dy_) for dy_ in dy)

[docs]def compute_vjp_single(dy, jac, num=None): """Convenience function to compute the vector-Jacobian product for a given vector of gradient outputs and a Jacobian for a single measurement tape. Args: dy (tensor_like): vector of gradient outputs jac (tensor_like, tuple): Jacobian matrix num (int): The length of the flattened dy argument. This is an optional argument, but can be useful to provide if dy potentially has no shape (for example, due to tracing or just-in-time compilation). Returns: tensor_like: the vector-Jacobian product **Examples** 1. For a single parameter and a single measurement without shape (e.g. expval, var): .. code-block:: pycon >>> jac = np.array(0.1) >>> dy = np.array(2) >>> compute_vjp_single(dy, jac) array([0.2]) 2. For a single parameter and a single measurement with shape (e.g. probs): .. code-block:: pycon >>> jac = np.array([0.1, 0.2]) >>> dy = np.array([1.0, 1.0]) >>> compute_vjp_single(dy, jac) array([0.3]) 3. For multiple parameters (in this case 2 parameters) and a single measurement without shape (e.g. expval, var): .. code-block:: pycon >>> jac = tuple([np.array(0.1), np.array(0.2)]) >>> dy = np.array(2) >>> compute_vjp_single(dy, jac) array([0.2, 0.4]) 4. For multiple parameters (in this case 2 parameters) and a single measurement with shape (e.g. probs): .. code-block:: pycon >>> jac = tuple([np.array([0.1, 0.2]), np.array([0.3, 0.4])]) >>> dy = np.array([1.0, 2.0]) >>> compute_vjp_single(dy, jac) array([0.5, 1.1]) """ if jac is None: return None dy_row = qml.math.reshape(dy, [-1]) if num is None: num = qml.math.shape(dy_row)[0] if not isinstance(dy_row, np.ndarray): jac = _convert(jac, dy_row) # Note: For generality, all exception type warnings are disabled. # TODO: Excplictly catalogue and update raises for known types. # Single measurement with a single param if not isinstance(jac, (tuple, autograd.builtins.SequenceBox)): # No trainable parameters if jac.shape == (0,): res = qml.math.zeros((1, 0)) return res # Single measurement with no dimension e.g. expval or with dimension e.g. probs if num == 1: jac = qml.math.squeeze(jac) jac = qml.math.reshape(jac, (-1, 1)) try: res = dy_row @ jac except Exception: # pylint: disable=broad-except res = qml.math.tensordot(jac, dy_row, [[0], [0]]) # Single measurement with multiple params else: # No trainable parameters (adjoint) if len(jac) == 0: res = qml.math.zeros((1, 0)) return res # Single measurement with no dimension e.g. expval if num == 1: jac = qml.math.reshape(qml.math.stack(jac), (1, -1)) try: res = dy_row @ jac except Exception: # pylint: disable=broad-except res = qml.math.tensordot(jac, dy_row, [[0], [0]]) # Single measurement with dimension e.g. probs else: jac = qml.math.stack(jac) try: res = jac @ dy_row except Exception: # pylint: disable=broad-except res = qml.math.tensordot(jac, dy_row, [[1], [0]]) return res
[docs]def compute_vjp_multi(dy, jac, num=None): """Convenience function to compute the vector-Jacobian product for a given vector of gradient outputs and a Jacobian for a tape with multiple measurements. Args: dy (tensor_like): vector of gradient outputs jac (tensor_like, tuple): Jacobian matrix num (int): The length of the flattened dy argument. This is an optional argument, but can be useful to provide if dy potentially has no shape (for example, due to tracing or just-in-time compilation). Returns: tensor_like: the vector-Jacobian product **Examples** 1. For a single parameter and multiple measurement (one without shape and one with shape, e.g. expval and probs): .. code-block:: pycon >>> jac = tuple([np.array(0.1), np.array([0.3, 0.4])]) >>> dy = tuple([np.array(1.0), np.array([1.0, 2.0])]) >>> compute_vjp_multi(dy, jac) array([1.2]) 2. For multiple parameters (in this case 2 parameters) and multiple measurement (one without shape and one with shape, e.g. expval and probs): .. code-block:: pycon >>> jac = ((np.array(0.1), np.array(0.2)), (np.array([0.3, 0.4]), np.array([0.5, 0.6]))) >>> dy = tuple([np.array(1.0), np.array([1.0, 2.0])]) >>> compute_vjp_multi(dy, jac) array([1.2, 1.9]) """ if jac is None: return None # Single parameter if not isinstance(jac[0], (tuple, autograd.builtins.SequenceBox)): res = [] for d, j_ in zip(dy, jac): res.append(compute_vjp_single(d, j_, num=num)) res = qml.math.sum(qml.math.stack(res), axis=0) # Multiple parameters else: try: dy_interface = qml.math.get_interface(dy[0]) # dy -> (i,j) observables, entries per observable # jac -> (i,k,j) observables, parameters, entries per observable # Contractions over observables and entries per observable dy_shape = qml.math.shape(dy) if len(dy_shape) > 1: # multiple values exist per observable output return qml.math.array(qml.math.einsum("ij,i...j", dy, jac), like=dy[0]) if dy_interface == "tensorflow": # TF needs a different path for Hessian support return qml.math.array( qml.math.einsum("i,i...", dy, jac, like=dy[0]), like=dy[0] ) # Scalar value per observable output return qml.math.array( qml.math.einsum("i,i...", dy, jac), like=dy[0] ) # Scalar value per observable output # NOTE: We want any potential failure to fall back here, so catch every exception type # TODO: Catalogue and update for expected exception types except Exception: # pylint: disable=broad-except res = [] for d, j_ in zip(dy, jac): sub_res = [] for j in j_: sub_res.append(qml.math.squeeze(compute_vjp_single(d, j, num=num))) res.append(sub_res) res = qml.math.stack([qml.math.stack(r) for r in res]) res = qml.math.sum(res, axis=0) return res
[docs]def vjp(tape, dy, gradient_fn, gradient_kwargs=None): r"""Generate the gradient tapes and processing function required to compute the vector-Jacobian products of a tape. Consider a function :math:\mathbf{f}(\mathbf{x}). The Jacobian is given by .. math:: \mathbf{J}_{\mathbf{f}}(\mathbf{x}) = \begin{pmatrix} \frac{\partial f_1}{\partial x_1} &\cdots &\frac{\partial f_1}{\partial x_n}\\ \vdots &\ddots &\vdots\\ \frac{\partial f_m}{\partial x_1} &\cdots &\frac{\partial f_m}{\partial x_n}\\ \end{pmatrix}. During backpropagation, the chain rule is applied. For example, consider the cost function :math:h = y\circ f: \mathbb{R}^n \rightarrow \mathbb{R}, where :math:y: \mathbb{R}^m \rightarrow \mathbb{R}. The gradient is: .. math:: \nabla h(\mathbf{x}) = \frac{\partial y}{\partial \mathbf{f}} \frac{\partial \mathbf{f}}{\partial \mathbf{x}} = \frac{\partial y}{\partial \mathbf{f}} \mathbf{J}_{\mathbf{f}}(\mathbf{x}). Denote :math:d\mathbf{y} = \frac{\partial y}{\partial \mathbf{f}}; we can write this in the form of a matrix multiplication: .. math:: \left[\nabla h(\mathbf{x})\right]_{j} = \sum_{i=0}^m d\mathbf{y}_i ~ \mathbf{J}_{ij}. Thus, we can see that the gradient of the cost function is given by the so-called **vector-Jacobian product**; the product of the row-vector :math:d\mathbf{y}, representing the gradient of subsequent components of the cost function, and :math:\mathbf{J}, the Jacobian of the current node of interest. Args: tape (.QuantumTape): quantum tape to differentiate dy (tensor_like): Gradient-output vector. Must have shape matching the output shape of the corresponding tape. gradient_fn (callable): the gradient transform to use to differentiate the tape gradient_kwargs (dict): dictionary of keyword arguments to pass when determining the gradients of tapes Returns: tensor_like or None: Vector-Jacobian product. Returns None if the tape has no trainable parameters. **Example** Consider the following quantum tape with PyTorch parameters: .. code-block:: python import torch x = torch.tensor([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], requires_grad=True, dtype=torch.float64) ops = [ qml.RX(x[0, 0], wires=0), qml.RY(x[0, 1], wires=1), qml.RZ(x[0, 2], wires=0), qml.CNOT(wires=[0, 1]), qml.RX(x[1, 0], wires=1), qml.RY(x[1, 1], wires=0), qml.RZ(x[1, 2], wires=1) ] measurements = [qml.expval(qml.Z(0)), qml.probs(wires=1)] tape = qml.tape.QuantumTape(ops, measurements) We can use the vjp function to compute the vector-Jacobian product, given a gradient-output vector dy: >>> dy = torch.tensor([1., 1., 1.], dtype=torch.float64) >>> vjp_tapes, fn = qml.gradients.vjp(tape, dy, qml.gradients.param_shift) Note that dy has shape (3,), matching the output dimension of the tape (1 expectation and 2 probability values). Executing the VJP tapes, and applying the processing function: >>> dev = qml.device("default.qubit") >>> vjp = fn(qml.execute(vjp_tapes, dev, gradient_fn=qml.gradients.param_shift, interface="torch")) >>> vjp tensor([-1.1562e-01, -1.3862e-02, -9.0841e-03, -1.5214e-16, -4.8217e-01, 2.1329e-17], dtype=torch.float64, grad_fn=<SumBackward1>) The output VJP is also differentiable with respect to the tape parameters: >>> cost = torch.sum(vjp) >>> cost.backward() >>> x.grad tensor([[-1.1025e+00, -2.0554e-01, -1.4917e-01], [-1.2490e-16, -9.1580e-01, 0.0000e+00]], dtype=torch.float64) """ gradient_kwargs = gradient_kwargs or {} num_params = len(tape.trainable_params) if num_params == 0: # The tape has no trainable parameters; the VJP # is simply none. return [], lambda _, num=None: None try: if _all_close_to_zero(dy): # If the dy vector is zero, then the # corresponding element of the VJP will be zero, # and we can avoid a quantum computation. def func(_, num=None): # pylint: disable=unused-argument res = qml.math.convert_like(np.zeros([num_params]), dy) multi = len(tape.measurements) > 1 if multi: multi_dy = dy[0] res = qml.math.convert_like(res, multi_dy) return qml.math.cast_like(res, multi_dy) return qml.math.cast_like(res, dy) return [], func except (AttributeError, TypeError, NotImplementedError): pass gradient_tapes, fn = gradient_fn(tape, **gradient_kwargs) def processing_fn(results, num=None): # postprocess results to compute the Jacobian jac = fn(results) multi = len(tape.measurements) > 1 comp_vjp_fn = compute_vjp_multi if multi else compute_vjp_single if not tape.shots.has_partitioned_shots: return comp_vjp_fn(dy, jac, num=num) vjp_ = [comp_vjp_fn(dy_, jac_, num=num) for dy_, jac_ in zip(dy, jac)] return qml.math.sum(qml.math.stack(vjp_), 0) return gradient_tapes, processing_fn
# pylint: disable=too-many-arguments
[docs]def batch_vjp(tapes, dys, gradient_fn, reduction="append", gradient_kwargs=None): r"""Generate the gradient tapes and processing function required to compute the vector-Jacobian products of a batch of tapes. Consider a function :math:\mathbf{f}(\mathbf{x}). The Jacobian is given by .. math:: \mathbf{J}_{\mathbf{f}}(\mathbf{x}) = \begin{pmatrix} \frac{\partial f_1}{\partial x_1} &\cdots &\frac{\partial f_1}{\partial x_n}\\ \vdots &\ddots &\vdots\\ \frac{\partial f_m}{\partial x_1} &\cdots &\frac{\partial f_m}{\partial x_n}\\ \end{pmatrix}. During backpropagation, the chain rule is applied. For example, consider the cost function :math:h = y\circ f: \mathbb{R}^n \rightarrow \mathbb{R}, where :math:y: \mathbb{R}^m \rightarrow \mathbb{R}. The gradient is: .. math:: \nabla h(\mathbf{x}) = \frac{\partial y}{\partial \mathbf{f}} \frac{\partial \mathbf{f}}{\partial \mathbf{x}} = \frac{\partial y}{\partial \mathbf{f}} \mathbf{J}_{\mathbf{f}}(\mathbf{x}). Denote :math:d\mathbf{y} = \frac{\partial y}{\partial \mathbf{f}}; we can write this in the form of a matrix multiplication: .. math:: \left[\nabla h(\mathbf{x})\right]_{j} = \sum_{i=0}^m d\mathbf{y}_i ~ \mathbf{J}_{ij}. Thus, we can see that the gradient of the cost function is given by the so-called **vector-Jacobian product**; the product of the row-vector :math:d\mathbf{y}, representing the gradient of subsequent components of the cost function, and :math:\mathbf{J}, the Jacobian of the current node of interest. Args: tapes (Sequence[.QuantumTape]): sequence of quantum tapes to differentiate dys (Sequence[tensor_like]): Sequence of gradient-output vectors dy. Must be the same length as tapes. Each dy tensor should have shape matching the output shape of the corresponding tape. gradient_fn (callable): the gradient transform to use to differentiate the tapes reduction (str): Determines how the vector-Jacobian products are returned. If append, then the output of the function will be of the form List[tensor_like], with each element corresponding to the VJP of each input tape. If extend, then the output VJPs will be concatenated. gradient_kwargs (dict): dictionary of keyword arguments to pass when determining the gradients of tapes Returns: List[tensor_like or None]: list of vector-Jacobian products. None elements corresponds to tapes with no trainable parameters. **Example** Consider the following Torch-compatible quantum tapes: .. code-block:: python import torch x = torch.tensor([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], requires_grad=True, dtype=torch.float64) ops = [ qml.RX(x[0, 0], wires=0), qml.RY(x[0, 1], wires=1), qml.RZ(x[0, 2], wires=0), qml.CNOT(wires=[0, 1]), qml.RX(x[1, 0], wires=1), qml.RY(x[1, 1], wires=0), qml.RZ(x[1, 2], wires=1) ] measurements1 = [qml.expval(qml.Z(0)), qml.probs(wires=1)] tape1 = qml.tape.QuantumTape(ops, measurements1) measurements2 = [qml.expval(qml.Z(0) @ qml.Z(1))] tape2 = qml.tape.QuantumTape(ops, measurements2) tapes = [tape1, tape2] Both tapes share the same circuit ansatz, but have different measurement outputs. We can use the batch_vjp function to compute the vector-Jacobian product, given a list of gradient-output vectors dys per tape: >>> dys = [torch.tensor([1., 1., 1.], dtype=torch.float64), ... torch.tensor([1.], dtype=torch.float64)] >>> vjp_tapes, fn = qml.gradients.batch_vjp(tapes, dys, qml.gradients.param_shift) Note that each dy has shape matching the output dimension of the tape (tape1 has 1 expectation and 2 probability values --- 3 outputs --- and tape2 has 1 expectation value). Executing the VJP tapes, and applying the processing function: >>> dev = qml.device("default.qubit") >>> vjps = fn(qml.execute(vjp_tapes, dev, gradient_fn=qml.gradients.param_shift, interface="torch")) >>> vjps [tensor([-1.1562e-01, -1.3862e-02, -9.0841e-03, -1.5214e-16, -4.8217e-01, 2.1329e-17], dtype=torch.float64, grad_fn=<SumBackward1>), tensor([ 1.7393e-01, -1.6412e-01, -5.3983e-03, -2.9366e-01, -4.0083e-01, 2.1134e-17], dtype=torch.float64, grad_fn=<SqueezeBackward3>)] We have two VJPs; one per tape. Each one corresponds to the number of parameters on the tapes (6). The output VJPs are also differentiable with respect to the tape parameters: >>> cost = torch.sum(vjps[0] + vjps[1]) >>> cost.backward() >>> x.grad tensor([[-0.4792, -0.9086, -0.2420], [-0.0930, -1.0772, 0.0000]], dtype=torch.float64) """ gradient_kwargs = gradient_kwargs or {} reshape_info = [] gradient_tapes = [] processing_fns = [] # Loop through the tapes and dys vector for tape, dy in zip(tapes, dys): g_tapes, fn = vjp(tape, dy, gradient_fn, gradient_kwargs=gradient_kwargs) reshape_info.append(len(g_tapes)) processing_fns.append(fn) gradient_tapes.extend(g_tapes) def processing_fn(results, nums=None): vjps = [] start = 0 if nums is None: nums = [None] * len(tapes) for t_idx in range(len(tapes)): # extract the correct results from the flat list res_len = reshape_info[t_idx] res_t = results[start : start + res_len] start += res_len # postprocess results to compute the VJP vjp_ = processing_fns[t_idx](res_t, num=nums[t_idx]) if vjp_ is None: if reduction == "append": vjps.append(None) continue if isinstance(reduction, str): getattr(vjps, reduction)(vjp_) elif callable(reduction): reduction(vjps, vjp_) return vjps return gradient_tapes, processing_fn

Using PennyLane

Release news

Development

API

Internals