# Source code for pennylane.gradients.finite_difference

# Copyright 2018-2021 Xanadu Quantum Technologies Inc.

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This module contains functions for computing the finite-difference gradient
of a quantum tape.
"""
# pylint: disable=protected-access,too-many-arguments,too-many-branches,too-many-statements
import functools
import warnings
from collections.abc import Sequence

import numpy as np
from scipy.special import factorial

import pennylane as qml

from .gradient_transform import (
)
from .general_shift_rules import generate_shifted_tapes

[docs]@functools.lru_cache(maxsize=None)
def finite_diff_coeffs(n, approx_order, strategy):
r"""Generate the finite difference shift values and corresponding
term coefficients for a given derivative order, approximation accuracy,
and strategy.

Args:
n (int): Positive integer specifying the order of the derivative. For example, n=1
corresponds to the first derivative, n=2 the second derivative, etc.
approx_order (int): Positive integer referring to the approximation order of the
returned coefficients, e.g., approx_order=1 corresponds to the
first-order approximation to the derivative.
strategy (str): One of "forward", "center", or "backward".
For the "forward" strategy, the finite-difference shifts occur at the points
:math:x_0, x_0+h, x_0+2h,\dots, where :math:h is some small
step size. The "backwards" strategy is similar, but in
reverse: :math:x_0, x_0-h, x_0-2h, \dots. Finally, the
"center" strategy results in shifts symmetric around the
unshifted point: :math:\dots, x_0-2h, x_0-h, x_0, x_0+h, x_0+2h,\dots.

Returns:
array[float]: A (2, N) array. The first row corresponds to the
coefficients, and the second row corresponds to the shifts.

**Example**

>>> finite_diff_coeffs(n=1, approx_order=1, strategy="forward")
array([[-1.,  1.],
[ 0.,  1.]])

For example, this results in the linear combination:

.. math:: \frac{-y(x_0) + y(x_0 + h)}{h}

where :math:h is the finite-difference step size.

More examples:

>>> finite_diff_coeffs(n=1, approx_order=2, strategy="center")
array([[-0.5,  0.5],
[-1. ,  1. ]])
>>> finite_diff_coeffs(n=2, approx_order=2, strategy="center")
array([[-2.,  1.,  1.],
[ 0., -1.,  1.]])

**Details**

Consider a function :math:y(x). We wish to approximate the :math:n-th
derivative at point :math:x_0, :math:y^{(n)}(x_0), by sampling the function
at :math:N<n distinct points:

.. math:: y^{(n)}(x_0) \approx \sum_{i=1}^N c_i y(x_i)

where :math:c_i are coefficients, and :math:x_i=x_0 + s_i are the points we sample
the function at.

Consider the Taylor expansion of :math:y(x_i) around the point :math:x_0:

.. math::

y^{(n)}(x_0) \approx \sum_{i=1}^N c_i y(x_i)
&= \sum_{i=1}^N c_i \left[ y(x_0) + y'(x_0)(x_i-x_0) + \frac{1}{2} y''(x_0)(x_i-x_0)^2 + \cdots \right]\\
& = \sum_{j=0}^m y^{(j)}(x_0) \left[\sum_{i=1}^N \frac{c_i s_i^j}{j!} + \mathcal{O}(s_i^m) \right],

where :math:s_i = x_i-x_0. For this approximation to be satisfied, we must therefore have

.. math::

\sum_{i=1}^N s_i^j c_i = \begin{cases} j!, &j=n\\ 0, & j\neq n\end{cases}.

Thus, to determine the coefficients :math:c_i \in \{c_1, \dots, c_N\} for particular
shift values :math:s_i \in \{s_1, \dots, s_N\} and derivative order :math:n,
we must solve this linear system of equations.
"""
if n < 1 or not isinstance(n, int):
raise ValueError("Derivative order n must be a positive integer.")

if approx_order < 1 or not isinstance(approx_order, int):
raise ValueError("Approximation order must be a positive integer.")

num_points = approx_order + 2 * np.floor((n + 1) / 2) - 1
N = num_points + 1 if n % 2 == 0 else num_points

if strategy == "forward":
shifts = np.arange(N, dtype=np.float64)

elif strategy == "backward":
shifts = np.arange(-N + 1, 1, dtype=np.float64)

elif strategy == "center":
if approx_order % 2 != 0:
raise ValueError("Centered finite-difference requires an even order approximation.")

N = num_points // 2
shifts = np.arange(-N, N + 1, dtype=np.float64)

else:
raise ValueError(
f"Unknown strategy {strategy}. Must be one of 'forward', 'backward', 'center'."
)

# solve for the coefficients
A = shifts ** np.arange(len(shifts)).reshape(-1, 1)
b = np.zeros_like(shifts)
b[n] = factorial(n)
coeffs = np.linalg.solve(A, b)

coeffs_and_shifts = np.stack([coeffs, shifts])

# remove all small coefficients and shifts
coeffs_and_shifts[np.abs(coeffs_and_shifts) < 1e-10] = 0

# remove columns where the coefficients are 0
coeffs_and_shifts = coeffs_and_shifts[:, ~np.all(coeffs_and_shifts == 0, axis=0)]

# sort columns in ascending order according to abs(shift)
coeffs_and_shifts = coeffs_and_shifts[:, np.argsort(np.abs(coeffs_and_shifts))]
return coeffs_and_shifts

def _finite_diff_new(
tape,
argnum=None,
h=1e-7,
approx_order=1,
n=1,
strategy="forward",
f0=None,
validate_params=True,
):
r"""Transform a QNode to compute the finite-difference gradient of all gate
parameters with respect to its inputs. This function is adapted to the new return system.

Args:
qnode (pennylane.QNode or .QuantumTape): quantum tape or QNode to differentiate
argnum (int or list[int] or None): Trainable parameter indices to differentiate
with respect to. If not provided, the derivatives with respect to all
trainable parameters are returned.
h (float): finite difference method step size
approx_order (int): The approximation order of the finite-difference method to use.
n (int): compute the :math:n-th derivative
strategy (str): The strategy of the finite difference method. Must be one of
"forward", "center", or "backward".
For the "forward" strategy, the finite-difference shifts occur at the points
:math:x_0, x_0+h, x_0+2h,\dots, where :math:h is some small
stepsize. The "backwards" strategy is similar, but in
reverse: :math:x_0, x_0-h, x_0-2h, \dots. Finally, the
"center" strategy results in shifts symmetric around the
unshifted point: :math:\dots, x_0-2h, x_0-h, x_0, x_0+h, x_0+2h,\dots.
f0 (tensor_like[float] or None): Output of the evaluated input tape. If provided,
and the gradient recipe contains an unshifted term, this value is used,
saving a quantum evaluation.
validate_params (bool): Whether to validate the tape parameters or not. If True,
the Operation.grad_method attribute and the circuit structure will be analyzed
to determine if the trainable parameters support the finite-difference method.
If False, the finite-difference method will be applied to all parameters.

Returns:
tensor_like or tuple[list[QuantumTape], function]:

- If the input is a QNode, a tensor
representing the output Jacobian matrix of size (number_outputs, number_gate_parameters)
is returned.

- If the input is a tape, a tuple containing a list of generated tapes,
in addition to a post-processing function to be applied to the
evaluated tapes.

**Example**

#TODO: Add example for new return type.
"""
if argnum is None and not tape.trainable_params:
warnings.warn(
"Attempted to compute the gradient of a tape with no trainable parameters. "
"If this is unintended, please mark trainable parameters in accordance with the "
"chosen auto differentiation framework, or via the 'tape.trainable_params' property."
)
if len(tape.measurements) == 1:
return [], lambda _: qml.math.zeros()
return [], lambda _: tuple(qml.math.zeros() for _ in range(len(tape.measurements)))

if validate_params:
if "grad_method" not in tape._par_info:
diff_methods = grad_method_validation("numeric", tape)
else:
diff_methods = ["F" for i in tape.trainable_params]

if all(g == "0" for g in diff_methods):
list_zeros = []

for i, m in enumerate(tape.measurements):
# TODO: Update shape for CV variables
if m.return_type is qml.measurements.Probability:
dim = 2 ** len(m.wires)
else:
dim = 1

sub_list_zeros = [qml.math.zeros(dim) for _ in range(len(tape.trainable_params))]
sub_list_zeros = tuple(sub_list_zeros)

list_zeros.append(sub_list_zeros)

if len(tape.measurements) == 1:
return [], lambda _: list_zeros

return [], lambda _: tuple(list_zeros)

shapes = []
c0 = None

coeffs, shifts = finite_diff_coeffs(n=n, approx_order=approx_order, strategy=strategy)

if 0 in shifts:
# Finite difference formula includes a term with zero shift.

if f0 is None:
# Ensure that the unshifted tape is appended
# to the gradient tapes, if not already.

# Store the unshifted coefficient. We know that
# it will always be the first coefficient due to processing.
c0 = coeffs
shifts = shifts[1:]
coeffs = coeffs[1:]

method_map = choose_grad_methods(diff_methods, argnum)

for i, _ in enumerate(tape.trainable_params):
if i not in method_map or method_map[i] == "0":
# parameter has zero gradient
shapes.append(0)
continue

g_tapes = generate_shifted_tapes(tape, i, shifts * h)
shapes.append(len(g_tapes))

def processing_fn(results):
start = 1 if c0 is not None and f0 is None else 0
r0 = f0 or results

output_dims = []
# TODO: Update shape for CV variables
for m in tape.measurements:
if m.return_type is qml.measurements.Probability:
output_dims.append(2 ** len(m.wires))
else:
output_dims.append(1)

for s in shapes:

if s == 0:
# parameter has zero gradient
if not isinstance(results, tuple):
g = qml.math.zeros_like(results)
else:
g = []
for i in output_dims:
zero = qml.math.squeeze(qml.math.zeros(i))
g.append(zero)

continue

res = results[start : start + s]
start = start + s

# compute the linear combination of results
# and coefficients

if len(tape.measurements) == 1:
res = qml.math.stack(res)
c = qml.math.convert_like(coeffs, res)
lin_comb = qml.math.tensordot(res, c, [, ])
else:
for i in range(len(tape.measurements)):
r = qml.math.stack([r[i] for r in res])
c = qml.math.convert_like(coeffs, r)
lin_comb = qml.math.tensordot(r, c, [, ])

# Add on the unshifted term
if c0 is not None:
if len(tape.measurements) == 1:
c = qml.math.convert_like(c0, r0)
pre_grads = pre_grads + c * r0
else:
for i in range(len(tape.measurements)):
r_i = r0[i]
c = qml.math.convert_like(c0, r_i)
pre_grads[i] = pre_grads[i] + c * r_i

coeff_div = qml.math.cast_like(
)

if len(tape.measurements) > 1:
qml.math.convert_like(i * coeff_div, coeff_div) for i in pre_grads
)
else:

# Single measurement
if len(tape.measurements) == 1:
if len(tape.trainable_params) == 1:

# Reordering to match the right shape for multiple measurements
grads_reorder = [ * len(tape.trainable_params) for _ in range(len(tape.measurements))]
for i in range(len(tape.measurements)):
for j in range(len(tape.trainable_params)):

# To tuple
if len(tape.trainable_params) == 1:
grads_tuple = tuple(elem for elem in grads_reorder)
else:
grads_tuple = tuple(tuple(elem) for elem in grads_reorder)

def finite_diff(
tape,
argnum=None,
h=1e-7,
approx_order=1,
n=1,
strategy="forward",
f0=None,
validate_params=True,
):
r"""Transform a QNode to compute the finite-difference gradient of all gate
parameters with respect to its inputs.

Args:
qnode (pennylane.QNode or .QuantumTape): quantum tape or QNode to differentiate
argnum (int or list[int] or None): Trainable parameter indices to differentiate
with respect to. If not provided, the derivatives with respect to all
trainable parameters are returned.
h (float): finite difference method step size
approx_order (int): The approximation order of the finite-difference method to use.
n (int): compute the :math:n-th derivative
strategy (str): The strategy of the finite difference method. Must be one of
"forward", "center", or "backward".
For the "forward" strategy, the finite-difference shifts occur at the points
:math:x_0, x_0+h, x_0+2h,\dots, where :math:h is some small
stepsize. The "backwards" strategy is similar, but in
reverse: :math:x_0, x_0-h, x_0-2h, \dots. Finally, the
"center" strategy results in shifts symmetric around the
unshifted point: :math:\dots, x_0-2h, x_0-h, x_0, x_0+h, x_0+2h,\dots.
f0 (tensor_like[float] or None): Output of the evaluated input tape. If provided,
and the gradient recipe contains an unshifted term, this value is used,
saving a quantum evaluation.
validate_params (bool): Whether to validate the tape parameters or not. If True,
the Operation.grad_method attribute and the circuit structure will be analyzed
to determine if the trainable parameters support the finite-difference method.
If False, the finite-difference method will be applied to all parameters.

Returns:
tensor_like or tuple[list[QuantumTape], function]:

- If the input is a QNode, a tensor
representing the output Jacobian matrix of size (number_outputs, number_gate_parameters)
is returned.

- If the input is a tape, a tuple containing a list of generated tapes,
in addition to a post-processing function to be applied to the
evaluated tapes.

**Example**

This transform can be registered directly as the quantum gradient transform
to use during autodifferentiation:

>>> dev = qml.device("default.qubit", wires=2)
... def circuit(params):
...     qml.RX(params, wires=0)
...     qml.RY(params, wires=0)
...     qml.RX(params, wires=0)
...     return qml.expval(qml.PauliZ(0)), qml.var(qml.PauliZ(0))
>>> params = np.array([0.1, 0.2, 0.3], requires_grad=True)
>>> qml.jacobian(circuit)(params)
tensor([[-0.38751725, -0.18884792, -0.38355708],
[ 0.69916868,  0.34072432,  0.69202365]], requires_grad=True)

.. details::
:title: Usage Details

This gradient transform can also be applied directly to :class:QNode <pennylane.QNode> objects:

>>> @qml.qnode(dev)
... def circuit(params):
...     qml.RX(params, wires=0)
...     qml.RY(params, wires=0)
...     qml.RX(params, wires=0)
...     return qml.expval(qml.PauliZ(0)), qml.var(qml.PauliZ(0))
tensor([[-0.38751725, -0.18884792, -0.38355708],
[ 0.69916868,  0.34072432,  0.69202365]], requires_grad=True)

This quantum gradient transform can also be applied to low-level
:class:~.QuantumTape objects. This will result in no implicit quantum
device evaluation. Instead, the processed tapes, and post-processing
function, which together define the gradient are directly returned:

>>> with qml.tape.QuantumTape() as tape:
...     qml.RX(params, wires=0)
...     qml.RY(params, wires=0)
...     qml.RX(params, wires=0)
...     qml.expval(qml.PauliZ(0))
...     qml.var(qml.PauliZ(0))
[<QuantumTape: wires=, params=3>,
<QuantumTape: wires=, params=3>,
<QuantumTape: wires=, params=3>,
<QuantumTape: wires=, params=3>]

This can be useful if the underlying circuits representing the gradient
computation need to be analyzed.

The output tapes can then be evaluated and post-processed to retrieve

>>> dev = qml.device("default.qubit", wires=2)
>>> fn(qml.execute(gradient_tapes, dev, None))
[[-0.38751721 -0.18884787 -0.38355704]
[ 0.69916862  0.34072424  0.69202359]]
"""
if qml.active_return():
return _finite_diff_new(
tape,
argnum=argnum,
h=h,
approx_order=approx_order,
n=n,
strategy=strategy,
f0=f0,
validate_params=validate_params,
)

if argnum is None and not tape.trainable_params:
warnings.warn(
"Attempted to compute the gradient of a tape with no trainable parameters. "
"If this is unintended, please mark trainable parameters in accordance with the "
"chosen auto differentiation framework, or via the 'tape.trainable_params' property."
)
return [], lambda _: qml.math.zeros([tape.output_dim, 0])

if validate_params:
if "grad_method" not in tape._par_info:
diff_methods = grad_method_validation("numeric", tape)
else:
diff_methods = ["F" for i in tape.trainable_params]

if all(g == "0" for g in diff_methods):
return [], lambda _: np.zeros([tape.output_dim, len(tape.trainable_params)])

shapes = []
c0 = None

coeffs, shifts = finite_diff_coeffs(n=n, approx_order=approx_order, strategy=strategy)

if 0 in shifts:
# Finite difference formula includes a term with zero shift.

if f0 is None:
# Ensure that the unshifted tape is appended
# to the gradient tapes, if not already.

# Store the unshifted coefficient. We know that
# it will always be the first coefficient due to processing.
c0 = coeffs
shifts = shifts[1:]
coeffs = coeffs[1:]

method_map = choose_grad_methods(diff_methods, argnum)

for i, _ in enumerate(tape.trainable_params):
if i not in method_map or method_map[i] == "0":
# parameter has zero gradient
shapes.append(0)
continue

g_tapes = generate_shifted_tapes(tape, i, shifts * h)
shapes.append(len(g_tapes))

def processing_fn(results):
# HOTFIX: Apply the same squeezing as in qml.QNode to make the transform output consistent.
# pylint: disable=protected-access
if tape._qfunc_output is not None and not isinstance(tape._qfunc_output, Sequence):
results = [qml.math.squeeze(res) for res in results]

start = 1 if c0 is not None and f0 is None else 0
r0 = f0 or results

for s in shapes:

if s == 0:
# parameter has zero gradient
g = qml.math.zeros_like(results)
continue

res = results[start : start + s]
start = start + s

# compute the linear combination of results and coefficients
res = qml.math.stack(res)
g = sum(c * r for c, r in zip(coeffs, res))

if c0 is not None:
# add on the unshifted term
g = g + c0 * r0

# The following is for backwards compatibility; currently,
# the device stacks multiple measurement arrays, even if not the same
# size, resulting in a ragged array.
# In the future, we might want to change this so that only tuples
# of arrays are returned.
for i, g in enumerate(grads):
if hasattr(g, "dtype") and g.dtype is np.dtype("object"):
if qml.math.ndim(g) > 0: