# Copyright 2018-2021 Xanadu Quantum Technologies Inc.

# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

# Unless required by applicable law or agreed to in writing, software
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
"""
This module contains functions for computing the parameter-shift gradient
of a qubit-based quantum tape.
"""
# pylint: disable=protected-access,too-many-arguments,too-many-statements
import warnings
from collections.abc import Sequence

import numpy as np

import pennylane as qml
from pennylane.measurements import MutualInfo, State, VnEntropy

from .finite_difference import finite_diff
from .general_shift_rules import (
_iterate_shift_rule,
frequencies_to_period,
generate_shifted_tapes,
process_shifts,
)
)

NONINVOLUTORY_OBS = {
"Hermitian": lambda obs: obs.__class__(obs.matrix() @ obs.matrix(), wires=obs.wires),
"SparseHamiltonian": lambda obs: obs.__class__(obs.matrix() @ obs.matrix(), wires=obs.wires),
"Projector": lambda obs: obs,
}
"""Dict[str, callable]: mapping from a non-involutory observable name
to a callable that accepts an observable object, and returns the square
of that observable.
"""

def _square_observable(obs):
"""Returns the square of an observable."""

if isinstance(obs, qml.operation.Tensor):
# Observable is a tensor, we must consider its
# component observables independently. Note that
# we assume all component observables are on distinct wires.

components_squared = []

for comp in obs.obs:

try:
components_squared.append(NONINVOLUTORY_OBS[comp.name](comp))
except KeyError:
# component is involutory
pass

return qml.operation.Tensor(*components_squared)

return NONINVOLUTORY_OBS[obs.name](obs)

def _process_op_recipe(op, p_idx, order):
"""Process an existing recipe of an operation."""
if recipe is None:
return None

recipe = qml.math.array(recipe)
if order == 1:
return process_shifts(recipe, batch_duplicates=False)

# Try to obtain the period of the operator frequencies for iteration of custom recipe
try:
period = frequencies_to_period(op.parameter_frequencies[p_idx])
except qml.operation.ParameterFrequenciesUndefinedError:
period = None

# Iterate the custom recipe to obtain the second-order recipe
if qml.math.allclose(recipe[:, 1], qml.math.ones_like(recipe[:, 1])):
# If the multipliers are ones, we do not include them in the iteration
# but keep track of them manually
iter_c, iter_s = process_shifts(_iterate_shift_rule(recipe[:, ::2], order, period)).T
return qml.math.stack([iter_c, qml.math.ones_like(iter_c), iter_s]).T

return process_shifts(_iterate_shift_rule(recipe, order, period))

def _choose_recipe(argnum, idx, gradient_recipes, shifts, tape):
"""Obtain the gradient recipe for an indicated parameter from provided
gradient_recipes. If none is provided, use the recipe of the operation instead."""
arg_idx = argnum.index(idx)
if recipe is not None:
recipe = process_shifts(np.array(recipe))
else:
op_shifts = None if shifts is None else shifts[arg_idx]
recipe = _get_operation_recipe(tape, idx, shifts=op_shifts)
return recipe

def _extract_unshifted(recipe, at_least_one_unshifted, f0, gradient_tapes, tape):
"""Exctract the unshifted term from a gradient recipe, if it is present.

Returns:
array_like[float]: The reduced recipe without the unshifted term.
bool: The updated flag whether an unshifted term was found for any of the recipes.
float or None: The coefficient of the unshifted term. None if no such term was present.

This assumes that there will be at most one unshifted term in the recipe (others simply are
not extracted) and that it comes first if there is one.
"""
first_c, first_m, first_s = recipe[0]
# Extract zero-shift term if present (if so, it will always be the first)
if first_s == 0 and first_m == 1:
# Gradient recipe includes a term with zero shift.
if not at_least_one_unshifted and f0 is None:
# Append the unshifted tape to the gradient tapes, if not already present

# Store the unshifted coefficient. It is always the first coefficient due to processing
unshifted_coeff = first_c
at_least_one_unshifted = True
recipe = recipe[1:]
else:
unshifted_coeff = None

return recipe, at_least_one_unshifted, unshifted_coeff

"""Use shifted tape evaluations and parameter-shift rule coefficients

_, coeffs, fn, unshifted_coeff, batch_size = data

# individual post-processing of e.g. Hamiltonian grad tapes
if fn is not None:
res = fn(res)

# compute the linear combination of results and coefficients
axis = 0
res = qml.math.stack(res)
elif (
qml.math.get_interface(res[0]) != "torch"
and batch_size is not None
and not scalar_qfunc_output
):
# If the original output is not scalar and broadcasting is used, the second axis
# (index 1) needs to be contracted. For Torch, this is not true because the
# output of the broadcasted tape is flat due to the behaviour of the Torch device.
axis = 1
g = qml.math.tensordot(res, qml.math.convert_like(coeffs, res), [[axis], [0]])

if unshifted_coeff is not None:
g = g + unshifted_coeff * r0

return g

def _get_operation_recipe(tape, t_idx, shifts, order=1):
"""Utility function to return the parameter-shift rule
of the operation corresponding to trainable parameter
t_idx on tape.

Args:
tape (.QuantumTape): Tape containing the operation to differentiate
t_idx (int): Parameter index of the operation to differentiate within the tape
shifts (Sequence[float or int]): Shift values to use if no static grad_recipe is
provided by the operation to differentiate
order (int): Order of the differentiation

This function performs multiple attempts to obtain the recipe:

- If the operation has a custom :attr:~.grad_recipe defined, it is used.

- If :attr:.parameter_frequencies yields a result, the frequencies are
used to construct the general parameter-shift rule via
:func:.generate_shift_rule.
Note that by default, the generator is used to compute the parameter frequencies
if they are not provided by a custom implementation.

That is, the order of precedence is :meth:~.grad_recipe, custom
:attr:~.parameter_frequencies, and finally :meth:.generator via the default
implementation of the frequencies.

If order is set to 2, the rule for the second-order derivative is obtained instead.
"""
if order not in {1, 2}:
raise NotImplementedError("_get_operation_recipe only is implemented for orders 1 and 2.")

op, p_idx = tape.get_operation(t_idx)

# Try to use the stored grad_recipe of the operation
op_recipe = _process_op_recipe(op, p_idx, order)
if op_recipe is not None:
return op_recipe

# Try to obtain frequencies, either via custom implementation or from generator eigvals
try:
frequencies = op.parameter_frequencies[p_idx]
except qml.operation.ParameterFrequenciesUndefinedError as e:
raise qml.operation.OperatorPropertyUndefined(
f"The operation {op.name} does not have a grad_recipe, parameter_frequencies or "
"a generator defined. No parameter shift rule can be applied."
) from e

# Create shift rule from frequencies with given shifts
coeffs, shifts = qml.gradients.generate_shift_rule(frequencies, shifts=shifts, order=order).T
# The generated shift rules do not include a rescaling of the parameter, only shifts.
mults = np.ones_like(coeffs)

return qml.math.stack([coeffs, mults, shifts]).T

def expval_param_shift(
):
r"""Generate the parameter-shift tapes and postprocessing methods required
to compute the gradient of a gate parameter with respect to an
expectation value.

Args:
tape (.QuantumTape): quantum tape to differentiate
argnum (int or list[int] or None): Trainable parameter indices to differentiate
with respect to. If not provided, the derivatives with respect to all
trainable indices are returned.
shifts (list[tuple[int or float]]): List containing tuples of shift values.
If provided, one tuple of shifts should be given per trainable parameter
and the tuple should match the number of frequencies for that parameter.
If unspecified, equidistant shifts are assumed.
for the parameter-shift method. One gradient recipe must be provided
per trainable parameter.
f0 (tensor_like[float] or None): Output of the evaluated input tape. If provided,
and the gradient recipe contains an unshifted term, this value is used,
saving a quantum evaluation.
broadcast (bool): Whether or not to use parameter broadcasting to create the
a single broadcasted tape per operation instead of one tape per shift angle.

Returns:
tuple[list[QuantumTape], function]: A tuple containing a
list of generated tapes, in addition to a post-processing
function to be applied to the results of the evaluated tapes.
"""
argnum = argnum or tape.trainable_params

# Each entry for gradient_data will be a tuple with entries
# (num_tapes, coeffs, fn, unshifted_coeff, batch_size)
# Keep track of whether there is at least one unshifted term in all the parameter-shift rules
at_least_one_unshifted = False

for idx, _ in enumerate(tape.trainable_params):

if idx not in argnum:
continue

op, _ = tape.get_operation(idx)

if op.name == "Hamiltonian":
# operation is a Hamiltonian
if op.return_type is not qml.measurements.Expectation:
raise ValueError(
"Can only differentiate Hamiltonian "
f"coefficients for expectations, not {op.return_type.value}"
)

# hamiltonian_grad always returns a list with a single tape
# hamiltonian_grad always returns a list with a single tape!
continue

recipe = _choose_recipe(argnum, idx, gradient_recipes, shifts, tape)
recipe, at_least_one_unshifted, unshifted_coeff = _extract_unshifted(
)
coeffs, multipliers, op_shifts = recipe.T

g_tapes = generate_shifted_tapes(tape, idx, op_shifts, multipliers, broadcast)
# If broadcast=True, g_tapes only contains one tape. If broadcast=False, all returned
# tapes will have the same batch_size=None. Thus we only use g_tapes[0].batch_size here.

def processing_fn(results):
# Apply the same squeezing as in qml.QNode to make the transform output consistent.
# pylint: disable=protected-access
scalar_qfunc_output = tape._qfunc_output is not None and not isinstance(
tape._qfunc_output, Sequence
)
if scalar_qfunc_output:
results = [qml.math.squeeze(res) for res in results]

start = 1 if at_least_one_unshifted and f0 is None else 0
r0 = f0 or results[0]

num_tapes, *_, batch_size = data
if num_tapes == 0:
# parameter has zero gradient. We don't know the output shape yet, so just memorize
# that this gradient will be set to zero, via grad = None
continue

res = results[start : start + num_tapes] if batch_size is None else results[start]
start = start + num_tapes

# This clause will be hit at least once (because otherwise all gradients would have
# been zero), providing a representative for a zero gradient to emulate its type/shape.
zero_rep = qml.math.zeros_like(g)

if g is None:
# The following is for backwards compatibility; currently, the device stacks multiple
# measurement arrays, even if not the same size, resulting in a ragged array.
# In the future, we might want to change this so that only tuples of arrays are returned.
if getattr(g, "dtype", None) is np.dtype("object") and qml.math.ndim(g) > 0:

r"""Generate the parameter-shift tapes and postprocessing methods required
to compute the gradient of a gate parameter with respect to a
variance value.

Args:
tape (.QuantumTape): quantum tape to differentiate
argnum (int or list[int] or None): Trainable parameter indices to differentiate
with respect to. If not provided, the derivative with respect to all
trainable indices are returned.
shifts (list[tuple[int or float]]): List containing tuples of shift values.
If provided, one tuple of shifts should be given per trainable parameter
and the tuple should match the number of frequencies for that parameter.
If unspecified, equidistant shifts are assumed.
for the parameter-shift method. One gradient recipe must be provided
per trainable parameter.
f0 (tensor_like[float] or None): Output of the evaluated input tape. If provided,
and the gradient recipe contains an unshifted term, this value is used,
saving a quantum evaluation.
broadcast (bool): Whether or not to use parameter broadcasting to create the
a single broadcasted tape per operation instead of one tape per shift angle.

Returns:
tuple[list[QuantumTape], function]: A tuple containing a
list of generated tapes, in addition to a post-processing
function to be applied to the results of the evaluated tapes.
"""
argnum = argnum or tape.trainable_params

# Determine the locations of any variance measurements in the measurement queue.
var_mask = [m.return_type is qml.measurements.Variance for m in tape.measurements]

# Get <A>, the expectation value of the tape with unshifted parameters.
expval_tape = tape.copy(copy_operations=True)

# Convert all variance measurements on the tape into expectation values
for i in var_idx:
obs = expval_tape._measurements[i].obs
expval_tape._measurements[i] = qml.measurements.MeasurementProcess(
qml.measurements.Expectation, obs=obs
)

# evaluate the analytic derivative of <A>
pdA_tapes, pdA_fn = expval_param_shift(
)

# Store the number of first derivative tapes, so that we know
# the number of results to post-process later.
tape_boundary = len(pdA_tapes) + 1

# If there are non-involutory observables A present, we must compute d<A^2>/dp.
# Get the indices in the measurement queue of all non-involutory
# observables.
non_involutory = []

for i in var_idx:
obs_name = tape.observables[i].name

if isinstance(obs_name, list):
# Observable is a tensor product, we must investigate all constituent observables.
if any(name in NONINVOLUTORY_OBS for name in obs_name):
non_involutory.append(i)

elif obs_name in NONINVOLUTORY_OBS:
non_involutory.append(i)

# For involutory observables (A^2 = I) we have d<A^2>/dp = 0.
involutory = set(var_idx) - set(non_involutory)

if non_involutory:
expval_sq_tape = tape.copy(copy_operations=True)

for i in non_involutory:
# We need to calculate d<A^2>/dp; to do so, we replace the
# involutory observables A in the queue with A^2.
obs = _square_observable(expval_sq_tape._measurements[i].obs)
expval_sq_tape._measurements[i] = qml.measurements.MeasurementProcess(
qml.measurements.Expectation, obs=obs
)

# Non-involutory observables are present; the partial derivative of <A^2>
# may be non-zero. Here, we calculate the analytic derivatives of the <A^2>
# observables.
pdA2_tapes, pdA2_fn = expval_param_shift(
)

def processing_fn(results):
# HOTFIX: Apply the same squeezing as in qml.QNode to make the transform output consistent.
# pylint: disable=protected-access
if tape._qfunc_output is not None and not isinstance(tape._qfunc_output, Sequence):
results = [qml.math.squeeze(res) for res in results]

# We need to expand the dimensions of the variance mask,
# and convert it to be the same type as the results.
res = results[0]
ragged = getattr(results[0], "dtype", None) is np.dtype("object")

for m, r in zip(var_mask, qml.math.atleast_1d(results[0])):
array_func = np.ones if m else np.zeros
shape = qml.math.shape(r)

if ragged and qml.math.ndim(res) > 0:
res = qml.math.hstack(res)

f0 = qml.math.expand_dims(res, -1)

pdA = pdA_fn(results[1:tape_boundary])
pdA2 = 0

if non_involutory:
# compute the second derivative of non-involutory observables
pdA2 = pdA2_fn(results[tape_boundary:])

if involutory:
# if involutory observables are present, ensure they have zero gradient.
#
# For the pdA2_tapes, we have replaced non-involutory
# observables with their square (A -> A^2). However,
# involutory observables have been left as-is (A), and have
# not been replaced by their square (A^2 = I). As a result,
# components of the gradient vector will not be correct. We
# need to replace the gradient value with 0 (the known,
# correct gradient for involutory variables).

m = [tape.observables[i].name not in NONINVOLUTORY_OBS for i in var_idx]
m = qml.math.convert_like(m, pdA2)
pdA2 = qml.math.where(qml.math.reshape(m, [-1, 1]), 0, pdA2)

# return d(var(A))/dp = d<A^2>/dp -2 * <A> * d<A>/dp for the variances (mask==True)
# d<A>/dp for plain expectations (mask==False)
return qml.math.where(mask, pdA2 - 2 * f0 * pdA, pdA)

def param_shift(
tape,
argnum=None,
shifts=None,
fallback_fn=finite_diff,
f0=None,
):
r"""Transform a QNode to compute the parameter-shift gradient of all gate
parameters with respect to its inputs.

Args:
qnode (pennylane.QNode or .QuantumTape): quantum tape or QNode to differentiate
argnum (int or list[int] or None): Trainable parameter indices to differentiate
with respect to. If not provided, the derivative with respect to all
trainable indices are returned.
shifts (list[tuple[int or float]]): List containing tuples of shift values.
If provided, one tuple of shifts should be given per trainable parameter
and the tuple should match the number of frequencies for that parameter.
If unspecified, equidistant shifts are assumed.
for the parameter-shift method. One gradient recipe must be provided
per trainable parameter.

This is a tuple with one nested list per parameter. For
parameter :math:\phi_k, the nested list contains elements of the form
:math:[c_i, a_i, s_i] where :math:i is the index of the
term, resulting in a gradient recipe of

.. math:: \frac{\partial}{\partial\phi_k}f = \sum_{i} c_i f(a_i \phi_k + s_i).

If None, the default gradient recipe containing the two terms
:math:[c_0, a_0, s_0]=[1/2, 1, \pi/2] and :math:[c_1, a_1,
s_1]=[-1/2, 1, -\pi/2] is assumed for every parameter.
fallback_fn (None or Callable): a fallback gradient function to use for
any parameters that do not support the parameter-shift rule.
f0 (tensor_like[float] or None): Output of the evaluated input tape. If provided,
and the gradient recipe contains an unshifted term, this value is used,
saving a quantum evaluation.
broadcast (bool): Whether or not to use parameter broadcasting to create the
a single broadcasted tape per operation instead of one tape per shift angle.

Returns:
tensor_like or tuple[list[QuantumTape], function]:

- If the input is a QNode, a tensor
representing the output Jacobian matrix of size (number_outputs, number_gate_parameters)
is returned.

- If the input is a tape, a tuple containing a list of generated tapes,
in addition to a post-processing function to be applied to the
evaluated tapes.

For a variational evolution :math:U(\mathbf{p}) \vert 0\rangle with
:math:N parameters :math:\mathbf{p},
consider the expectation value of an observable :math:O:

.. math::

f(\mathbf{p})  = \langle \hat{O} \rangle(\mathbf{p}) = \langle 0 \vert
U(\mathbf{p})^\dagger \hat{O} U(\mathbf{p}) \vert 0\rangle.

The gradient of this expectation value can be calculated via the parameter-shift rule:

.. math::

\frac{\partial f}{\partial \mathbf{p}} = \sum_{\mu=1}^{2R}
f\left(\mathbf{p}+\frac{2\mu-1}{2R}\pi\right)
\frac{(-1)^{\mu-1}}{4R\sin^2\left(\frac{2\mu-1}{4R}\pi\right)}

Here, :math:R is the number of frequencies with which the parameter :math:\mathbf{p}
enters the function :math:f via the operation :math:U, and we assumed that these
frequencies are equidistant.
For more general shift rules, both regarding the shifts and the frequencies, and
for more technical details, see
Vidal and Theis (2018) <https://arxiv.org/abs/1812.06323>_ and
Wierichs et al. (2022) <https://doi.org/10.22331/q-2022-03-30-677>_.

For a variational evolution :math:U(\mathbf{p}) \vert 0\rangle with
:math:N parameters :math:\mathbf{p},
consider the variance of an observable :math:O:

.. math::

g(\mathbf{p})=\langle \hat{O}^2 \rangle (\mathbf{p}) - [\langle \hat{O}
\rangle(\mathbf{p})]^2.

We can relate this directly to the parameter-shift rule by noting that

.. math::

\frac{\partial g}{\partial \mathbf{p}}= \frac{\partial}{\partial
\mathbf{p}} \langle \hat{O}^2 \rangle (\mathbf{p})
- 2 f(\mathbf{p}) \frac{\partial f}{\partial \mathbf{p}}.

The derivatives in the expression on the right hand side can be computed via
the shift rule as above, allowing for the computation of the variance derivative.

In the case where :math:O is involutory (:math:\hat{O}^2 = I), the first
term in the above expression vanishes, and we are simply left with

.. math::

\frac{\partial g}{\partial \mathbf{p}} = - 2 f(\mathbf{p})
\frac{\partial f}{\partial \mathbf{p}}.

**Example**

This transform can be registered directly as the quantum gradient transform
to use during autodifferentiation:

>>> dev = qml.device("default.qubit", wires=2)
... def circuit(params):
...     qml.RX(params[0], wires=0)
...     qml.RY(params[1], wires=0)
...     qml.RX(params[2], wires=0)
...     return qml.expval(qml.PauliZ(0)), qml.var(qml.PauliZ(0))
>>> params = np.array([0.1, 0.2, 0.3], requires_grad=True)
>>> qml.jacobian(circuit)(params)
tensor([[-0.38751725, -0.18884792, -0.38355708],

.. note::

param_shift performs multiple attempts to obtain the gradient recipes for
each operation:

- If an operation has a custom :attr:~.operation.Operation.grad_recipe defined,
it is used.

- If :attr:~.operation.Operation.parameter_frequencies yields a result, the frequencies
are used to construct the general parameter-shift rule via
:func:.generate_shift_rule.
Note that by default, the generator is used to compute the parameter frequencies
if they are not provided via a custom implementation.

That is, the order of precedence is :attr:~.operation.Operation.grad_recipe, custom
:attr:~.operation.Operation.parameter_frequencies, and finally
:meth:~.operation.Operation.generator via the default implementation of the frequencies.

.. warning::

Note that using parameter broadcasting via broadcast=True is not supported for tapes
with multiple return values or for evaluations with shot vectors.
As the option broadcast=True adds a broadcasting dimension, it is not compatible
Finally, operations with trainable parameters are required to support broadcasting.
One way of checking this is the Attribute supports_broadcasting:

True

.. details::
:title: Usage Details

This gradient transform can be applied directly to :class:QNode <pennylane.QNode> objects:

>>> @qml.qnode(dev)
... def circuit(params):
...     qml.RX(params[0], wires=0)
...     qml.RY(params[1], wires=0)
...     qml.RX(params[2], wires=0)
...     return qml.expval(qml.PauliZ(0)), qml.var(qml.PauliZ(0))
tensor([[-0.38751725, -0.18884792, -0.38355708],

This quantum gradient transform can also be applied to low-level
:class:~.QuantumTape objects. This will result in no implicit quantum
device evaluation. Instead, the processed tapes, and post-processing
function, which together define the gradient are directly returned:

>>> with qml.tape.QuantumTape() as tape:
...     qml.RX(params[0], wires=0)
...     qml.RY(params[1], wires=0)
...     qml.RX(params[2], wires=0)
...     qml.expval(qml.PauliZ(0))
...     qml.var(qml.PauliZ(0))
[<QuantumTape: wires=[0, 1], params=3>,
<QuantumTape: wires=[0, 1], params=3>,
<QuantumTape: wires=[0, 1], params=3>,
<QuantumTape: wires=[0, 1], params=3>,
<QuantumTape: wires=[0, 1], params=3>,
<QuantumTape: wires=[0, 1], params=3>]

This can be useful if the underlying circuits representing the gradient
computation need to be analyzed.

The output tapes can then be evaluated and post-processed to retrieve

>>> dev = qml.device("default.qubit", wires=2)
[[-0.38751721 -0.18884787 -0.38355704]
[ 0.69916862  0.34072424  0.69202359]]

When setting the keyword argument broadcast to True, the shifted
circuit evaluations for each operation are batched together, resulting in

>>> params = np.array([0.1, 0.2, 0.3], requires_grad=True)
>>> with qml.tape.QuantumTape() as tape:
...     qml.RX(params[0], wires=0)
...     qml.RY(params[1], wires=0)
...     qml.RX(params[2], wires=0)
...     qml.expval(qml.PauliZ(0))
3
>>> [t.batch_size for t in gradient_tapes]
[2, 2, 2]

The postprocessing function will know that broadcasting is used and handle
the results accordingly:
array([[-0.3875172 , -0.18884787, -0.38355704]])

An advantage of using broadcast=True is a speedup:

>>> number = 100
>>> timeit.timeit(serial_call, globals=globals(), number=number) / number
0.020183045039993887
>>> timeit.timeit(broadcasted_call, globals=globals(), number=number) / number
0.01244492811998498

This speedup grows with the number of shifts and qubits until all preprocessing and
postprocessing overhead becomes negligible. While it will depend strongly on the details
of the circuit, at least a small improvement can be expected in most cases.
Note that broadcast=True requires additional memory by a factor of the largest
batch_size of the created tapes.
"""

if any(m.return_type in [State, VnEntropy, MutualInfo] for m in tape.measurements):
raise ValueError(
"Computing the gradient of circuits that return the state is not supported."
)

if broadcast and len(tape.measurements) > 1:
raise NotImplementedError(
"Broadcasting with multiple measurements is not supported yet. "
)

if argnum is None and not tape.trainable_params:
warnings.warn(
"Attempted to compute the gradient of a tape with no trainable parameters. "
"If this is unintended, please mark trainable parameters in accordance with the "
"chosen auto differentiation framework, or via the 'tape.trainable_params' property."
)
return [], lambda _: np.zeros((tape.output_dim, 0))

method = "analytic" if fallback_fn is None else "best"

if all(g == "0" for g in diff_methods):
return [], lambda _: np.zeros([tape.output_dim, len(tape.trainable_params)])

# If there are unsupported operations, call the fallback gradient function
unsupported_params = {idx for idx, g in method_map.items() if g == "F"}
argnum = [i for i, dm in method_map.items() if dm == "A"]

if unsupported_params:
if not argnum:
return fallback_fn(tape)

g_tapes, fallback_proc_fn = fallback_fn(tape, argnum=unsupported_params)
fallback_len = len(g_tapes)

# remove finite difference parameters from the method map
method_map = {t_idx: dm for t_idx, dm in method_map.items() if dm != "F"}

if any(m.return_type is qml.measurements.Variance for m in tape.measurements):
else:

if unsupported_params:
# If there are unsupported parameters, we must process
# the quantum results separately, once for the fallback
# function and once for the parameter-shift rule, and recombine.

def processing_fn(results):