Source code for pennylane.gradients.pulse_gradient_odegen
# Copyright 2023 Xanadu Quantum Technologies Inc.# Licensed under the Apache License, Version 2.0 (the "License");# you may not use this file except in compliance with the License.# You may obtain a copy of the License at# http://www.apache.org/licenses/LICENSE-2.0# Unless required by applicable law or agreed to in writing, software# distributed under the License is distributed on an "AS IS" BASIS,# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.# See the License for the specific language governing permissions and# limitations under the License."""This module contains functions for computing the pulse generatorparameter-shift gradient of pulse sequences in a qubit-based quantum tape."""fromfunctoolsimportpartialimportnumpyasnpimportpennylaneasqmlfrompennylaneimporttransformfrompennylane.ops.qubit.special_unitaryimport_pauli_decompose,pauli_basis_stringsfrompennylane.pulseimportParametrizedEvolutionfrompennylane.tapeimportQuantumScript,QuantumScriptBatchfrompennylane.typingimportPostprocessingFnfrom.gradient_transformimport(_all_zero_grad,_no_trainable_grad,assert_no_state_returns,assert_no_trainable_tape_batching,assert_no_variance,choose_trainable_params,find_and_validate_gradient_methods,reorder_grads,)from.parameter_shiftimport_make_zero_repfrom.pulse_gradientimport_assert_has_jax,raise_pulse_diff_on_qnodetry:importjaxexceptImportError:# Handling the case where JAX is not installed is done via _assert_has_jaxpassdef_one_parameter_generators(op):r"""Compute the effective generators :math:`\{\Omega_k\}` of one-parameter groups that reproduce the partial derivatives of a parametrized evolution. In particular, compute :math:`U` and :math:`\partial U / \partial \theta_k` and recombine them into :math:`\Omega_k = U^\dagger \partial U / \partial \theta_k` Args: op (`~.ParametrizedEvolution`): Parametrized evolution for which to compute the generator Returns: tuple[tensor_like]: The generators for one-parameter groups that reproduce the partial derivatives of the parametrized evolution. The ``k``\ th entry of the returned ``tuple`` has the shape ``(*par_shape, 2**N, 2**N)`` where ``N`` is the number of qubits the evolution acts on and ``par_shape`` is the shape of the ``k``\ th parameter of the evolution. The effective generator can be computed from the derivative of the unitary matrix corresponding to the full time evolution of a pulse: .. math:: \Omega_k = U(\theta)^\dagger \frac{\partial}{\partial \theta_k} U(\theta) Here :math:`U(\theta)` is the unitary matrix of the time evolution due to the pulse and :math:`\theta` are the variational parameters of the pulse. See the documentation of pulse_odegen for more details and a mathematical derivation. """def_compute_matrix(op_data):"""Parametrized computation of the matrix for the given pulse ``op``."""returnop(op_data,t=op.t,**op.odeint_kwargs).matrix()def_compute_matrix_split(op_data):"""Parametrized computation of the matrix for the given pulse ``op``. Return the real and imaginary parts separately."""mat=_compute_matrix(op_data)returnmat.real,mat.imag# Compute the Jacobian of _compute_matrix, giving the Jacobian of the real and imag parts# The output is a tuple, with one entry per parameter, each of which has the axes# (mat_dim, mat_dim, *parameter_shape)jac_real,jac_imag=jax.jacobian(_compute_matrix_split)(op.data)# Compute the matrix of the pulse itself and conjugate it. Skip the transposition of the adjoint# The output has the shape (mat_dim, mat_dim)U_dagger=qml.math.conj(_compute_matrix([qml.math.detach(d)fordinop.data]))# Compute U^\dagger @ \partial U / \partial \theta_k# For each entry ``j`` in the tuple ``jac``,# 1. Contract U_dagger with j along the axes [0], [0]. This effectively transposes# U_dagger, which we skipped above.# 2. After contraction, the axes are (mat_dim, mat_dim, *parameter_shape).# 3. Move the first two axis to the last two positions.moveax=partial(qml.math.moveaxis,source=(0,1),destination=(-2,-1))returntuple(moveax(qml.math.tensordot(U_dagger,j_r+1j*j_i,axes=[[0],[0]]))forj_r,j_iinzip(jac_real,jac_imag))def_one_parameter_paulirot_coeffs(generators,num_wires):r"""Compute the Pauli coefficients of effective generators. The coefficients correspond to the decomposition into (rescaled) Pauli word generators as used by ``PauliRot`` gates. Args: generators (tuple[tensor_like]): Generators of the one-parameter groups that reproduce the partial derivatives of the parametrized evolution of interest. num_wires (int): Number of wires the parametrized evolution acts on. Returns: tuple[tensor_like]: Coefficients of the provided generators in the Pauli basis, modified by a factor of ``2j`` (see warning below). .. warning:: This function includes a prefactor ``2j`` in its output compared to the "standard" Pauli basis coefficients. That is, for the effective generator :math:`\frac{1}{3}iX`, which has the Pauli basis coefficient :math:`\frac{1}{3}i`, this function will compute the number :math:`-\frac{2}{3}`. This is in order to accomodate the use of ``qml.PauliRot`` in the pulse generator differentiation pipeline. """# The generators are skew-Hermitian. Therefore _pauli_decompose will return a purely# imaginary tensor. Multiplying with 2i results in a real-valued tensor, which# we prefer over a complex-valued tensor with vanishing imaginary partreturntuple(qml.math.real(2j*_pauli_decompose(g,num_wires))forgingenerators)def_nonzero_coeffs_and_words(coefficients,num_wires,atol=1e-8):"""Given a tuple of coefficient tensors with a common first axis size :math:`4^N-1`, where :math:`N` is the number of wires, filter out those indices for the first axis for which the sliced coefficients are not all zero. Return these coefficients, as well as the corresponding Pauli word strings on :math:`N` wires. Args: coefficients(tuple[tensor_like]): Coefficients to filter. num_wires (int): Number of qubits the generators act on. atol (float): absolute tolerance used to determine whether a tensor is zero. """words=pauli_basis_strings(num_wires)new_coefficients=[[]for_incoefficients]new_words=[]forword,*coeffsinzip(words,*coefficients):ifall(qml.math.allclose(c,0.0,atol=atol,rtol=0.0)forcincoeffs):continuenew_words.append(word)fornew_coeffs,cinzip(new_coefficients,coeffs):new_coeffs.append(c)returnnew_coefficients,new_wordsdef_insert_op(tape,ops,op_idx):"""Create new tapes, each with an additional operation from a provided list inserted at the specified position. Creates as many new tapes as there are operations provided. Args: tape (`~.QuantumTape`): Original tape. ops (list[qml.Operation]): Operations to insert (individually) at ``op_idx`` to produce a new tape each. op_idx (int): Index at which to insert the operations in ``ops``. Returns: list[`~.QuantumScript`]: new tapes with inserted operations, one tape per operation in ``ops``. """ops_pre=tape.operations[:op_idx]ops_post=tape.operations[op_idx:]return[qml.tape.QuantumScript(ops_pre+[insert]+ops_post,tape.measurements,shots=tape.shots)forinsertinops]def_generate_tapes_and_coeffs(tape,idx,atol,cache):"""Compute the modified tapes and coefficients required to compute the pulse generator derivative of a tape with respect to an indicated trainable parameter. Args: tape (`~.QuantumTape`): Tape to differentiate. idx (int): Index (referring to ``tape.trainable_parameters``) of the parameter with respect to which to differentiate. atol (float): absolute tolerance used to determine whether a coefficient is zero. cache (dict): Caching dictionary that allows to skip adding duplicate modified tapes. Returns: list[`~.QuantumScript`]: Modified tapes to be added to the pulse generator differentiation tapes. It is an empty list if modified tapes were already created for another parameter of the pulse of interest. tuple[int, int, tensor_like]: Gradient computation data, consisting of the start and end indices into the total list of tapes as well as the coefficients that need to be contracted with the corresponding results to obtain the partial derivative with respect to the indicated trainable parameter. dict: Input ``cache`` dictionary. If the cache lookup missed, the cache is extended by one entry and its entry ``"total_num_tapes"`` is increased by the number of created tapes. """# ``op`` is the operation into which the trainable tape parameter with index ``idx`` feeds.# ``op_idx`` is the index of ``op`` within all tape operations. ``term_idx`` is the index# of the trainable tape parameter within ``op``.op,op_idx,term_idx=tape.get_operation(idx)ifop_idxincache:# operation was analyzed and tapes were added before. Get tape indices and coefficients.start,end,all_coeffs=cache[op_idx]return[],(start,end,all_coeffs[term_idx]),cacheifnotisinstance(op,ParametrizedEvolution):# only ParametrizedEvolution can be treated with this gradient transformraiseValueError("pulse_odegen does not support differentiating parameters of "f"other operations than pulses, but received operation {op}.")num_wires=len(op.wires)# Compute the effective generators of the given pulse _for all parameters_.generators=_one_parameter_generators(op)# Obtain one-parameter-group coefficients in Pauli basis and filter for non-zero coeffsall_coeffs=_one_parameter_paulirot_coeffs(generators,num_wires)all_coeffs,pauli_words=_nonzero_coeffs_and_words(all_coeffs,num_wires,atol)# create PauliRot gates for each Pauli word (with a non-zero coefficient) and for both shiftspauli_rots=[qml.PauliRot(angle,word,wires=op.wires)forwordinpauli_wordsforanglein[np.pi/2,-np.pi/2]]# create tapes with the above PauliRot gates inserted, one per tapetapes=_insert_op(tape,pauli_rots,op_idx)# get the previous total number of tapes from the cache and determine start and end indicesend=(start:=cache["total_num_tapes"])+(num_tapes:=len(tapes))# store the tape indices and coefficients for this operation, to be retrieved from the cache# for other parameters of the same operation, see first if clause above.cache[op_idx]=(start,end,all_coeffs)# increment total number of tapescache["total_num_tapes"]+=num_tapes# Return the start and end pointer for the created tapes/the corresponding results.# Also return the coefficients for the current trainable parameter of the pulsereturntapes,(start,end,all_coeffs[term_idx]),cachedef_parshift_and_contract(results,coeffs,single_measure,single_shot_entry):"""Compute parameter-shift tape derivatives and contract them with coefficients. Args: results (list[tensor_like]): Tape execution results to be processed. coeffs (list[tensor_like]): Coefficients to be contracted. single_measure (bool): whether the tape execution results contain single measurements. single_shot_entry (bool): whether the tape execution results were obtained with a single shots setting. Returns: tensor_like or tuple[tensor_like] or tuple[tuple[tensor_like]]: contraction between the parameter-shift derivative computed from ``results`` and the ``coeffs``. """def_parshift_and_contract_single(res_list,coeffs):"""Execute the standard parameter-shift rule on a list of results and contract with Pauli basis coefficients."""psr_deriv=((res:=qml.math.stack(res_list))[::2]-res[1::2])/2returnqml.math.tensordot(psr_deriv,coeffs,axes=[[0],[0]])ifsingle_measureandsingle_shot_entry:# single measurement and single shot entryreturn_parshift_and_contract_single(results,qml.math.stack(coeffs))ifsingle_measureorsingle_shot_entry:# single measurement or single shot entry, but not bothreturntuple(_parshift_and_contract_single(r,qml.math.stack(coeffs))forrinzip(*results))returntuple(tuple(_parshift_and_contract_single(_r,qml.math.stack(coeffs))for_rinzip(*r))forrinzip(*results))def_expval_pulse_odegen(tape,argnum,atol):"""Compute the pulse generator parameter-shift rule for a quantum circuit that returns expectation values of observables. Args: tape (`~.QuantumTape`): Quantum circuit to be differentiated with the pulse generator parameter-shift rule. argnum (int or list[int] or None): Trainable tape parameter indices to differentiate with respect to. If not provided, the derivatives with respect to all trainable parameters are returned. Note that the indices are with respect to the list of trainable parameters. atol (float): absolute tolerance used to determine vanishing contributions. Returns: tuple[list[QuantumTape], function]: A tuple containing a list of generated tapes, together with a post-processing function to be applied to the results of the evaluated tapes in order to obtain the Jacobian. """argnum=argnumortape.trainable_params# Initialize a cache that will store the following:# 1. a single entry ``str: int`` that memorizes the number of tapes that# were created overall thus far. It is initialized in the following line# 2. one entry per ``ParametrizedEvolution`` that contains at least one trainable# parameter of the tape that also is marked as trainable as per ``argnum``.# These entries have the format ``int: (int, int, tuple[tensor_like])``. The key is the# index for the pulse within the tape operations. The first two entries of the value# are a "start" and an "end" pointer, referencing which tapes in ``gradient_tapes``# have been created for the pulse.# Correspondingly, these pointers define the sub-list of results that should be used# for the given pulse in the postprocessing function below.# The third entry in the value tuple contains the coefficients of the effective# generators of the given pulse. These are given in a tuple, with one entry per# parameter of the pulse.# For details on these coefficients, see _one_parameter_paulirot_coeffs.cache={"total_num_tapes":0}# ``gradient_data`` will contain tuples ``(int, int, tensor_like)``, with the first two# entries being the start/end pointers explained above, and the third entry being the# coefficients tensor for one particular parameter of a pulse (one entry of the coefficients# tuple in the corresponding ``cache`` entry).gradient_data=[]gradient_tapes=[]tape_params=tape.get_parameters()foridx,paraminenumerate(tape_params):shape=qml.math.shape(param)ifidxnotinargnum:# Trainable parameters that are de-selected by ``argnum`` receive a vanishing# gradient entry.# Indicate that there are no tapes for this parameter by setting start==endgradient_data.append((0,0,None,shape))continue# Access the pulse in the tape into which the current trainable parameter# feeds. If this pulse is analyzed for the first time, compute its effective# generators and their Pauli basis coefficients for _all_ parameters of the pulse,# and create the tapes of the modified cost function for all occurring Pauli words.# If the pulse has been analyzed before, retrieve the tape/results pointers of# the pulse and the coefficients belonging to the current parameter from the cache,# but do not add create any additional tapes.tapes,data,cache=_generate_tapes_and_coeffs(tape,idx,atol,cache)gradient_data.append((*data,shape))gradient_tapes.extend(tapes)# Extract some specifications about the original tape, required for output formattingnum_measurements=len(tape.measurements)single_measure=num_measurements==1num_params=len(tape.trainable_params)partitioned_shots=tape.shots.has_partitioned_shotstape_specs=(single_measure,num_params,num_measurements,tape.shots)defprocessing_fn(results):"""Post-process the results of the parameter-shifted tapes for ``pulse_odegen`` into the gradient."""grads=[]zero_parshapes=[]# Iterate over gradient_data, which contains one entry for each of the trainable parameters in argnumforstart,end,coeffs,par_shapeingradient_data:# If start and end pointer are equal, no tapes contribute and we get a vanishing# gradient for this parameter. For this, add an entry ``None``, which will# be replaced by the appropriately formatted zero later on. Memorize the parameter# shape for said formatting.ifstart==end:grads.append(None)zero_parshapes.append(par_shape)continue# Extract the results corresponding to the start and end pointer of the pulse# to which the current parameter belongs.res=results[start:end]# Apply the parameter-shift rule (respecting the tape output formatting)# and contract the result with the coefficients of the effective generators# in the Pauli basis. This computes the partial derivative.g=_parshift_and_contract(res,coeffs,single_measure,notpartitioned_shots)grads.append(g)# Memorize the parameter shape for the nonzero gradient entrynonzero_parshape=par_shape# g will have been defined at least once (because otherwise all gradients would have# been zero), providing a representative for a zero gradient to emulate its type/shape.# This shape needs to be modified because pulse parameters may differ in shape.zero_parshapes=iter(zero_parshapes)fori,_ginenumerate(grads):if_gisNone:par_shapes=(nonzero_parshape,next(zero_parshapes))# Make zero representative gradient entry, adapting the shapezero_rep=_make_zero_rep(g,single_measure,partitioned_shots,par_shapes)# Fill in zero-valued gradient entrygrads[i]=zero_rep# Reformat the flat list of gradients into an output that fits the original tape specs.returnreorder_grads(grads,tape_specs)returngradient_tapes,processing_fn
[docs]@partial(transform,final_transform=True)defpulse_odegen(tape:QuantumScript,argnum=None,atol=1e-7)->tuple[QuantumScriptBatch,PostprocessingFn]:r"""Transform a circuit to compute the pulse generator parameter-shift gradient of pulses in a pulse program with respect to their inputs. This method combines automatic differentiation of few-qubit operations with hardware-compatible shift rules. It allows for the evaluation of parameter-shift gradients for many-qubit pulse programs on hardware, with the limitation that the individual pulses must be acting on a sufficiently small number of qubits. For this differentiation method, the unitary matrix :math:`U` of a pulse gate and its derivative :math:`\partial_k U` are computed classically with an autodiff framework. From :math:`\partial_k U` and :math:`U` we can deduce the so-called effective generators :math:`\Omega_{k}` assuming the form .. math:: \partial_k U = U \Omega_k. These effective generators are then decomposed into the Pauli basis and the standard parameter-shift rule is used to evaluate the derivatives of the pulse program in this basis. To this end, shifted ``PauliRot`` operations are inserted in the program. Finally, the Pauli basis derivatives are recombined into the gradient of the pulse program with respect to its original parameters. See the theoretical background section below for more details. Args: tape (QuantumTape): quantum circuit to differentiate argnum (int or list[int] or None): Trainable tape parameter indices to differentiate with respect to. If not provided, the derivatives with respect to all trainable parameters are returned. Note that the indices are with respect to the list of trainable parameters. atol (float): Precision parameter used to truncate the Pauli basis coefficients of the effective generators. Coefficients ``x`` satisfying ``qml.math.isclose(x, 0., atol=atol, rtol=0) == True`` are neglected. Returns: tuple[List[QuantumTape], function]: The transformed circuit as described in :func:`qml.transform <pennylane.transform>`. Executing this circuit will provide the Jacobian in the form of a tensor, a tuple, or a nested tuple depending upon the nesting structure of measurements in the original circuit. .. note:: This function requires the JAX interface and does not work with other autodiff interfaces commonly encountered with PennyLane. In addition, this transform is only JIT-compatible with pulses that only have scalar parameters. .. warning:: This transform may not be applied directly to QNodes. Use JAX entrypoints (``jax.grad``, ``jax.jacobian``, ...) instead or apply the transform on the tape level. Also see the examples below. **Example** Consider the parametrized Hamiltonian :math:`\theta_0 Y_{0}+f(\boldsymbol{\theta_1}, t) Y_{1} + \theta_2 Z_{0}X_{1}` with parameters :math:`\theta_0 = \frac{1}{5}`, :math:`\boldsymbol{\theta_1}=\left(\frac{3}{5}, \frac{1}{5}\right)^{T}` and :math:`\theta_2 = \frac{2}{5}`, the time-dependent function :math:`f(\boldsymbol{\theta_1}, t) = \theta_{1,0} t + \theta_{1,1}` as well as a time interval :math:`t=\left[\frac{1}{10}, \frac{9}{10}\right]`. .. code-block:: python from jax import numpy as jnp jax.config.update("jax_enable_x64", True) H = ( qml.pulse.constant * qml.Y(0) + jnp.polyval * qml.Y(1) + qml.pulse.constant * (qml.Z(0) @ qml.X(1)) ) params = [jnp.array(0.2), jnp.array([0.6, 0.2]), jnp.array(0.4)] t = [0.1, 0.9] For simplicity, consider a pulse program consisting of this single pulse and a measurement of the expectation value of :math:`X_{0}`. .. code-block:: python dev = qml.device("default.qubit") @qml.qnode(dev, interface="jax", diff_method=qml.gradients.pulse_odegen) def circuit(params): op = qml.evolve(H)(params, t) return qml.expval(qml.X(0)) We registered the ``QNode`` to be differentiated with the ``pulse_odegen`` method. This allows us to simply differentiate it with ``jax.grad``, which internally makes use of the pulse generator parameter-shift method. >>> jax.grad(circuit)(params) [Array(1.41897932, dtype=float64, weak_type=True), Array([0.00164913, 0.00284788], dtype=float64), Array(-0.09984584, dtype=float64, weak_type=True)] Alternatively, we may apply the transform to the tape of the pulse program, obtaining the tapes with inserted ``PauliRot`` gates together with the post-processing function: >>> tape = qml.workflow.construct_tape(circuit)(params) # Build the tape of the circuit. >>> tape.trainable_params = [0, 1, 2] >>> tapes, fun = qml.gradients.pulse_odegen(tape, argnum=[0, 1, 2]) >>> len(tapes) 12 Why are there :math:`12` tapes? Consider the terms in the time-dependent pulse Hamiltonian: :math:`\{Y_0, Y_1, Z_0X_1\}`. Via the Lie bracket, which is just the standard matrix commutator, they generate an algebra, the so-called *dynamical Lie algebra (DLA)* of the pulse. In order to find all Pauli words that occur in the DLA, we need to (recursively) calculate all possible commutators between the three words above and their commutators. For the three words above, we obtain three additional words: .. math:: [Y_0, Z_0X_1] &\propto X_0X_1 \\ [Y_1, Z_0X_1] &\propto Z_0Z_1 \\ [Y_0, Z_0Z_1] &\propto X_0Z_1 All other commutators result in expressions proportional to one of the six Pauli words. For each of these six words, we need to compute the standard parameter-shift rule requiring two shifted circuits, which yields :math:`12` tapes. We may inspect one of the tapes, which differs from the original tape by the inserted rotation gate ``"RIY"``, i.e. a ``PauliRot(np.pi/2, "IY", wires=[0, 1])`` gate. Note that the order of the tapes follows lexicographical ordering of the inserted Pauli rotations, so that :math:`Y_1` is the first of the six words. >>> print(qml.drawer.tape_text(tapes[0])) 0: ─╭RIY─╭ParametrizedEvolution─┤ <X> 1: ─╰RIY─╰ParametrizedEvolution─┤ Executing the tapes and applying the post-processing function to the results then yields the gradient: >>> fun(qml.execute(tapes, dev)) (Array(1.41897932, dtype=float64), Array([0.00164913, 0.00284788], dtype=float64), Array(-0.09984584, dtype=float64)) .. note:: For pulse Hamiltonians with complex generating terms and few parameters, the decomposition approach taken in this method may incur more (quantum and classical) computational cost than strictly necessary. .. details:: :title: Theoretical background :href: theory The pulse generator parameter-shift gradient method makes use of the *effective generator* of a pulse for given parameters and duration. Consider the parametrized Hamiltonian .. math:: H(\boldsymbol{\theta}, t) = \sum_{k=1}^K f_k(\boldsymbol{\theta}, t) H_k where the Hamiltonian terms :math:`\{H_k\}` are constant and the :math:`\{f_k\}` are parametrized time-dependent functions depending on the parameters :math:`\boldsymbol{\theta}`. The unitary time evolution operator associated with :math:`H` is the solution to the Schrödinger equation .. math:: \frac{\mathrm{d} U}{\mathrm{d} t}(t) = -i H(\boldsymbol{\theta}, t) U(t), \quad U(0) = \mathbb{I} For a fixed time interval :math:`[t_0, t_1]`, we associate a matrix function :math:`U(\boldsymbol{\theta})` with the unitary evolution. To compute the pulse generator parameter-shift gradient, we are interested in the partial derivatives of this matrix function, usually with respect to the parameters :math:`\boldsymbol{\theta}`. Provided that :math:`H` does not act on too many qubits, or that we have an alternative sparse representation of :math:`U(\boldsymbol{\theta})`, we may compute these partial derivatives .. math:: \frac{\partial U(\boldsymbol{\theta})}{\partial \theta_{k}} classically via automatic differentiation, where :math:`\theta_{k}` is the :math:`k`\ -th (scalar) parameter in :math:`\boldsymbol{\theta}`. Now, due to the compactness of the groups :math:`\mathrm{SU}(N)`\ , we know that for each :math:`\theta_{k}` there is an *effective generator* :math:`\Omega_{k}` such that .. math:: \frac{\partial U(\boldsymbol{\theta})}{\partial \theta_{k}} = U(\boldsymbol{\theta})\Omega_{k}. Given that we can compute the left-hand side expression as well as the matrix for :math:`U` itself, we can compute :math:`\Omega_{k}` for all parameters :math:`\theta_{k}`. In addition, we may decompose these generators into Pauli words: .. math:: \Omega_{k} = \sum_{\ell=1}^{L} \omega_{k}^{(\ell)} P_{\ell} The coefficients :math:`\omega_{k}^{(\ell)}` of the generators can be computed by decomposing the anti-Hermitian matrix :math:`\Omega_{k}` into the Pauli basis and only keeping the non-vanishing terms. This is possible via a tensor contraction with the full Pauli basis (or alternative, more efficient methods): .. math:: \omega_{k}^{(\ell)} = \frac{1}{2^N}\mathrm{Tr}\left[P_\ell \Omega_{k}\right] where :math:`N` is the number of qubits and :math:`\ell = 1, .. , L` the Pauli word index. The number of non-zero Pauli words :math:`L` is typically equal to the dimension of the dynamical Lie algebra (can be lower if coefficients happen to be zero) and at most :math:`4^N-1`. Thus far, we discussed the derivative of the time evolution, or pulse. Now, consider an objective function that is based on measuring an expectation value after executing a pulse program: .. math:: C(\boldsymbol{\theta})= \langle\psi_0|U(\boldsymbol{\theta})^\dagger B U(\boldsymbol{\theta}) |\psi_0\rangle Using the derivative of :math:`U` and the decomposition of the effective generator :math:`\Omega_k` above, we calculate the partial derivative of :math:`C`: .. math:: \frac{\partial C}{\partial \theta_{k}} (\boldsymbol{\theta})&= \langle\psi_0|\left[U^\dagger B U, \Omega_{k}\right]|\psi_0\rangle\\ &=\sum_{\ell=1}^L \omega_{k}^{(\ell)} \langle\psi_0|\left[U^\dagger B U, P_\ell \right]|\psi_0\rangle\\ &=\sum_{\ell=1}^L \tilde\omega_{k}^{(\ell)} \langle\psi_0|\left[U^\dagger B U, -\frac{i}{2}P_\ell \right]|\psi_0\rangle\\ &=\sum_{\ell=1}^L \tilde\omega_{k}^{(\ell)} \frac{\mathrm{d}}{\mathrm{d}x} \langle\psi_0|\exp\left(i\frac{x}{2}P_\ell \right)U^\dagger B U\exp\left(-i\frac{x}{2}P_\ell \right)|\psi_0\rangle\large|_{x=0}\\ &=\sum_{\ell=1}^L \tilde\omega_{k}^{(\ell)} \frac{\mathrm{d}}{\mathrm{d}x} C_\ell(x)\large|_{x=0} where we skipped the argument :math:`\boldsymbol{\theta}` of :math:`U` for readability and introduced the modified coefficients :math:`\tilde\omega_{k}^{(\ell)}=2i\omega_{k}^{(\ell)}`. In the second to last step, we rewrote the commutator of :math:`U^\dagger BU` and :math:`\frac{i}{2}P_\ell` as the derivative (at zero) of a modified cost function :math:`C_\ell(x)` that executes a Pauli rotation about :math:`-i\frac{x}{2}P_\ell` before the parametrized time evolution. Here, the variable :math:`x` is just a convenient way to write the modified cost function. Note that its derivative with respect to :math:`x` can be computed with the standard two-term parameter-shift rule for Pauli rotation gates, i.e. .. math:: \frac{\mathrm{d}}{\mathrm{d}x} C_\ell(x) {\large|}_{x=0} = \frac{1}{2} \left(C_\ell(\pi/2) - C_\ell(-\pi/2)\right) with :math:`C_\ell(x) = \langle\psi_0|e^{i\frac{x}{2}P_\ell} U^\dagger B U e^{-i\frac{x}{2}P_\ell} |\psi_0\rangle`. **Caching** Considering the derivation above, we notice that the same modified cost function :math:`C_\ell(x)` may appear in the derivatives of distinct parameters :math:`\theta_k` and :math:`\theta_m`, because they are shared by two terms in the pulse Hamiltonian. In order to not evaluate the same modified quantum circuit derivatives multiple times, we use an internal cache that avoids repeated creation of the same parameter-shifted circuits. In addition, all modified cost functions :math:`C_\ell` that would be multiplied with a vanishing coefficient :math:`\tilde\omega_{k}^{(\ell)}` *for all* :math:`k` are skipped altogether. This approach requires a few additional classical coprocessing steps but allows us to save quantum resources in many relevant pulse programs. """transform_name="pulse generator parameter-shift"_assert_has_jax(transform_name)assert_no_state_returns(tape.measurements,transform_name)assert_no_variance(tape.measurements,transform_name)assert_no_trainable_tape_batching(tape,transform_name)ifargnumisNoneandnottape.trainable_params:return_no_trainable_grad(tape)trainable_params=choose_trainable_params(tape,argnum)diff_methods=find_and_validate_gradient_methods(tape,"analytic",trainable_params)ifall(g=="0"forgindiff_methods.values()):return_all_zero_grad(tape)argnum=[ifori,dmindiff_methods.items()ifdm=="A"]return_expval_pulse_odegen(tape,argnum,atol)
@pulse_odegen.custom_qnode_transformdefpulse_odegen_qnode_wrapper(self,qnode,targs,tkwargs):"""A custom QNode wrapper for the gradient transform :func:`~.pulse_odegen`. It raises an error, so that applying ``pulse_odegen`` to a ``QNode`` directly is not supported. """# pylint:disable=unused-argumenttransform_name="pulse generator parameter-shift"raise_pulse_diff_on_qnode(transform_name)