Source code for pennylane.gradients.finite_difference
# Copyright 2018-2021 Xanadu Quantum Technologies Inc.# Licensed under the Apache License, Version 2.0 (the "License");# you may not use this file except in compliance with the License.# You may obtain a copy of the License at# http://www.apache.org/licenses/LICENSE-2.0# Unless required by applicable law or agreed to in writing, software# distributed under the License is distributed on an "AS IS" BASIS,# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.# See the License for the specific language governing permissions and# limitations under the License."""This module contains functions for computing the finite-difference gradientof a quantum tape."""importfunctoolsfromfunctoolsimportpartial# pylint: disable=protected-access,too-many-arguments,too-many-branches,too-many-statements,unused-argumentfromwarningsimportwarnimportnumpyasnpfromscipy.linalgimportsolveaslinalg_solvefromscipy.specialimportfactorialimportpennylaneasqmlfrompennylaneimporttransformfrompennylane.gradients.gradient_transformimport_contract_qjac_with_cjacfrompennylane.measurementsimportProbabilityMPfrompennylane.tapeimportQuantumScript,QuantumScriptBatchfrompennylane.typingimportPostprocessingFnfrom.general_shift_rulesimportgenerate_shifted_tapesfrom.gradient_transformimport(_all_zero_grad,_no_trainable_grad,assert_no_trainable_tape_batching,choose_trainable_params,find_and_validate_gradient_methods,)
[docs]@functools.lru_cache(maxsize=None)deffinite_diff_coeffs(n,approx_order,strategy):r"""Generate the finite difference shift values and corresponding term coefficients for a given derivative order, approximation accuracy, and strategy. Args: n (int): Positive integer specifying the order of the derivative. For example, ``n=1`` corresponds to the first derivative, ``n=2`` the second derivative, etc. approx_order (int): Positive integer referring to the approximation order of the returned coefficients, e.g., ``approx_order=1`` corresponds to the first-order approximation to the derivative. strategy (str): One of ``"forward"``, ``"center"``, or ``"backward"``. For the ``"forward"`` strategy, the finite-difference shifts occur at the points :math:`x_0, x_0+h, x_0+2h,\dots`, where :math:`h` is some small step size. The ``"backwards"`` strategy is similar, but in reverse: :math:`x_0, x_0-h, x_0-2h, \dots`. Finally, the ``"center"`` strategy results in shifts symmetric around the unshifted point: :math:`\dots, x_0-2h, x_0-h, x_0, x_0+h, x_0+2h,\dots`. Returns: array[float]: A ``(2, N)`` array. The first row corresponds to the coefficients, and the second row corresponds to the shifts. **Example** >>> finite_diff_coeffs(n=1, approx_order=1, strategy="forward") array([[-1., 1.], [ 0., 1.]]) For example, this results in the linear combination: .. math:: \frac{-y(x_0) + y(x_0 + h)}{h} where :math:`h` is the finite-difference step size. More examples: >>> finite_diff_coeffs(n=1, approx_order=2, strategy="center") array([[-0.5, 0.5], [-1. , 1. ]]) >>> finite_diff_coeffs(n=2, approx_order=2, strategy="center") array([[-2., 1., 1.], [ 0., -1., 1.]]) **Details** Consider a function :math:`y(x)`. We wish to approximate the :math:`n`-th derivative at point :math:`x_0`, :math:`y^{(n)}(x_0)`, by sampling the function at :math:`N<n` distinct points: .. math:: y^{(n)}(x_0) \approx \sum_{i=1}^N c_i y(x_i) where :math:`c_i` are coefficients, and :math:`x_i=x_0 + s_i` are the points we sample the function at. Consider the Taylor expansion of :math:`y(x_i)` around the point :math:`x_0`: .. math:: y^{(n)}(x_0) \approx \sum_{i=1}^N c_i y(x_i) &= \sum_{i=1}^N c_i \left[ y(x_0) + y'(x_0)(x_i-x_0) + \frac{1}{2} y''(x_0)(x_i-x_0)^2 + \cdots \right]\\ & = \sum_{j=0}^m y^{(j)}(x_0) \left[\sum_{i=1}^N \frac{c_i s_i^j}{j!} + \mathcal{O}(s_i^m) \right], where :math:`s_i = x_i-x_0`. For this approximation to be satisfied, we must therefore have .. math:: \sum_{i=1}^N s_i^j c_i = \begin{cases} j!, &j=n\\ 0, & j\neq n\end{cases}. Thus, to determine the coefficients :math:`c_i \in \{c_1, \dots, c_N\}` for particular shift values :math:`s_i \in \{s_1, \dots, s_N\}` and derivative order :math:`n`, we must solve this linear system of equations. """ifn<1ornotisinstance(n,int):raiseValueError("Derivative order n must be a positive integer.")ifapprox_order<1ornotisinstance(approx_order,int):raiseValueError("Approximation order must be a positive integer.")num_points=approx_order+2*np.floor((n+1)/2)-1N=num_points+1ifn%2==0elsenum_pointsifstrategy=="forward":shifts=np.arange(N,dtype=np.float64)elifstrategy=="backward":shifts=np.arange(-N+1,1,dtype=np.float64)elifstrategy=="center":ifapprox_order%2!=0:raiseValueError("Centered finite-difference requires an even order approximation.")N=num_points//2shifts=np.arange(-N,N+1,dtype=np.float64)else:raiseValueError(f"Unknown strategy {strategy}. Must be one of 'forward', 'backward', 'center'.")# solve for the coefficientsA=shifts**np.arange(len(shifts)).reshape(-1,1)b=np.zeros_like(shifts)b[n]=factorial(n)# Note: using np.linalg.solve instead of scipy.linalg.solve can cause a bus error when this# is inside a tf.py_function inside a tf.function, as occurs with the tensorflow-autograph interface# Bus errors were potentially specific to the M1 Mac. Change with caution.coeffs=linalg_solve(A,b)coeffs_and_shifts=np.stack([coeffs,shifts])# remove all small coefficients and shiftscoeffs_and_shifts[np.abs(coeffs_and_shifts)<1e-10]=0# remove columns where the coefficients are 0coeffs_and_shifts=coeffs_and_shifts[:,~np.all(coeffs_and_shifts==0,axis=0)]# sort columns in ascending order according to abs(shift)coeffs_and_shifts=coeffs_and_shifts[:,np.argsort(np.abs(coeffs_and_shifts)[1])]returncoeffs_and_shifts
def_processing_fn(results,shots,single_shot_batch_fn):ifnotshots.has_partitioned_shots:returnsingle_shot_batch_fn(results)grads_tuple=[]foridxinrange(shots.num_copies):res=[tape_res[idx]fortape_resinresults]g_tuple=single_shot_batch_fn(res)grads_tuple.append(g_tuple)returntuple(grads_tuple)def_finite_diff_stopping_condition(op)->bool:ifnotop.has_decomposition:# let things without decompositions through without error# error will happen when calculating shifted tapes for finite diffreturnTrueifisinstance(op,qml.operation.Operator)andany(qml.math.requires_grad(p)forpinop.data):returnop.grad_methodisnotNonereturnTruedef_expand_transform_finite_diff(tape:QuantumScript,argnum=None,h=1e-7,approx_order=1,n=1,strategy="forward",f0=None,validate_params=True,)->tuple[QuantumScriptBatch,PostprocessingFn]:"""Expand function to be applied before finite difference."""[new_tape],postprocessing=qml.devices.preprocess.decompose(tape,stopping_condition=_finite_diff_stopping_condition,skip_initial_state_prep=False,name="finite_diff",error=qml.operation.DecompositionUndefinedError,)ifnew_tapeistape:return[tape],postprocessingparams=new_tape.get_parameters(trainable_only=False)new_tape.trainable_params=qml.math.get_trainable_indices(params)return[new_tape],postprocessing
[docs]@partial(transform,expand_transform=_expand_transform_finite_diff,classical_cotransform=_contract_qjac_with_cjac,final_transform=True,)deffinite_diff(tape:QuantumScript,argnum=None,h=1e-7,approx_order=1,n=1,strategy="forward",f0=None,validate_params=True,)->tuple[QuantumScriptBatch,PostprocessingFn]:r"""Transform a circuit to compute the finite-difference gradient of all gate parameters with respect to its inputs. Args: tape (QNode or QuantumTape): quantum circuit to differentiate argnum (int or list[int] or None): Trainable parameter indices to differentiate with respect to. If not provided, the derivatives with respect to all trainable parameters are returned. Note that the indices are with respect to the list of trainable parameters. h (float): finite difference method step size approx_order (int): The approximation order of the finite-difference method to use. n (int): compute the :math:`n`-th derivative strategy (str): The strategy of the finite difference method. Must be one of ``"forward"``, ``"center"``, or ``"backward"``. For the ``"forward"`` strategy, the finite-difference shifts occur at the points :math:`x_0, x_0+h, x_0+2h,\dots`, where :math:`h` is some small stepsize. The ``"backwards"`` strategy is similar, but in reverse: :math:`x_0, x_0-h, x_0-2h, \dots`. Finally, the ``"center"`` strategy results in shifts symmetric around the unshifted point: :math:`\dots, x_0-2h, x_0-h, x_0, x_0+h, x_0+2h,\dots`. f0 (tensor_like[float] or None): Output of the evaluated input tape. If provided, and the gradient recipe contains an unshifted term, this value is used, saving a quantum evaluation. validate_params (bool): Whether to validate the tape parameters or not. If ``True``, the ``Operation.grad_method`` attribute and the circuit structure will be analyzed to determine if the trainable parameters support the finite-difference method. If ``False``, the finite-difference method will be applied to all parameters. Returns: qnode (QNode) or tuple[List[QuantumTape], function]: The transformed circuit as described in :func:`qml.transform <pennylane.transform>`. Executing this circuit will provide the Jacobian in the form of a tensor, a tuple, or a nested tuple depending upon the nesting structure of measurements in the original circuit. **Example** This transform can be registered directly as the quantum gradient transform to use during autodifferentiation: >>> dev = qml.device("default.qubit") >>> @qml.qnode(dev, interface="autograd", diff_method="finite-diff") ... def circuit(params): ... qml.RX(params[0], wires=0) ... qml.RY(params[1], wires=0) ... qml.RX(params[2], wires=0) ... return qml.expval(qml.Z(0)) >>> params = np.array([0.1, 0.2, 0.3], requires_grad=True) >>> qml.jacobian(circuit)(params) array([-0.38751725, -0.18884792, -0.38355708]) When differentiating QNodes with multiple measurements using Autograd or TensorFlow, the outputs of the QNode first need to be stacked. The reason is that those two frameworks only allow differentiating functions with array or tensor outputs, instead of functions that output sequences. In contrast, Jax and Torch require no additional post-processing. >>> import jax >>> dev = qml.device("default.qubit") >>> @qml.qnode(dev, interface="jax", diff_method="finite-diff") ... def circuit(params): ... qml.RX(params[0], wires=0) ... qml.RY(params[1], wires=0) ... qml.RX(params[2], wires=0) ... return qml.expval(qml.Z(0)), qml.var(qml.Z(0)) >>> params = jax.numpy.array([0.1, 0.2, 0.3]) >>> jax.jacobian(circuit)(params) (Array([-0.38751727, -0.18884793, -0.3835571 ], dtype=float32), Array([0.6991687 , 0.34072432, 0.6920237 ], dtype=float32)) .. details:: :title: Usage Details This gradient transform can be applied directly to :class:`QNode <pennylane.QNode>` objects. However, for performance reasons, we recommend providing the gradient transform as the ``diff_method`` argument of the QNode decorator, and differentiating with your preferred machine learning framework. >>> @qml.qnode(dev) ... def circuit(params): ... qml.RX(params[0], wires=0) ... qml.RY(params[1], wires=0) ... qml.RX(params[2], wires=0) ... return qml.expval(qml.Z(0)), qml.var(qml.Z(0)) >>> params = np.array([0.1, 0.2, 0.3], requires_grad=True) >>> qml.gradients.finite_diff(circuit)(params) (tensor([-0.38751724, -0.18884792, -0.38355708], requires_grad=True), tensor([0.69916868, 0.34072432, 0.69202365], requires_grad=True)) This quantum gradient transform can also be applied to low-level :class:`~.QuantumTape` objects. This will result in no implicit quantum device evaluation. Instead, the processed tapes, and post-processing function, which together define the gradient are directly returned: >>> ops = [qml.RX(p, wires=0) for p in params] >>> measurements = [qml.expval(qml.Z(0)), qml.var(qml.Z(0))] >>> tape = qml.tape.QuantumTape(ops, measurements) >>> gradient_tapes, fn = qml.gradients.finite_diff(tape) >>> gradient_tapes [<QuantumTape: wires=[0], params=3>, <QuantumScript: wires=[0], params=3>, <QuantumScript: wires=[0], params=3>, <QuantumScript: wires=[0], params=3>] This can be useful if the underlying circuits representing the gradient computation need to be analyzed. Note that ``argnum`` refers to the index of a parameter within the list of trainable parameters. For example, if we have: >>> tape = qml.tape.QuantumScript( ... [qml.RX(1.2, wires=0), qml.RY(2.3, wires=0), qml.RZ(3.4, wires=0)], ... [qml.expval(qml.Z(0))], ... trainable_params = [1, 2] ... ) >>> qml.gradients.finite_diff(tape, argnum=1) The code above will differentiate the third parameter rather than the second. The output tapes can then be evaluated and post-processed to retrieve the gradient: >>> dev = qml.device("default.qubit") >>> fn(qml.execute(gradient_tapes, dev, None)) ((tensor(-0.56464251, requires_grad=True), tensor(-0.56464251, requires_grad=True), tensor(-0.56464251, requires_grad=True)), (tensor(0.93203912, requires_grad=True), tensor(0.93203912, requires_grad=True), tensor(0.93203912, requires_grad=True))) This gradient transform is compatible with devices that use shot vectors for execution. >>> shots = (10, 100, 1000) >>> dev = qml.device("default.qubit", shots=shots) >>> @qml.qnode(dev) ... def circuit(params): ... qml.RX(params[0], wires=0) ... qml.RY(params[1], wires=0) ... qml.RX(params[2], wires=0) ... return qml.expval(qml.Z(0)), qml.var(qml.Z(0)) >>> params = np.array([0.1, 0.2, 0.3], requires_grad=True) >>> qml.gradients.finite_diff(circuit, h=0.1)(params) ((array([-2., -2., 0.]), array([3.6, 3.6, 0. ])), (array([1. , 0.2, 0.4]), array([-1.78 , -0.34 , -0.688])), (array([-0.9 , -0.22, -0.48]), array([1.5498 , 0.3938 , 0.84672]))) The outermost tuple contains results corresponding to each element of the shot vector. """transform_name="finite difference"assert_no_trainable_tape_batching(tape,transform_name)ifany(qml.math.get_dtype_name(p)=="float32"forpintape.get_parameters()):warn("Finite differences with float32 detected. Answers may be inaccurate. float64 is recommended.",UserWarning,)number_parameters=len(tape.trainable_params)number_measurements=len(tape.measurements)ifargnumisNoneandnottape.trainable_params:return_no_trainable_grad(tape)trainable_params=choose_trainable_params(tape,argnum)diff_methods=(find_and_validate_gradient_methods(tape,"numeric",trainable_params)ifvalidate_paramselse{idx:"F"foridxintrainable_params})ifall(g=="0"forgindiff_methods.values()):return_all_zero_grad(tape)gradient_tapes=[]shapes=[]c0=Nonecoeffs,shifts=finite_diff_coeffs(n=n,approx_order=approx_order,strategy=strategy)if0inshifts:# Finite difference formula includes a term with zero shift.iff0isNone:# Ensure that the unshifted tape is appended# to the gradient tapes, if not already.gradient_tapes.append(tape)# Store the unshifted coefficient. We know that# it will always be the first coefficient due to processing.c0=coeffs[0]shifts=shifts[1:]coeffs=coeffs[1:]fori,_inenumerate(tape.trainable_params):ifinotindiff_methodsordiff_methods[i]=="0":# parameter has zero gradientshapes.append(0)continueg_tapes=generate_shifted_tapes(tape,i,shifts*h)gradient_tapes.extend(g_tapes)shapes.append(len(g_tapes))def_single_shot_batch_result(results):"""Auxiliary function for post-processing one batch of results corresponding to finite shots or a single component of a shot vector"""grads=[]start=1ifc0isnotNoneandf0isNoneelse0r0=f0orresults[0]output_dims=[]# TODO: Update shape for CV variablesformintape.measurements:ifisinstance(m,ProbabilityMP):output_dims.append(2**len(m.wires))else:output_dims.append(1)forsinshapes:ifs==0:# parameter has zero gradientifnotisinstance(results[0],tuple):g=qml.math.zeros_like(results[0])else:g=[]foriinoutput_dims:zero=qml.math.squeeze(qml.math.zeros(i))g.append(zero)grads.append(g)continueres=results[start:start+s]start=start+s# compute the linear combination of results# and coefficientspre_grads=[]ifnumber_measurements==1:res=qml.math.stack(res)c=qml.math.convert_like(coeffs,res)lin_comb=qml.math.tensordot(res,c,[[0],[0]])pre_grads.append(lin_comb)else:foriinrange(number_measurements):r=qml.math.stack([r[i]forrinres])c=qml.math.convert_like(coeffs,r)lin_comb=qml.math.tensordot(r,c,[[0],[0]])pre_grads.append(lin_comb)# Add on the unshifted termifc0isnotNone:ifnumber_measurements==1:c=qml.math.convert_like(c0,r0)pre_grads=[pre_grads[0]+r0*c]else:foriinrange(number_measurements):r_i=r0[i]c=qml.math.convert_like(c0,r_i)pre_grads[i]=pre_grads[i]+r_i*ccoeff_div=qml.math.cast_like(qml.math.convert_like(1/h**n,pre_grads[0]),pre_grads[0])iflen(tape.measurements)>1:pre_grads=tuple(qml.math.convert_like(i*coeff_div,coeff_div)foriinpre_grads)else:pre_grads=qml.math.convert_like(pre_grads[0]*coeff_div,coeff_div)grads.append(pre_grads)# Single measurementifnumber_measurements==1:ifnumber_parameters==1:returngrads[0]returntuple(grads)# Reordering to match the right shape for multiple measurementsgrads_reorder=[[0]*number_parametersfor_inrange(len(tape.measurements))]foriinrange(number_measurements):forjinrange(number_parameters):grads_reorder[i][j]=grads[j][i]# To tupleifnumber_parameters==1:returntuple(elem[0]forelemingrads_reorder)returntuple(tuple(elem)forelemingrads_reorder)processing_fn=functools.partial(_processing_fn,shots=tape.shots,single_shot_batch_fn=_single_shot_batch_result)returngradient_tapes,processing_fn