diff --git a/doc/releases/changelog-dev.md b/doc/releases/changelog-dev.md index ac75e056a36..e5d0d5c23df 100644 --- a/doc/releases/changelog-dev.md +++ b/doc/releases/changelog-dev.md @@ -15,10 +15,11 @@ * `qml.draw` now supports drawing mid-circuit measurements. [(#4775)](https://github.com/PennyLaneAI/pennylane/pull/4775) -* Autograd can now use vjps provided by the device from the new device API. If a device provides +* Autograd and torch can now use vjps provided by the device from the new device API. If a device provides a vector Jacobian product, this can be selected by providing `device_vjp=True` to `qml.execute`. [(#4557)](https://github.com/PennyLaneAI/pennylane/pull/4557) + [(#4654)](https://github.com/PennyLaneAI/pennylane/pull/4654) * Updates to some relevant Pytests to enable its use as a suite of benchmarks. [(#4703)](https://github.com/PennyLaneAI/pennylane/pull/4703) diff --git a/pennylane/interfaces/execution.py b/pennylane/interfaces/execution.py index 6c4e953e4df..4e1c5aa771b 100644 --- a/pennylane/interfaces/execution.py +++ b/pennylane/interfaces/execution.py @@ -43,7 +43,7 @@ device_type = Union[qml.Device, "qml.devices.Device"] -jpc_interfaces = {"autograd", "numpy"} +jpc_interfaces = {"autograd", "numpy", "torch", "pytorch"} INTERFACE_MAP = { None: "Numpy", diff --git a/pennylane/interfaces/jacobian_products.py b/pennylane/interfaces/jacobian_products.py index 7307aee62d5..6cf385c221c 100644 --- a/pennylane/interfaces/jacobian_products.py +++ b/pennylane/interfaces/jacobian_products.py @@ -44,7 +44,6 @@ def _compute_vjps(jacs, dys, tapes): vjps.append(qml.math.sum(qml.math.stack(shot_vjps), axis=0)) else: vjps.append(f[multi](dy, jac)) - return tuple(vjps) diff --git a/pennylane/interfaces/torch.py b/pennylane/interfaces/torch.py index 33e72e2c903..982c3150d5e 100644 --- a/pennylane/interfaces/torch.py +++ b/pennylane/interfaces/torch.py @@ -14,6 +14,51 @@ """ This module contains functions for adding the PyTorch interface to a PennyLane Device class. + +**How to bind a custom derivative with Torch.** + +See `the Torch documentation `_ for more complete +information. + +Suppose I have a function ``f`` that I want to define a custom vjp for. + +We need to inherit from ``torch.autograd.Function`` and define ``forward`` and ``backward`` static +methods. + +.. code-block:: python + + class CustomFunction(torch.autograd.Function): + + @staticmethod + def forward(ctx, x, exponent=2): + ctx.saved_info = {'x': x, 'exponent': exponent} + return x ** exponent + + @staticmethod + def backward(ctx, dy): + x = ctx.saved_info['x'] + exponent = ctx.saved_info['exponent'] + print(f"Calculating the gradient with x={x}, dy={dy}, exponent={exponent}") + return dy * exponent * x ** (exponent-1), None + +To use the ``CustomFunction`` class, we call it with the static ``apply`` method. + +>>> val = torch.tensor(2.0, requires_grad=True) +>>> res = CustomFunction.apply(val) +>>> res +tensor(4., grad_fn=) +>>> res.backward() +>>> val.grad +Calculating the gradient with x=2.0, dy=1.0, exponent=2 +tensor(4.) + +Note that for custom functions, the output of ``forward`` and the output of ``backward`` are flattened iterables of +Torch arrays. While autograd and jax can handle nested result objects like ``((np.array(1), np.array(2)), np.array(3))``, +torch requires that it be flattened like ``(np.array(1), np.array(2), np.array(3))``. The ``pytreeify`` class decorator +modifies the output of ``forward`` and the input to ``backward`` to unpack and repack the nested structure of the PennyLane +result object. + + """ # pylint: disable=too-many-arguments,protected-access,abstract-method import inspect @@ -64,27 +109,6 @@ def new_backward(ctx, *flat_grad_outputs): return cls -def _compute_vjps(dys, jacs, multi_measurements): - """Compute the vjps of multiple tapes, directly for a Jacobian and tangents.""" - if logger.isEnabledFor(logging.DEBUG): - logger.debug( - "Entry with args=(dys=%s, jacs=%s, multi_measurements=%s) called by=%s", - dys, - jacs, - multi_measurements, - "::L".join(str(i) for i in inspect.getouterframes(inspect.currentframe(), 2)[1][1:3]), - ) - - vjps = [] - - for i, multi in enumerate(multi_measurements): - compute_func = ( - qml.gradients.compute_vjp_multi if multi else qml.gradients.compute_vjp_single - ) - vjps.extend(compute_func(dys[i], jacs[i])) - return vjps - - @pytreeify class ExecuteTapes(torch.autograd.Function): """The signature of this ``torch.autograd.Function`` is designed to @@ -96,26 +120,17 @@ class ExecuteTapes(torch.autograd.Function): as the first argument ``kwargs``. This dictionary **must** contain: * ``"tapes"``: the quantum tapes to batch evaluate - * ``"device"``: the quantum device to use to evaluate the tapes - * ``"execute_fn"``: the execution function to use on forward passes - * ``"gradient_fn"``: the gradient transform function to use - for backward passes - * ``"gradient_kwargs"``: gradient keyword arguments to pass to the - gradient function - * ``"max_diff``: the maximum order of derivatives to support + * ``"execute_fn"``: a function that calculates the results of the tapes + * ``"jpc"``: a :class:`~.JacobianProductCalculator` that can compute the vjp. Further, note that the ``parameters`` argument is dependent on the ``tapes``; this function should always be called with the parameters extracted directly from the tapes as follows: - >>> parameters = [] - >>> [parameters.extend(t.get_parameters()) for t in tapes] - >>> kwargs = {"tapes": tapes, "device": device, "gradient_fn": gradient_fn, ...} + >>> parameters = [p for t in tapes for p in t.get_parameters()] + >>> kwargs = {"tapes": tapes, "execute_fn": execute_fn, "jpc": jpc} >>> ExecuteTapes.apply(kwargs, *parameters) - The private argument ``_n`` is used to track nesting of derivatives, for example - if the nth-order derivative is requested. Do not set this argument unless you - understand the consequences! """ @staticmethod @@ -133,16 +148,9 @@ def forward(ctx, kwargs, *parameters): # pylint: disable=arguments-differ ) ctx.tapes = kwargs["tapes"] - ctx.device = kwargs["device"] + ctx.jpc = kwargs["jpc"] - ctx.execute_fn = kwargs["execute_fn"] - ctx.gradient_fn = kwargs["gradient_fn"] - - ctx.gradient_kwargs = kwargs["gradient_kwargs"] - ctx.max_diff = kwargs["max_diff"] - ctx._n = kwargs.get("_n", 1) - - res, ctx.jacs = ctx.execute_fn(ctx.tapes, **ctx.gradient_kwargs) + res = tuple(kwargs["execute_fn"](ctx.tapes)) # if any input tensor uses the GPU, the output should as well ctx.torch_device = None @@ -151,12 +159,7 @@ def forward(ctx, kwargs, *parameters): # pylint: disable=arguments-differ if isinstance(p, torch.Tensor) and p.is_cuda: # pragma: no cover ctx.torch_device = p.get_device() break - res = tuple(_res_to_torch(r, ctx) for r in res) - for i, _ in enumerate(res): - # In place change of the numpy array Jacobians to Torch objects - _jac_to_torch(i, ctx) - return res @staticmethod @@ -173,124 +176,39 @@ def backward(ctx, *dy): ), ) - multi_measurements = [len(tape.measurements) > 1 for tape in ctx.tapes] - - if ctx.jacs: - # Jacobians were computed on the forward pass (mode="forward") - # No additional quantum evaluations needed; simply compute the VJPs directly. - vjps = _compute_vjps(dy, ctx.jacs, multi_measurements) - - else: - # Need to compute the Jacobians on the backward pass (accumulation="backward") - - if isinstance(ctx.gradient_fn, qml.transforms.core.TransformDispatcher): - # Gradient function is a gradient transform. - - # Generate and execute the required gradient tapes - if ctx._n < ctx.max_diff: - # The derivative order is less than the max derivative order. - # Compute the VJP recursively by using the gradient transform - # and calling ``execute`` to compute the results. - # This will allow higher-order derivatives to be computed - # if requested. - - vjp_tapes, processing_fn = qml.gradients.batch_vjp( - ctx.tapes, - dy, - ctx.gradient_fn, - reduction="extend", - gradient_kwargs=ctx.gradient_kwargs, - ) - # This is where the magic happens. Note that we call ``execute``. - # This recursion, coupled with the fact that the gradient transforms - # are differentiable, allows for arbitrary order differentiation. - res = execute( - vjp_tapes, - ctx.device, - ctx.execute_fn, - ctx.gradient_fn, - ctx.gradient_kwargs, - _n=ctx._n + 1, - max_diff=ctx.max_diff, - ) - vjps = processing_fn(res) - - else: - # The derivative order is at the maximum. Compute the VJP - # in a non-differentiable manner to reduce overhead. - vjp_tapes, processing_fn = qml.gradients.batch_vjp( - ctx.tapes, - dy, - ctx.gradient_fn, - reduction="extend", - gradient_kwargs=ctx.gradient_kwargs, - ) - - vjps = processing_fn(ctx.execute_fn(vjp_tapes)[0]) - - else: - # Gradient function is not a gradient transform - # (e.g., it might be a device method). - # Note that unlike the previous branch: - # - # - there is no recursion here - # - gradient_fn is not differentiable - # - # so we cannot support higher-order derivatives. - - jacs = ctx.gradient_fn(ctx.tapes, **ctx.gradient_kwargs) - - vjps = _compute_vjps(dy, jacs, multi_measurements) - - # Remove empty vjps (from tape with non trainable params) - vjps = [vjp for vjp in vjps if list(vjp.shape) != [0]] + vjps = ctx.jpc.compute_vjp(ctx.tapes, dy) + + # split tensor into separate entries + unpacked_vjps = [] + for vjp_slice in vjps: + if vjp_slice is not None and np.squeeze(vjp_slice).shape != (0,): + unpacked_vjps.extend(_res_to_torch(vjp_slice, ctx)) + vjps = tuple(unpacked_vjps) # The output of backward must match the input of forward. # Therefore, we return `None` for the gradient of `kwargs`. - return (None,) + tuple(vjps) + return (None,) + vjps -def execute(tapes, device, execute_fn, gradient_fn, gradient_kwargs, _n=1, max_diff=1): +def execute(tapes, execute_fn, jpc): """Execute a batch of tapes with Torch parameters on a device. - This function may be called recursively, if ``gradient_fn`` is a differentiable - transform, and ``_n < max_diff``. Args: tapes (Sequence[.QuantumTape]): batch of tapes to execute - device (pennylane.Device): Device to use to execute the batch of tapes. - If the device does not provide a ``batch_execute`` method, - by default the tapes will be executed in serial. - execute_fn (callable): The execution function used to execute the tapes - during the forward pass. This function must return a tuple ``(results, jacobians)``. - If ``jacobians`` is an empty list, then ``gradient_fn`` is used to - compute the gradients during the backwards pass. - gradient_kwargs (dict): dictionary of keyword arguments to pass when - determining the gradients of tapes - gradient_fn (callable): the gradient function to use to compute quantum gradients - _n (int): a positive integer used to track nesting of derivatives, for example - if the nth-order derivative is requested. - max_diff (int): If ``gradient_fn`` is a gradient transform, this option specifies - the maximum order of derivatives to support. Increasing this value allows - for higher order derivatives to be extracted, at the cost of additional - (classical) computational overhead during the backwards pass. + execute_fn (Callable[[Sequence[.QuantumTape]], ResultBatch]): a function that turns a batch of circuits into results + jpc (JacobianProductCalculator): a class that can compute the vector jacobian product for the input tapes. + Returns: - list[list[torch.Tensor]]: A nested list of tape results. Each element in - the returned list corresponds in order to the provided tapes. + TensorLike: A nested tuple of tape results. Each element in + the returned tuple corresponds in order to the provided tapes. """ if logger.isEnabledFor(logging.DEBUG): logger.debug( - "Entry with args=(tapes=%s, device=%s, execute_fn=%s, gradient_fn=%s, gradient_kwargs=%s, _n=%s, max_diff=%s) called by=%s", + "Entry with args=(tapes=%s, execute_fn=%s, jpc=%s", tapes, - repr(device), - execute_fn - if not (logger.isEnabledFor(qml.logging.TRACE) and inspect.isfunction(execute_fn)) - else "\n" + inspect.getsource(execute_fn) + "\n", - gradient_fn - if not (logger.isEnabledFor(qml.logging.TRACE) and inspect.isfunction(gradient_fn)) - else "\n" + inspect.getsource(gradient_fn) + "\n", - gradient_kwargs, - _n, - max_diff, - "::L".join(str(i) for i in inspect.getouterframes(inspect.currentframe(), 2)[1][1:3]), + f"\n{inspect.getsource(execute_fn)}\n" + if logger.isEnabledFor(qml.logging.TRACE) + else execute_fn, + jpc, ) # pylint: disable=unused-argument @@ -302,13 +220,9 @@ def execute(tapes, device, execute_fn, gradient_fn, gradient_kwargs, _n=1, max_d parameters.extend(tape.get_parameters()) kwargs = { - "tapes": tapes, - "device": device, + "tapes": tuple(tapes), "execute_fn": execute_fn, - "gradient_fn": gradient_fn, - "gradient_kwargs": gradient_kwargs, - "_n": _n, - "max_diff": max_diff, + "jpc": jpc, } return ExecuteTapes.apply(kwargs, *parameters) @@ -316,49 +230,8 @@ def execute(tapes, device, execute_fn, gradient_fn, gradient_kwargs, _n=1, max_d def _res_to_torch(r, ctx): """Convert results from unwrapped execution to torch.""" + if isinstance(r, dict): + return r if isinstance(r, (list, tuple)): - res = [] - for t in r: - if isinstance(t, dict) or isinstance(t, list) and all(isinstance(i, dict) for i in t): - # count result, single or broadcasted - res.append(t) - else: - if isinstance(t, tuple): - res.append(tuple(torch.as_tensor(el, device=ctx.torch_device) for el in t)) - else: - res.append(torch.as_tensor(t, device=ctx.torch_device)) - if isinstance(r, tuple): - res = tuple(res) - elif isinstance(r, dict): - res = r - else: - res = torch.as_tensor(r, device=ctx.torch_device) - - return res - - -def _jac_to_torch(i, ctx): - """Convert Jacobian from unwrapped execution to torch in the given ctx.""" - if ctx.jacs: - ctx_jacs = list(ctx.jacs) - multi_m = len(ctx.tapes[i].measurements) > 1 - multi_p = len(ctx.tapes[i].trainable_params) > 1 - - # Multiple measurements and parameters: Jacobian is a tuple of tuple - if multi_p and multi_m: - jacobians = [] - for jacobian in ctx_jacs[i]: - inside_nested_jacobian = [ - torch.as_tensor(j, device=ctx.torch_device) for j in jacobian - ] - inside_nested_jacobian_tuple = tuple(inside_nested_jacobian) - jacobians.append(inside_nested_jacobian_tuple) - ctx_jacs[i] = tuple(jacobians) - # Single measurement and single parameter: Jacobian is a tensor - elif not multi_p and not multi_m: - ctx_jacs[i] = torch.as_tensor(np.array(ctx_jacs[i]), device=ctx.torch_device) - # Multiple measurements or multiple parameters: Jacobian is a tuple - else: - jacobian = [torch.as_tensor(jac, device=ctx.torch_device) for jac in ctx_jacs[i]] - ctx_jacs[i] = tuple(jacobian) - ctx.jacs = tuple(ctx_jacs) + return type(r)(_res_to_torch(t, ctx) for t in r) + return torch.as_tensor(r, device=ctx.torch_device) diff --git a/tests/interfaces/default_qubit_2_integration/test_torch_default_qubit_2.py b/tests/interfaces/default_qubit_2_integration/test_torch_default_qubit_2.py index d4ae61f39b2..c193dd3feeb 100644 --- a/tests/interfaces/default_qubit_2_integration/test_torch_default_qubit_2.py +++ b/tests/interfaces/default_qubit_2_integration/test_torch_default_qubit_2.py @@ -15,10 +15,13 @@ import numpy as np import pytest +from param_shift_dev import ParamShiftDerivativesDevice + import pennylane as qml from pennylane.devices import DefaultQubit from pennylane.gradients import param_shift from pennylane.interfaces import execute +from pennylane.measurements import Shots torch = pytest.importorskip("torch") @@ -128,11 +131,35 @@ def cost_cache(x): # add tests for lightning 2 when possible # set rng for device when possible test_matrix = [ - ({"gradient_fn": param_shift}, 100000, DefaultQubit(seed=42)), - ({"gradient_fn": param_shift}, None, DefaultQubit()), - ({"gradient_fn": "backprop"}, None, DefaultQubit()), - ({"gradient_fn": "adjoint", "grad_on_execution": True}, None, DefaultQubit()), - ({"gradient_fn": "adjoint", "grad_on_execution": False}, None, DefaultQubit()), + ({"gradient_fn": param_shift}, Shots(100000), DefaultQubit(seed=42)), + ({"gradient_fn": param_shift}, Shots((100000, 100000)), DefaultQubit(seed=42)), + ({"gradient_fn": param_shift}, Shots(None), DefaultQubit()), + ({"gradient_fn": "backprop"}, Shots(None), DefaultQubit()), + ( + {"gradient_fn": "adjoint", "grad_on_execution": True, "device_vjp": False}, + Shots(None), + DefaultQubit(), + ), + ( + { + "gradient_fn": "adjoint", + "grad_on_execution": False, + "device_vjp": False, + }, + Shots(None), + DefaultQubit(), + ), + ({"gradient_fn": "adjoint", "device_vjp": True}, Shots(None), DefaultQubit()), + ( + {"gradient_fn": "device", "device_vjp": False}, + Shots((100000, 100000)), + ParamShiftDerivativesDevice(), + ), + ( + {"gradient_fn": "device", "device_vjp": True}, + Shots((100000, 100000)), + ParamShiftDerivativesDevice(), + ), ] @@ -171,11 +198,17 @@ def cost(a, b): assert device.tracker.totals["executions"] == 2 # different wires so different hashes assert len(res) == 2 - assert res[0].shape == () - assert res[1].shape == () - - assert qml.math.allclose(res[0], torch.cos(a) * torch.cos(b), atol=atol_for_shots(shots)) - assert qml.math.allclose(res[1], torch.cos(a) * torch.cos(b), atol=atol_for_shots(shots)) + if not shots.has_partitioned_shots: + assert res[0].shape == () + assert res[1].shape == () + exp = torch.cos(a) * torch.cos(b) + if shots.has_partitioned_shots: + for shot in range(2): + for wire in range(2): + assert qml.math.allclose(res[shot][wire], exp, atol=atol_for_shots(shots)) + else: + for wire in range(2): + assert qml.math.allclose(res[wire], exp, atol=atol_for_shots(shots)) def test_scalar_jacobian(self, execute_kwargs, shots, device): """Test scalar jacobian calculation""" @@ -186,7 +219,8 @@ def cost(a): return execute([tape], device, **execute_kwargs)[0] res = torch.autograd.functional.jacobian(cost, a) - assert res.shape == () # pylint: disable=no-member + if not shots.has_partitioned_shots: + assert res.shape == () # pylint: disable=no-member # compare to standard tape jacobian tape = qml.tape.QuantumScript([qml.RY(a, wires=0)], [qml.expval(qml.PauliZ(0))]) @@ -195,8 +229,13 @@ def cost(a): expected = fn(device.execute(tapes)) assert expected.shape == () - assert torch.allclose(res, expected, atol=atol_for_shots(shots), rtol=0) - assert torch.allclose(res, -torch.sin(a), atol=atol_for_shots(shots)) + if shots.has_partitioned_shots: + for i in range(shots.num_copies): + assert torch.allclose(res[i], expected, atol=atol_for_shots(shots), rtol=0) + assert torch.allclose(res[i], -torch.sin(a), atol=atol_for_shots(shots)) + else: + assert torch.allclose(res, expected, atol=atol_for_shots(shots), rtol=0) + assert torch.allclose(res, -torch.sin(a), atol=atol_for_shots(shots)) def test_jacobian(self, execute_kwargs, shots, device): """Test jacobian calculation""" @@ -207,23 +246,40 @@ def cost(a, b): ops = [qml.RY(a, wires=0), qml.RX(b, wires=1), qml.CNOT(wires=[0, 1])] m = [qml.expval(qml.PauliZ(0)), qml.expval(qml.PauliY(1))] tape = qml.tape.QuantumScript(ops, m, shots=shots) - return torch.hstack(execute([tape], device, **execute_kwargs)[0]) + [res] = execute([tape], device, **execute_kwargs) + if shots.has_partitioned_shots: + return torch.hstack(res[0] + res[1]) + return torch.hstack(res) res = cost(a, b) expected = torch.tensor([torch.cos(a), -torch.cos(a) * torch.sin(b)]) - assert torch.allclose(res, expected, atol=atol_for_shots(shots), rtol=0) + if shots.has_partitioned_shots: + assert torch.allclose(res[:2], expected, atol=atol_for_shots(shots), rtol=0) + assert torch.allclose(res[2:], expected, atol=atol_for_shots(shots), rtol=0) + else: + assert torch.allclose(res, expected, atol=atol_for_shots(shots), rtol=0) res = torch.autograd.functional.jacobian(cost, (a, b)) assert isinstance(res, tuple) and len(res) == 2 - assert res[0].shape == (2,) - assert res[1].shape == (2,) expected = ( torch.tensor([-torch.sin(a), torch.sin(a) * torch.sin(b)]), torch.tensor([0, -torch.cos(a) * torch.cos(b)]), ) - for _r, _e in zip(res, expected): - assert torch.allclose(_r, _e, atol=atol_for_shots(shots)) + if shots.has_partitioned_shots: + assert res[0].shape == (4,) + assert res[1].shape == (4,) + + for _r, _e in zip(res, expected): + assert torch.allclose(_r[:2], _e, atol=atol_for_shots(shots)) + assert torch.allclose(_r[2:], _e, atol=atol_for_shots(shots)) + + else: + assert res[0].shape == (2,) + assert res[1].shape == (2,) + + for _r, _e in zip(res, expected): + assert torch.allclose(_r, _e, atol=atol_for_shots(shots)) def test_tape_no_parameters(self, execute_kwargs, shots, device): """Test that a tape with no parameters is correctly @@ -255,7 +311,10 @@ def cost(params): shots=shots, ) res = execute([tape1, tape2, tape3, tape4], device, **execute_kwargs) - res = [qml.math.asarray(r, like="torch") for r in res] + if shots.has_partitioned_shots: + res = [qml.math.asarray(ri, like="torch") for r in res for ri in r] + else: + res = [qml.math.asarray(r, like="torch") for r in res] return sum(torch.hstack(res)) params = torch.tensor([0.1, 0.2], requires_grad=True) @@ -263,11 +322,18 @@ def cost(params): res = cost(params) expected = 2 + np.cos(0.5) + np.cos(x) * np.cos(y) - assert torch.allclose(res, expected, atol=atol_for_shots(shots), rtol=0) + + if shots.has_partitioned_shots: + assert torch.allclose(res, 2 * expected, atol=atol_for_shots(shots), rtol=0) + else: + assert torch.allclose(res, expected, atol=atol_for_shots(shots), rtol=0) res.backward() expected = torch.tensor([-torch.cos(y) * torch.sin(x), -torch.cos(x) * torch.sin(y)]) - assert torch.allclose(params.grad, expected, atol=atol_for_shots(shots), rtol=0) + if shots.has_partitioned_shots: + assert torch.allclose(params.grad, 2 * expected, atol=atol_for_shots(shots), rtol=0) + else: + assert torch.allclose(params.grad, expected, atol=atol_for_shots(shots), rtol=0) @pytest.mark.skip("torch cannot reuse tensors in various computations") def test_tapes_with_different_return_size(self, execute_kwargs, shots, device): @@ -380,8 +446,9 @@ def cost(a, b, c): # Only two arguments are trainable assert isinstance(res, tuple) and len(res) == 2 - assert res[0].shape == () - assert res[1].shape == () + if not shots.has_partitioned_shots: + assert res[0].shape == () + assert res[1].shape == () # I tried getting analytic results for this circuit but I kept being wrong and am giving up @@ -696,7 +763,10 @@ def _cost_fn(weights, coeffs1, coeffs2): qml.expval(H2) tape = qml.tape.QuantumScript.from_queue(q, shots=shots) - return torch.hstack(execute([tape], device, **execute_kwargs)[0]) + res = execute([tape], device, **execute_kwargs)[0] + if shots.has_partitioned_shots: + return torch.hstack(res[0] + res[1]) + return torch.hstack(res) return _cost_fn @@ -747,11 +817,19 @@ def test_multiple_hamiltonians_not_trainable( res = cost_fn(weights, coeffs1, coeffs2) expected = self.cost_fn_expected(weights, coeffs1, coeffs2) - assert torch.allclose(res, expected, atol=atol_for_shots(shots), rtol=0) + if shots.has_partitioned_shots: + assert torch.allclose(res[:2], expected, atol=atol_for_shots(shots), rtol=0) + assert torch.allclose(res[2:], expected, atol=atol_for_shots(shots), rtol=0) + else: + assert torch.allclose(res, expected, atol=atol_for_shots(shots), rtol=0) res = torch.autograd.functional.jacobian(lambda w: cost_fn(w, coeffs1, coeffs2), weights) expected = self.cost_fn_jacobian(weights, coeffs1, coeffs2)[:, :2] - assert torch.allclose(res, expected, atol=atol_for_shots(shots), rtol=0) + if shots.has_partitioned_shots: + assert torch.allclose(res[:2, :], expected, atol=atol_for_shots(shots), rtol=0) + assert torch.allclose(res[2:, :], expected, atol=atol_for_shots(shots), rtol=0) + else: + assert torch.allclose(res, expected, atol=atol_for_shots(shots), rtol=0) def test_multiple_hamiltonians_trainable(self, execute_kwargs, cost_fn, shots, use_new_op_math): """Test hamiltonian with trainable parameters.""" @@ -766,8 +844,17 @@ def test_multiple_hamiltonians_trainable(self, execute_kwargs, cost_fn, shots, u res = cost_fn(weights, coeffs1, coeffs2) expected = self.cost_fn_expected(weights, coeffs1, coeffs2) - assert torch.allclose(res, expected, atol=atol_for_shots(shots), rtol=0) + if shots.has_partitioned_shots: + assert torch.allclose(res[:2], expected, atol=atol_for_shots(shots), rtol=0) + assert torch.allclose(res[2:], expected, atol=atol_for_shots(shots), rtol=0) + else: + assert torch.allclose(res, expected, atol=atol_for_shots(shots), rtol=0) res = torch.hstack(torch.autograd.functional.jacobian(cost_fn, (weights, coeffs1, coeffs2))) expected = self.cost_fn_jacobian(weights, coeffs1, coeffs2) - assert torch.allclose(res, expected, atol=atol_for_shots(shots), rtol=0) + if shots.has_partitioned_shots: + pytest.xfail( + "multiple hamiltonians with shot vectors does not seem to be differentiable." + ) + else: + assert torch.allclose(res, expected, atol=atol_for_shots(shots), rtol=0) diff --git a/tests/interfaces/default_qubit_2_integration/test_torch_qnode_default_qubit_2.py b/tests/interfaces/default_qubit_2_integration/test_torch_qnode_default_qubit_2.py index 9250186f58b..7a18a6e7bc7 100644 --- a/tests/interfaces/default_qubit_2_integration/test_torch_qnode_default_qubit_2.py +++ b/tests/interfaces/default_qubit_2_integration/test_torch_qnode_default_qubit_2.py @@ -1330,7 +1330,7 @@ def circuit(): def test_counts_expval(self): """Test counts works as expected if combined with expectation values""" - @qnode(DefaultQubit(), diff_method="parameter-shift", interface="torch") + @qnode(qml.device("default.qubit"), diff_method="parameter-shift", interface="torch") def circuit(): qml.Hadamard(wires=[0]) qml.CNOT(wires=[0, 1]) diff --git a/tests/interfaces/test_torch.py b/tests/interfaces/test_torch.py index 2270182307d..6e5053246af 100644 --- a/tests/interfaces/test_torch.py +++ b/tests/interfaces/test_torch.py @@ -332,11 +332,10 @@ def cost(a, cache): interface="torch", )[0] - # Without caching, 3 evaluations are required. - # 1 for the forward pass, and one per output dimension - # on the backward pass. + # Without caching, 2 evaluations are required. + # 1 for the forward pass, and one for the backward pass torch_functional.jacobian(lambda x: cost(x, cache=None), params) - assert dev.num_executions == 3 + assert dev.num_executions == 2 # With caching, only 2 evaluations are required. One # for the forward pass, and one for the backward pass. diff --git a/tests/interfaces/test_torch_qnode.py b/tests/interfaces/test_torch_qnode.py index 4218a6fc26b..3e6977d5cb0 100644 --- a/tests/interfaces/test_torch_qnode.py +++ b/tests/interfaces/test_torch_qnode.py @@ -1487,6 +1487,8 @@ def test_hamiltonian_expansion_finite_shots( elif diff_method == "hadamard": pytest.skip("The hadamard method does not yet support Hamiltonians") + np.random.seed(1235) + dev = qml.device(dev_name, wires=3, shots=50000) spy = mocker.spy(qml.transforms, "hamiltonian_expand") obs = [qml.PauliX(0), qml.PauliX(0) @ qml.PauliZ(1), qml.PauliZ(0) @ qml.PauliZ(1)]