diff --git a/pennylane/gradients/gradient_transform.py b/pennylane/gradients/gradient_transform.py index bf01f4fc83b..ca47e76aa25 100644 --- a/pennylane/gradients/gradient_transform.py +++ b/pennylane/gradients/gradient_transform.py @@ -295,20 +295,22 @@ def _no_trainable_grad(tape): return [], lambda _: tuple(qml.math.zeros([0]) for _ in range(len(tape.measurements))) -def _swap_first_two_axes(grads, first_axis_size, second_axis_size): +def _swap_first_two_axes(grads, first_axis_size, second_axis_size, squeeze=True): """Transpose the first two axes of an iterable of iterables, returning - a tuple of tuples.""" - if first_axis_size == 1: + a tuple of tuples. Tuple version of ``np.moveaxis(grads, 0, 1)``""" + if first_axis_size == 1 and squeeze: return tuple(grads[0][i] for i in range(second_axis_size)) return tuple( tuple(grads[j][i] for j in range(first_axis_size)) for i in range(second_axis_size) ) -def _move_first_axis_to_third_pos(grads, first_axis_size, second_axis_size, third_axis_size): +def _move_first_axis_to_third_pos( + grads, first_axis_size, second_axis_size, third_axis_size, squeeze=True +): """Transpose the first two axes of an iterable of iterables, returning - a tuple of tuples.""" - if first_axis_size == 1: + a tuple of tuples. Tuple version of ``np.moveaxis(grads, 0, 2)``""" + if first_axis_size == 1 and squeeze: return tuple( tuple(grads[0][i][j] for j in range(third_axis_size)) for i in range(second_axis_size) ) diff --git a/pennylane/gradients/parameter_shift.py b/pennylane/gradients/parameter_shift.py index 9036378f712..b9f558c2e81 100644 --- a/pennylane/gradients/parameter_shift.py +++ b/pennylane/gradients/parameter_shift.py @@ -24,7 +24,6 @@ import pennylane as qml from pennylane import transform -from pennylane.gradients.gradient_transform import _contract_qjac_with_cjac from pennylane.measurements import VarianceMP from .finite_difference import finite_diff @@ -36,7 +35,10 @@ ) from .gradient_transform import ( _all_zero_grad, + _contract_qjac_with_cjac, + _move_first_axis_to_third_pos, _no_trainable_grad, + _swap_first_two_axes, assert_multimeasure_not_broadcasted, assert_no_state_returns, assert_no_trainable_tape_batching, @@ -152,7 +154,7 @@ def _single_meas_grad(result, coeffs, unshifted_coeff, r0): If an unshifted term exists, its contribution is added to the gradient. """ - if isinstance(result, list) and result == []: + if isinstance(result, tuple) and result == (): if unshifted_coeff is None: raise ValueError( "This gradient component neither has a shifted nor an unshifted component. " @@ -173,19 +175,14 @@ def _single_meas_grad(result, coeffs, unshifted_coeff, r0): def _multi_meas_grad(res, coeffs, r0, unshifted_coeff, num_measurements): """Compute the gradient for multiple measurements by taking the linear combination of the coefficients and each measurement result.""" - g = [] if r0 is None: r0 = [None] * num_measurements - for meas_idx in range(num_measurements): - # Gather the measurement results - meas_result = [param_result[meas_idx] for param_result in res] - g_component = _single_meas_grad(meas_result, coeffs, unshifted_coeff, r0[meas_idx]) - g.append(g_component) + if res == (): + res = tuple(() for _ in range(num_measurements)) + return tuple(_single_meas_grad(r, coeffs, unshifted_coeff, r0_) for r, r0_ in zip(res, r0)) - return tuple(g) - -def _evaluate_gradient(tape, res, data, r0): +def _evaluate_gradient(tape_specs, res, data, r0, batch_size): """Use shifted tape evaluations and parameter-shift rule coefficients to evaluate a gradient result. If res is an empty list, ``r0`` and ``data[3]``, which is the coefficient for the unshifted term, must be given and not None. @@ -197,42 +194,46 @@ def _evaluate_gradient(tape, res, data, r0): if fn is not None: res = fn(res) - num_measurements = len(tape.measurements) + *_, num_measurements, shots = tape_specs + scalar_shots, len_shot_vec = not shots.has_partitioned_shots, shots.num_copies + + if r0 is None and not scalar_shots: + r0 = [None] * int(len_shot_vec) if num_measurements == 1: - if not tape.shots.has_partitioned_shots: + if scalar_shots: + # Res has axes (parameters,) return _single_meas_grad(res, coeffs, unshifted_coeff, r0) - g = [] - len_shot_vec = tape.shots.num_copies - # Res has order of axes: - # 1. Number of parameters - # 2. Shot vector - if r0 is None: - r0 = [None] * int(len_shot_vec) - for i in range(len_shot_vec): - shot_comp_res = [r[i] for r in res] - shot_comp_res = _single_meas_grad(shot_comp_res, coeffs, unshifted_coeff, r0[i]) - g.append(shot_comp_res) - return tuple(g) - - g = [] - if not tape.shots.has_partitioned_shots: + # Res has axes (parameters, shots) or with broadcasting (shots, parameters) + if batch_size is None: + # Move shots to first position + res = _swap_first_two_axes(res, len(res), len_shot_vec, squeeze=False) + # _single_meas_grad expects axis (parameters,), iterate over shot vector + return tuple(_single_meas_grad(r, coeffs, unshifted_coeff, r0_) for r, r0_ in zip(res, r0)) + + if scalar_shots: + # Res has axes (parameters, measurements) or with broadcasting (measurements, parameters) + if batch_size is None and len(res) > 0: + # Move measurements to first position + res = _swap_first_two_axes(res, len(res), num_measurements, squeeze=False) + # _multi_meas_grad expects axes (measurements, parameters) return _multi_meas_grad(res, coeffs, r0, unshifted_coeff, num_measurements) - # Res has order of axes: - # 1. Number of parameters - # 2. Shot vector - # 3. Number of measurements - for idx_shot_comp in range(tape.shots.num_copies): - single_shot_component_result = [ - result_for_each_param[idx_shot_comp] for result_for_each_param in res - ] - multi_meas_grad = _multi_meas_grad( - single_shot_component_result, coeffs, r0, unshifted_coeff, num_measurements - ) - g.append(multi_meas_grad) - - return tuple(g) + # Res has axes (parameters, shots, measurements) + # or with broadcasting (shots, measurements, parameters) + if batch_size is None: + if len(res) > 0: + # Move first axis (parameters) to last position + res = _move_first_axis_to_third_pos( + res, len(res), len_shot_vec, num_measurements, squeeze=False + ) + else: + res = (() for _ in range(len_shot_vec)) + # _multi_meas_grad expects (measurements, parameters), so we iterate over shot vector + return tuple( + _multi_meas_grad(r, coeffs, r0_, unshifted_coeff, num_measurements) + for r, r0_ in zip(res, r0) + ) def _get_operation_recipe(tape, t_idx, shifts, order=1): @@ -424,14 +425,14 @@ def processing_fn(results): grads.append(None) continue # The gradient for this parameter is computed from r0 alone. - g = _evaluate_gradient(tape, [], data, r0) + g = _evaluate_gradient(tape_specs, (), data, r0, batch_size) grads.append(g) continue res = results[start : start + num_tapes] if batch_size is None else results[start] start = start + num_tapes - g = _evaluate_gradient(tape, res, data, r0) + g = _evaluate_gradient(tape_specs, res, data, r0, batch_size) grads.append(g) # g will have been defined at least once (because otherwise all gradients would have diff --git a/tests/gradients/parameter_shift/test_parameter_shift.py b/tests/gradients/parameter_shift/test_parameter_shift.py index 4a33f631ba0..6823a0a3af7 100644 --- a/tests/gradients/parameter_shift/test_parameter_shift.py +++ b/tests/gradients/parameter_shift/test_parameter_shift.py @@ -20,12 +20,221 @@ from pennylane.devices import DefaultQubitLegacy from pennylane.gradients import param_shift from pennylane.gradients.parameter_shift import ( + _evaluate_gradient, _get_operation_recipe, _make_zero_rep, _put_zeros_in_pdA2_involutory, ) +from pennylane.measurements.shots import Shots from pennylane.operation import AnyWires, Observable +# Constants for TestEvaluateGradient +# Coefficients and expectation values +X = np.arange(1, 5) +# Expected "shift rule" result +Z = np.sum(-np.arange(1, 5) ** 2) +# Single coefficient/expectation value that leads to the same result as X +w = np.sqrt(30) +# Prefactors to emulate a shot vector +shv = np.array([0.1, 0.4, 0.7]) +# Fake probability vector (just a 1d array) +p = np.array([0.01, 0.06, -0.2, 0.5, -0.1, 0.7, -0.09]) +# Second fake probability vector (just a 1d array) +p2 = p[1:5] +# shifted probability evaluations +P = np.outer(X, p) +# shifted probability evaluations for p2 +P2 = np.outer(X, p2) +# Single unshifted result that lead to the same result as P +v = w * p +# Single unshifted result that lead to the same result as P2 +v2 = w * p2 +# Prefactors to emulate different shot values and multi measurement +shv_m = np.outer([0.1, 0.4, 0.7], [1, 2]) + + +class TestEvaluateGradient: + """Test _evaluate_gradient.""" + + # pylint: disable=too-many-arguments + + # We could theoretically compute the required res, r0 and expected from the parametrization of coeffs, + # unshifted_coeff and batch_size, but that turned out to take lots of effort and edge case logic + + test_cases_single_shots_single_meas = [ + # Expectation value + (X, None, None, tuple(-X), None, Z), + (X, None, 4, -X, None, Z), + (X[:-1], X[-1], None, tuple(-X[:-1]), -X[-1], Z), + (X[:-1], X[-1], 4, -X[:-1], -X[-1], Z), + (np.ones(0), w, None, (), -w, Z), + (np.ones(0), w, 4, (), -w, Z), + # Probability + (X, None, None, tuple(-P), None, p * Z), + (X, None, 4, -P, None, p * Z), + (X[:-1], X[-1], None, tuple(-P[:-1]), -P[-1], p * Z), + (X[:-1], X[-1], 4, -P[:-1], -P[-1], p * Z), + (np.ones(0), w, None, (), -v, p * Z), + (np.ones(0), w, 4, (), -v, p * Z), + ] + + @pytest.mark.parametrize( + "coeffs, unshifted_coeff, batch_size, res, r0, expected", + test_cases_single_shots_single_meas, + ) + def test_single_shots_single_meas(self, coeffs, unshifted_coeff, batch_size, res, r0, expected): + """Test that a single shots, single measurement gradient is evaluated correctly.""" + + shots = Shots(100) + tape_specs = (None, None, 1, shots) + data = [None, coeffs, None, unshifted_coeff, None] + grad = _evaluate_gradient(tape_specs, res, data, r0, batch_size) + + assert isinstance(grad, np.ndarray) + assert grad.shape == expected.shape + assert np.allclose(grad, expected) + + exp_probs = (p2 * Z, 2 * p * Z) + test_cases_single_shots_multi_meas = [ + # Expectation values + (X, None, None, tuple(zip(-X, -2 * X)), None, (Z, 2 * Z)), + (X, None, 4, (-X, -2 * X), None, (Z, 2 * Z)), + (X[:-1], X[-1], None, tuple(zip(-X[:-1], -2 * X[:-1])), (-X[-1], -2 * X[-1]), (Z, 2 * Z)), + (X[:-1], X[-1], 4, (-X[:-1], -2 * X[:-1]), (-X[-1], -2 * X[-1]), (Z, 2 * Z)), + (np.ones(0), w, None, (), (-w, -2 * w), (Z, 2 * Z)), + (np.ones(0), w, 4, (), (-w, -2 * w), (Z, 2 * Z)), + # Expval and Probability + (X, None, None, tuple(zip(-X, -2 * P)), None, (Z, 2 * p * Z)), + (X, None, 4, (-X, -2 * P), None, (Z, 2 * p * Z)), + (X[:-1], X[-1], None, tuple(zip(-X, -2 * P))[:-1], (-X[-1], -2 * P[-1]), (Z, 2 * p * Z)), + (X[:-1], X[-1], 4, (-X[:-1], -2 * P[:-1]), (-X[-1], -2 * P[-1]), (Z, 2 * p * Z)), + (np.ones(0), w, None, (), (-w, -2 * v), (Z, 2 * p * Z)), + (np.ones(0), w, 4, (), (-w, -2 * v), (Z, 2 * p * Z)), + # Probabilities + (X, None, None, tuple(zip(-P2, -2 * P)), None, exp_probs), + (X, None, 4, (-P2, -2 * P), None, exp_probs), + (X[:-1], X[-1], None, tuple(zip(-P2, -2 * P))[:-1], (-P2[-1], -2 * P[-1]), exp_probs), + (X[:-1], X[-1], 4, (-P2[:-1], -2 * P[:-1]), (-P2[-1], -2 * P[-1]), exp_probs), + (np.ones(0), w, None, (), (-v2, -2 * v), exp_probs), + (np.ones(0), w, 4, (), (-v2, -2 * v), exp_probs), + ] + + @pytest.mark.parametrize( + "coeffs, unshifted_coeff, batch_size, res, r0, expected", + test_cases_single_shots_multi_meas, + ) + def test_single_shots_multi_meas(self, coeffs, unshifted_coeff, batch_size, res, r0, expected): + """Test that a single shots, multiple measurements gradient is evaluated correctly.""" + + shots = Shots(100) + tape_specs = (None, None, 2, shots) + data = [None, coeffs, None, unshifted_coeff, None] + grad = _evaluate_gradient(tape_specs, res, data, r0, batch_size) + + assert isinstance(grad, tuple) and len(grad) == 2 + for g, e in zip(grad, expected): + assert isinstance(g, np.ndarray) and g.shape == e.shape + assert np.allclose(g, e) + + shot_vec_X = tuple(zip(*(-c * X for c in shv))) + shot_vec_P = tuple(zip(*(-c * P for c in shv))) + shot_vec_P_partial = tuple(-c * P[:-1] for c in shv) + + exp_shot_vec_prob = np.outer(shv, p) * Z + test_cases_multi_shots_single_meas = [ + # Expectation value + (X, None, None, shot_vec_X, None, shv * Z), + (X, None, 4, tuple(-c * X for c in shv), None, shv * Z), + (X[:-1], X[-1], None, shot_vec_X[:-1], shot_vec_X[-1], shv * Z), + (X[:-1], X[-1], 4, tuple(-c * X[:-1] for c in shv), tuple(-shv * X[-1]), shv * Z), + (np.ones(0), w, None, (), tuple(-c * w for c in shv), shv * Z), + (np.ones(0), w, 4, ((), (), ()), tuple(-c * w for c in shv), shv * Z), + # Probability + (X, None, None, shot_vec_P, None, exp_shot_vec_prob), + (X, None, 4, tuple(-c * P for c in shv), None, exp_shot_vec_prob), + (X[:-1], X[-1], None, shot_vec_P[:-1], shot_vec_P[-1], exp_shot_vec_prob), + (X[:-1], X[-1], 4, shot_vec_P_partial, tuple(np.outer(-shv, P[-1])), exp_shot_vec_prob), + (np.ones(0), w, None, (), tuple(-c * v for c in shv), exp_shot_vec_prob), + (np.ones(0), w, 4, ((), (), ()), tuple(-c * v for c in shv), exp_shot_vec_prob), + ] + + @pytest.mark.parametrize( + "coeffs, unshifted_coeff, batch_size, res, r0, expected", + test_cases_multi_shots_single_meas, + ) + def test_multi_shots_single_meas(self, coeffs, unshifted_coeff, batch_size, res, r0, expected): + """Test that a shot vector, single measurements gradient is evaluated correctly.""" + + shots = Shots((100, 101, 102)) + tape_specs = (None, None, 1, shots) + data = [None, coeffs, None, unshifted_coeff, None] + grad = _evaluate_gradient(tape_specs, res, data, r0, batch_size) + + assert isinstance(grad, tuple) and len(grad) == 3 + for g, e in zip(grad, expected): + assert isinstance(g, np.ndarray) and g.shape == e.shape + assert np.allclose(g, e) + + multi_X = tuple(tuple((-c * x, -2 * c * x) for c in shv) for x in X) + batched_multi_X = tuple((-c * X, -2 * c * X) for c in shv) + partial_multi_X = tuple((-c * X[:-1], -2 * c * X[:-1]) for c in shv) + expvals_r0 = tuple((-c * w, -2 * c * w) for c in shv) + + multi_X_P = tuple(tuple((-c * _p, -2 * c * x) for c in shv) for x, _p in zip(X, P)) + batched_multi_X_P = tuple((-c * P, -2 * c * X) for c in shv) + partial_multi_X_P = tuple((-c * P[:-1], -2 * c * X[:-1]) for c in shv) + prob_expval_r0 = tuple((-c * v, -2 * c * w) for c in shv) + + multi_P_P = tuple(tuple((-c * _p, -2 * c * _q) for c in shv) for _q, _p in zip(P2, P)) + batched_multi_P_P = tuple((-c * P, -2 * c * P2) for c in shv) + partial_multi_P_P = tuple((-c * P[:-1], -2 * c * P2[:-1]) for c in shv) + probs_r0 = tuple((-c * v, -2 * c * v2) for c in shv) + + exp_shot_vec_prob_expval = tuple((c * p * Z, 2 * c * Z) for c in shv) + exp_shot_vec_probs = tuple((c * p * Z, 2 * c * p2 * Z) for c in shv) + test_cases_multi_shots_multi_meas = [ + # Expectation values + (X, None, None, multi_X, None, shv_m * Z), + (X, None, 4, batched_multi_X, None, shv_m * Z), + (X[:-1], X[-1], None, multi_X[:-1], multi_X[-1], shv_m * Z), + (X[:-1], X[-1], 4, partial_multi_X, multi_X[-1], shv_m * Z), + (np.ones(0), w, None, (), expvals_r0, shv_m * Z), + (np.ones(0), w, 4, ((), (), ()), expvals_r0, shv_m * Z), + # Probability and expectation + (X, None, None, multi_X_P, None, exp_shot_vec_prob_expval), + (X, None, 4, batched_multi_X_P, None, exp_shot_vec_prob_expval), + (X[:-1], X[-1], None, multi_X_P[:-1], multi_X_P[-1], exp_shot_vec_prob_expval), + (X[:-1], X[-1], 4, partial_multi_X_P, multi_X_P[-1], exp_shot_vec_prob_expval), + (np.ones(0), w, None, (), prob_expval_r0, exp_shot_vec_prob_expval), + (np.ones(0), w, 4, ((), (), ()), prob_expval_r0, exp_shot_vec_prob_expval), + # Probabilities + (X, None, None, multi_P_P, None, exp_shot_vec_probs), + (X, None, 4, batched_multi_P_P, None, exp_shot_vec_probs), + (X[:-1], X[-1], None, multi_P_P[:-1], multi_P_P[-1], exp_shot_vec_probs), + (X[:-1], X[-1], 4, partial_multi_P_P, multi_P_P[-1], exp_shot_vec_probs), + (np.ones(0), w, None, (), probs_r0, exp_shot_vec_probs), + (np.ones(0), w, 4, ((), (), ()), probs_r0, exp_shot_vec_probs), + ] + + @pytest.mark.parametrize( + "coeffs, unshifted_coeff, batch_size, res, r0, expected", + test_cases_multi_shots_multi_meas, + ) + def test_multi_shots_multi_meas(self, coeffs, unshifted_coeff, batch_size, res, r0, expected): + """Test that a shot vector, multiple measurements gradient is evaluated correctly.""" + + shots = Shots((100, 101, 102)) + tape_specs = (None, None, 2, shots) + data = [None, coeffs, None, unshifted_coeff, None] + grad = _evaluate_gradient(tape_specs, res, data, r0, batch_size) + + assert isinstance(grad, tuple) and len(grad) == 3 + for g, e in zip(grad, expected): + assert isinstance(g, tuple) and len(g) == 2 + for _g, _e in zip(g, e): + assert isinstance(_g, np.ndarray) and _g.shape == _e.shape + assert np.allclose(_g, _e) + # pylint: disable=too-few-public-methods class RY_with_F(qml.RY): diff --git a/tests/gradients/parameter_shift/test_parameter_shift_cv.py b/tests/gradients/parameter_shift/test_parameter_shift_cv.py index 0642511267c..8064bb51891 100644 --- a/tests/gradients/parameter_shift/test_parameter_shift_cv.py +++ b/tests/gradients/parameter_shift/test_parameter_shift_cv.py @@ -14,7 +14,7 @@ """Tests for the gradients.parameter_shift_cv module.""" # pylint: disable=protected-access, no-self-use, not-callable, no-value-for-parameter -import unittest.mock as mock +from unittest import mock import pytest