Skip to content

Commit

Permalink
Refactor _evaluate_gradient for param_shift (#5666)
Browse files Browse the repository at this point in the history
**Context:**
`param_shift` uses the internal method `_evaluate_gradient`, which
mostly consists of logic to take the contraction of tape execution
results with the parameter-shift rule coefficients and map it over tuple
axes. It also needs to respect batching of execution results if
`broadcast=True` is used in `param_shift`.

**Description of the Change:**
This PR cleans up `_evaluate_gradient`, extends it to multi-measurement
and shot vector scenarios when broadcasting is used, and recycles helper
methods from `gradient_transform.py` to reduce the code.
We also add unit tests for this method, allowing to reduce integration
test count in the future.

**Benefits:**
Prepare `_evaluate_gradient` for multi-measurement and shot vector
support with `broadcast=True`.
Improve testing and code quality.

**Possible Drawbacks:**

**Related GitHub Issues:**
prepares a bug fix for #5598 

[sc-62283]

---------

Co-authored-by: Mudit Pandey <[email protected]>
Co-authored-by: Astral Cai <[email protected]>
Co-authored-by: Vincent Michaud-Rioux <[email protected]>
Co-authored-by: lillian542 <[email protected]>
  • Loading branch information
5 people authored May 24, 2024
1 parent df2abfa commit 58668d7
Show file tree
Hide file tree
Showing 4 changed files with 263 additions and 51 deletions.
14 changes: 8 additions & 6 deletions pennylane/gradients/gradient_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,20 +295,22 @@ def _no_trainable_grad(tape):
return [], lambda _: tuple(qml.math.zeros([0]) for _ in range(len(tape.measurements)))


def _swap_first_two_axes(grads, first_axis_size, second_axis_size):
def _swap_first_two_axes(grads, first_axis_size, second_axis_size, squeeze=True):
"""Transpose the first two axes of an iterable of iterables, returning
a tuple of tuples."""
if first_axis_size == 1:
a tuple of tuples. Tuple version of ``np.moveaxis(grads, 0, 1)``"""
if first_axis_size == 1 and squeeze:
return tuple(grads[0][i] for i in range(second_axis_size))
return tuple(
tuple(grads[j][i] for j in range(first_axis_size)) for i in range(second_axis_size)
)


def _move_first_axis_to_third_pos(grads, first_axis_size, second_axis_size, third_axis_size):
def _move_first_axis_to_third_pos(
grads, first_axis_size, second_axis_size, third_axis_size, squeeze=True
):
"""Transpose the first two axes of an iterable of iterables, returning
a tuple of tuples."""
if first_axis_size == 1:
a tuple of tuples. Tuple version of ``np.moveaxis(grads, 0, 2)``"""
if first_axis_size == 1 and squeeze:
return tuple(
tuple(grads[0][i][j] for j in range(third_axis_size)) for i in range(second_axis_size)
)
Expand Down
89 changes: 45 additions & 44 deletions pennylane/gradients/parameter_shift.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@

import pennylane as qml
from pennylane import transform
from pennylane.gradients.gradient_transform import _contract_qjac_with_cjac
from pennylane.measurements import VarianceMP

from .finite_difference import finite_diff
Expand All @@ -36,7 +35,10 @@
)
from .gradient_transform import (
_all_zero_grad,
_contract_qjac_with_cjac,
_move_first_axis_to_third_pos,
_no_trainable_grad,
_swap_first_two_axes,
assert_multimeasure_not_broadcasted,
assert_no_state_returns,
assert_no_trainable_tape_batching,
Expand Down Expand Up @@ -152,7 +154,7 @@ def _single_meas_grad(result, coeffs, unshifted_coeff, r0):
If an unshifted term exists, its contribution is added to the gradient.
"""
if isinstance(result, list) and result == []:
if isinstance(result, tuple) and result == ():
if unshifted_coeff is None:
raise ValueError(
"This gradient component neither has a shifted nor an unshifted component. "
Expand All @@ -173,19 +175,14 @@ def _single_meas_grad(result, coeffs, unshifted_coeff, r0):
def _multi_meas_grad(res, coeffs, r0, unshifted_coeff, num_measurements):
"""Compute the gradient for multiple measurements by taking the linear combination of
the coefficients and each measurement result."""
g = []
if r0 is None:
r0 = [None] * num_measurements
for meas_idx in range(num_measurements):
# Gather the measurement results
meas_result = [param_result[meas_idx] for param_result in res]
g_component = _single_meas_grad(meas_result, coeffs, unshifted_coeff, r0[meas_idx])
g.append(g_component)
if res == ():
res = tuple(() for _ in range(num_measurements))
return tuple(_single_meas_grad(r, coeffs, unshifted_coeff, r0_) for r, r0_ in zip(res, r0))

return tuple(g)


def _evaluate_gradient(tape, res, data, r0):
def _evaluate_gradient(tape_specs, res, data, r0, batch_size):
"""Use shifted tape evaluations and parameter-shift rule coefficients to evaluate
a gradient result. If res is an empty list, ``r0`` and ``data[3]``, which is the
coefficient for the unshifted term, must be given and not None.
Expand All @@ -197,42 +194,46 @@ def _evaluate_gradient(tape, res, data, r0):
if fn is not None:
res = fn(res)

num_measurements = len(tape.measurements)
*_, num_measurements, shots = tape_specs
scalar_shots, len_shot_vec = not shots.has_partitioned_shots, shots.num_copies

if r0 is None and not scalar_shots:
r0 = [None] * int(len_shot_vec)

if num_measurements == 1:
if not tape.shots.has_partitioned_shots:
if scalar_shots:
# Res has axes (parameters,)
return _single_meas_grad(res, coeffs, unshifted_coeff, r0)
g = []
len_shot_vec = tape.shots.num_copies
# Res has order of axes:
# 1. Number of parameters
# 2. Shot vector
if r0 is None:
r0 = [None] * int(len_shot_vec)
for i in range(len_shot_vec):
shot_comp_res = [r[i] for r in res]
shot_comp_res = _single_meas_grad(shot_comp_res, coeffs, unshifted_coeff, r0[i])
g.append(shot_comp_res)
return tuple(g)

g = []
if not tape.shots.has_partitioned_shots:
# Res has axes (parameters, shots) or with broadcasting (shots, parameters)
if batch_size is None:
# Move shots to first position
res = _swap_first_two_axes(res, len(res), len_shot_vec, squeeze=False)
# _single_meas_grad expects axis (parameters,), iterate over shot vector
return tuple(_single_meas_grad(r, coeffs, unshifted_coeff, r0_) for r, r0_ in zip(res, r0))

if scalar_shots:
# Res has axes (parameters, measurements) or with broadcasting (measurements, parameters)
if batch_size is None and len(res) > 0:
# Move measurements to first position
res = _swap_first_two_axes(res, len(res), num_measurements, squeeze=False)
# _multi_meas_grad expects axes (measurements, parameters)
return _multi_meas_grad(res, coeffs, r0, unshifted_coeff, num_measurements)

# Res has order of axes:
# 1. Number of parameters
# 2. Shot vector
# 3. Number of measurements
for idx_shot_comp in range(tape.shots.num_copies):
single_shot_component_result = [
result_for_each_param[idx_shot_comp] for result_for_each_param in res
]
multi_meas_grad = _multi_meas_grad(
single_shot_component_result, coeffs, r0, unshifted_coeff, num_measurements
)
g.append(multi_meas_grad)

return tuple(g)
# Res has axes (parameters, shots, measurements)
# or with broadcasting (shots, measurements, parameters)
if batch_size is None:
if len(res) > 0:
# Move first axis (parameters) to last position
res = _move_first_axis_to_third_pos(
res, len(res), len_shot_vec, num_measurements, squeeze=False
)
else:
res = (() for _ in range(len_shot_vec))
# _multi_meas_grad expects (measurements, parameters), so we iterate over shot vector
return tuple(
_multi_meas_grad(r, coeffs, r0_, unshifted_coeff, num_measurements)
for r, r0_ in zip(res, r0)
)


def _get_operation_recipe(tape, t_idx, shifts, order=1):
Expand Down Expand Up @@ -424,14 +425,14 @@ def processing_fn(results):
grads.append(None)
continue
# The gradient for this parameter is computed from r0 alone.
g = _evaluate_gradient(tape, [], data, r0)
g = _evaluate_gradient(tape_specs, (), data, r0, batch_size)
grads.append(g)
continue

res = results[start : start + num_tapes] if batch_size is None else results[start]
start = start + num_tapes

g = _evaluate_gradient(tape, res, data, r0)
g = _evaluate_gradient(tape_specs, res, data, r0, batch_size)
grads.append(g)

# g will have been defined at least once (because otherwise all gradients would have
Expand Down
209 changes: 209 additions & 0 deletions tests/gradients/parameter_shift/test_parameter_shift.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,221 @@
from pennylane.devices import DefaultQubitLegacy
from pennylane.gradients import param_shift
from pennylane.gradients.parameter_shift import (
_evaluate_gradient,
_get_operation_recipe,
_make_zero_rep,
_put_zeros_in_pdA2_involutory,
)
from pennylane.measurements.shots import Shots
from pennylane.operation import AnyWires, Observable

# Constants for TestEvaluateGradient
# Coefficients and expectation values
X = np.arange(1, 5)
# Expected "shift rule" result
Z = np.sum(-np.arange(1, 5) ** 2)
# Single coefficient/expectation value that leads to the same result as X
w = np.sqrt(30)
# Prefactors to emulate a shot vector
shv = np.array([0.1, 0.4, 0.7])
# Fake probability vector (just a 1d array)
p = np.array([0.01, 0.06, -0.2, 0.5, -0.1, 0.7, -0.09])
# Second fake probability vector (just a 1d array)
p2 = p[1:5]
# shifted probability evaluations
P = np.outer(X, p)
# shifted probability evaluations for p2
P2 = np.outer(X, p2)
# Single unshifted result that lead to the same result as P
v = w * p
# Single unshifted result that lead to the same result as P2
v2 = w * p2
# Prefactors to emulate different shot values and multi measurement
shv_m = np.outer([0.1, 0.4, 0.7], [1, 2])


class TestEvaluateGradient:
"""Test _evaluate_gradient."""

# pylint: disable=too-many-arguments

# We could theoretically compute the required res, r0 and expected from the parametrization of coeffs,
# unshifted_coeff and batch_size, but that turned out to take lots of effort and edge case logic

test_cases_single_shots_single_meas = [
# Expectation value
(X, None, None, tuple(-X), None, Z),
(X, None, 4, -X, None, Z),
(X[:-1], X[-1], None, tuple(-X[:-1]), -X[-1], Z),
(X[:-1], X[-1], 4, -X[:-1], -X[-1], Z),
(np.ones(0), w, None, (), -w, Z),
(np.ones(0), w, 4, (), -w, Z),
# Probability
(X, None, None, tuple(-P), None, p * Z),
(X, None, 4, -P, None, p * Z),
(X[:-1], X[-1], None, tuple(-P[:-1]), -P[-1], p * Z),
(X[:-1], X[-1], 4, -P[:-1], -P[-1], p * Z),
(np.ones(0), w, None, (), -v, p * Z),
(np.ones(0), w, 4, (), -v, p * Z),
]

@pytest.mark.parametrize(
"coeffs, unshifted_coeff, batch_size, res, r0, expected",
test_cases_single_shots_single_meas,
)
def test_single_shots_single_meas(self, coeffs, unshifted_coeff, batch_size, res, r0, expected):
"""Test that a single shots, single measurement gradient is evaluated correctly."""

shots = Shots(100)
tape_specs = (None, None, 1, shots)
data = [None, coeffs, None, unshifted_coeff, None]
grad = _evaluate_gradient(tape_specs, res, data, r0, batch_size)

assert isinstance(grad, np.ndarray)
assert grad.shape == expected.shape
assert np.allclose(grad, expected)

exp_probs = (p2 * Z, 2 * p * Z)
test_cases_single_shots_multi_meas = [
# Expectation values
(X, None, None, tuple(zip(-X, -2 * X)), None, (Z, 2 * Z)),
(X, None, 4, (-X, -2 * X), None, (Z, 2 * Z)),
(X[:-1], X[-1], None, tuple(zip(-X[:-1], -2 * X[:-1])), (-X[-1], -2 * X[-1]), (Z, 2 * Z)),
(X[:-1], X[-1], 4, (-X[:-1], -2 * X[:-1]), (-X[-1], -2 * X[-1]), (Z, 2 * Z)),
(np.ones(0), w, None, (), (-w, -2 * w), (Z, 2 * Z)),
(np.ones(0), w, 4, (), (-w, -2 * w), (Z, 2 * Z)),
# Expval and Probability
(X, None, None, tuple(zip(-X, -2 * P)), None, (Z, 2 * p * Z)),
(X, None, 4, (-X, -2 * P), None, (Z, 2 * p * Z)),
(X[:-1], X[-1], None, tuple(zip(-X, -2 * P))[:-1], (-X[-1], -2 * P[-1]), (Z, 2 * p * Z)),
(X[:-1], X[-1], 4, (-X[:-1], -2 * P[:-1]), (-X[-1], -2 * P[-1]), (Z, 2 * p * Z)),
(np.ones(0), w, None, (), (-w, -2 * v), (Z, 2 * p * Z)),
(np.ones(0), w, 4, (), (-w, -2 * v), (Z, 2 * p * Z)),
# Probabilities
(X, None, None, tuple(zip(-P2, -2 * P)), None, exp_probs),
(X, None, 4, (-P2, -2 * P), None, exp_probs),
(X[:-1], X[-1], None, tuple(zip(-P2, -2 * P))[:-1], (-P2[-1], -2 * P[-1]), exp_probs),
(X[:-1], X[-1], 4, (-P2[:-1], -2 * P[:-1]), (-P2[-1], -2 * P[-1]), exp_probs),
(np.ones(0), w, None, (), (-v2, -2 * v), exp_probs),
(np.ones(0), w, 4, (), (-v2, -2 * v), exp_probs),
]

@pytest.mark.parametrize(
"coeffs, unshifted_coeff, batch_size, res, r0, expected",
test_cases_single_shots_multi_meas,
)
def test_single_shots_multi_meas(self, coeffs, unshifted_coeff, batch_size, res, r0, expected):
"""Test that a single shots, multiple measurements gradient is evaluated correctly."""

shots = Shots(100)
tape_specs = (None, None, 2, shots)
data = [None, coeffs, None, unshifted_coeff, None]
grad = _evaluate_gradient(tape_specs, res, data, r0, batch_size)

assert isinstance(grad, tuple) and len(grad) == 2
for g, e in zip(grad, expected):
assert isinstance(g, np.ndarray) and g.shape == e.shape
assert np.allclose(g, e)

shot_vec_X = tuple(zip(*(-c * X for c in shv)))
shot_vec_P = tuple(zip(*(-c * P for c in shv)))
shot_vec_P_partial = tuple(-c * P[:-1] for c in shv)

exp_shot_vec_prob = np.outer(shv, p) * Z
test_cases_multi_shots_single_meas = [
# Expectation value
(X, None, None, shot_vec_X, None, shv * Z),
(X, None, 4, tuple(-c * X for c in shv), None, shv * Z),
(X[:-1], X[-1], None, shot_vec_X[:-1], shot_vec_X[-1], shv * Z),
(X[:-1], X[-1], 4, tuple(-c * X[:-1] for c in shv), tuple(-shv * X[-1]), shv * Z),
(np.ones(0), w, None, (), tuple(-c * w for c in shv), shv * Z),
(np.ones(0), w, 4, ((), (), ()), tuple(-c * w for c in shv), shv * Z),
# Probability
(X, None, None, shot_vec_P, None, exp_shot_vec_prob),
(X, None, 4, tuple(-c * P for c in shv), None, exp_shot_vec_prob),
(X[:-1], X[-1], None, shot_vec_P[:-1], shot_vec_P[-1], exp_shot_vec_prob),
(X[:-1], X[-1], 4, shot_vec_P_partial, tuple(np.outer(-shv, P[-1])), exp_shot_vec_prob),
(np.ones(0), w, None, (), tuple(-c * v for c in shv), exp_shot_vec_prob),
(np.ones(0), w, 4, ((), (), ()), tuple(-c * v for c in shv), exp_shot_vec_prob),
]

@pytest.mark.parametrize(
"coeffs, unshifted_coeff, batch_size, res, r0, expected",
test_cases_multi_shots_single_meas,
)
def test_multi_shots_single_meas(self, coeffs, unshifted_coeff, batch_size, res, r0, expected):
"""Test that a shot vector, single measurements gradient is evaluated correctly."""

shots = Shots((100, 101, 102))
tape_specs = (None, None, 1, shots)
data = [None, coeffs, None, unshifted_coeff, None]
grad = _evaluate_gradient(tape_specs, res, data, r0, batch_size)

assert isinstance(grad, tuple) and len(grad) == 3
for g, e in zip(grad, expected):
assert isinstance(g, np.ndarray) and g.shape == e.shape
assert np.allclose(g, e)

multi_X = tuple(tuple((-c * x, -2 * c * x) for c in shv) for x in X)
batched_multi_X = tuple((-c * X, -2 * c * X) for c in shv)
partial_multi_X = tuple((-c * X[:-1], -2 * c * X[:-1]) for c in shv)
expvals_r0 = tuple((-c * w, -2 * c * w) for c in shv)

multi_X_P = tuple(tuple((-c * _p, -2 * c * x) for c in shv) for x, _p in zip(X, P))
batched_multi_X_P = tuple((-c * P, -2 * c * X) for c in shv)
partial_multi_X_P = tuple((-c * P[:-1], -2 * c * X[:-1]) for c in shv)
prob_expval_r0 = tuple((-c * v, -2 * c * w) for c in shv)

multi_P_P = tuple(tuple((-c * _p, -2 * c * _q) for c in shv) for _q, _p in zip(P2, P))
batched_multi_P_P = tuple((-c * P, -2 * c * P2) for c in shv)
partial_multi_P_P = tuple((-c * P[:-1], -2 * c * P2[:-1]) for c in shv)
probs_r0 = tuple((-c * v, -2 * c * v2) for c in shv)

exp_shot_vec_prob_expval = tuple((c * p * Z, 2 * c * Z) for c in shv)
exp_shot_vec_probs = tuple((c * p * Z, 2 * c * p2 * Z) for c in shv)
test_cases_multi_shots_multi_meas = [
# Expectation values
(X, None, None, multi_X, None, shv_m * Z),
(X, None, 4, batched_multi_X, None, shv_m * Z),
(X[:-1], X[-1], None, multi_X[:-1], multi_X[-1], shv_m * Z),
(X[:-1], X[-1], 4, partial_multi_X, multi_X[-1], shv_m * Z),
(np.ones(0), w, None, (), expvals_r0, shv_m * Z),
(np.ones(0), w, 4, ((), (), ()), expvals_r0, shv_m * Z),
# Probability and expectation
(X, None, None, multi_X_P, None, exp_shot_vec_prob_expval),
(X, None, 4, batched_multi_X_P, None, exp_shot_vec_prob_expval),
(X[:-1], X[-1], None, multi_X_P[:-1], multi_X_P[-1], exp_shot_vec_prob_expval),
(X[:-1], X[-1], 4, partial_multi_X_P, multi_X_P[-1], exp_shot_vec_prob_expval),
(np.ones(0), w, None, (), prob_expval_r0, exp_shot_vec_prob_expval),
(np.ones(0), w, 4, ((), (), ()), prob_expval_r0, exp_shot_vec_prob_expval),
# Probabilities
(X, None, None, multi_P_P, None, exp_shot_vec_probs),
(X, None, 4, batched_multi_P_P, None, exp_shot_vec_probs),
(X[:-1], X[-1], None, multi_P_P[:-1], multi_P_P[-1], exp_shot_vec_probs),
(X[:-1], X[-1], 4, partial_multi_P_P, multi_P_P[-1], exp_shot_vec_probs),
(np.ones(0), w, None, (), probs_r0, exp_shot_vec_probs),
(np.ones(0), w, 4, ((), (), ()), probs_r0, exp_shot_vec_probs),
]

@pytest.mark.parametrize(
"coeffs, unshifted_coeff, batch_size, res, r0, expected",
test_cases_multi_shots_multi_meas,
)
def test_multi_shots_multi_meas(self, coeffs, unshifted_coeff, batch_size, res, r0, expected):
"""Test that a shot vector, multiple measurements gradient is evaluated correctly."""

shots = Shots((100, 101, 102))
tape_specs = (None, None, 2, shots)
data = [None, coeffs, None, unshifted_coeff, None]
grad = _evaluate_gradient(tape_specs, res, data, r0, batch_size)

assert isinstance(grad, tuple) and len(grad) == 3
for g, e in zip(grad, expected):
assert isinstance(g, tuple) and len(g) == 2
for _g, _e in zip(g, e):
assert isinstance(_g, np.ndarray) and _g.shape == _e.shape
assert np.allclose(_g, _e)


# pylint: disable=too-few-public-methods
class RY_with_F(qml.RY):
Expand Down
Loading

0 comments on commit 58668d7

Please sign in to comment.