From 21f509bfa4b505148c400cb69f4dfab872f5b0ff Mon Sep 17 00:00:00 2001
From: Haochen Wang <paulhaochen.wang@mail.utoronto.ca>
Date: Thu, 19 Sep 2024 11:17:10 -0400
Subject: [PATCH 01/10] Add a how-to for catalyst-compiling "Symmetry-invariant
 quantum machine learning force fields"

The how-to contains the full code listing, and some primitive tutorial words.
---
 ...rial_eqnn_force_field_catalyst_compiled.py | 280 ++++++++++++++++++
 1 file changed, 280 insertions(+)
 create mode 100644 demonstrations/tutorial_eqnn_force_field_catalyst_compiled.py

diff --git a/demonstrations/tutorial_eqnn_force_field_catalyst_compiled.py b/demonstrations/tutorial_eqnn_force_field_catalyst_compiled.py
new file mode 100644
index 0000000000..16525b41e6
--- /dev/null
+++ b/demonstrations/tutorial_eqnn_force_field_catalyst_compiled.py
@@ -0,0 +1,280 @@
+r"""Compiling "Symmetry-invariant quantum machine learning force fields" with PennyLane-Catalyst
+========================================================
+
+
+To speed up our training process, we can use PennyLane-Catalyst to compile our training workflow.
+
+As opposed to jax.jit, catalyst.qjit innately understands PennyLane quantum instructions and performs better on
+Lightning backend.
+
+
+"""
+
+import pennylane as qml
+import numpy as np
+
+import jax
+from jax import numpy as jnp
+
+import scipy
+import matplotlib.pyplot as plt
+import sklearn
+
+import catalyst
+from catalyst import qjit
+
+from jax.example_libraries import optimizers
+from sklearn.preprocessing import MinMaxScaler
+
+X = np.array([[0, 1], [1, 0]])
+Y = np.array([[0, -1.0j], [1.0j, 0]])
+Z = np.array([[1, 0], [0, -1]])
+
+sigmas = jnp.array(np.array([X, Y, Z]))  # Vector of Pauli matrices
+sigmas_sigmas = jnp.array(
+    np.array(
+        [np.kron(X, X), np.kron(Y, Y), np.kron(Z, Z)]  # Vector of tensor products of Pauli matrices
+    )
+)
+
+def singlet(wires):
+    # Encode a 2-qubit rotation-invariant initial state, i.e., the singlet state.
+    qml.Hadamard(wires=wires[0])
+    qml.PauliZ(wires=wires[0])
+    qml.PauliX(wires=wires[1])
+    qml.CNOT(wires=wires)
+
+
+def equivariant_encoding(alpha, data, wires):
+    # data (jax array): cartesian coordinates of atom i
+    # alpha (jax array): trainable scaling parameter
+    hamiltonian = jnp.einsum("i,ijk", data, sigmas)  # Heisenberg Hamiltonian
+    U = jax.scipy.linalg.expm(-1.0j * alpha * hamiltonian / 2)
+    qml.QubitUnitary(U, wires=wires, id="E")
+
+
+def trainable_layer(weight, wires):
+    hamiltonian = jnp.einsum("ijk->jk", sigmas_sigmas)
+    U = jax.scipy.linalg.expm(-1.0j * weight * hamiltonian)
+    qml.QubitUnitary(U, wires=wires, id="U")
+
+
+# Invariant observbale
+Heisenberg = [
+    qml.PauliX(0) @ qml.PauliX(1),
+    qml.PauliY(0) @ qml.PauliY(1),
+    qml.PauliZ(0) @ qml.PauliZ(1),
+]
+Observable = qml.Hamiltonian(np.ones((3)), Heisenberg)
+
+
+def noise_layer(epsilon, wires):
+    for _, w in enumerate(wires):
+        qml.RZ(epsilon[_], wires=[w])
+
+
+D = 6  # Depth of the model
+B = 1  # Number of repetitions inside a trainable layer
+rep = 2  # Number of repeated vertical encoding
+
+active_atoms = 2  # Number of active atoms
+                  # Here we only have two active atoms since we fixed the oxygen (which becomes non-active) at the origin
+num_qubits = active_atoms * rep
+
+
+# We need to use "lightning.qubit" device for Catalyst compilation.
+dev = qml.device("lightning.qubit", wires=num_qubits)
+
+
+######################################################################
+# The core function that is called repeatedly many times can benifit from being just-in-time compiled with qjit.
+# All we need to do is decorate the function with the `@qjit` decorator.
+#
+# Catalyst has its own `for_loop` function to work with qjit.
+# `catalyst.for_loop` should be used when the loop bounds or step depends on the qjit-ted function's input arguments.
+# If there is no such dependence, `catalyst.for_loop` can still be used.
+# Here we showcase both usages.
+
+@qjit
+@qml.qnode(dev)
+def vqlm_qjit(data, params):
+    weights = params["params"]["weights"]
+    alphas = params["params"]["alphas"]
+    epsilon = params["params"]["epsilon"]
+    # Initial state
+    @catalyst.for_loop(0, rep, 1)
+    def singlet_loop(i):
+        singlet(wires=jnp.arange(active_atoms)+active_atoms*i)
+    singlet_loop()
+    # Initial encoding
+    for i in range(num_qubits):
+        equivariant_encoding(
+            alphas[i, 0], jnp.asarray(data)[i % active_atoms, ...], wires=[i]
+        )        
+    # Reuploading model
+    for d in range(D):
+        qml.Barrier()
+        for b in range(B):
+            # Even layer
+            for i in range(0, num_qubits - 1, 2):
+                trainable_layer(weights[i, d + 1, b], wires=[i, (i + 1) % num_qubits])
+            # Odd layer
+            for i in range(1, num_qubits, 2):
+                trainable_layer(weights[i, d + 1, b], wires=[i, (i + 1) % num_qubits])
+        # Symmetry-breaking
+        if epsilon is not None:
+            noise_layer(epsilon[d, :], range(num_qubits))
+        # Encoding
+        for i in range(num_qubits):
+            equivariant_encoding(
+                alphas[i, d + 1], jnp.asarray(data)[i % active_atoms, ...], wires=[i]
+            )
+    return qml.expval(Observable)
+
+# vectorizing for batched training with `catalyst.vmap`
+vec_vqlm = catalyst.vmap(vqlm_qjit, in_axes=(0, {'params': {'alphas': None, 'epsilon': None, 'weights': None}} ), out_axes=0)
+
+# loss function for cost
+def mse_loss(predictions, targets):
+    return jnp.mean(0.5 * (predictions - targets) ** 2)
+
+# Compile a training step
+# many calls so compile = faster!
+@qjit
+def train_step(step_i, opt_state, loss_data):
+
+    def cost(weights, loss_data):
+        data, E_target, F_target = loss_data
+        E_pred = vec_vqlm(data, weights)
+        l = mse_loss(E_pred, E_target)
+        return l
+
+    net_params = get_params(opt_state)
+    loss = cost(net_params, loss_data)
+    grads = catalyst.grad(cost, method = "fd", h=1e-13, argnums=0)(net_params, loss_data)
+    return loss, opt_update(step_i, grads, opt_state)
+
+
+# Return prediction and loss at inference times, e.g. for testing
+@qjit
+def inference(loss_data, opt_state):
+    data, E_target, F_target = loss_data
+    net_params = get_params(opt_state)
+    E_pred = vec_vqlm(data, net_params)
+    l = mse_loss(E_pred, E_target)
+    return E_pred, l
+
+
+#################### main ##########################
+### setup ###
+# Load the data
+energy = np.load("eqnn_force_field_data/Energy.npy")
+forces = np.load("eqnn_force_field_data/Forces.npy")
+positions = np.load(
+    "eqnn_force_field_data/Positions.npy"
+)  # Cartesian coordinates shape = (nbr_sample, nbr_atoms,3)
+shape = np.shape(positions)
+
+
+### Scaling the energy to fit in [-1,1]
+
+scaler = MinMaxScaler((-1, 1))
+
+energy = scaler.fit_transform(energy)
+forces = forces * scaler.scale_
+
+
+# Placing the oxygen at the origin
+data = np.zeros((shape[0], 2, 3))
+data[:, 0, :] = positions[:, 1, :] - positions[:, 0, :]
+data[:, 1, :] = positions[:, 2, :] - positions[:, 0, :]
+positions = data.copy()
+
+forces = forces[:, 1:, :]  # Select only the forces on the hydrogen atoms since the oxygen is fixed
+
+
+# Splitting in train-test set
+indices_train = np.random.choice(np.arange(shape[0]), size=int(0.8 * shape[0]), replace=False)
+indices_test = np.setdiff1d(np.arange(shape[0]), indices_train)
+
+E_train, E_test = (energy[indices_train, 0], energy[indices_test, 0])
+F_train, F_test = forces[indices_train, ...], forces[indices_test, ...]
+data_train, data_test = (
+    jnp.array(positions[indices_train, ...]),
+    jnp.array(positions[indices_test, ...]),
+)
+
+### training ###
+opt_init, opt_update, get_params = optimizers.adam(1e-2)
+
+np.random.seed(42)
+weights = np.zeros((num_qubits, D, B))
+weights[0] = np.random.uniform(0, np.pi, 1)
+weights = jnp.array(weights)
+
+# Encoding weights
+alphas = jnp.array(np.ones((num_qubits, D + 1)))
+
+# Symmetry-breaking (SB)
+np.random.seed(42)
+epsilon = jnp.array(np.random.normal(0, 0.001, size=(D, num_qubits)))
+epsilon = None  # We disable SB for this specific example
+epsilon = jax.lax.stop_gradient(epsilon)  # comment if we wish to train the SB weights as well.
+
+
+
+net_params = {"params": {"weights": weights, "alphas": alphas, "epsilon": epsilon}}
+opt_state = opt_init(net_params)
+running_loss = []
+
+
+num_batches = 5000 # number of optimization steps
+batch_size =  256  # number of training data per batch
+
+batch = np.random.choice(np.arange(np.shape(data_train)[0]), batch_size, replace=False)
+loss_data = data_train[batch, ...], E_train[batch, ...], F_train[batch, ...]
+
+
+# The main training loop
+# We call `train_step` and `inference` many times, so the speedup from qjit will be quite significant!
+for ibatch in range(num_batches):
+    # select a batch of training points
+    batch = np.random.choice(np.arange(np.shape(data_train)[0]), batch_size, replace=False)
+
+    # preparing the data
+    loss_data = data_train[batch, ...], E_train[batch, ...], F_train[batch, ...]
+    loss_data_test = data_test, E_test, F_test
+
+    # perform one training step
+    loss, opt_state = train_step(num_batches, opt_state, loss_data)
+
+    # computing the test loss and energy predictions
+    E_pred, test_loss = inference(loss_data_test, opt_state)
+    running_loss.append([float(loss), float(test_loss)])
+
+
+history_loss = np.array(running_loss)
+
+### plotting ###
+fontsize = 12
+plt.figure(figsize=(4,4))
+plt.plot(history_loss[:, 0], "r-", label="training error")
+plt.plot(history_loss[:, 1], "b-", label="testing error")
+
+plt.yscale("log")
+plt.xlabel("Optimization Steps", fontsize=fontsize)
+plt.ylabel("Mean Squared Error", fontsize=fontsize)
+plt.legend(fontsize=fontsize)
+plt.tight_layout()
+plt.show()
+
+
+plt.figure(figsize=(4,4))
+plt.title("Energy predictions", fontsize=fontsize)
+plt.plot(energy[indices_test], E_pred, "ro", label="Test predictions")
+plt.plot(energy[indices_test], energy[indices_test], "k.-", lw=1, label="Exact")
+plt.xlabel("Exact energy", fontsize=fontsize)
+plt.ylabel("Predicted energy", fontsize=fontsize)
+plt.legend(fontsize=fontsize)
+plt.tight_layout()
+plt.show()

From f83f6909e7260a66938a50992ff12123e1a80c14 Mon Sep 17 00:00:00 2001
From: Haochen Wang <paulhaochen.wang@mail.utoronto.ca>
Date: Fri, 20 Sep 2024 13:25:19 -0400
Subject: [PATCH 02/10] grammar; remove unused imported

---
 .../tutorial_eqnn_force_field_catalyst_compiled.py          | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/demonstrations/tutorial_eqnn_force_field_catalyst_compiled.py b/demonstrations/tutorial_eqnn_force_field_catalyst_compiled.py
index 16525b41e6..56a3b3a1e2 100644
--- a/demonstrations/tutorial_eqnn_force_field_catalyst_compiled.py
+++ b/demonstrations/tutorial_eqnn_force_field_catalyst_compiled.py
@@ -16,9 +16,7 @@
 import jax
 from jax import numpy as jnp
 
-import scipy
 import matplotlib.pyplot as plt
-import sklearn
 
 import catalyst
 from catalyst import qjit
@@ -87,11 +85,11 @@ def noise_layer(epsilon, wires):
 
 
 ######################################################################
-# The core function that is called repeatedly many times can benifit from being just-in-time compiled with qjit.
+# The core function that is called repeatedly can benefit from being just-in-time compiled with qjit.
 # All we need to do is decorate the function with the `@qjit` decorator.
 #
 # Catalyst has its own `for_loop` function to work with qjit.
-# `catalyst.for_loop` should be used when the loop bounds or step depends on the qjit-ted function's input arguments.
+# `catalyst.for_loop` should be used when the loop bounds or step depend on the qjit-ted function's input arguments.
 # If there is no such dependence, `catalyst.for_loop` can still be used.
 # Here we showcase both usages.
 

From 442e9d20a62bd1fb8ffc747af1f8fbd3c7572d61 Mon Sep 17 00:00:00 2001
From: Haochen Wang <paulhaochen.wang@mail.utoronto.ca>
Date: Fri, 20 Sep 2024 13:29:46 -0400
Subject: [PATCH 03/10] move `opt_init, ... = optimizers.adam(...)` into the
 original location

---
 demonstrations/tutorial_eqnn_force_field_catalyst_compiled.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/demonstrations/tutorial_eqnn_force_field_catalyst_compiled.py b/demonstrations/tutorial_eqnn_force_field_catalyst_compiled.py
index 56a3b3a1e2..725edbc27a 100644
--- a/demonstrations/tutorial_eqnn_force_field_catalyst_compiled.py
+++ b/demonstrations/tutorial_eqnn_force_field_catalyst_compiled.py
@@ -203,8 +203,6 @@ def inference(loss_data, opt_state):
 )
 
 ### training ###
-opt_init, opt_update, get_params = optimizers.adam(1e-2)
-
 np.random.seed(42)
 weights = np.zeros((num_qubits, D, B))
 weights[0] = np.random.uniform(0, np.pi, 1)
@@ -220,7 +218,7 @@ def inference(loss_data, opt_state):
 epsilon = jax.lax.stop_gradient(epsilon)  # comment if we wish to train the SB weights as well.
 
 
-
+opt_init, opt_update, get_params = optimizers.adam(1e-2)
 net_params = {"params": {"weights": weights, "alphas": alphas, "epsilon": epsilon}}
 opt_state = opt_init(net_params)
 running_loss = []

From bbd9221e0378d768cf68ca776ef7122f7008e78e Mon Sep 17 00:00:00 2001
From: Haochen Wang <paulhaochen.wang@mail.utoronto.ca>
Date: Fri, 20 Sep 2024 14:02:02 -0400
Subject: [PATCH 04/10] black format

---
 ...rial_eqnn_force_field_catalyst_compiled.py | 51 ++++++++++++++-----
 1 file changed, 37 insertions(+), 14 deletions(-)

diff --git a/demonstrations/tutorial_eqnn_force_field_catalyst_compiled.py b/demonstrations/tutorial_eqnn_force_field_catalyst_compiled.py
index 725edbc27a..060f38cd56 100644
--- a/demonstrations/tutorial_eqnn_force_field_catalyst_compiled.py
+++ b/demonstrations/tutorial_eqnn_force_field_catalyst_compiled.py
@@ -31,10 +31,15 @@
 sigmas = jnp.array(np.array([X, Y, Z]))  # Vector of Pauli matrices
 sigmas_sigmas = jnp.array(
     np.array(
-        [np.kron(X, X), np.kron(Y, Y), np.kron(Z, Z)]  # Vector of tensor products of Pauli matrices
+        [
+            np.kron(X, X),
+            np.kron(Y, Y),
+            np.kron(Z, Z),
+        ]  # Vector of tensor products of Pauli matrices
     )
 )
 
+
 def singlet(wires):
     # Encode a 2-qubit rotation-invariant initial state, i.e., the singlet state.
     qml.Hadamard(wires=wires[0])
@@ -76,7 +81,7 @@ def noise_layer(epsilon, wires):
 rep = 2  # Number of repeated vertical encoding
 
 active_atoms = 2  # Number of active atoms
-                  # Here we only have two active atoms since we fixed the oxygen (which becomes non-active) at the origin
+# Here we only have two active atoms since we fixed the oxygen (which becomes non-active) at the origin
 num_qubits = active_atoms * rep
 
 
@@ -93,22 +98,25 @@ def noise_layer(epsilon, wires):
 # If there is no such dependence, `catalyst.for_loop` can still be used.
 # Here we showcase both usages.
 
+
 @qjit
 @qml.qnode(dev)
 def vqlm_qjit(data, params):
     weights = params["params"]["weights"]
     alphas = params["params"]["alphas"]
     epsilon = params["params"]["epsilon"]
+
     # Initial state
     @catalyst.for_loop(0, rep, 1)
     def singlet_loop(i):
-        singlet(wires=jnp.arange(active_atoms)+active_atoms*i)
+        singlet(wires=jnp.arange(active_atoms) + active_atoms * i)
+
     singlet_loop()
     # Initial encoding
     for i in range(num_qubits):
         equivariant_encoding(
             alphas[i, 0], jnp.asarray(data)[i % active_atoms, ...], wires=[i]
-        )        
+        )
     # Reuploading model
     for d in range(D):
         qml.Barrier()
@@ -129,13 +137,20 @@ def singlet_loop(i):
             )
     return qml.expval(Observable)
 
+
 # vectorizing for batched training with `catalyst.vmap`
-vec_vqlm = catalyst.vmap(vqlm_qjit, in_axes=(0, {'params': {'alphas': None, 'epsilon': None, 'weights': None}} ), out_axes=0)
+vec_vqlm = catalyst.vmap(
+    vqlm_qjit,
+    in_axes=(0, {"params": {"alphas": None, "epsilon": None, "weights": None}}),
+    out_axes=0,
+)
+
 
 # loss function for cost
 def mse_loss(predictions, targets):
     return jnp.mean(0.5 * (predictions - targets) ** 2)
 
+
 # Compile a training step
 # many calls so compile = faster!
 @qjit
@@ -149,7 +164,7 @@ def cost(weights, loss_data):
 
     net_params = get_params(opt_state)
     loss = cost(net_params, loss_data)
-    grads = catalyst.grad(cost, method = "fd", h=1e-13, argnums=0)(net_params, loss_data)
+    grads = catalyst.grad(cost, method="fd", h=1e-13, argnums=0)(net_params, loss_data)
     return loss, opt_update(step_i, grads, opt_state)
 
 
@@ -188,11 +203,15 @@ def inference(loss_data, opt_state):
 data[:, 1, :] = positions[:, 2, :] - positions[:, 0, :]
 positions = data.copy()
 
-forces = forces[:, 1:, :]  # Select only the forces on the hydrogen atoms since the oxygen is fixed
+forces = forces[
+    :, 1:, :
+]  # Select only the forces on the hydrogen atoms since the oxygen is fixed
 
 
 # Splitting in train-test set
-indices_train = np.random.choice(np.arange(shape[0]), size=int(0.8 * shape[0]), replace=False)
+indices_train = np.random.choice(
+    np.arange(shape[0]), size=int(0.8 * shape[0]), replace=False
+)
 indices_test = np.setdiff1d(np.arange(shape[0]), indices_train)
 
 E_train, E_test = (energy[indices_train, 0], energy[indices_test, 0])
@@ -215,7 +234,9 @@ def inference(loss_data, opt_state):
 np.random.seed(42)
 epsilon = jnp.array(np.random.normal(0, 0.001, size=(D, num_qubits)))
 epsilon = None  # We disable SB for this specific example
-epsilon = jax.lax.stop_gradient(epsilon)  # comment if we wish to train the SB weights as well.
+epsilon = jax.lax.stop_gradient(
+    epsilon
+)  # comment if we wish to train the SB weights as well.
 
 
 opt_init, opt_update, get_params = optimizers.adam(1e-2)
@@ -224,8 +245,8 @@ def inference(loss_data, opt_state):
 running_loss = []
 
 
-num_batches = 5000 # number of optimization steps
-batch_size =  256  # number of training data per batch
+num_batches = 5000  # number of optimization steps
+batch_size = 256  # number of training data per batch
 
 batch = np.random.choice(np.arange(np.shape(data_train)[0]), batch_size, replace=False)
 loss_data = data_train[batch, ...], E_train[batch, ...], F_train[batch, ...]
@@ -235,7 +256,9 @@ def inference(loss_data, opt_state):
 # We call `train_step` and `inference` many times, so the speedup from qjit will be quite significant!
 for ibatch in range(num_batches):
     # select a batch of training points
-    batch = np.random.choice(np.arange(np.shape(data_train)[0]), batch_size, replace=False)
+    batch = np.random.choice(
+        np.arange(np.shape(data_train)[0]), batch_size, replace=False
+    )
 
     # preparing the data
     loss_data = data_train[batch, ...], E_train[batch, ...], F_train[batch, ...]
@@ -253,7 +276,7 @@ def inference(loss_data, opt_state):
 
 ### plotting ###
 fontsize = 12
-plt.figure(figsize=(4,4))
+plt.figure(figsize=(4, 4))
 plt.plot(history_loss[:, 0], "r-", label="training error")
 plt.plot(history_loss[:, 1], "b-", label="testing error")
 
@@ -265,7 +288,7 @@ def inference(loss_data, opt_state):
 plt.show()
 
 
-plt.figure(figsize=(4,4))
+plt.figure(figsize=(4, 4))
 plt.title("Energy predictions", fontsize=fontsize)
 plt.plot(energy[indices_test], E_pred, "ro", label="Test predictions")
 plt.plot(energy[indices_test], energy[indices_test], "k.-", lw=1, label="Exact")

From c66e485e938c5900aae7e7b0efe9d9257d82d687 Mon Sep 17 00:00:00 2001
From: Haochen Wang <paulhaochen.wang@mail.utoronto.ca>
Date: Fri, 8 Nov 2024 12:07:06 -0500
Subject: [PATCH 05/10] add qjit to original demo

---
 demonstrations/tutorial_eqnn_force_field.py | 35 ++++++++++++++-------
 1 file changed, 23 insertions(+), 12 deletions(-)

diff --git a/demonstrations/tutorial_eqnn_force_field.py b/demonstrations/tutorial_eqnn_force_field.py
index 45373bf0b4..92e2ff8008 100644
--- a/demonstrations/tutorial_eqnn_force_field.py
+++ b/demonstrations/tutorial_eqnn_force_field.py
@@ -143,6 +143,10 @@
 import matplotlib.pyplot as plt
 import sklearn
 
+######################################################################
+# To speed up the computation, we also import catalyst, a jit compiler for PennyLane quantum programs.
+import catalyst
+
 ######################################################################
 # Let us construct Pauli matrices, which are used to build the Hamiltonian.
 X = np.array([[0, 1], [1, 0]])
@@ -301,10 +305,13 @@ def noise_layer(epsilon, wires):
 #################################
 
 
-dev = qml.device("default.qubit", wires=num_qubits)
+######################################################################
+# To speed up the computation, we will be using catalyst to compile our quantum program, and we will be
+# running our program on the lightning backend instead of the default qubit backend.
+dev = qml.device("lightning.qubit", wires=num_qubits)
 
 
-@qml.qnode(dev, interface="jax")
+@qml.qnode(dev)
 def vqlm(data, params):
 
     weights = params["params"]["weights"]
@@ -396,25 +403,27 @@ def vqlm(data, params):
 )
 
 #################################
-# We will know define the cost function and how to train the model using Jax. We will use the mean-square-error loss function.
-# To speed up the computation, we use the decorator ``@jax.jit`` to do just-in-time compilation for this execution. This means the first execution will typically take a little longer with the
-# benefit that all following executions will be significantly faster, see the `Jax docs on jitting <https://jax.readthedocs.io/en/latest/jax-101/02-jitting.html>`_.
+# We will now define the cost function and how to train the model using Jax. We will use the mean-square-error loss function.
+# To speed up the computation, we use the decorator ``@catalyst.qjit`` to do just-in-time compilation for this execution. This means the first execution will typically take a little longer with the
+# benefit that all following executions will be significantly faster, see the `Catalyst documentation <https://docs.pennylane.ai/projects/catalyst/en/stable/index.html>`_.
 
 #################################
 from jax.example_libraries import optimizers
 
 # We vectorize the model over the data points
-vec_vqlm = jax.vmap(vqlm, (0, None), 0)
+vec_vqlm = catalyst.vmap(
+    vqlm,
+    in_axes=(0, {"params": {"alphas": None, "epsilon": None, "weights": None}}),
+    out_axes=0,
+)
 
 
 # Mean-squared-error loss function
-@jax.jit
 def mse_loss(predictions, targets):
     return jnp.mean(0.5 * (predictions - targets) ** 2)
 
 
 # Make prediction and compute the loss
-@jax.jit
 def cost(weights, loss_data):
     data, E_target, F_target = loss_data
     E_pred = vec_vqlm(data, weights)
@@ -424,17 +433,19 @@ def cost(weights, loss_data):
 
 
 # Perform one training step
-@jax.jit
+# This function will be repeatedly called, so we qjit it to exploit the saved runtime from many runs.
+@catalyst.qjit
 def train_step(step_i, opt_state, loss_data):
 
     net_params = get_params(opt_state)
-    loss, grads = jax.value_and_grad(cost, argnums=0)(net_params, loss_data)
-
+    loss = cost(net_params, loss_data)
+    grads = catalyst.grad(cost, method="fd", h=1e-13, argnums=0)(net_params, loss_data)
     return loss, opt_update(step_i, grads, opt_state)
 
 
 # Return prediction and loss at inference times, e.g. for testing
-@jax.jit
+# This function is also repeatedly called, so qjit it.
+@catalyst.qjit
 def inference(loss_data, opt_state):
 
     data, E_target, F_target = loss_data

From c72a374f08c6bddec705274b59a6a4604cd00196 Mon Sep 17 00:00:00 2001
From: Haochen Wang <paulhaochen.wang@mail.utoronto.ca>
Date: Fri, 8 Nov 2024 12:08:23 -0500
Subject: [PATCH 06/10] remove second qjit demo

---
 ...rial_eqnn_force_field_catalyst_compiled.py | 299 ------------------
 1 file changed, 299 deletions(-)
 delete mode 100644 demonstrations/tutorial_eqnn_force_field_catalyst_compiled.py

diff --git a/demonstrations/tutorial_eqnn_force_field_catalyst_compiled.py b/demonstrations/tutorial_eqnn_force_field_catalyst_compiled.py
deleted file mode 100644
index 060f38cd56..0000000000
--- a/demonstrations/tutorial_eqnn_force_field_catalyst_compiled.py
+++ /dev/null
@@ -1,299 +0,0 @@
-r"""Compiling "Symmetry-invariant quantum machine learning force fields" with PennyLane-Catalyst
-========================================================
-
-
-To speed up our training process, we can use PennyLane-Catalyst to compile our training workflow.
-
-As opposed to jax.jit, catalyst.qjit innately understands PennyLane quantum instructions and performs better on
-Lightning backend.
-
-
-"""
-
-import pennylane as qml
-import numpy as np
-
-import jax
-from jax import numpy as jnp
-
-import matplotlib.pyplot as plt
-
-import catalyst
-from catalyst import qjit
-
-from jax.example_libraries import optimizers
-from sklearn.preprocessing import MinMaxScaler
-
-X = np.array([[0, 1], [1, 0]])
-Y = np.array([[0, -1.0j], [1.0j, 0]])
-Z = np.array([[1, 0], [0, -1]])
-
-sigmas = jnp.array(np.array([X, Y, Z]))  # Vector of Pauli matrices
-sigmas_sigmas = jnp.array(
-    np.array(
-        [
-            np.kron(X, X),
-            np.kron(Y, Y),
-            np.kron(Z, Z),
-        ]  # Vector of tensor products of Pauli matrices
-    )
-)
-
-
-def singlet(wires):
-    # Encode a 2-qubit rotation-invariant initial state, i.e., the singlet state.
-    qml.Hadamard(wires=wires[0])
-    qml.PauliZ(wires=wires[0])
-    qml.PauliX(wires=wires[1])
-    qml.CNOT(wires=wires)
-
-
-def equivariant_encoding(alpha, data, wires):
-    # data (jax array): cartesian coordinates of atom i
-    # alpha (jax array): trainable scaling parameter
-    hamiltonian = jnp.einsum("i,ijk", data, sigmas)  # Heisenberg Hamiltonian
-    U = jax.scipy.linalg.expm(-1.0j * alpha * hamiltonian / 2)
-    qml.QubitUnitary(U, wires=wires, id="E")
-
-
-def trainable_layer(weight, wires):
-    hamiltonian = jnp.einsum("ijk->jk", sigmas_sigmas)
-    U = jax.scipy.linalg.expm(-1.0j * weight * hamiltonian)
-    qml.QubitUnitary(U, wires=wires, id="U")
-
-
-# Invariant observbale
-Heisenberg = [
-    qml.PauliX(0) @ qml.PauliX(1),
-    qml.PauliY(0) @ qml.PauliY(1),
-    qml.PauliZ(0) @ qml.PauliZ(1),
-]
-Observable = qml.Hamiltonian(np.ones((3)), Heisenberg)
-
-
-def noise_layer(epsilon, wires):
-    for _, w in enumerate(wires):
-        qml.RZ(epsilon[_], wires=[w])
-
-
-D = 6  # Depth of the model
-B = 1  # Number of repetitions inside a trainable layer
-rep = 2  # Number of repeated vertical encoding
-
-active_atoms = 2  # Number of active atoms
-# Here we only have two active atoms since we fixed the oxygen (which becomes non-active) at the origin
-num_qubits = active_atoms * rep
-
-
-# We need to use "lightning.qubit" device for Catalyst compilation.
-dev = qml.device("lightning.qubit", wires=num_qubits)
-
-
-######################################################################
-# The core function that is called repeatedly can benefit from being just-in-time compiled with qjit.
-# All we need to do is decorate the function with the `@qjit` decorator.
-#
-# Catalyst has its own `for_loop` function to work with qjit.
-# `catalyst.for_loop` should be used when the loop bounds or step depend on the qjit-ted function's input arguments.
-# If there is no such dependence, `catalyst.for_loop` can still be used.
-# Here we showcase both usages.
-
-
-@qjit
-@qml.qnode(dev)
-def vqlm_qjit(data, params):
-    weights = params["params"]["weights"]
-    alphas = params["params"]["alphas"]
-    epsilon = params["params"]["epsilon"]
-
-    # Initial state
-    @catalyst.for_loop(0, rep, 1)
-    def singlet_loop(i):
-        singlet(wires=jnp.arange(active_atoms) + active_atoms * i)
-
-    singlet_loop()
-    # Initial encoding
-    for i in range(num_qubits):
-        equivariant_encoding(
-            alphas[i, 0], jnp.asarray(data)[i % active_atoms, ...], wires=[i]
-        )
-    # Reuploading model
-    for d in range(D):
-        qml.Barrier()
-        for b in range(B):
-            # Even layer
-            for i in range(0, num_qubits - 1, 2):
-                trainable_layer(weights[i, d + 1, b], wires=[i, (i + 1) % num_qubits])
-            # Odd layer
-            for i in range(1, num_qubits, 2):
-                trainable_layer(weights[i, d + 1, b], wires=[i, (i + 1) % num_qubits])
-        # Symmetry-breaking
-        if epsilon is not None:
-            noise_layer(epsilon[d, :], range(num_qubits))
-        # Encoding
-        for i in range(num_qubits):
-            equivariant_encoding(
-                alphas[i, d + 1], jnp.asarray(data)[i % active_atoms, ...], wires=[i]
-            )
-    return qml.expval(Observable)
-
-
-# vectorizing for batched training with `catalyst.vmap`
-vec_vqlm = catalyst.vmap(
-    vqlm_qjit,
-    in_axes=(0, {"params": {"alphas": None, "epsilon": None, "weights": None}}),
-    out_axes=0,
-)
-
-
-# loss function for cost
-def mse_loss(predictions, targets):
-    return jnp.mean(0.5 * (predictions - targets) ** 2)
-
-
-# Compile a training step
-# many calls so compile = faster!
-@qjit
-def train_step(step_i, opt_state, loss_data):
-
-    def cost(weights, loss_data):
-        data, E_target, F_target = loss_data
-        E_pred = vec_vqlm(data, weights)
-        l = mse_loss(E_pred, E_target)
-        return l
-
-    net_params = get_params(opt_state)
-    loss = cost(net_params, loss_data)
-    grads = catalyst.grad(cost, method="fd", h=1e-13, argnums=0)(net_params, loss_data)
-    return loss, opt_update(step_i, grads, opt_state)
-
-
-# Return prediction and loss at inference times, e.g. for testing
-@qjit
-def inference(loss_data, opt_state):
-    data, E_target, F_target = loss_data
-    net_params = get_params(opt_state)
-    E_pred = vec_vqlm(data, net_params)
-    l = mse_loss(E_pred, E_target)
-    return E_pred, l
-
-
-#################### main ##########################
-### setup ###
-# Load the data
-energy = np.load("eqnn_force_field_data/Energy.npy")
-forces = np.load("eqnn_force_field_data/Forces.npy")
-positions = np.load(
-    "eqnn_force_field_data/Positions.npy"
-)  # Cartesian coordinates shape = (nbr_sample, nbr_atoms,3)
-shape = np.shape(positions)
-
-
-### Scaling the energy to fit in [-1,1]
-
-scaler = MinMaxScaler((-1, 1))
-
-energy = scaler.fit_transform(energy)
-forces = forces * scaler.scale_
-
-
-# Placing the oxygen at the origin
-data = np.zeros((shape[0], 2, 3))
-data[:, 0, :] = positions[:, 1, :] - positions[:, 0, :]
-data[:, 1, :] = positions[:, 2, :] - positions[:, 0, :]
-positions = data.copy()
-
-forces = forces[
-    :, 1:, :
-]  # Select only the forces on the hydrogen atoms since the oxygen is fixed
-
-
-# Splitting in train-test set
-indices_train = np.random.choice(
-    np.arange(shape[0]), size=int(0.8 * shape[0]), replace=False
-)
-indices_test = np.setdiff1d(np.arange(shape[0]), indices_train)
-
-E_train, E_test = (energy[indices_train, 0], energy[indices_test, 0])
-F_train, F_test = forces[indices_train, ...], forces[indices_test, ...]
-data_train, data_test = (
-    jnp.array(positions[indices_train, ...]),
-    jnp.array(positions[indices_test, ...]),
-)
-
-### training ###
-np.random.seed(42)
-weights = np.zeros((num_qubits, D, B))
-weights[0] = np.random.uniform(0, np.pi, 1)
-weights = jnp.array(weights)
-
-# Encoding weights
-alphas = jnp.array(np.ones((num_qubits, D + 1)))
-
-# Symmetry-breaking (SB)
-np.random.seed(42)
-epsilon = jnp.array(np.random.normal(0, 0.001, size=(D, num_qubits)))
-epsilon = None  # We disable SB for this specific example
-epsilon = jax.lax.stop_gradient(
-    epsilon
-)  # comment if we wish to train the SB weights as well.
-
-
-opt_init, opt_update, get_params = optimizers.adam(1e-2)
-net_params = {"params": {"weights": weights, "alphas": alphas, "epsilon": epsilon}}
-opt_state = opt_init(net_params)
-running_loss = []
-
-
-num_batches = 5000  # number of optimization steps
-batch_size = 256  # number of training data per batch
-
-batch = np.random.choice(np.arange(np.shape(data_train)[0]), batch_size, replace=False)
-loss_data = data_train[batch, ...], E_train[batch, ...], F_train[batch, ...]
-
-
-# The main training loop
-# We call `train_step` and `inference` many times, so the speedup from qjit will be quite significant!
-for ibatch in range(num_batches):
-    # select a batch of training points
-    batch = np.random.choice(
-        np.arange(np.shape(data_train)[0]), batch_size, replace=False
-    )
-
-    # preparing the data
-    loss_data = data_train[batch, ...], E_train[batch, ...], F_train[batch, ...]
-    loss_data_test = data_test, E_test, F_test
-
-    # perform one training step
-    loss, opt_state = train_step(num_batches, opt_state, loss_data)
-
-    # computing the test loss and energy predictions
-    E_pred, test_loss = inference(loss_data_test, opt_state)
-    running_loss.append([float(loss), float(test_loss)])
-
-
-history_loss = np.array(running_loss)
-
-### plotting ###
-fontsize = 12
-plt.figure(figsize=(4, 4))
-plt.plot(history_loss[:, 0], "r-", label="training error")
-plt.plot(history_loss[:, 1], "b-", label="testing error")
-
-plt.yscale("log")
-plt.xlabel("Optimization Steps", fontsize=fontsize)
-plt.ylabel("Mean Squared Error", fontsize=fontsize)
-plt.legend(fontsize=fontsize)
-plt.tight_layout()
-plt.show()
-
-
-plt.figure(figsize=(4, 4))
-plt.title("Energy predictions", fontsize=fontsize)
-plt.plot(energy[indices_test], E_pred, "ro", label="Test predictions")
-plt.plot(energy[indices_test], energy[indices_test], "k.-", lw=1, label="Exact")
-plt.xlabel("Exact energy", fontsize=fontsize)
-plt.ylabel("Predicted energy", fontsize=fontsize)
-plt.legend(fontsize=fontsize)
-plt.tight_layout()
-plt.show()

From 2d9ef0c45e3da90af923a68e8ec9df98f37d4f8e Mon Sep 17 00:00:00 2001
From: Haochen Wang <paulhaochen.wang@mail.utoronto.ca>
Date: Mon, 25 Nov 2024 09:50:09 -0500
Subject: [PATCH 07/10] apply PR suggestions

---
 demonstrations/tutorial_eqnn_force_field.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/demonstrations/tutorial_eqnn_force_field.py b/demonstrations/tutorial_eqnn_force_field.py
index 92e2ff8008..4b757407d0 100644
--- a/demonstrations/tutorial_eqnn_force_field.py
+++ b/demonstrations/tutorial_eqnn_force_field.py
@@ -144,7 +144,7 @@
 import sklearn
 
 ######################################################################
-# To speed up the computation, we also import catalyst, a jit compiler for PennyLane quantum programs.
+# To speed up the computation, we also import `Catalyst <https://docs.pennylane.ai/projects/catalyst/en/stable/index.html>`_, a jit compiler for PennyLane quantum programs.
 import catalyst
 
 ######################################################################
@@ -306,8 +306,7 @@ def noise_layer(epsilon, wires):
 
 
 ######################################################################
-# To speed up the computation, we will be using catalyst to compile our quantum program, and we will be
-# running our program on the lightning backend instead of the default qubit backend.
+# We will be running our program using `lightning.qubit`, our performant state-vector simulator.
 dev = qml.device("lightning.qubit", wires=num_qubits)
 
 
@@ -404,8 +403,8 @@ def vqlm(data, params):
 
 #################################
 # We will now define the cost function and how to train the model using Jax. We will use the mean-square-error loss function.
-# To speed up the computation, we use the decorator ``@catalyst.qjit`` to do just-in-time compilation for this execution. This means the first execution will typically take a little longer with the
-# benefit that all following executions will be significantly faster, see the `Catalyst documentation <https://docs.pennylane.ai/projects/catalyst/en/stable/index.html>`_.
+# We use the decorator ``@catalyst.qjit`` to do just-in-time compilation for this execution. This means the first execution will typically take a little longer with the
+# benefit that all following executions will be significantly faster (see the `Catalyst documentation <https://docs.pennylane.ai/projects/catalyst/en/stable/index.html>`_).
 
 #################################
 from jax.example_libraries import optimizers

From d7845e4ddfa0988f1d6a6ad4efc457e033cf3e78 Mon Sep 17 00:00:00 2001
From: Haochen Wang <paulhaochen.wang@mail.utoronto.ca>
Date: Mon, 25 Nov 2024 14:01:03 -0500
Subject: [PATCH 08/10] update date of last modification

---
 demonstrations/tutorial_eqnn_force_field.metadata.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/demonstrations/tutorial_eqnn_force_field.metadata.json b/demonstrations/tutorial_eqnn_force_field.metadata.json
index 99dd1bcc61..21043e02ae 100644
--- a/demonstrations/tutorial_eqnn_force_field.metadata.json
+++ b/demonstrations/tutorial_eqnn_force_field.metadata.json
@@ -9,7 +9,7 @@
         }
     ],
     "dateOfPublication": "2024-03-12T00:00:00+00:00",
-    "dateOfLastModification": "2024-11-06T00:00:00+00:00",
+    "dateOfLastModification": "2024-11-25T00:00:00+00:00",
     "categories": [
         "Quantum Machine Learning",
         "Quantum Chemistry"

From 23e072dc60ad0944e4dc129d198834d4f26c339b Mon Sep 17 00:00:00 2001
From: Isaac De Vlugt <isaacdevlugt@gmail.com>
Date: Tue, 17 Dec 2024 15:21:03 -0500
Subject: [PATCH 09/10] trigger CI


From 1823aa8c2c667fc72bd10d99ddb87dccf28101ca Mon Sep 17 00:00:00 2001
From: Haochen Wang <paulhaochen.wang@mail.utoronto.ca>
Date: Thu, 19 Dec 2024 10:06:26 -0500
Subject: [PATCH 10/10] try CI with smaller run

---
 demonstrations/tutorial_eqnn_force_field.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/demonstrations/tutorial_eqnn_force_field.py b/demonstrations/tutorial_eqnn_force_field.py
index 4b757407d0..6f58512d73 100644
--- a/demonstrations/tutorial_eqnn_force_field.py
+++ b/demonstrations/tutorial_eqnn_force_field.py
@@ -485,11 +485,12 @@ def inference(loss_data, opt_state):
 # We train our VQLM using stochastic gradient descent.
 
 
-num_batches = 5000  # number of optimization steps
-batch_size = 256  # number of training data per batch
+num_batches = 200 # 5000  # number of optimization steps
+batch_size = 5 # 256  # number of training data per batch
 
 
 for ibatch in range(num_batches):
+    #print(ibatch)
     # select a batch of training points
     batch = np.random.choice(np.arange(np.shape(data_train)[0]), batch_size, replace=False)