XanaduAI · mariaschuld · Feb 28, 2024 · Feb 28, 2024 · Feb 28, 2024 · Feb 29, 2024
diff --git a/src/qml_benchmarks/model_utils.py b/src/qml_benchmarks/model_utils.py
@@ -22,6 +22,7 @@
 import optax
 import jax
 import jax.numpy as jnp
+from pennylane import numpy as pnp
 from sklearn.exceptions import ConvergenceWarning
 from sklearn.utils import gen_batches
 
@@ -124,6 +125,63 @@ def update(params, opt_state, x, y):
     return params
 
 
+def train_without_jax(
+        model,
+        loss_fn,
+        optimizer,
+        X,
+        y,
+        random_key_generator,
+        convergence_interval=200
+):
+    """Trains a model using an optimizer and a loss function, using PennyLane's autograd interface.
+    """
+
+    params = list(model.params_.values())
+    opt = optimizer(stepsize=model.learning_rate)
+
+    loss_history = []
+    converged = False
+    start = time.time()
+    for step in range(model.max_steps):
+        key = random_key_generator()
+        X_batch, y_batch = get_batch_without_jax(X, y, key, batch_size=model.batch_size)
+        X_batch = pnp.array(X_batch, requires_grad=False)
+        y_batch = pnp.array(y_batch, requires_grad=False)
+        loss_val = loss_fn(*params, X_batch, y_batch)
+        params = opt.step(loss_fn, *params, X_batch, y_batch)[:len(params)]
+        loss_history.append(loss_val)
+
+        logging.debug(f"{step} - loss: {loss_val}")
+
+        if np.isnan(loss_val):
+            logging.info(f"nan encountered. Training aborted.")
+            break
+
+        if step > 2 * convergence_interval:
+            average1 = np.mean(loss_history[-convergence_interval:])
+            average2 = np.mean(loss_history[-2 * convergence_interval:-convergence_interval])
+            std1 = np.std(loss_history[-convergence_interval:])
+            if np.abs(average2 - average1) <= std1 / np.sqrt(convergence_interval) / 2:
+                logging.info(f"Model {model.__class__.__name__} converged after {step} steps.")
+                converged = True
+                break
+
+    end = time.time()
+    loss_history = np.array(loss_history)
+    model.loss_history_ = loss_history / np.max(np.abs(loss_history))
+    model.training_time_ = end - start
+
+    if not converged:
+        raise ConvergenceWarning(
+            f"Model {model.__class__.__name__} has not converged after the maximum number of {model.max_steps} steps.")
+
+    for i, key in enumerate(model.params_.keys()):
+        model.params_[key] = params[i]
+
+    return model.params_
+
+
 def get_batch(X, y, rnd_key, batch_size=32):
     """
     A generator to get random batches of the data (X, y)
@@ -145,6 +203,25 @@ def get_batch(X, y, rnd_key, batch_size=32):
     return X[rnd_indices], y[rnd_indices]
 
 
+def get_batch_without_jax(X, y, rnd_key, batch_size=32):
+    """
+    A generator to get random batches of the data (X, y)
+
+    Args:
+        X (array[float]): Input data with shape (n_samples, n_features).
+        y (array[float]): Target labels with shape (n_samples,)
+        rnd_key: A jax random key object
+        batch_size (int): Number of elements in batch
+
+    Returns:
+        array[float]: A batch of input data shape (batch_size, n_features)
+        array[float]: A batch of target labels shaped (batch_size,)
+    """
+    all_indices = list(range(len(X)))
+    rnd_indices = np.random.choice(all_indices, size=(batch_size,), replace=True)
+    return X[rnd_indices], y[rnd_indices]
+
+
 def get_from_dict(dict, key_list):
     """
     Access a value from a nested dictionary.
@@ -292,3 +369,54 @@ def chunked_loss(params, X, y):
         return jnp.mean(res)
 
     return chunked_loss
+
+
+####### LOSS UTILS WITHOUT JAX
+
+def l2_loss(pred, y):
+    """
+    The square loss function. 0.5 is there to match optax.l2_loss.
+    """
+    return 0.5 * (pred - y) ** 2
+
+
+def softmax(x, axis=-1):
+    """
+    copied from JAX: https://jax.readthedocs.io/en/latest/_modules/jax/_src/nn/functions.html#softmax
+    """
+    x_max = pnp.max(x, axis, keepdims=True)
+    unnormalized = pnp.exp(x - x_max)
+    result = unnormalized / pnp.sum(unnormalized, axis, keepdims=True)
+    return result
+
+
+def one_hot(a, num_classes=2):
+    """
+    convert an array to a one hot encoded array.
+    Taken from https://stackoverflow.com/questions/29831489/convert-array-of-indices-to-one-hot-encoded-array-in-numpy
+    """
+    b = pnp.zeros((a.size, num_classes))
+    b[pnp.arange(a.size), a] = 1
+    return b
+
+
+def log_softmax(x, axis=-1):
+    """
+    taken from jax.nn.log_softmax:
+    https://jax.readthedocs.io/en/latest/_modules/jax/_src/nn/functions.html#log_softmax
+    """
+    x_arr = pnp.asarray(x)
+    x_max = pnp.max(x_arr, axis, keepdims=True)
+    x_max = pnp.array(x_max, requires_grad=False)
+    shifted = x_arr - x_max
+    shifted_logsumexp = pnp.log(
+        pnp.sum(pnp.exp(shifted), axis, keepdims=True))
+    result = shifted - shifted_logsumexp
+    return result
+
+
+def softmax_cross_entropy(logits, labels):
+    """taken from optax source:
+    https: // github.com / google - deepmind / optax / blob / master / optax / losses / _classification.py  # L78%23L103
+    """
+    return -pnp.sum(labels * log_softmax(logits, axis=-1), axis=-1)
diff --git a/src/qml_benchmarks/models/iqp_kernel.py b/src/qml_benchmarks/models/iqp_kernel.py
@@ -16,7 +16,6 @@
 import pennylane as qml
 import numpy as np
 import jax
-import jax.numpy as jnp
 from sklearn.base import BaseEstimator, ClassifierMixin
 from sklearn.svm import SVC
 from sklearn.preprocessing import MinMaxScaler
@@ -31,12 +30,14 @@ def __init__(
         svm=SVC(kernel="precomputed", probability=True),
         repeats=2,
         C=1.0,
+        use_jax=False,
+        vmap=False,
         jit=False,
         random_state=42,
         scaling=1.0,
         max_vmap=250,
-        dev_type="default.qubit.jax",
-        qnode_kwargs={"interface": "jax-jit", "diff_method": None},
+        dev_type="default.qubit",
+        qnode_kwargs={},
     ):
         r"""
         Kernel version of the classifier from https://arxiv.org/pdf/1804.11326v2.pdf.
@@ -58,17 +59,21 @@ def __init__(
             svm (sklearn.svm.SVC): scikit-learn SVM class object used to fit the model from the kernel matrix
             repeats (int): number of times the IQP structure is repeated in the embedding circuit.
             C (float): regularization parameter for SVC. Lower values imply stronger regularization.
+            use_jax (bool): Whether to use jax. If False, no jitting and vmapping is performed either
             jit (bool): Whether to use just in time compilation.
-            dev_type (str): string specifying the pennylane device type; e.g. 'default.qubit'.
+            vmap (bool): Whether to use jax.vmap.
             max_vmap (int or None): The maximum size of a chunk to vectorise over. Lower values use less memory.
                 must divide batch_size.
+            dev_type (str): string specifying the pennylane device type; e.g. 'default.qubit'.
             qnode_kwargs (str): the key word arguments passed to the circuit qnode.
             scaling (float): Factor by which to scale the input data.
             random_state (int): seed used for reproducibility.
         """
         # attributes that do not depend on data
         self.repeats = repeats
         self.C = C
+        self.use_jax = use_jax
+        self.vmap = vmap
         self.jit = jit
         self.max_vmap = max_vmap
         self.svm = svm
@@ -86,7 +91,9 @@ def __init__(
         self.circuit = None
 
     def generate_key(self):
-        return jax.random.PRNGKey(self.rng.integers(1000000))
+        if self.use_jax:
+            return jax.random.PRNGKey(self.rng.integers(1000000))
+        return self.rng.integers(1000000)
 
     def construct_circuit(self):
         dev = qml.device(self.dev_type, wires=self.n_qubits_)
@@ -115,7 +122,7 @@ def circuit(x):
 
         self.circuit = circuit
 
-        if self.jit:
+        if self.use_jax and self.jit:
             circuit = jax.jit(circuit)
         return circuit
 
@@ -132,15 +139,19 @@ def precompute_kernel(self, X1, X2):
         dim2 = len(X2)
 
         # concatenate all pairs of vectors
-        Z = jnp.array(
+        Z = np.array(
             [np.concatenate((X1[i], X2[j])) for i in range(dim1) for j in range(dim2)]
         )
 
         circuit = self.construct_circuit()
-        self.batched_circuit = chunk_vmapped_fn(
-            jax.vmap(circuit, 0), start=0, max_vmap=self.max_vmap
-        )
-        kernel_values = self.batched_circuit(Z)[:, 0]
+
+        if self.use_jax and self.vmap:
+            self.batched_circuit = chunk_vmapped_fn(
+                jax.vmap(circuit, 0), start=0, max_vmap=self.max_vmap
+            )
+            kernel_values = self.batched_circuit(Z)[:, 0]
+        else:
+            kernel_values = np.array([circuit(z)[0] for z in Z])
 
         # reshape the values into the kernel matrix
         kernel_matrix = np.reshape(kernel_values, (dim1, dim2))
@@ -174,11 +185,14 @@ def fit(self, X, y):
             y (np.ndarray): Labels of shape (n_samples,)
         """
 
-        self.svm.random_state = int(
-            jax.random.randint(
-                self.generate_key(), shape=(1,), minval=0, maxval=1000000
+        if self.use_jax:
+            self.svm.random_state = int(
+                jax.random.randint(
+                    self.generate_key(), shape=(1,), minval=0, maxval=1000000
+                )
             )
-        )
+        else:
+            self.svm.random_state = self.generate_key()
 
         self.initialize(X.shape[1], np.unique(y))
 
@@ -244,3 +258,5 @@ def transform(self, X, preprocess=True):
             X = self.scaler.transform(X)
 
         return X * self.scaling
+
+