diff --git a/nersc/single_circuits/LK_CPU-vs-GPU.png b/nersc/single_circuits/LK_CPU-vs-GPU.png new file mode 100644 index 0000000..a7aa469 Binary files /dev/null and b/nersc/single_circuits/LK_CPU-vs-GPU.png differ diff --git a/nersc/single_circuits/LK_qjit-compile.png b/nersc/single_circuits/LK_qjit-compile.png new file mode 100644 index 0000000..0ae2f44 Binary files /dev/null and b/nersc/single_circuits/LK_qjit-compile.png differ diff --git a/nersc/single_circuits/LK_qjit-vs-base.png b/nersc/single_circuits/LK_qjit-vs-base.png new file mode 100644 index 0000000..505b6d5 Binary files /dev/null and b/nersc/single_circuits/LK_qjit-vs-base.png differ diff --git a/nersc/single_circuits/README.md b/nersc/single_circuits/README.md new file mode 100644 index 0000000..3fed3fa --- /dev/null +++ b/nersc/single_circuits/README.md @@ -0,0 +1,286 @@ + +# Benchmarking quantum circuits + + +## Run with Python `venv` + +### `lightning-kokkos` from pypi wheels + +Python venv with pypi wheels +``` +cd /global/common/software/m4693/ + +module load python +mkdir -p venv +python -m venv venv/qml_LK +source venv/qml_LK/bin/activate + +cd /global/cfs/cdirs/m4693/qml-benchmarks-devel +pip install -e . # --user + +pip install ray # for other experiments + +pip install pennylane-lightning +pip install pennylane-lightning[kokkos] + +pip install pennylane-catalyst +``` + +Start interactive job on CPU node for testing +``` bash +salloc -q interactive -C cpu -t 0:30:00 -A m4693 + +# and execute in this interactive session: + +source /global/common/software/m4693/venv/qml_LK/bin/activate +cd nersc/ + +# to restrict the number of threads: +export OMP_NUM_THREADS=32 +python3 single_circuits/demo_variational.py -q lightning.qubit -n 15,20 -r +``` + +Stats on interactive CPU node (nid004079) +``` +> Weights as native numpy arrays +lightning.qubit + 15 - 0.1 s + 20 - 3.3 s + 21 - 7 s + 22 - 16 s + 23 - 35 s +lightning.kokkos + 23 - 1 s + 25 - 5 s (7 s with 32 threads) + 26 - 34 s + +> Benchmarking numpy/qml.numpy, gradients with "adjoint" +> no-grad: qml.np.array(requires_grad=True) but no jacobian requested +lightning.qubit + numpy qml.np qml.np qjit qjit qjit + no-grad grad comp no-grad grad + 15 - 0.14 0.16 1.3 10.4 0.1 error + 16 - 0.24 0.25 2.0 11.6 0.2 + 17 - 0.44 0.42 3.7 12.8 0.3 + 20 - 3.75 3.74 32.6 19.8 3.4 +> NotImplementedError: Converting dtype('O') to a ctypes type +lightning.kokkos (with 32 threads) + numpy qml.np qml.np qjit qjit qjit + no-grad grad comp no-grad grad + 15 - 0.1 0.1 0.7 10.3 0.0 + 20 - 0.3 0.3 2.4 16.6 0.3 + 23 - 1.4 1.4 15.1 21.5 1.3 + 25 - 6.9 6.9 101.1 30.7 7.3 + +> Benchmarking numpy/qml.numpy, gradients with "finite-diff" +lightning.qubit + numpy qml.np qml.np qjit qjit qjit + no-grad grad comp no-grad grad + 15 - 0.1 0.2 - 9.9 0.0 42.3 + 16 - 0.1 0.3 - 11.1 0.1 87.7 + 17 - 0.3 0.4 - 12.3 0.2 + 20 - 2.6 3.0 - 18.2 2.5 + +lightning.kokkos (with 32 threads) + numpy qml.np qml.np qjit qjit qjit + no-grad grad comp no-grad grad + 15 - 0.1 0.1 - 10.1 0.0 27.3 + 20 - 0.2 0.3 - 16.1 0.3 189.4 + 23 - 1.3 1.5 - 21.3 1.4 - + 25 - 6.5 6.6 - 30.4 7.6 - +``` + +### `lightning-kokkos` from source with CUDA + +lightning-kokkos with GPU +- https://pypi.org/project/PennyLane-Lightning-Kokkos/ +- https://docs.pennylane.ai/projects/lightning/en/stable/lightning_kokkos/installation.html +- https://github.com/PennyLaneAI/lightning-on-hpc/blob/main/DataCollection/distributed/LUMI_LKOKKOS_VQE/README.md- + +``` bash +cd /global/common/software/m4693/ + +module load cudatoolkit + +module load python +mkdir -p venv +python -m venv venv/qml_LK_GPU +source venv/qml_LK_GPU/bin/activate + +python -m pip install pip==22.0 + +git clone https://github.com/PennyLaneAI/pennylane-lightning.git +cd pennylane-lightning + +git checkout v0.36.0 + +pip install -r requirements.txt +pip install ray + +# pip install pennylane-catalyst # [added later] + +# install lightning-qubit as prerequisite +CXX=$(which CC) python -m pip install -e . --verbose + +CXX=$(which CC) CMAKE_ARGS="-DKokkos_ENABLE_OPENMP=ON -DKokkos_ENABLE_CUDA=ON -DKokkos_ARCH_AMPERE80:BOOL=ON -DCMAKE_CXX_COMPILER=$(which CC)" PL_BACKEND="lightning_kokkos" python -m pip install . --verbose +``` + +Start interactive job on GPU node for testing +``` bash +salloc -q interactive -C gpu -t 0:30:00 -A m4693 + +# and execute in this interactive session: + +source /global/common/software/m4693/venv/qml_LK_GPU/bin/activate +cd nersc/ + +# to restrict the number of threads: +#export OMP_NUM_THREADS=1 + +python3 single_circuits/demo_variational.py -q lightning.kokkos -n 20,25 -r +``` + +Stats on interactive GPU node (nid200381) +``` +lightning.kokkos + 23 - s + 25 - 3 s + 26 - 6 s + 27 - 12 s + 28 - 25 s + +> Benchmarking numpy/qml.numpy, gradients +> no-grad: qml.np.array(requires_grad=True) but no jacobian requested +lightning.kokkos + numpy qml.np jacobian qjit qjit qjit + no-grad grad comp no-grad grad + 22 - s 2 s 5 s 20 s 1 s + 23 - s 4 s 9 s + 25 - 3 s 18 s 37 s 50 s 24 s + 26 - 6 s + 27 - 12 s + 28 - 25 s +> Kokkos::Cuda ERROR: Failed to call Kokkos::Cuda::finalize() + +> Benchmarking numpy/qml.numpy, gradients with "finite-diff" +lightning.kokkos + numpy qml.np qml.np qjit qjit qjit + no-grad grad comp no-grad grad + 15 - 10.3 0.1 52.7 + 20 - 16.5 0.4 + 22 - 0.3 0.5 - 20.4 1.1 + 23 - 0.6 0.8 - 22.9 2.8 + 25 - 2.6 2.9 - 49.1 24.0 + 26 - 5.6 5.8 - + +``` + +Run batch of circuits in parallel +``` bash +# @ray.remote(num_gpus=0.5) has same runtime than num_gpus=1 +time python3 single_circuits/batch_variational.py -n 26 -s 4 + +# move task to background and monitor GPU usage +nvidia-smi +``` + +Stats on 1 interactive GPU node +``` +ray_init in 7 to 15 s +> How long does 1 circuit run on its GPU? +25 features + samples run_time run_time/sample*gpu + - 3 + 16 32 8 +26 features + samples run_time run_time/sample*gpu + - 6 + 4 10 10 + 8 23 11 + 16 39 10 + 32 77 10 +> create dev 1.8 s +> create circuit < 1 ms +27 features + samples run_time run_time/sample*gpu + - 12 + 4 16 16 + 8 31 15 +> Overhead of 4 s per circuit with Ray +> This includes creating dev + circuit + +30 features + samples run_time run_time/sample*gpu + - n.a. + 4 120 120 +> create dev 3.3 s +> create circuit < 1 ms + +> Run r circuits sequentially within 1 ray job: +batch_variational.py -n 26 -s 32 -r 8 + total: 48.949 s + per_circuit: 6.119 s +> per circuit runtime is equivalent to run w/o ray +``` + +## Run in `podman` containers + +Prerequisite: Make sure to have datasets available in `single_circuits/linearly_separable`. + +Start interactive job on CPU node for testing +``` bash +salloc -q interactive -C cpu -t 0:30:00 -A m4693 + +# and execute in this interactive session: + +IMG=tgermain/ubu22-pennylane-ray + +# For preliminary testing whether image is available on node: +CFSH=/global/cfs/cdirs/m4693 # CFS home +REPO_DIR=$CFSH/qml-benchmarks-devel # qml-benchmark repo +ROOT_DIR=$REPO_DIR/nersc/root # to access local python packages +WORK_DIR=$REPO_DIR/nersc # to store output files +# Mount /tmp to avoid following error with Ray: +# ValueError: Can't find a `node_ip_address.json` file + +podman-hpc run -it \ + --net host \ + --volume /tmp:/tmp \ + --volume $ROOT_DIR:/root \ + --volume $REPO_DIR:/qml-benchmarks \ + --volume $WORK_DIR:/work_dir \ + --workdir /work_dir \ + -e HDF5_USE_FILE_LOCKING='FALSE' \ + --shm-size=10.24gb \ + $IMG bash + +# Then execute in container, in `work_dir/`: + +python3 single_circuits/circuit_variational.py --model IQPVariationalClassifier --numFeatures 21 --inputPath single_circuits/linearly_separable/ + +python3 single_circuits/demo_variational.py + +# exit container + +# Run container interactively with wrapper +./wrap_podman.sh $IMG "python3 single_circuits/demo_variational.py" +``` + +## Plot benchmarks + +``` +cd /global/common/software/m4693/ + +module load python +mkdir -p venv + +python -m venv venv/qml_plot +source venv/qml_plot/bin/activate + +pip install matplotlib pandas +``` + +``` +source /global/common/software/m4693/venv/qml_plot/bin/activate + +``` \ No newline at end of file diff --git a/nersc/single_circuits/batch_variational.py b/nersc/single_circuits/batch_variational.py new file mode 100644 index 0000000..d06d7a4 --- /dev/null +++ b/nersc/single_circuits/batch_variational.py @@ -0,0 +1,165 @@ + +''' +https://docs.ray.io/en/latest/ray-core/tasks.html#ray-remote-functions +https://docs.ray.io/en/latest/ray-core/patterns/limit-running-tasks.html +''' + +import argparse +import time + +import numpy as np +import ray + +import pennylane as qml + +# TODO: fix hanging run with qml.np +#from pennylane import numpy as np + +from datetime import datetime + +def get_parser(): + parser = argparse.ArgumentParser() + parser.add_argument('-n', '--numFeatures', type=int, default=15, help="dataset dimension ") + parser.add_argument('-s', '--numSamples', type=int, default=4, help="number of sample circuits (in one batch)") + parser.add_argument('-r', '--numRuns', type=int, default=1, help="number of circuit run sequentially within one ray job") + parser.add_argument('-d', '--dryRun', action='store_true', help="print specs only, no circuit execution") + args = parser.parse_args() + return args + +args = get_parser() + +def print_elapsed(name, t1, t2): + print("%s: %.3 s" % (name, (t2 - t1).total_seconds())) + +def print_per_circuit(name, t1, t2, n_circuits, n_gpus=4): + seconds = (t2 - t1).total_seconds() / n_circuits * n_gpus + print("%s: %.3f s" % (name, seconds)) + +# Model parameters available in config originate from circuit_variational.py. +catalog = { + 15: {'n_features': 15, 'n_layers': 15, 'n_repeats': 10, 'n_params': 1, 'sample_shape': (15,), 'num_wires': 15, 'num_gates': 1800, 'depth': 267}, + 20: {'n_features': 20, 'n_layers': 15, 'n_repeats': 10, 'n_params': 1, 'sample_shape': (20,), 'num_wires': 20, 'num_gates': 2900, 'depth': 310}, + 21: {'n_features': 21, 'n_layers': 15, 'n_repeats': 10, 'n_params': 1, 'sample_shape': (21,), 'num_wires': 21, 'num_gates': 3150, 'depth': 321}, + 22: {'n_features': 22, 'n_layers': 15, 'n_repeats': 10, 'n_params': 1, 'sample_shape': (22,), 'num_wires': 22, 'num_gates': 3410, 'depth': 333}, + 23: {'n_features': 23, 'n_layers': 15, 'n_repeats': 10, 'n_params': 1, 'sample_shape': (23,), 'num_wires': 23, 'num_gates': 3680, 'depth': 346}, + 24: {'n_features': 24, 'n_layers': 15, 'n_repeats': 10, 'n_params': 1, 'sample_shape': (24,), 'num_wires': 24, 'num_gates': 3960, 'depth': 358}, + 25: {'n_features': 25, 'n_layers': 15, 'n_repeats': 10, 'n_params': 1, 'sample_shape': (25,), 'num_wires': 25, 'num_gates': 4250, 'depth': 371}, + 26: {'n_features': 26, 'n_layers': 15, 'n_repeats': 10, 'n_params': 1, 'sample_shape': (26,), 'num_wires': 26, 'num_gates': 4550, 'depth': 383}, + 27: {'n_features': 27, 'n_layers': 15, 'n_repeats': 10, 'n_params': 1, 'sample_shape': (27,), 'num_wires': 27, 'num_gates': 4860, 'depth': 396}, + 28: {'n_features': 28, 'n_layers': 15, 'n_repeats': 10, 'n_params': 1, 'sample_shape': (28,), 'num_wires': 28, 'num_gates': 5180, 'depth': 409}, + 29: {'n_features': 29, 'n_layers': 15, 'n_repeats': 10, 'n_params': 1, 'sample_shape': (29,), 'num_wires': 29, 'num_gates': 5510, 'depth': 423}, + 30: {'n_features': 30, 'n_layers': 15, 'n_repeats': 10, 'n_params': 1, 'sample_shape': (30,), 'num_wires': 30, 'num_gates': 5850, 'depth': 435}, +} + +config = dict(catalog[args.numFeatures]) + +config['device'] = 'lightning.kokkos' +config['n_samples'] = args.numSamples +config['n_circ_per_job'] = args.numRuns + +n_features = config['n_features'] +n_layers = config['n_layers'] +n_repeats = config['n_repeats'] + +class VariationalModel: + + def __init__(self, n_features, n_layers, n_repeats, x): + self.n_qubits_ = n_features + self.n_layers = n_layers + self.repeats = n_repeats + self.params_ = None + + self.initialize_params() + + def initialize_params(self): + weights = 2 * np.pi * np.random.uniform(size=(self.n_layers, self.n_qubits_, 3)) + weights = np.array(weights) # requires_grad=True # + + self.params_ = {"weights": weights} + + def create_circuit(self, dev, x): + + @qml.qnode(dev) # diff_method="adjoint" # + def circuit(params, x): + """ + The variational circuit from the plots. Uses an IQP data embedding. + We use the same observable as in the plots. + """ + qml.IQPEmbedding(x, wires=range(self.n_qubits_), n_repeats=self.repeats) + qml.StronglyEntanglingLayers( + params["weights"], wires=range(self.n_qubits_), imprimitive=qml.CZ + ) + return qml.expval(qml.PauliZ(0) @ qml.PauliZ(1)) + + return circuit + +train_shape = (config['sample_shape'][0], config['n_samples']) + +X = np.random.rand(*train_shape) +x = X[:, 0] + +model = VariationalModel(n_features, n_layers, n_repeats, x) + +if args.dryRun: + print('inspecting circuit()') + dev = qml.device(config['device'], wires=n_features) + circuit = model.create_circuit(dev, x) + specs = qml.specs(circuit, expansion_strategy='device')( + model.params_, x[np.newaxis, :]) + print({ + 'n_features': n_features, + 'n_layers': model.n_layers, + 'n_repeats': model.repeats, + 'n_params': len(model.params_), + 'sample_shape': X.shape, + 'device_name': specs['device_name'], + 'gradient_fn': specs['gradient_fn'], + 'num_wires': specs['resources'].num_wires, + 'num_gates': specs['resources'].num_gates, + 'depth': specs['resources'].depth, + }) + exit(0) + +@ray.remote(num_gpus=1) +def exec_circuit(model, x, n_circuits): + # device creation must be in remote function, + # because Lightning is not pickable + t_start = datetime.now() + dev = qml.device(config['device'], wires=n_features) + t_end = datetime.now() + print_elapsed('create_dev', t_start, t_end) + t_start = datetime.now() + circuit = model.create_circuit(dev, x[:, 0]) + t_end = datetime.now() + print_elapsed('create_circuit', t_start, t_end) + + for i in range(n_circuits): + expval = circuit(model.params_, x[:, i]) + # TODO: activate gradients + #grads = qml.jacobian(circuit)(model.params_, x) + + return expval + +print('ray init()') +t_start = datetime.now() +ray.init() +t_end = datetime.now() +print_elapsed('ray_init', t_start, t_end) + +print('running circuit()') +t_start = datetime.now() + +n_cpj = config['n_circ_per_job'] +n_jobs = config['n_samples'] // n_cpj +print('n_jobs:', n_jobs) +result_refs = [] +for i in range(n_jobs): + result_refs.append( + exec_circuit.remote(model, X[:, i * n_cpj: (i + 1) * n_cpj], n_cpj)) + +res = ray.get(result_refs) + +t_end = datetime.now() +#print(res) +print_elapsed('total', t_start, t_end) +print_per_circuit('per_circuit', t_start, t_end, config['n_samples']) diff --git a/nersc/single_circuits/circuit_variational.py b/nersc/single_circuits/circuit_variational.py new file mode 100644 index 0000000..39711e4 --- /dev/null +++ b/nersc/single_circuits/circuit_variational.py @@ -0,0 +1,183 @@ + +from datetime import datetime + +import argparse +import csv +import os +import subprocess +import time +import yaml + +from pprint import pprint + +import jax +import jax.numpy as jnp +import numpy as np + +import pennylane as qml + +import qml_benchmarks + +from qml_benchmarks.hyperparam_search_utils import read_data + +def get_parser(): + parser = argparse.ArgumentParser() + parser.add_argument("-v", "--verbosity", type=int, choices=[0, 1, 2, 3, 4], help="increase output verbosity", + default=1, dest='verb') + parser.add_argument("--inputPath", default='linearly_separable/', help='input data location') + parser.add_argument('-n', '--numFeatures', type=int, default=2, help="dataset dimension ") + parser.add_argument('-m', '--model', help="model: IQPVariationalClassifier, QuantumMetricLearner") + parser.add_argument('-d', '--dryRun', action='store_true', help="print specs only, no circuit execution") + + args = parser.parse_args() + + print('myArg-program:', parser.prog) + for arg in vars(args): print('myArg:', arg, getattr(args, arg)) + + # assert os.path.exists(args.outPath) + return args + + +# ================================= +# ================================= +# M A I N +# ================================= +# ================================= +if __name__ == "__main__": + args = get_parser() + + #define model + if args.model == 'QuantumMetricLearner': + from qml_benchmarks.models.quantum_metric_learning import QuantumMetricLearner as Model + elif args.model == 'IQPVariationalClassifier': + from qml_benchmarks.models.iqp_variational import IQPVariationalClassifier as Model + else: + raise ValueError('unknown model %s' % args.model) + + #implementation attributes of model + use_jax = True + vmap = True + jit = True + max_steps = 100 #the number of gradient descent steps to use to estimate the step time + model_settings = {'use_jax': use_jax, 'vmap': vmap, 'jit': jit, 'max_steps': max_steps} + + perf_ind_name = 'JAX' #a name for the performance indicator used for naming files + n_trials = 1 #number of trials to average over + n_test = -1 #number of test set points. For full test set use n_test = -1 + ################################# + + n_features = args.numFeatures # dataset dimension + model_name = Model().__class__.__name__ + + # get the 'worst case' hyperparameter settings for the model (those that require the most resources) + with open('performance_indicators/hyperparam_settings.yaml', "r") as file: + hp_settings = yaml.safe_load(file) + + hyperparams = {**hp_settings[model_name], **model_settings} + print(hyperparams) + + hyperparams['dev_type'] = 'lightning.qubit' + + assert os.path.exists(args.inputPath) + # inpF1=f'../../paper/benchmarks/linearly_separable/linearly_separable_{n_features}d_train.csv' + inpF1 = os.path.join(args.inputPath, 'linearly_separable_%dd_train.csv' % (n_features)) + inpF2 = inpF1.replace('train', 'test') + print('M:inpF1', inpF1) + X_train, y_train = read_data(inpF1) + print('M:inpF2', inpF2) + X_test, y_test = read_data(inpF2) + + if n_test != -1: + X_test = X_test[:n_test] + y_test = y_test[:n_test] + + first_train_steps = [] + av_consec_train_steps = [] + predict_times = [] + + model = Model(**hyperparams) + + def init_circuit(X, y): + # Derived from: `model.fit(X_train, y_train)` + model.initialize(n_features=X.shape[1], classes=np.unique(y)) + return model.circuit + + circuit = init_circuit(X_train, y_train) + + ''' + def initialize_params(self): + weights = 2 * np.pi * np.random.uniform(size=(self.n_layers, self.n_qubits_, 3)) + weights = jnp.array(weights) + + self.params_ = {"weights": weights} + + params = model.params_ + x = X_batch + + @qml.qnode(dev, **self.qnode_kwargs) + def circuit(params, x): + """ + The variational circuit from the plots. Uses an IQP data embedding. + We use the same observable as in the plots. + """ + qml.IQPEmbedding(x, wires=range(self.n_qubits_), n_repeats=self.repeats) + qml.StronglyEntanglingLayers( + params["weights"], wires=range(self.n_qubits_), imprimitive=qml.CZ + ) + return qml.expval(qml.PauliZ(0) @ qml.PauliZ(1)) + ''' + + X = X_train[0] + y = y_train[0] + + def print_elapsed(t1, t2): + print("%.6f s" % ((t2 - t1).total_seconds())) + + specs = qml.specs(circuit, expansion_strategy='device')(model.params_, X) + #pprint(specs) + print({ + 'n_features': args.numFeatures, + 'n_layers': model.n_layers, + 'n_repeats': model.repeats, + 'n_params': len(model.params_), + 'sample_shape': X.shape, + 'device_name': specs['device_name'], + 'gradient_fn': specs['gradient_fn'], + 'num_wires': specs['resources'].num_wires, + 'num_gates': specs['resources'].num_gates, + 'depth': specs['resources'].depth, + }) + + if args.dryRun: + exit(0) + + print('M:executing circuit') + t_start = datetime.now() + + for trial in range(n_trials): + jax.clear_caches() + + expval = circuit(model.params_, X) + grads = qml.jacobian(circuit)(model.params_, X) + + t_end = datetime.now() + print_elapsed(t_start, t_end) + + # {'n_features': 15, 'n_layers': 15, 'n_params': 1, 'sample_shape': (15,)} + + # {'num_features': 15, 'num_wires': 15, 'num_gates': 1800, 'depth': 267} + # {'num_features': 20, 'num_wires': 20, 'num_gates': 2900, 'depth': 310} + # {'num_features': 21, 'num_wires': 21, 'num_gates': 3150, 'depth': 321} + # {'num_features': 22, 'num_wires': 22, 'num_gates': 3410, 'depth': 333} + + # default.qubit + # 15d - 10 s + # 20d - 45 s + + # lightning.qubit + # 15d - 0.4 s + # 20d - 5 s + # 21d - 10 s + # 22d - 22 s + + print('M:done') diff --git a/nersc/single_circuits/demo_variational.py b/nersc/single_circuits/demo_variational.py new file mode 100644 index 0000000..157009e --- /dev/null +++ b/nersc/single_circuits/demo_variational.py @@ -0,0 +1,205 @@ + +''' +Demo of IQPVariationalClassifier using qml.IQPEmbedding and qml.StronglyEntanglingLayers. +''' + +import argparse +import os +import subprocess + +import pennylane as qml +import catalyst + +from datetime import datetime + +def get_parser(): + parser = argparse.ArgumentParser() + parser.add_argument('-n', '--numFeatures', type=str, default='15', help="dataset dimension(s) (comma separated list)") + parser.add_argument('-q', '--device', default='lightning.qubit', help="quantum device e.g. lightning.qubit") + parser.add_argument('-g', '--gradients', action='store_true', help="request gradients wrt. all weights") + parser.add_argument('-j', '--jit', action='store_true', help="JIT with Catalyst") + parser.add_argument('--numpy', action='store_true', help="use numpy instead of pennylane.numpy") + parser.add_argument('-d', '--dryRun', action='store_true', help="print specs only, no circuit execution") + parser.add_argument('-r', '--report', action='store_true', help="print for report") + args = parser.parse_args() + return args + +args = get_parser() + +def print_elapsed(prefix, t1, t2): + print("%s%6.1f s" % (prefix, (t2 - t1).total_seconds())) + +# Model parameters available in config originate from circuit_variational.py. +catalog = { + 10: {'n_features': 10, 'n_layers': 15, 'n_repeats': 10, 'n_params': 1, 'sample_shape': (10,), 'num_wires': 10, 'num_gates': 950, 'depth': 198}, + 15: {'n_features': 15, 'n_layers': 15, 'n_repeats': 10, 'n_params': 1, 'sample_shape': (15,), 'num_wires': 15, 'num_gates': 1800, 'depth': 267}, + 16: {'n_features': 16, 'n_layers': 15, 'n_repeats': 10, 'n_params': 1, 'sample_shape': (16,), 'num_wires': 16, 'num_gates': 2000, 'depth': 281}, + 17: {'n_features': 17, 'n_layers': 15, 'n_repeats': 10, 'n_params': 1, 'sample_shape': (17,), 'num_wires': 17, 'num_gates': 2210, 'depth': 282}, + 18: {'n_features': 18, 'n_layers': 15, 'n_repeats': 10, 'n_params': 1, 'sample_shape': (18,), 'num_wires': 18, 'num_gates': 2430, 'depth': 289}, + 19: {'n_features': 19, 'n_layers': 15, 'n_repeats': 10, 'n_params': 1, 'sample_shape': (19,), 'num_wires': 19, 'num_gates': 2660, 'depth': 299}, + 20: {'n_features': 20, 'n_layers': 15, 'n_repeats': 10, 'n_params': 1, 'sample_shape': (20,), 'num_wires': 20, 'num_gates': 2900, 'depth': 310}, + 21: {'n_features': 21, 'n_layers': 15, 'n_repeats': 10, 'n_params': 1, 'sample_shape': (21,), 'num_wires': 21, 'num_gates': 3150, 'depth': 321}, + 22: {'n_features': 22, 'n_layers': 15, 'n_repeats': 10, 'n_params': 1, 'sample_shape': (22,), 'num_wires': 22, 'num_gates': 3410, 'depth': 333}, + 23: {'n_features': 23, 'n_layers': 15, 'n_repeats': 10, 'n_params': 1, 'sample_shape': (23,), 'num_wires': 23, 'num_gates': 3680, 'depth': 346}, + 24: {'n_features': 24, 'n_layers': 15, 'n_repeats': 10, 'n_params': 1, 'sample_shape': (24,), 'num_wires': 24, 'num_gates': 3960, 'depth': 358}, + 25: {'n_features': 25, 'n_layers': 15, 'n_repeats': 10, 'n_params': 1, 'sample_shape': (25,), 'num_wires': 25, 'num_gates': 4250, 'depth': 371}, + 26: {'n_features': 26, 'n_layers': 15, 'n_repeats': 10, 'n_params': 1, 'sample_shape': (26,), 'num_wires': 26, 'num_gates': 4550, 'depth': 383}, + 27: {'n_features': 27, 'n_layers': 15, 'n_repeats': 10, 'n_params': 1, 'sample_shape': (27,), 'num_wires': 27, 'num_gates': 4860, 'depth': 396}, + 28: {'n_features': 28, 'n_layers': 15, 'n_repeats': 10, 'n_params': 1, 'sample_shape': (28,), 'num_wires': 28, 'num_gates': 5180, 'depth': 409}, + 29: {'n_features': 29, 'n_layers': 15, 'n_repeats': 10, 'n_params': 1, 'sample_shape': (29,), 'num_wires': 29, 'num_gates': 5510, 'depth': 423}, + 30: {'n_features': 30, 'n_layers': 15, 'n_repeats': 10, 'n_params': 1, 'sample_shape': (30,), 'num_wires': 30, 'num_gates': 5850, 'depth': 435}, +} + +if args.numpy: + import numpy as np +else: + from pennylane import numpy as np # + from jax import numpy as jnp # + +if args.jit: + qjit = catalyst.qjit +else: + def qjit(func): + def wrapper(*args, **kwargs): + return func(*args, **kwargs) + return wrapper + +class VariationalModel: + + def __init__(self, dev, n_features, n_layers, n_repeats, x): + self.dev = dev + self.n_qubits_ = n_features + self.n_layers = n_layers + self.repeats = n_repeats + self.params_ = None + + self.initialize_params() + self.create_circuit(x) + + def initialize_params(self): + weights = 2 * np.pi * np.random.uniform(size=(self.n_layers, self.n_qubits_, 3)) + if args.numpy: + weights = np.array(weights) + else: + if args.jit: + weights = jnp.array(weights) # + else: + weights = np.array(weights, requires_grad=True) # + self.params_ = {"weights": weights} + + def create_circuit(self, x): + + @qjit + @qml.qnode(self.dev, grad_on_execution=False) # + def circuit(weights, x): + """ + The variational circuit from the plots. Uses an IQP data embedding. + We use the same observable as in the plots. + """ + qml.IQPEmbedding(x, wires=range(self.n_qubits_), n_repeats=self.repeats) + qml.StronglyEntanglingLayers( + weights, wires=range(self.n_qubits_), imprimitive=qml.CZ + ) + return qml.expval(qml.PauliZ(0) @ qml.PauliZ(1)) + + self.circuit = circuit + + +def benchmark(numFeatures): + + config = dict(catalog[numFeatures]) + + config['device'] = args.device + if not args.report: + print('device:', args.device) + + n_features = config['n_features'] + n_layers = config['n_layers'] + n_repeats = config['n_repeats'] + + dev = qml.device(config['device'], wires=n_features) + + X = np.random.rand(*config['sample_shape']) + + model = VariationalModel(dev, n_features, n_layers, n_repeats, X) + circuit = model.circuit + + if args.dryRun: + print('inspecting circuit()') + specs = qml.specs(circuit, expansion_strategy='device')(model.params_, X) + print({ + 'n_features': n_features, + 'n_layers': model.n_layers, + 'n_repeats': model.repeats, + 'n_params': model.params_["weights"].size, + 'sample_shape': X.shape, + 'device_name': specs['device_name'], + 'gradient_fn': specs['gradient_fn'], + 'num_wires': specs['resources'].num_wires, + 'num_gates': specs['resources'].num_gates, + 'depth': specs['resources'].depth, + }) + exit(0) + + if not args.report: + print('running circuit()') + + weights = model.params_["weights"] + + if args.gradients: + if args.jit: + @qml.qjit + def run_grad(weights, x): + grads = catalyst.grad(circuit, method="fd")(weights, x) + return grads + else: + raise NotImplementedError('gradients w/o qjit') + + if args.jit: + # First run. Includes compilation if JIT. + t_start = datetime.now() + expval = circuit(weights, X) + t_end = datetime.now() + #print_elapsed('%2d ' % n_features, t_start, t_end) + first_time = (t_end - t_start).total_seconds() + else: + first_time = 0.0 + + #factor = 1.01 + #X_2 = X * factor + #params_2 = {"weights": model.params_["weights"] * factor} + + t_start = datetime.now() + expval = circuit(weights, X) + if args.gradients: + grads = run_grad(weights, X) + t_end = datetime.now() + #print_elapsed('%2d ' % n_features, t_start, t_end) + second_time = (t_end - t_start).total_seconds() + + #print(expval) + + return_code = subprocess.call( + "nvidia-smi", shell=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.STDOUT) + + n_threads = os.getenv('OMP_NUM_THREADS', '0') + + tokens = [ + args.device, + 'GPU' if return_code == 0 else 'CPU', + n_threads, + 'grad' if args.gradients else '-', + 'qjit' if args.jit else '-', + 'np' if args.numpy else '-', + '%d' % n_features, + '%.3f' % first_time, + '%.3f' % second_time, + ] + + print(','.join(tokens)) + + +for numFeatures in args.numFeatures.split(','): + benchmark(int(numFeatures)) diff --git a/nersc/single_circuits/demo_variational.sh b/nersc/single_circuits/demo_variational.sh new file mode 100644 index 0000000..707b4ff --- /dev/null +++ b/nersc/single_circuits/demo_variational.sh @@ -0,0 +1,8 @@ +#!/bin/bash -e + +#for NQ in 15 16 17 20; do # lightning.qubit +#for NQ in 15 20 23 25; do # lightning.kokkos CPU +for NQ in 15 20 22 23 25 26; do # lightning.kokkos GPU + #echo $NQ + python3 demo_variational.py -n $NQ -r $* 2>/dev/null +done diff --git a/nersc/single_circuits/minimal_variational.py b/nersc/single_circuits/minimal_variational.py new file mode 100644 index 0000000..6d32fa0 --- /dev/null +++ b/nersc/single_circuits/minimal_variational.py @@ -0,0 +1,36 @@ + +import functools + +import numpy as np +from jax import numpy as jnp + +import pennylane as qml +import catalyst + +from catalyst import qjit + +n_qubits_ = 4 + +x = np.random.rand(n_qubits_) + +#shape = qml.StronglyEntanglingLayers.shape(n_layers=2, n_wires=n_qubits_) +shape = (2, 4, 3) +params = jnp.array(np.random.random(size=shape)) + +dev = qml.device("lightning.qubit", wires=n_qubits_) + +@qjit +def run_circuit(params, x): + + @qml.qnode(dev) # diff_method="adjoint" | "finite-diff" | "backprop" + #@functools.partial(qml.devices.preprocess.decompose, stopping_condition = lambda obj: obj.name not in ['Rot', 'StronglyEntanglingLayers'], max_expansion=3) + def circuit(params, x): + qml.IQPEmbedding(x, wires=range(n_qubits_), n_repeats=2) + qml.StronglyEntanglingLayers( + params, wires=range(n_qubits_), imprimitive=qml.CZ + ) + return qml.expval(qml.PauliZ(0) @ qml.PauliZ(1)) + + catalyst.grad(circuit, method="fd")(params, x) # method="fd" + +run_circuit(params, x) diff --git a/nersc/single_circuits/plot_variational.ipynb b/nersc/single_circuits/plot_variational.ipynb new file mode 100644 index 0000000..0d814dc --- /dev/null +++ b/nersc/single_circuits/plot_variational.ipynb @@ -0,0 +1,173 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "45198aed-7b98-44e7-b7eb-f0769de20953", + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "1e7c6c27-08e6-49b8-9251-39fda429ce0a", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv('report.csv')\n", + "#print(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "4e177c0a-058a-401e-8c31-968448e9230f", + "metadata": {}, + "outputs": [], + "source": [ + "def plot(df, cond, label, var='exec'):\n", + " if cond is None:\n", + " _df = df\n", + " else:\n", + " _df = df[cond]\n", + " n_qubits = _df['qubits']\n", + " exec_time = _df[var]\n", + " plt.plot(n_qubits, exec_time, label=label)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "2b315f1b-008a-49c4-8294-19131d034e66", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df_LK_CPU = df[(df['dev'] == 'lightning.kokkos') & (df['jit'] == '-') & (df['hw'] == 'CPU')]\n", + "df_LK_GPU = df[(df['dev'] == 'lightning.kokkos') & (df['jit'] == '-') & (df['hw'] == 'GPU')]\n", + "\n", + "plot(df_LK_CPU, (df_LK_CPU['threads'] == 16), 'LK 16th')\n", + "plot(df_LK_CPU, (df_LK_CPU['threads'] == 32), 'LK 32th')\n", + "plot(df_LK_CPU, (df_LK_CPU['threads'] == 64), 'LK 64th')\n", + "plot(df_LK_GPU, None, 'LK GPU')\n", + "plt.ylabel('execution')\n", + "\n", + "plt.xlabel('#qubits')\n", + "plt.yscale('log')\n", + "plt.grid()\n", + "plt.legend(framealpha=1)\n", + "\n", + "plt.savefig('LK_CPU-vs-GPU.png')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "6db2cccd-95a5-41bb-92b6-57725e1bba6f", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df_LK = df[(df['dev'] == 'lightning.kokkos') & (df['threads'] == 32) & (df['hw'] == 'CPU')]\n", + "\n", + "plot(df_LK, (df_LK['jit'] == 'qjit'), 'LK qjit')\n", + "plot(df_LK, (df_LK['jit'] == '-'), 'LK base')\n", + "plt.ylabel('execution')\n", + "\n", + "plt.xlabel('#qubits')\n", + "plt.yscale('log')\n", + "plt.grid()\n", + "plt.legend(framealpha=1)\n", + "\n", + "plt.savefig('LK_qjit-vs-base.png')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "63f54b17-e320-47b8-ba2b-61ba502cf0c3", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df_LK = df[(df['dev'] == 'lightning.kokkos') & (df['threads'] == 32) & (df['hw'] == 'CPU')]\n", + "\n", + "plot(df_LK, (df_LK['jit'] == 'qjit'), 'LK qjit', 'comp')\n", + "plt.ylabel('compilation')\n", + "\n", + "plt.xlabel('#qubits')\n", + "plt.yscale('log')\n", + "plt.grid()\n", + "plt.legend(framealpha=1)\n", + "\n", + "plt.savefig('LK_qjit-compile.png')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "adbd2db6-0b38-4c75-9f99-70fa5a225770", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/nersc/single_circuits/report.csv b/nersc/single_circuits/report.csv new file mode 100644 index 0000000..7027982 --- /dev/null +++ b/nersc/single_circuits/report.csv @@ -0,0 +1,57 @@ +dev,hw,threads,grad,jit,np,qubits,comp,exec +lightning.kokkos,CPU,16,-,-,-,15,0.000,0.125 +lightning.kokkos,CPU,16,-,-,-,20,0.000,0.401 +lightning.kokkos,CPU,16,-,-,-,21,0.000,0.679 +lightning.kokkos,CPU,16,-,-,-,22,0.000,1.186 +lightning.kokkos,CPU,16,-,-,-,23,0.000,2.182 +lightning.kokkos,CPU,16,-,-,-,24,0.000,4.614 +lightning.kokkos,CPU,16,-,-,-,25,0.000,10.505 +lightning.kokkos,CPU,16,-,-,-,26,0.000,32.876 +lightning.kokkos,CPU,16,-,-,-,27,0.000,76.580 +lightning.kokkos,CPU,32,-,-,-,15,0.000,0.128 +lightning.kokkos,CPU,32,-,-,-,16,0.000,0.142 +lightning.kokkos,CPU,32,-,-,-,17,0.000,0.166 +lightning.kokkos,CPU,32,-,-,-,18,0.000,0.220 +lightning.kokkos,CPU,32,-,-,-,19,0.000,0.262 +lightning.kokkos,CPU,32,-,-,-,20,0.000,0.353 +lightning.kokkos,CPU,32,-,-,-,21,0.000,0.494 +lightning.kokkos,CPU,32,-,-,-,22,0.000,0.801 +lightning.kokkos,CPU,32,-,-,-,23,0.000,1.423 +lightning.kokkos,CPU,32,-,-,-,24,0.000,2.934 +lightning.kokkos,CPU,32,-,-,-,25,0.000,6.621 +lightning.kokkos,CPU,32,-,-,-,26,0.000,29.947 +lightning.kokkos,CPU,32,-,-,-,27,0.000,64.426 +lightning.kokkos,CPU,64,-,-,-,15,0.000,0.135 +lightning.kokkos,CPU,64,-,-,-,20,0.000,0.341 +lightning.kokkos,CPU,64,-,-,-,21,0.000,0.474 +lightning.kokkos,CPU,64,-,-,-,22,0.000,0.653 +lightning.kokkos,CPU,64,-,-,-,23,0.000,1.052 +lightning.kokkos,CPU,64,-,-,-,24,0.000,2.247 +lightning.kokkos,CPU,64,-,-,-,25,0.000,5.124 +lightning.kokkos,CPU,64,-,-,-,26,0.000,33.576 +lightning.kokkos,CPU,64,-,-,-,27,0.000,72.099 +lightning.kokkos,GPU,0,-,-,-,15,0.000,0.118 +lightning.kokkos,GPU,0,-,-,-,20,0.000,0.224 +lightning.kokkos,GPU,0,-,-,-,21,0.000,0.280 +lightning.kokkos,GPU,0,-,-,-,22,0.000,0.477 +lightning.kokkos,GPU,0,-,-,-,23,0.000,0.759 +lightning.kokkos,GPU,0,-,-,-,24,0.000,1.451 +lightning.kokkos,GPU,0,-,-,-,25,0.000,2.858 +lightning.kokkos,GPU,0,-,-,-,26,0.000,5.795 +lightning.kokkos,GPU,0,-,-,-,27,0.000,12.113 +lightning.kokkos,GPU,0,-,-,-,28,0.000,25.542 +lightning.kokkos,GPU,0,-,-,-,29,0.000,54.042 +lightning.kokkos,GPU,0,-,-,-,30,0.000,114.467 +lightning.kokkos,CPU,32,-,qjit,-,15,10.045,0.029 +lightning.kokkos,CPU,32,-,qjit,-,16,11.415,0.043 +lightning.kokkos,CPU,32,-,qjit,-,17,12.532,0.063 +lightning.kokkos,CPU,32,-,qjit,-,18,13.544,0.103 +lightning.kokkos,CPU,32,-,qjit,-,19,15.033,0.160 +lightning.kokkos,CPU,32,-,qjit,-,20,16.750,0.331 +lightning.kokkos,CPU,32,-,qjit,-,21,18.347,0.341 +lightning.kokkos,CPU,32,-,qjit,-,22,20.144,0.671 +lightning.kokkos,CPU,32,-,qjit,-,23,22.126,1.345 +lightning.kokkos,CPU,32,-,qjit,-,24,24.765,3.227 +lightning.kokkos,CPU,32,-,qjit,-,25,31.732,9.668 +lightning.kokkos,CPU,32,-,qjit,-,26,58.165,32.451 +lightning.kokkos,CPU,32,-,qjit,-,27,92.592,65.517