From 1f973d0585de4492a7c44853b68afdb7bbb1a6ce Mon Sep 17 00:00:00 2001 From: Shuli Shu <31480676+multiphaseCFD@users.noreply.github.com> Date: Wed, 25 Oct 2023 14:28:56 -0400 Subject: [PATCH] Add python layer for LGPU_MPI (#518) * add LGPU cpp layer * update measurement * add openmp to adjgpu * Auto update version * Add support for building multiple backend simulators (#497) * Add PL_BACKEND_LIST * Update the support * Exclude Python bindings * Update HermitianObs name scope conflicts * Auto update version * Cleanup * Update CI to build and check C++ tests of multiple backends (Linux) * Update changelog * Update .github/workflows/tests_linux.yml Co-authored-by: Vincent Michaud-Rioux * Apply code review suggestions * Update .github/workflows/tests_linux.yml Co-authored-by: Amintor Dusko <87949283+AmintorDusko@users.noreply.github.com> --------- Co-authored-by: Dev version update bot Co-authored-by: Vincent Michaud-Rioux Co-authored-by: Amintor Dusko <87949283+AmintorDusko@users.noreply.github.com> * add python layer & isingxy gate in the cpp layer * add batched adjoint method * Update DefaultQubit to DefaultQubitLegacy (#500) * Update DefaultQubit to DefaultQubitLegacy * Update changelog * update pylint disable on fallback * Auto update version * add batch support for adjoint method * add gitignore * tidy up code * Auto update version * make format * revert complexT delete in LKokkosBingds * make format * update based on tidy * fix tidy format * add_gpu_runners_tests * add cuquantum_sdk path to ci workflow * debug * add path to cuquantum sdk * add python layer tests in ci workflow * ci tests * quick fix * skip pr ci for some workflows * quick fix * quick fix * update python ci tests * remove dependency on lightning_qubit in ci * fix directory * fix directory * quick fix * quick fix * test for cuda-12 * update measurement * updata cu12 workflows * add getDataVector support to LQubitRaw * install lightning.qubit before lightning.gpu in ci * update test_obs * activate all CI checks * quick fix * tidy up code * tidy up code * make format * update ci for more tests * tidy up code * tidy up code * tidy up code * make format * fix for codecov * codecov fix * quick fix * quick fix * quick fix * quick test * fix test * fix tests * another quick fix * coverage fix * update ci tests * update ci for no binary * codecov fix * update adj tests for no binary case * update python layer tests * fix codecov * make format * initial commit for MPI * revert to cu11 * enable more py tests * update CI * upload codecov ci * add more tests for statevectorcudamanaged * add more unit tests * add more tests * make format * add more cpp tests * skip cpp tests pauli param gates * make format * add more files to gitignore * Auto update version * init commit * Trigger CI * update gpu runner * quick fix * update fix * add cpp layer for LGPU-MPI backend * add py layer * quick fix * make format * fix for fp32 support in expval calculation * quick fix * fix for cray_mpich_serialize_py * copy to move for hamiltonian operation * add unit tests for adjoint method * add more tests * resolve comments py layer * remove omp support in LGPU * update version * Auto update version * fix based on comments * Add L-GPU and L-Kokkos as package extras (#515) * Add L-GPU and L-Kokkos as package extras * Auto update version * Update changelog * Temp enable the x86 wheel cache * Return wheel storage functionality to normal * Update readme * Auto update version * Trigger CI * Update README.rst Co-authored-by: Amintor Dusko <87949283+AmintorDusko@users.noreply.github.com> --------- Co-authored-by: Dev version update bot Co-authored-by: Amintor Dusko <87949283+AmintorDusko@users.noreply.github.com> * Auto update version * make format * remove sparseH * remove applyHostMatrixGate * Add wheel_linux_x86_64_cu11.yml (#517) * Add wheel_linux_x86_64_cu11.yml * echo COMPILER=g++ >> * python3.9 => python * reinstall g++11 * Try that * Use env vars for versions. * Fix var syntax. * Hardcode versions * Fix custatevec-cu11 * Revert triggers. * Update changelog [skip ci] * resolve more comments * add more tests to non_param gates * resolve cpp tests comments * remove unused methods in measurement class * remove unused methods * resolve more comments * add changelog and matrixhasher * quick update * add more tests and merge base branch * add mpi unit tests for algorithm base class * add more unit tests for utils * ctor test for MPIManager * Add mpi tests to LGPU (#519) * Initial commit mpi tests * Remove label guards * Fix PL_DEVICE * Install lightning_qubit. * Fix ENABLE_MPI * print cuquantum * export cu_sdk * revert define * Debug cpp tests. * Debug cpp tests. * Fix cmake options. * Compile with mpicxx * Specify backend. * Specify backend. * Remove obsolete line. * Specify cov backend * Merge test/cov & try simplifying python * if-no-files-found: error and fix python tests. * Fix mpi find * Install real lightning. * Revert python tests. * Hardcode backend values in python tests * Install lightning_qubit with gpu in python tests * Remove explicit mpich mentions. * Parametrize mpilib name. * Add openmpi tests. * Build only openmpi python tests. * Add timeouts * test/test_apply.py * Revert pull triggers. * Clean gpu-mpi test workflows. * Revert to 804ed24. * Revert back. * Update tests_linux_x86_mpi.yml [ci skip] * Add jobs dep. * Remove module unload * Simplify mpi-gpu tests. * trigger CI * unset CFLAGS. * set CFLAGS * Revert triggers. * Fix pull_request: [skip ci] * trigger CI * Rename test_gpu_cu11.yml -> tests_gpu_cu11.yml [skip ci] * add CI checks for cpp unit tests * add cpp layer ci check for mpi backend * Auto update version * remove redundant blank lines * tidy up code * Trigger CI * remove single GPU backend tests in mpi ci * upload codecov results * add more unit tests * add tests for pauli word based expval * add more docs * add more tests * skip lcov for native gates * add mpi_helpers * add more docstrings * add change log * Auto update version * Auto update version * fix failures caused by merging * add changelog * Trigger multi-GPU runner * add more fp32 tests to the measurement class * add number of devices and mpi procs check * Add coverage for py-mpitests. (#522) * Initial commit mpi tests * Remove label guards * Fix PL_DEVICE * Install lightning_qubit. * Fix ENABLE_MPI * print cuquantum * export cu_sdk * revert define * Debug cpp tests. * Debug cpp tests. * Fix cmake options. * Compile with mpicxx * Specify backend. * Specify backend. * Remove obsolete line. * Specify cov backend * Merge test/cov & try simplifying python * if-no-files-found: error and fix python tests. * Fix mpi find * Install real lightning. * Revert python tests. * Hardcode backend values in python tests * Install lightning_qubit with gpu in python tests * Remove explicit mpich mentions. * Parametrize mpilib name. * Add openmpi tests. * Build only openmpi python tests. * Add timeouts * test/test_apply.py * Revert pull triggers. * Clean gpu-mpi test workflows. * Revert to 804ed24. * Revert back. * Update tests_linux_x86_mpi.yml [ci skip] * Add jobs dep. * Remove module unload * Simplify mpi-gpu tests. * trigger CI * unset CFLAGS. * set CFLAGS * Revert triggers. * Fix pull_request: [skip ci] * trigger CI * Rename test_gpu_cu11.yml -> tests_gpu_cu11.yml [skip ci] * Add coverage for py-mpitests. * Upload mpi-gpu test coverage. * Try other paths. * trigger CI * Add mpi tests. * Fix couple tests. * Fixx test_apply tests? * Add MPI sparse measurements. * Fix format. * Add MPI_Init checks in MPIManager constructors. * Reformat mpitests and add cov for proc > dev error. * Refactor makefile. * Revert to full mpirun path. * Fix couple tests. * Name coverage after matrix.mpilib. * Remove oversubscribe MPI test. * Update changelog [skip ci]. --------- Co-authored-by: Shuli <08cnbj@gmail.com> * add more tests in obs base class * Revert "Merge branch 'add_LGPUMPI' into add_py_LGPUMPI" This reverts commit d3af81987fa6553d1975abf9b5aa9c17bd0edf63, reversing changes made to 6ad1c7c8fd4cee21d7ca3b91aa349e7d1dd2e8ed. * Fix pylint [skip ci] * resolve comments on source codes and tidy up code * Use CRTP to define initSV and remove initSV_MPI * resolve more typos * resolve more typoes * resolve adjoint class * remove py&pybind layer * resolve more comments * Remove redundant blank line * add num mpi & ngpudevice proc check * fix typo * remove unused lines * add more tests * remove initsv_mpi * add reset * make format * use_mpi as _use_mpi in QuantumScriptSerializer * resolve more comments * check->require * make format * rename mpi workflow * Add sparseH for LGPU (#526) * Init commit * Fix std::endl; * Use more generic indices in base std::size_t. * add pybind layer * add python layer * Quick and dirty spham bindings. * Add sparse_ham serialization. * Add sparse_ham tests in tests/test_adjoint_jacobian.py' * Bug fix sparse product. * add sparseH * Trigger CI * Fix python bindings LGPU idxT * Fix serial tests and update changelog. * add more unit tests for sparseH base class * Fix tidy & sparse adjoint test device name. * Fix tidy warning for sparse_ham. * Send backend-specific ops in respective modules. * Fix sparse_hamiltonianmpi_c and add getWires test. * Add sparseH diff capability in LQ. * Add sparse Hamiltonian support for Lightning-Kokkos (#527) * Use more generic indices in base std::size_t. * Quick and dirty spham bindings. * Add sparse_ham serialization. * Add sparse_ham tests in tests/test_adjoint_jacobian.py' * Bug fix sparse product. * Fix python bindings LGPU idxT * Fix serial tests and update changelog. * Fix tidy & sparse adjoint test device name. * Fix tidy warning for sparse_ham. * Send backend-specific ops in respective modules. * Fix sparse_hamiltonianmpi_c and add getWires test. * Fix clang tidy * Comment workflows but tidy. * Fix tidy warn * Add override to sp::getWires * Restore triggers * Update tests_linux_x86_mpi.yml * Add constructibility tests. * Move L-Kokkos-CUDA tests to workflow call, called from tests_gpu_cu11.yml. * Remove GPU deadlock. * Bug fix Python MPI. * Upload both outputs. * Update gcc version in format.yml. * Update .github/CHANGELOG.md [skip ci] Co-authored-by: Amintor Dusko <87949283+AmintorDusko@users.noreply.github.com> * Update .github/workflows/tests_gpu_kokkos.yml [skip ci] Co-authored-by: Amintor Dusko <87949283+AmintorDusko@users.noreply.github.com> * rename argn [skip ci] * Remove unused lines [skip ci] * Fix SparseHamiltonianBase::isEqual. [skip ci] * Trigger CI * Auto update version * Trigger CI * resolve comments * rename dev_kokkos to dev * Fix tidy. --------- Co-authored-by: Vincent Michaud-Rioux Co-authored-by: Vincent Michaud-Rioux Co-authored-by: Amintor Dusko <87949283+AmintorDusko@users.noreply.github.com> Co-authored-by: Dev version update bot * update work flow * resolve comments for unit tests * add more unit tests for sparseH * quick fix * add fp32 tests * tidy up code * remove redundant lines * add pylintrc to mpitests * add mpitests dir to commit-config * add mpitests to .coveragerc * add mpitests path to coveragerc * Fix mpitests/test_adjoint_jacobian.py * Fix pylint in mpitests/test_apply [skip ci]. * pylint fix for mpi py_d_e_m_p tets * tidy up cpp code * fix codefactor * revert skipp condition for openfermionpyscf * codefactor fix * add sparseH tests for mpi backend * Install openfermion in CI workflows and fix H2 QChem integration test. * update changelog --------- Co-authored-by: Dev version update bot Co-authored-by: Ali Asadi Co-authored-by: Vincent Michaud-Rioux Co-authored-by: Amintor Dusko <87949283+AmintorDusko@users.noreply.github.com> Co-authored-by: Lee James O'Riordan Co-authored-by: Vincent Michaud-Rioux --- .coveragerc | 3 +- .github/CHANGELOG.md | 12 + .github/workflows/format.yml | 5 +- .github/workflows/tests_gpu_cu11.yml | 2 +- .../{tests_gpu.yml => tests_gpu_kokkos.yml} | 3 +- .github/workflows/tests_linux.yml | 3 + ...86_mpi.yml => tests_linux_x86_mpi_gpu.yml} | 135 +- .pre-commit-config.yaml | 2 +- Makefile | 2 +- mpitests/.pylintrc | 52 + mpitests/conftest.py | 120 ++ mpitests/test_adjoint_jacobian.py | 1364 +++++++++++++++++ mpitests/test_apply.py | 1049 +++++++++++++ mpitests/test_device.py | 54 + mpitests/test_expval.py | 332 ++++ mpitests/test_measurements_sparse.py | 168 ++ mpitests/test_probs.py | 312 ++++ pennylane_lightning/core/_serialize.py | 91 +- pennylane_lightning/core/_version.py | 2 +- pennylane_lightning/core/lightning_base.py | 6 +- .../core/src/bindings/Bindings.cpp | 8 + .../core/src/bindings/Bindings.hpp | 6 +- .../core/src/bindings/BindingsMPI.hpp | 483 ++++++ .../core/src/observables/Observables.hpp | 108 ++ .../observables/tests/Test_Observables.cpp | 83 +- .../tests/mpi/Test_ObservablesMPI.cpp | 88 ++ .../simulators/lightning_gpu/CMakeLists.txt | 4 + .../lightning_gpu/bindings/LGPUBindings.hpp | 90 +- .../bindings/LGPUBindingsMPI.hpp | 323 ++++ .../measurements/MeasurementsGPU.hpp | 14 +- .../measurements/MeasurementsGPUMPI.hpp | 15 +- .../Test_StateVectorCudaManaged_Expval.cpp | 34 +- .../mpi/Test_StateVectorCudaMPI_Expval.cpp | 34 +- .../observables/ObservablesGPU.cpp | 3 + .../observables/ObservablesGPU.hpp | 83 + .../observables/ObservablesGPUMPI.cpp | 3 + .../observables/ObservablesGPUMPI.hpp | 94 ++ .../observables/tests/Test_ObservablesGPU.cpp | 42 + .../tests/mpi/Test_ObservablesGPUMPI.cpp | 54 + .../lightning_gpu/utils/LinearAlg.hpp | 48 +- .../lightning_gpu/utils/MPILinearAlg.hpp | 12 +- .../utils/tests/Test_LinearAlgebra.cpp | 16 +- .../utils/tests/mpi/Test_LinearAlgebraMPI.cpp | 15 +- .../lightning_kokkos/StateVectorKokkos.hpp | 19 +- .../bindings/LKokkosBindings.hpp | 54 + .../observables/ObservablesKokkos.cpp | 3 + .../observables/ObservablesKokkos.hpp | 71 + .../tests/Test_ObservablesKokkos.cpp | 14 + .../bindings/LQubitBindings.hpp | 58 +- .../observables/ObservablesLQubit.cpp | 6 + .../observables/ObservablesLQubit.hpp | 71 + .../tests/Test_ObservablesLQubit.cpp | 15 + .../core/src/utils/TestHelpers.hpp | 28 +- .../lightning_gpu/lightning_gpu.py | 162 +- requirements-dev.txt | 4 +- tests/test_adjoint_jacobian.py | 68 +- tests/test_device.py | 15 + tests/test_serialize.py | 10 + 58 files changed, 5817 insertions(+), 158 deletions(-) rename .github/workflows/{tests_gpu.yml => tests_gpu_kokkos.yml} (99%) rename .github/workflows/{tests_linux_x86_mpi.yml => tests_linux_x86_mpi_gpu.yml} (55%) create mode 100644 mpitests/.pylintrc create mode 100644 mpitests/conftest.py create mode 100644 mpitests/test_adjoint_jacobian.py create mode 100644 mpitests/test_apply.py create mode 100644 mpitests/test_device.py create mode 100644 mpitests/test_expval.py create mode 100644 mpitests/test_measurements_sparse.py create mode 100644 mpitests/test_probs.py create mode 100644 pennylane_lightning/core/src/bindings/BindingsMPI.hpp create mode 100644 pennylane_lightning/core/src/simulators/lightning_gpu/bindings/LGPUBindingsMPI.hpp diff --git a/.coveragerc b/.coveragerc index 5c4cfff8a2..e9d7866fff 100644 --- a/.coveragerc +++ b/.coveragerc @@ -2,7 +2,8 @@ [run] source = pennylane_lightning omit = - tests* + tests/* + mpitests/* [report] # Regexes for lines to exclude from consideration diff --git a/.github/CHANGELOG.md b/.github/CHANGELOG.md index 3ee6676579..8cb50c0517 100644 --- a/.github/CHANGELOG.md +++ b/.github/CHANGELOG.md @@ -2,6 +2,15 @@ ### New features since last release +* Add `SparseHamiltonian` support for Lightning-Qubit and Lightning-GPU. + [(#526)] (https://github.com/PennyLaneAI/pennylane-lightning/pull/526) + +* Add `SparseHamiltonian` support for Lightning-Kokkos. + [(#527)] (https://github.com/PennyLaneAI/pennylane-lightning/pull/527) + +* Integrate python/pybind layer of distributed Lightning-GPU into the Lightning monorepo with python unit tests. + [(#518)] (https://github.com/PennyLaneAI/pennylane-lightning/pull/518) + * Integrate the distributed C++ backend of Lightning-GPU into the Lightning monorepo. [(#514)] (https://github.com/PennyLaneAI/pennylane-lightning/pull/514) @@ -46,6 +55,9 @@ ### Improvements +* Improve Python testing for Lightning-GPU (+MPI) by adding jobs in Actions files and adding Python tests to increase code coverage. + [(#522)](https://github.com/PennyLaneAI/pennylane-lightning/pull/522) + * Add support for `pip install pennylane-lightning[kokkos]` for the OpenMP backend. [(#515)](https://github.com/PennyLaneAI/pennylane-lightning/pull/515) diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml index 6cbde05023..350312d253 100644 --- a/.github/workflows/format.yml +++ b/.github/workflows/format.yml @@ -84,7 +84,7 @@ jobs: cp -rf ${{ github.workspace}}/Kokkos_install/${{ matrix.exec_model }}/* Kokkos/ - name: Install dependencies - run: sudo apt update && sudo apt -y install clang-tidy-14 cmake g++-10 ninja-build libomp-14-dev + run: sudo apt update && sudo apt -y install clang-tidy-14 cmake gcc-11 g++-11 ninja-build libomp-14-dev env: DEBIAN_FRONTEND: noninteractive @@ -96,5 +96,6 @@ jobs: -DBUILD_TESTS=ON \ -DENABLE_WARNINGS=ON \ -DPL_BACKEND=${{ matrix.pl_backend }} \ - -DCMAKE_CXX_COMPILER="$(which g++-10)" + -DCMAKE_CXX_COMPILER="$(which g++-11)" \ + -DCMAKE_C_COMPILER="$(which gcc-11)" cmake --build ./Build \ No newline at end of file diff --git a/.github/workflows/tests_gpu_cu11.yml b/.github/workflows/tests_gpu_cu11.yml index cb15aca14e..925491efe6 100644 --- a/.github/workflows/tests_gpu_cu11.yml +++ b/.github/workflows/tests_gpu_cu11.yml @@ -212,7 +212,7 @@ jobs: - name: Install required packages run: | python -m pip install pip~=22.0 - python -m pip install ninja cmake custatevec-cu11 pytest pytest-mock flaky pytest-cov + python -m pip install ninja cmake custatevec-cu11 pytest pytest-mock flaky pytest-cov openfermionpyscf - name: Build and install package env: diff --git a/.github/workflows/tests_gpu.yml b/.github/workflows/tests_gpu_kokkos.yml similarity index 99% rename from .github/workflows/tests_gpu.yml rename to .github/workflows/tests_gpu_kokkos.yml index bbf240d91c..e65aa0697d 100644 --- a/.github/workflows/tests_gpu.yml +++ b/.github/workflows/tests_gpu_kokkos.yml @@ -1,4 +1,4 @@ -name: Testing (GPU) +name: Testing::LKokkos::GPU on: pull_request: push: @@ -237,6 +237,7 @@ jobs: run: | cd main python -m pip install -r requirements-dev.txt + python -m pip install openfermionpyscf - name: Install ML libraries for interfaces run: | diff --git a/.github/workflows/tests_linux.yml b/.github/workflows/tests_linux.yml index ac8a05c593..672327341c 100644 --- a/.github/workflows/tests_linux.yml +++ b/.github/workflows/tests_linux.yml @@ -118,6 +118,7 @@ jobs: run: | cd main python -m pip install -r requirements-dev.txt + python -m pip install openfermionpyscf - name: Install Stable PennyLane if: inputs.pennylane-version == 'stable' @@ -244,6 +245,7 @@ jobs: run: | cd main python -m pip install -r requirements-dev.txt + python -m pip install openfermionpyscf - name: Install Stable PennyLane if: inputs.pennylane-version == 'stable' @@ -412,6 +414,7 @@ jobs: run: | cd main python -m pip install -r requirements-dev.txt + python -m pip install openfermionpyscf - name: Install Stable PennyLane if: inputs.pennylane-version == 'stable' diff --git a/.github/workflows/tests_linux_x86_mpi.yml b/.github/workflows/tests_linux_x86_mpi_gpu.yml similarity index 55% rename from .github/workflows/tests_linux_x86_mpi.yml rename to .github/workflows/tests_linux_x86_mpi_gpu.yml index 69040df8e3..e879415492 100644 --- a/.github/workflows/tests_linux_x86_mpi.yml +++ b/.github/workflows/tests_linux_x86_mpi_gpu.yml @@ -157,6 +157,112 @@ jobs: rm -rf * .git .gitignore .github pip cache purge + + python_tests: + if: contains(github.event.pull_request.labels.*.name, 'ci:use-multi-gpu-runner') || (inputs.lightning-version != '' && inputs.pennylane-version != '') + runs-on: + - self-hosted + - linux + - x64 + - ubuntu-22.04 + - multi-gpu + strategy: + max-parallel: 1 + matrix: + mpilib: ["mpich", "openmpi"] + timeout-minutes: 30 + + steps: + - name: Checkout pennyLane-lightning + uses: actions/checkout@v3 + with: + fetch-tags: true + + - name: Switch to stable build of Lightning + if: inputs.lightning-version == 'stable' + run: | + git fetch tags --force + git checkout $(git tag | sort -V | tail -1) + + - uses: actions/setup-python@v4 + id: setup_python + name: Install Python + with: + python-version: '3.9' + + # Since the self-hosted runner can be re-used. It is best to set up all package + # installations in a virtual environment that gets cleaned at the end of each workflow run + - name: Setup Python virtual environment + id: setup_venv + env: + VENV_NAME: ${{ github.workspace }}/venv_${{ steps.setup_python.outputs.python-version }}_${{ github.sha }} + run: | + # Clear any pre-existing venvs + rm -rf venv_* + + # Create new venv for this workflow_run + python --version + python -m venv ${{ env.VENV_NAME }} + + # Add the venv to PATH for subsequent steps + echo ${{ env.VENV_NAME }}/bin >> $GITHUB_PATH + + # Adding venv name as an output for subsequent steps to reference if needed + source ${{ env.VENV_NAME }}/bin/activate + echo "venv_name=${{ env.VENV_NAME }}" >> $GITHUB_OUTPUT + + - name: Display Python-Path + id: python_path + run: | + py_path=$(which python) + echo "Python Interpreter Path => $py_path" + echo "python=$py_path" >> $GITHUB_OUTPUT + + pip_path=$(which python) + echo "PIP Path => $pip_path" + echo "pip=$pip_path" >> $GITHUB_OUTPUT + + - name: Install Latest PennyLane + # We want to install the latest PL on non workflow_call events + if: inputs.pennylane-version == 'latest' || inputs.pennylane-version == '' + run: python -m pip install git+https://github.com/PennyLaneAI/pennylane.git@master + + - name: Install required packages + run: | + source /etc/profile.d/modules.sh && module use /opt/modules/ && module load ${{ matrix.mpilib }} + python -m pip install pip~=22.0 + python -m pip install ninja cmake custatevec-cu11 pytest pytest-mock flaky pytest-cov mpi4py openfermionpyscf + SKIP_COMPILATION=True PL_BACKEND=lightning_qubit python -m pip install -e . -vv + + - name: Build and install package + env: + CUQUANTUM_SDK: $(python -c "import site; print( f'{site.getsitepackages()[0]}/cuquantum/lib')") + run: | + source /etc/profile.d/modules.sh && module use /opt/modules/ && module load ${{ matrix.mpilib }} + CMAKE_ARGS="-DCMAKE_C_COMPILER=mpicc -DCMAKE_CXX_COMPILER=mpicxx -DENABLE_MPI=ON -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DCMAKE_CUDA_ARCHITECTURES=${{ env.CI_CUDA_ARCH }} -DPython_EXECUTABLE=${{ steps.python_path.outputs.python }}" \ + PL_BACKEND=lightning_gpu python -m pip install -e . --verbose + + - name: Run unit tests for MPI-enabled lightning.gpu device + run: | + source /etc/profile.d/modules.sh && module use /opt/modules/ && module load ${{ matrix.mpilib }} + PL_DEVICE=lightning.gpu /opt/mpi/${{ matrix.mpilib }}/bin/mpirun -np 2 python -m pytest ./mpitests $COVERAGE_FLAGS + mv coverage.xml coverage-${{ github.job }}-lightning_gpu_${{ matrix.mpilib }}-main.xml + # PL_DEVICE=lightning.gpu /opt/mpi/${{ matrix.mpilib }}/bin/mpirun --oversubscribe -n 4 pytest -s -x mpitests/test_device.py -k test_create_device $COVERAGE_FLAGS + + - name: Upload code coverage results + uses: actions/upload-artifact@v3 + with: + name: ubuntu-codecov-results-python + path: coverage-${{ github.job }}-lightning_gpu_${{ matrix.mpilib }}-*.xml + if-no-files-found: error + + - name: Cleanup + if: always() + run: | + rm -rf ${{ steps.setup_venv.outputs.venv_name }} + rm -rf * .git .gitignore .github + pip cache purge + upload-to-codecov-linux-cpp: needs: ["cpp_tests"] name: Upload coverage data to codecov @@ -182,4 +288,31 @@ jobs: run: | rm -rf ${{ steps.setup_venv.outputs.venv_name }} rm -rf * .git .gitignore .github - pip cache purge \ No newline at end of file + pip cache purge + + upload-to-codecov-linux-python: + needs: ["python_tests"] + name: Upload coverage data to codecov + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Download coverage reports + uses: actions/download-artifact@v3 + with: + name: ubuntu-codecov-results-python + + - name: Upload to Codecov + uses: codecov/codecov-action@v3 + with: + fail_ci_if_error: true + verbose: true + token: ${{ secrets.CODECOV_TOKEN }} + + - name: Cleanup + if: always() + run: | + rm -rf ${{ steps.setup_venv.outputs.venv_name }} + rm -rf * .git .gitignore .github + pip cache purge diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3268036a65..634565cded 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,4 +18,4 @@ repos: "-sn", # Don't display the score "--rcfile=.pylintrc", # Link to your config file ] - exclude: ^(bin/|doc/|scripts/|setup.py|tests/) + exclude: ^(bin/|doc/|scripts/|setup.py|tests/|mpitests/) diff --git a/Makefile b/Makefile index f8a62fc407..d07ae44dbc 100644 --- a/Makefile +++ b/Makefile @@ -110,7 +110,7 @@ format-cpp: ./bin/format $(CHECK) --cfversion $(if $(version:-=),$(version),0) ./pennylane_lightning format-python: - black -l 100 ./pennylane_lightning/ ./tests $(CHECK) + black -l 100 ./pennylane_lightning/ ./mpitests ./tests $(CHECK) .PHONY: check-tidy check-tidy: diff --git a/mpitests/.pylintrc b/mpitests/.pylintrc new file mode 100644 index 0000000000..3847296e98 --- /dev/null +++ b/mpitests/.pylintrc @@ -0,0 +1,52 @@ +[MASTER] +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code +extension-pkg-whitelist=numpy,scipy,autograd,toml,appdir,autograd.numpy,autograd.numpy.linalg,autograd.numpy.builtins,semantic_version,torch,tensorflow,tensorflow.contrib,tensorflow.contrib.eager,LazyLoader,networkx,networkx.dag +ignore-patterns=test_legacy* + +[TYPECHECK] + +# List of module names for which member attributes should not be checked +# (useful for modules/projects where namespaces are manipulated during runtime +# and thus existing member attributes cannot be deduced by static analysis. It +# supports qualified module names, as well as Unix pattern matching. +ignored-modules=numpy,scipy,autograd,toml,appdir,autograd.numpy,autograd.numpy.linalg,autograd.numpy.builtins,semantic_version,torch,tensorflow,tensorflow.contrib,tensorflow.contrib.eager,LazyLoader,networkx,networkx.dag,math,pennylane.numpy + +# List of classes names for which member attributes should not be checked +# (useful for classes with attributes dynamically set). This supports can work +# with qualified names. +ignored-classes=numpy,scipy,autograd,toml,appdir,autograd.numpy,autograd.numpy.linalg,autograd.numpy.builtins,semantic_version,torch,tensorflow,tensorflow.contrib,tensorflow.contrib.eager,LazyLoader,networkx,networkx.dag,math,pennylane.numpy,pennylane.numpy.random,pennylane.numpy.linalg,pennylane.numpy.builtins,pennylane.operation,rustworkx,kahypar + +[MESSAGES CONTROL] + +# Enable the message, report, category or checker with the given id(s). You can +# either give multiple identifier separated by comma (,) or put this option +# multiple time. +#enable= + +# Disable the message, report, category or checker with the given id(s). You +# can either give multiple identifier separated by comma (,) or put this option +# multiple time (only on the command line, not in the configuration file where +# it should appear only once). +# Cyclical import checks are disabled for now as they are frequently used in +# the code base, but this can be removed in the future once cycles are resolved. +disable= + line-too-long, + invalid-name, + too-many-lines, + redefined-builtin, + too-many-locals, + duplicate-code, + cyclic-import, + import-error, + bad-option-value, + import-outside-toplevel, + missing-class-docstring, + missing-function-docstring, + no-self-use + +[MISCELLANEOUS] + +# List of note tags to take in consideration, separated by a comma. +notes= diff --git a/mpitests/conftest.py b/mpitests/conftest.py new file mode 100644 index 0000000000..09ab802b05 --- /dev/null +++ b/mpitests/conftest.py @@ -0,0 +1,120 @@ +# Copyright 2018-2023 Xanadu Quantum Technologies Inc. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Pytest configuration file for PennyLane-Lightning-GPU test suite. +""" +# pylint: disable=missing-function-docstring,wrong-import-order,unused-import + +import itertools +import os +import pytest + +from pennylane import numpy as np +import pennylane as qml + +# Tuple passed to distributed device ctor +# np.complex for data type and True or False +# for enabling MPI or not. +fixture_params = itertools.product( + [np.complex64, np.complex128], + [True, False], +) + +# defaults +TOL = 1e-6 +TOL_STOCHASTIC = 0.05 + +U = np.array( + [ + [0.83645892 - 0.40533293j, -0.20215326 + 0.30850569j], + [-0.23889780 - 0.28101519j, -0.88031770 - 0.29832709j], + ] +) + +U2 = np.array([[0, 1, 1, 1], [1, 0, 1, -1], [1, -1, 0, 1], [1, 1, -1, 0]]) / np.sqrt(3) +A = np.array([[1.02789352, 1.61296440 - 0.3498192j], [1.61296440 + 0.3498192j, 1.23920938 + 0j]]) + +THETA = np.linspace(0.11, 1, 3) +PHI = np.linspace(0.32, 1, 3) +VARPHI = np.linspace(0.02, 1, 3) + + +@pytest.fixture(scope="session") +def tol(): + """Numerical tolerance for equality tests.""" + return float(os.environ.get("TOL", TOL)) + + +@pytest.fixture(scope="session", params=[2, 3]) +def n_subsystems(request): + """Number of qubits or qumodes.""" + return request.param + + +# Looking for the device for testing. +default_device = "lightning.gpu" +supported_devices = {"lightning.gpu"} +supported_devices.update({sb.replace(".", "_") for sb in supported_devices}) + + +def get_device(): + """Return the pennylane lightning device. + + The device is ``lightning.gpu`` by default. + Allowed values are: "lightning.gpu". + An underscore can also be used instead of a dot. + If the environment variable ``PL_DEVICE`` is defined, its value is used. + Underscores are replaced by dots upon exiting. + """ + device = None + if "PL_DEVICE" in os.environ: + device = os.environ.get("PL_DEVICE", default_device) + device = device.replace("_", ".") + if device is None: + device = default_device + if device not in supported_devices: + raise ValueError(f"Invalid backend {device}.") + return device + + +device_name = get_device() + +if device_name not in qml.plugin_devices: + raise qml.DeviceError( + f"Device {device_name} does not exist. Make sure the required plugin is installed." + ) + +# Device specification +if device_name == "lightning.gpu": + from pennylane_lightning.lightning_gpu import LightningGPU as LightningDevice +else: + raise qml.DeviceError(f"The MPI tests do not apply to the {device_name} device.") + + +# General qubit_device fixture, for any number of wires. +@pytest.fixture( + scope="function", + params=fixture_params, +) +def qubit_device(request): + def _device(wires): + return qml.device( + device_name, + wires=wires, + mpi=True, + c_dtype=request.param[0], + batch_obs=request.param[1], + ) + + return _device diff --git a/mpitests/test_adjoint_jacobian.py b/mpitests/test_adjoint_jacobian.py new file mode 100644 index 0000000000..3657c336f8 --- /dev/null +++ b/mpitests/test_adjoint_jacobian.py @@ -0,0 +1,1364 @@ +# Copyright 2018-2023 Xanadu Quantum Technologies Inc. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Unit tests for the :mod:`pennylane_lightning_gpu.LightningGPU` device (MPI). +""" +# pylint: disable=protected-access,cell-var-from-loop,c-extension-no-member +import itertools +import math +from mpi4py import MPI +import pytest +from conftest import device_name, LightningDevice as ld + +from scipy.stats import unitary_group +import pennylane as qml +from pennylane import numpy as np +from pennylane import QNode, qnode + +I, X, Y, Z = ( + np.eye(2), + qml.PauliX.compute_matrix(), + qml.PauliY.compute_matrix(), + qml.PauliZ.compute_matrix(), +) + +# Tuple passed to distributed device ctor +# np.complex for data type and True or False +# for enabling batched_obs. +fixture_params = itertools.product( + [np.complex64, np.complex128], + [True, False], +) + + +def Rx(theta): + r"""One-qubit rotation about the x axis. + + Args: + theta (float): rotation angle + Returns: + array: unitary 2x2 rotation matrix :math:`e^{-i \sigma_x \theta/2}` + """ + return math.cos(theta / 2) * I + 1j * math.sin(-theta / 2) * X + + +def Ry(theta): + r"""One-qubit rotation about the y axis. + + Args: + theta (float): rotation angle + Returns: + array: unitary 2x2 rotation matrix :math:`e^{-i \sigma_y \theta/2}` + """ + return math.cos(theta / 2) * I + 1j * math.sin(-theta / 2) * Y + + +def Rz(theta): + r"""One-qubit rotation about the z axis. + + Args: + theta (float): rotation angle + Returns: + array: unitary 2x2 rotation matrix :math:`e^{-i \sigma_z \theta/2}` + """ + return math.cos(theta / 2) * I + 1j * math.sin(-theta / 2) * Z + + +class TestAdjointJacobian: # pylint: disable=too-many-public-methods + """Tests for the adjoint_jacobian method""" + + @pytest.fixture(params=fixture_params) + def dev(self, request): + """Returns a PennyLane device.""" + return qml.device( + device_name, + wires=8, + mpi=True, + c_dtype=request.param[0], + batch_obs=request.param[1], + ) + + def test_not_expval(self, dev): + """Test if a QuantumFunctionError is raised for a tape with measurements that are not + expectation values""" + + with qml.tape.QuantumTape() as tape: + qml.RX(0.1, wires=0) + qml.var(qml.PauliZ(0)) + + with pytest.raises( + qml.QuantumFunctionError, match="Adjoint differentiation method does not" + ): + dev.adjoint_jacobian(tape) + + with qml.tape.QuantumTape() as tape: + qml.RX(0.1, wires=0) + qml.state() + + if device_name == "lightning.gpu" and ld._CPP_BINARY_AVAILABLE: + message = "Adjoint differentiation does not support State measurements." + elif ld._CPP_BINARY_AVAILABLE: + message = "This method does not support statevector return type." + else: + message = "Adjoint differentiation method does not support measurement StateMP" + with pytest.raises( + qml.QuantumFunctionError, + match=message, + ): + dev.adjoint_jacobian(tape) + + def test_finite_shots_warns(self): + """Tests warning raised when finite shots specified""" + + dev = qml.device(device_name, wires=8, mpi=True, shots=1) + + with qml.tape.QuantumTape() as tape: + qml.expval(qml.PauliZ(0)) + + with pytest.warns( + UserWarning, + match="Requested adjoint differentiation to be computed with finite shots.", + ): + dev.adjoint_jacobian(tape) + + def test_empty_measurements(self, dev): + """Tests if an empty array is returned when the measurements of the tape is empty.""" + + with qml.tape.QuantumTape() as tape: + qml.RX(0.4, wires=[0]) + + jac = dev.adjoint_jacobian(tape) + assert len(jac) == 0 + + @pytest.mark.skipif(not ld._CPP_BINARY_AVAILABLE, reason="Lightning binary required") + def test_unsupported_op(self, dev): + """Test if a QuantumFunctionError is raised for an unsupported operation, i.e., + multi-parameter operations that are not qml.Rot""" + + with qml.tape.QuantumTape() as tape: + qml.CRot(0.1, 0.2, 0.3, wires=[0, 1]) + qml.expval(qml.PauliZ(0)) + + with pytest.raises( + qml.QuantumFunctionError, + match="The CRot operation is not supported using the", + ): + dev.adjoint_jacobian(tape) + + @pytest.mark.skipif(not ld._CPP_BINARY_AVAILABLE, reason="Lightning binary required") + def test_proj_unsupported(self, dev): + """Test if a QuantumFunctionError is raised for a Projector observable""" + with qml.tape.QuantumTape() as tape: + qml.CRX(0.1, wires=[0, 1]) + qml.expval(qml.Projector([0, 1], wires=[0, 1])) + + with pytest.raises( + qml.QuantumFunctionError, + match="differentiation method does not support the Projector", + ): + dev.adjoint_jacobian(tape) + + with qml.tape.QuantumTape() as tape: + qml.CRX(0.1, wires=[0, 1]) + qml.expval(qml.Projector([0], wires=[0]) @ qml.PauliZ(0)) + + with pytest.raises( + qml.QuantumFunctionError, + match="differentiation method does not support the Projector", + ): + dev.adjoint_jacobian(tape) + + @pytest.mark.parametrize("theta", np.linspace(-2 * np.pi, 2 * np.pi, 7)) + @pytest.mark.parametrize("G", [qml.RX, qml.RY, qml.RZ]) + @pytest.mark.parametrize("stateprep", [qml.QubitStateVector, qml.StatePrep]) + def test_pauli_rotation_gradient(self, stateprep, G, theta, dev): + """Tests that the automatic gradients of Pauli rotations are correct.""" + random_state = np.array( + [0.43593284 - 0.02945156j, 0.40812291 + 0.80158023j], requires_grad=False + ) + + tape = qml.tape.QuantumScript( + [G(theta, 0)], [qml.expval(qml.PauliZ(0))], [stateprep(random_state, 0)] + ) + + tape.trainable_params = {1} + + calculated_val = dev.adjoint_jacobian(tape) + + tol = 1e-3 if dev.R_DTYPE == np.float32 else 1e-7 + + # compare to finite differences + tapes, fn = qml.gradients.param_shift(tape) + numeric_val = fn(qml.execute(tapes, dev, None)) + assert np.allclose(calculated_val, numeric_val, atol=tol, rtol=0) + + @pytest.mark.parametrize("theta", np.linspace(-2 * np.pi, 2 * np.pi, 7)) + @pytest.mark.parametrize("stateprep", [qml.QubitStateVector, qml.StatePrep]) + def test_Rot_gradient(self, stateprep, theta, dev): + """Tests that the device gradient of an arbitrary Euler-angle-parameterized gate is + correct.""" + params = np.array([theta, theta**3, np.sqrt(2) * theta]) + + with qml.tape.QuantumTape() as tape: + stateprep(np.array([1.0, -1.0], requires_grad=False) / np.sqrt(2), wires=0) + qml.Rot(*params, wires=[0]) + qml.expval(qml.PauliZ(0)) + + tape.trainable_params = {1, 2, 3} + + calculated_val = dev.adjoint_jacobian(tape) + + tol = 1e-3 if dev.R_DTYPE == np.float32 else 1e-7 + + # compare to finite differences + tapes, fn = qml.gradients.param_shift(tape) + numeric_val = fn(qml.execute(tapes, dev, None)) + assert np.allclose(calculated_val, numeric_val, atol=tol, rtol=0) + + @pytest.mark.parametrize("par", [1, -2, 1.623, -0.051, 0]) # integers, floats, zero + def test_ry_gradient(self, par, tol, dev): + """Test that the gradient of the RY gate matches the exact analytic formula.""" + with qml.tape.QuantumTape() as tape: + qml.RY(par, wires=[0]) + qml.expval(qml.PauliX(0)) + + tape.trainable_params = {0} + + # gradients + exact = np.cos(par) + grad_A = dev.adjoint_jacobian(tape) + + # different methods must agree + assert np.allclose(grad_A, exact, atol=tol, rtol=0) + + def test_rx_gradient(self, tol, dev): + """Test that the gradient of the RX gate matches the known formula.""" + a = 0.7418 + + with qml.tape.QuantumTape() as tape: + qml.RX(a, wires=0) + qml.expval(qml.PauliZ(0)) + + # circuit jacobians + dev_jacobian = dev.adjoint_jacobian(tape) + expected_jacobian = -np.sin(a) + assert np.allclose(dev_jacobian, expected_jacobian, atol=tol, rtol=0) + + def test_multiple_rx_gradient_pauliz(self, tol, dev): + """Tests that the gradient of multiple RX gates in a circuit yields the correct result.""" + params = np.array([np.pi, np.pi / 2, np.pi / 3]) + + with qml.tape.QuantumTape() as tape: + qml.RX(params[0], wires=0) + qml.RX(params[1], wires=1) + qml.RX(params[2], wires=2) + + for idx in range(3): + qml.expval(qml.PauliZ(idx)) + + # circuit jacobians + dev_jacobian = dev.adjoint_jacobian(tape) + expected_jacobian = -np.diag(np.sin(params)) + assert np.allclose(dev_jacobian, expected_jacobian, atol=tol, rtol=0) + + def test_multiple_rx_gradient_hermitian(self, tol, dev): + """Tests that the gradient of multiple RX gates in a circuit yields the correct result + with Hermitian observable + """ + params = np.array([np.pi, np.pi / 2, np.pi / 3]) + + with qml.tape.QuantumTape() as tape: + qml.RX(params[0], wires=0) + qml.RX(params[1], wires=1) + qml.RX(params[2], wires=2) + + for idx in range(3): + qml.expval(qml.Hermitian([[1, 0], [0, -1]], wires=[idx])) + + tape.trainable_params = {0, 1, 2} + # circuit jacobians + dev_jacobian = dev.adjoint_jacobian(tape) + expected_jacobian = -np.diag(np.sin(params)) + + assert np.allclose(dev_jacobian, expected_jacobian, atol=tol, rtol=0) + + qubit_ops = [getattr(qml, name) for name in qml.ops._qubit__ops__] # pylint: disable=no-member + ops = {qml.RX, qml.RY, qml.RZ, qml.PhaseShift, qml.CRX, qml.CRY, qml.CRZ, qml.Rot} + + def test_multiple_rx_gradient_expval_hermitian(self, tol, dev): + """Tests that the gradient of multiple RX gates in a circuit yields the correct result + with Hermitian observable + """ + params = np.array([np.pi / 3, np.pi / 4, np.pi / 5]) + + with qml.tape.QuantumTape() as tape: + qml.RX(params[0], wires=0) + qml.RX(params[1], wires=1) + qml.RX(params[2], wires=2) + + qml.expval( + qml.Hermitian( + [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]], + wires=[0, 2], + ) + ) + + tape.trainable_params = {0, 1, 2} + dev_jacobian = dev.adjoint_jacobian(tape) + expected_jacobian = np.array( + [ + -np.sin(params[0]) * np.cos(params[2]), + 0, + -np.cos(params[0]) * np.sin(params[2]), + ] + ) + + assert np.allclose(dev_jacobian, expected_jacobian, atol=tol, rtol=0) + + qubit_ops = [getattr(qml, name) for name in qml.ops._qubit__ops__] # pylint: disable=no-member + ops = {qml.RX, qml.RY, qml.RZ, qml.PhaseShift, qml.CRX, qml.CRY, qml.CRZ, qml.Rot} + + @pytest.mark.skipif(not ld._CPP_BINARY_AVAILABLE, reason="Lightning binary required") + def test_multiple_rx_gradient_expval_hamiltonian(self, tol, dev): + """Tests that the gradient of multiple RX gates in a circuit yields the correct result + with Hermitian observable + """ + params = np.array([np.pi / 3, np.pi / 4, np.pi / 5]) + + ham = qml.Hamiltonian( + [1.0, 0.3, 0.3, 0.4], + [ + qml.PauliX(0) @ qml.PauliX(1), + qml.PauliZ(0), + qml.PauliZ(1), + qml.Hermitian( + [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]], + wires=[0, 2], + ), + ], + ) + + with qml.tape.QuantumTape() as tape: + qml.RX(params[0], wires=0) + qml.RX(params[1], wires=1) + qml.RX(params[2], wires=2) + + qml.expval(ham) + + tape.trainable_params = {0, 1, 2} + dev_jacobian = dev.adjoint_jacobian(tape) + expected_jacobian = ( + 0.3 * np.array([-np.sin(params[0]), 0, 0]) + + 0.3 * np.array([0, -np.sin(params[1]), 0]) + + 0.4 + * np.array( + [ + -np.sin(params[0]) * np.cos(params[2]), + 0, + -np.cos(params[0]) * np.sin(params[2]), + ] + ) + ) + + assert np.allclose(dev_jacobian, expected_jacobian, atol=tol, rtol=0) + + qubit_ops = [getattr(qml, name) for name in qml.ops._qubit__ops__] # pylint: disable=no-member + ops = {qml.RX, qml.RY, qml.RZ, qml.PhaseShift, qml.CRX, qml.CRY, qml.CRZ, qml.Rot} + + @pytest.mark.parametrize("obs", [qml.PauliX, qml.PauliY]) + @pytest.mark.parametrize( + "op", + [ + qml.RX(0.4, wires=0), + qml.RY(0.6, wires=0), + qml.RZ(0.8, wires=0), + qml.CRX(1.0, wires=[0, 1]), + qml.CRY(2.0, wires=[0, 1]), + qml.CRZ(3.0, wires=[0, 1]), + qml.Rot(0.2, -0.1, 0.2, wires=0), + ], + ) + def test_gradients_pauliz(self, op, obs, dev): + """Tests that the gradients of circuits match between the finite difference and device + methods.""" + # op.num_wires and op.num_params must be initialized a priori + with qml.tape.QuantumTape() as tape: + qml.Hadamard(wires=0) + qml.RX(0.543, wires=0) + qml.CNOT(wires=[0, 1]) + + op # pylint: disable=pointless-statement + + qml.Rot(1.3, -2.3, 0.5, wires=[0]) + qml.RZ(-0.5, wires=0) + qml.adjoint(qml.RY(0.5, wires=1), lazy=False) + qml.CNOT(wires=[0, 1]) + + qml.expval(obs(wires=0)) + qml.expval(qml.PauliZ(wires=1)) + + tape.trainable_params = set(range(1, 1 + op.num_params)) + + tol = 1e-3 if dev.R_DTYPE == np.float32 else 1e-7 + + # pylint: disable=unnecessary-direct-lambda-call + grad_F = (lambda t, fn: fn(qml.execute(t, dev, None)))(*qml.gradients.param_shift(tape)) + grad_D = dev.adjoint_jacobian(tape) + + assert np.allclose(grad_D, grad_F, atol=tol, rtol=0) + + @pytest.mark.parametrize( + "op", + [ + qml.RX(0.4, wires=0), + qml.RY(0.6, wires=0), + qml.RZ(0.8, wires=0), + qml.CRX(1.0, wires=[0, 1]), + qml.CRY(2.0, wires=[0, 1]), + qml.CRZ(3.0, wires=[0, 1]), + qml.Rot(0.2, -0.1, 0.2, wires=0), + ], + ) + def test_gradients_hermitian(self, op, dev): + """Tests that the gradients of circuits match between the finite difference and device + methods.""" + # op.num_wires and op.num_params must be initialized a priori + with qml.tape.QuantumTape() as tape: + qml.Hadamard(wires=0) + qml.RX(0.543, wires=0) + qml.CNOT(wires=[0, 1]) + + op.queue() + + qml.Rot(1.3, -2.3, 0.5, wires=[0]) + qml.RZ(-0.5, wires=0) + qml.adjoint(qml.RY(0.5, wires=1), lazy=False) + qml.CNOT(wires=[0, 1]) + + qml.expval( + qml.Hermitian( + [[0, 0, 1, 1], [0, 1, 2, 1], [1, 2, 1, 0], [1, 1, 0, 0]], + wires=[0, 1], + ) + ) + + tape.trainable_params = set(range(1, 1 + op.num_params)) + + tol = 1e-3 if dev.R_DTYPE == np.float32 else 1e-7 + + # pylint: disable=unnecessary-direct-lambda-call + grad_F = (lambda t, fn: fn(qml.execute(t, dev, None)))(*qml.gradients.param_shift(tape)) + grad_D = dev.adjoint_jacobian(tape) + + assert np.allclose(grad_D, grad_F, atol=tol, rtol=0) + + def test_gradient_gate_with_multiple_parameters_pauliz(self, dev): + """Tests that gates with multiple free parameters yield correct gradients.""" + x, y, z = [0.5, 0.3, -0.7] + + tape = qml.tape.QuantumScript( + [ + qml.RX(0.4, wires=[0]), + qml.Rot(x, y, z, wires=[0]), + qml.RY(-0.2, wires=[0]), + ], + [qml.expval(qml.PauliZ(0))], + ) + + tape.trainable_params = {1, 2, 3} + + tol = 1e-3 if dev.R_DTYPE == np.float32 else 1e-7 + + grad_D = dev.adjoint_jacobian(tape) + tapes, fn = qml.gradients.param_shift(tape) + grad_F = fn(qml.execute(tapes, dev, None)) + + # gradient has the correct shape and every element is nonzero + assert len(grad_D) == 3 + assert all(isinstance(v, np.ndarray) for v in grad_D) + assert np.count_nonzero(grad_D) == 3 + # the different methods agree + assert np.allclose(grad_D, grad_F, atol=tol, rtol=0) + + def test_gradient_gate_with_multiple_parameters_hermitian(self, dev): + """Tests that gates with multiple free parameters yield correct gradients.""" + x, y, z = [0.5, 0.3, -0.7] + + tape = qml.tape.QuantumScript( + [ + qml.RX(0.4, wires=[0]), + qml.Rot(x, y, z, wires=[0]), + qml.RY(-0.2, wires=[0]), + ], + [qml.expval(qml.Hermitian([[0, 1], [1, 1]], wires=0))], + ) + + tape.trainable_params = {1, 2, 3} + + tol = 1e-3 if dev.R_DTYPE == np.float32 else 1e-7 + + grad_D = dev.adjoint_jacobian(tape) + tapes, fn = qml.gradients.param_shift(tape) + grad_F = fn(qml.execute(tapes, dev, None)) + + # gradient has the correct shape and every element is nonzero + assert len(grad_D) == 3 + assert all(isinstance(v, np.ndarray) for v in grad_D) + assert np.count_nonzero(grad_D) == 3 + # the different methods agree + assert np.allclose(grad_D, grad_F, atol=tol, rtol=0) + + @pytest.mark.skipif(not ld._CPP_BINARY_AVAILABLE, reason="Lightning binary required") + def test_gradient_gate_with_multiple_parameters_hamiltonian(self, dev): + """Tests that gates with multiple free parameters yield correct gradients.""" + x, y, z = [0.5, 0.3, -0.7] + + ham = qml.Hamiltonian( + [1.0, 0.3, 0.3], + [qml.PauliX(0) @ qml.PauliX(1), qml.PauliZ(0), qml.PauliZ(1)], + ) + + tape = qml.tape.QuantumScript( + [ + qml.RX(0.4, wires=[0]), + qml.Rot(x, y, z, wires=[0]), + qml.RY(-0.2, wires=[0]), + ], + [qml.expval(ham)], + ) + + tape.trainable_params = {1, 2, 3} + + tol = 1e-3 if dev.R_DTYPE == np.float32 else 1e-7 + + grad_D = dev.adjoint_jacobian(tape) + tapes, fn = qml.gradients.param_shift(tape) + grad_F = fn(qml.execute(tapes, dev, None)) + + # gradient has the correct shape and every element is nonzero + assert len(grad_D) == 3 + assert all(isinstance(v, np.ndarray) for v in grad_D) + assert np.count_nonzero(grad_D) == 3 + # the different methods agree + assert np.allclose(grad_D, grad_F, atol=tol, rtol=0) + + def test_use_device_state(self, tol, dev): + """Tests that when using the device state, the correct answer is still returned.""" + + x, y, z = [0.5, 0.3, -0.7] + + with qml.tape.QuantumTape() as tape: + qml.RX(0.4, wires=[0]) + qml.Rot(x, y, z, wires=[0]) + qml.RY(-0.2, wires=[0]) + qml.expval(qml.PauliZ(0)) + + tape.trainable_params = {1, 2, 3} + + dM1 = dev.adjoint_jacobian(tape) + + qml.execute([tape], dev, None) + dM2 = dev.adjoint_jacobian(tape, use_device_state=True) + + assert np.allclose(dM1, dM2, atol=tol, rtol=0) + + def test_provide_starting_state(self, tol, dev): + """Tests provides correct answer when provided starting state.""" + comm = MPI.COMM_WORLD + + x, y, z = [0.5, 0.3, -0.7] + + with qml.tape.QuantumTape() as tape: + qml.RX(0.4, wires=[0]) + qml.Rot(x, y, z, wires=[0]) + qml.RY(-0.2, wires=[0]) + qml.expval(qml.PauliZ(0)) + + tape.trainable_params = {1, 2, 3} + + dM1 = dev.adjoint_jacobian(tape) + + if device_name == "lightning.gpu": + local_state_vector = dev.state + complex_type = np.complex128 if dev.R_DTYPE == np.float64 else np.complex64 + state_vector = np.zeros(1 << 8).astype(complex_type) + comm.Allgather(local_state_vector, state_vector) + qml.execute([tape], dev, None) + dM2 = dev.adjoint_jacobian(tape, starting_state=state_vector) + assert np.allclose(dM1, dM2, atol=tol, rtol=0) + + @pytest.mark.skipif(not ld._CPP_BINARY_AVAILABLE, reason="Lightning binary required") + def test_provide_wrong_starting_state(self, dev): + """Tests raise an exception when provided starting state mismatches.""" + x, y, z = [0.5, 0.3, -0.7] + + with qml.tape.QuantumTape() as tape: + qml.RX(0.4, wires=[0]) + qml.Rot(x, y, z, wires=[0]) + qml.RY(-0.2, wires=[0]) + qml.expval(qml.PauliZ(0)) + + tape.trainable_params = {1, 2, 3} + + with pytest.raises( + qml.QuantumFunctionError, + match="The number of qubits of starting_state must be the same as", + ): + dev.adjoint_jacobian(tape, starting_state=np.ones(7)) + + @pytest.mark.skipif( + device_name == "lightning.gpu", + reason="Adjoint differentiation does not support State measurements.", + ) + @pytest.mark.skipif(not ld._CPP_BINARY_AVAILABLE, reason="Lightning binary required") + def test_state_return_type(self, dev): + """Tests raise an exception when the return type is State""" + with qml.tape.QuantumTape() as tape: + qml.RX(0.4, wires=[0]) + qml.state() + + tape.trainable_params = {0} + + with pytest.raises( + qml.QuantumFunctionError, + match="This method does not support statevector return type.", + ): + dev.adjoint_jacobian(tape) + + +class TestAdjointJacobianQNode: + """Test QNode integration with the adjoint_jacobian method""" + + @pytest.fixture(params=fixture_params) + def dev(self, request): + """Returns a PennyLane device.""" + return qml.device( + device_name, + wires=8, + mpi=True, + c_dtype=request.param[0], + batch_obs=request.param[1], + ) + + def test_finite_shots_warning(self): + """Tests that a warning is raised when computing the adjoint diff on a device with finite shots""" + + dev = qml.device(device_name, wires=8, mpi=True, shots=1) + + with pytest.warns( + UserWarning, + match="Requested adjoint differentiation to be computed with finite shots.", + ): + + @qml.qnode(dev, diff_method="adjoint") + def circ(x): + qml.RX(x, wires=0) + return qml.expval(qml.PauliZ(0)) + + with pytest.warns( + UserWarning, + match="Requested adjoint differentiation to be computed with finite shots.", + ): + qml.grad(circ)(0.1) + + @pytest.mark.skipif(not ld._CPP_BINARY_AVAILABLE, reason="Lightning binary required") + def test_qnode(self, mocker, dev): + """Test that specifying diff_method allows the adjoint method to be selected""" + args = np.array([0.54, 0.1, 0.5], requires_grad=True) + + def circuit(x, y, z): + qml.Hadamard(wires=0) + qml.RX(0.543, wires=0) + qml.CNOT(wires=[0, 1]) + + qml.Rot(x, y, z, wires=0) + + qml.Rot(1.3, -2.3, 0.5, wires=[0]) + qml.RZ(-0.5, wires=0) + qml.RY(0.5, wires=1) + qml.CNOT(wires=[0, 1]) + + return qml.expval(qml.PauliX(0) @ qml.PauliZ(1)) + + qnode1 = QNode(circuit, dev, diff_method="adjoint") + spy = mocker.spy(dev, "adjoint_jacobian") + + grad_fn = qml.grad(qnode1) + grad_A = grad_fn(*args) + + spy.assert_called() + + h = 1e-3 if dev.R_DTYPE == np.float32 else 1e-7 + tol = 1e-3 if dev.R_DTYPE == np.float32 else 1e-7 + + qnode2 = QNode(circuit, dev, diff_method="finite-diff", h=h) + grad_fn = qml.grad(qnode2) + grad_F = grad_fn(*args) + + assert np.allclose(grad_A, grad_F, atol=tol, rtol=0) + + thetas = np.linspace(-2 * np.pi, 2 * np.pi, 8) + + @pytest.mark.parametrize("reused_p", thetas**3 / 19) + @pytest.mark.parametrize("other_p", thetas**2 / 1) + def test_fanout_multiple_params( + self, reused_p, other_p, tol, mocker, dev + ): # pylint: disable=too-many-arguments + """Tests that the correct gradient is computed for qnodes which + use the same parameter in multiple gates.""" + + def expZ(state): + return np.abs(state[0]) ** 2 - np.abs(state[1]) ** 2 + + extra_param = np.array(0.31, requires_grad=False) + + @qnode(dev, diff_method="adjoint") + def cost(p1, p2): + qml.RX(extra_param, wires=[0]) + qml.RY(p1, wires=[0]) + qml.RZ(p2, wires=[0]) + qml.RX(p1, wires=[0]) + return qml.expval(qml.PauliZ(0)) + + zero_state = np.array([1.0, 0.0]) + cost(reused_p, other_p) + + spy = mocker.spy(dev, "adjoint_jacobian") + + # analytic gradient + grad_fn = qml.grad(cost) + grad_D = grad_fn(reused_p, other_p) + + spy.assert_called_once() + + # manual gradient + grad_true0 = ( + expZ( + Rx(reused_p) @ Rz(other_p) @ Ry(reused_p + np.pi / 2) @ Rx(extra_param) @ zero_state + ) + - expZ( + Rx(reused_p) @ Rz(other_p) @ Ry(reused_p - np.pi / 2) @ Rx(extra_param) @ zero_state + ) + ) / 2 + grad_true1 = ( + expZ( + Rx(reused_p + np.pi / 2) @ Rz(other_p) @ Ry(reused_p) @ Rx(extra_param) @ zero_state + ) + - expZ( + Rx(reused_p - np.pi / 2) @ Rz(other_p) @ Ry(reused_p) @ Rx(extra_param) @ zero_state + ) + ) / 2 + expected = grad_true0 + grad_true1 # product rule + + assert np.allclose(grad_D[0], expected, atol=tol, rtol=0) + + @pytest.mark.skipif(not ld._CPP_BINARY_AVAILABLE, reason="Lightning binary required") + def test_gradient_repeated_gate_parameters(self, mocker, dev): + """Tests that repeated use of a free parameter in a multi-parameter gate yields correct + gradients.""" + params = np.array([0.8, 1.3], requires_grad=True) + + def circuit(params): + qml.RX(np.array(np.pi / 4, requires_grad=False), wires=[0]) + qml.Rot(params[1], params[0], 2 * params[0], wires=[0]) + return qml.expval(qml.PauliX(0)) + + spy_analytic = mocker.spy(dev, "adjoint_jacobian") + + h = 1e-3 if dev.R_DTYPE == np.float32 else 1e-7 + tol = 1e-3 if dev.R_DTYPE == np.float32 else 1e-7 + + cost = QNode(circuit, dev, diff_method="finite-diff", h=h) + + grad_fn = qml.grad(cost) + grad_F = grad_fn(params) + + spy_analytic.assert_not_called() + + cost = QNode(circuit, dev, diff_method="adjoint") + grad_fn = qml.grad(cost) + grad_D = grad_fn(params) + + spy_analytic.assert_called_once() + + # the different methods agree + assert np.allclose(grad_D, grad_F, atol=tol, rtol=0) + + def test_interface_tf(self, dev): + """Test if gradients agree between the adjoint and finite-diff methods when using the + TensorFlow interface""" + + tf = pytest.importorskip("tensorflow") + + def f(params1, params2): + qml.RX(0.4, wires=[0]) + qml.RZ(params1 * tf.sqrt(params2), wires=[0]) + qml.RY(tf.cos(params2), wires=[0]) + return qml.expval(qml.PauliZ(0)) + + if dev.R_DTYPE == np.float32: + tf_r_dtype = tf.float32 + else: + tf_r_dtype = tf.float64 + + params1 = tf.Variable(0.3, dtype=tf_r_dtype) + params2 = tf.Variable(0.4, dtype=tf_r_dtype) + + h = 2e-3 if dev.R_DTYPE == np.float32 else 1e-7 + tol = 1e-3 if dev.R_DTYPE == np.float32 else 1e-7 + + qnode1 = QNode(f, dev, interface="tf", diff_method="adjoint") + qnode2 = QNode(f, dev, interface="tf", diff_method="finite-diff", h=h) + + with tf.GradientTape() as tape: + res1 = qnode1(params1, params2) + + g1 = tape.gradient(res1, [params1, params2]) + + with tf.GradientTape() as tape: + res2 = qnode2(params1, params2) + + g2 = tape.gradient(res2, [params1, params2]) + + assert np.allclose(g1, g2, atol=tol) + + def test_interface_torch(self, dev): + """Test if gradients agree between the adjoint and finite-diff methods when using the + Torch interface""" + + torch = pytest.importorskip("torch") + + def f(params1, params2): + qml.RX(0.4, wires=[0]) + qml.RZ(params1 * torch.sqrt(params2), wires=[0]) + qml.RY(torch.cos(params2), wires=[0]) + return qml.expval(qml.PauliZ(0)) + + params1 = torch.tensor(0.3, requires_grad=True) + params2 = torch.tensor(0.4, requires_grad=True) + + h = 2e-3 if dev.R_DTYPE == np.float32 else 1e-7 + + qnode1 = QNode(f, dev, interface="torch", diff_method="adjoint") + qnode2 = QNode(f, dev, interface="torch", diff_method="finite-diff", h=h) + + res1 = qnode1(params1, params2) + res1.backward() + + grad_adjoint = params1.grad, params2.grad + + res2 = qnode2(params1, params2) + res2.backward() + + grad_fd = params1.grad, params2.grad + + assert np.allclose(grad_adjoint, grad_fd) + + def test_interface_jax(self, dev): + """Test if the gradients agree between adjoint and finite-difference methods in the + jax interface""" + + jax = pytest.importorskip("jax") + if dev.R_DTYPE == np.float64: + from jax.config import config # pylint: disable=import-outside-toplevel + + config.update("jax_enable_x64", True) + + def f(params1, params2): + qml.RX(0.4, wires=[0]) + qml.RZ(params1 * jax.numpy.sqrt(params2), wires=[0]) + qml.RY(jax.numpy.cos(params2), wires=[0]) + return qml.expval(qml.PauliZ(0)) + + params1 = jax.numpy.array(0.3, dev.R_DTYPE) + params2 = jax.numpy.array(0.4, dev.R_DTYPE) + + h = 2e-3 if dev.R_DTYPE == np.float32 else 1e-7 + tol = 1e-3 if dev.R_DTYPE == np.float32 else 1e-7 + + qnode_adjoint = QNode(f, dev, interface="jax", diff_method="adjoint") + qnode_fd = QNode(f, dev, interface="jax", diff_method="finite-diff", h=h) + + grad_adjoint = jax.grad(qnode_adjoint)(params1, params2) + grad_fd = jax.grad(qnode_fd)(params1, params2) + + assert np.allclose(grad_adjoint, grad_fd, atol=tol) + + +def circuit_ansatz(params, wires): + """Circuit ansatz containing all the parametrized gates""" + qml.QubitStateVector(unitary_group.rvs(2**8, random_state=0)[0], wires=wires) + qml.RX(params[0], wires=wires[0]) + qml.RY(params[1], wires=wires[1]) + qml.adjoint(qml.RX(params[2], wires=wires[2])) + qml.RZ(params[0], wires=wires[3]) + qml.CRX(params[3], wires=[wires[3], wires[0]]) + qml.PhaseShift(params[4], wires=wires[2]) + qml.CRY(params[5], wires=[wires[2], wires[1]]) + qml.adjoint(qml.CRZ(params[5], wires=[wires[0], wires[3]])) + qml.adjoint(qml.PhaseShift(params[6], wires=wires[0])) + qml.Rot(params[6], params[7], params[8], wires=wires[0]) + qml.adjoint(qml.Rot(params[8], params[8], params[9], wires=wires[1])) + qml.MultiRZ(params[11], wires=[wires[0], wires[1]]) + qml.PauliRot(params[12], "XXYZ", wires=[wires[0], wires[1], wires[2], wires[3]]) + qml.CPhase(params[12], wires=[wires[3], wires[2]]) + qml.IsingXX(params[13], wires=[wires[1], wires[0]]) + qml.IsingXY(params[14], wires=[wires[3], wires[2]]) + qml.IsingYY(params[14], wires=[wires[3], wires[2]]) + qml.IsingZZ(params[14], wires=[wires[2], wires[1]]) + qml.U1(params[15], wires=wires[0]) + qml.U2(params[16], params[17], wires=wires[0]) + qml.U3(params[18], params[19], params[20], wires=wires[1]) + qml.adjoint(qml.CRot(params[21], params[22], params[23], wires=[wires[1], wires[2]])) + qml.SingleExcitation(params[24], wires=[wires[2], wires[0]]) + qml.DoubleExcitation(params[25], wires=[wires[2], wires[0], wires[1], wires[3]]) + qml.SingleExcitationPlus(params[26], wires=[wires[0], wires[2]]) + qml.SingleExcitationMinus(params[27], wires=[wires[0], wires[2]]) + qml.DoubleExcitationPlus(params[27], wires=[wires[2], wires[0], wires[1], wires[3]]) + qml.DoubleExcitationMinus(params[27], wires=[wires[2], wires[0], wires[1], wires[3]]) + qml.RX(params[28], wires=wires[0]) + qml.RX(params[29], wires=wires[1]) + + +@pytest.mark.parametrize( + "returns", + [ + qml.PauliZ(0), + qml.PauliX(2), + qml.PauliZ(0) @ qml.PauliY(3), + qml.Hadamard(2), + qml.Hadamard(3) @ qml.PauliZ(2), + qml.PauliX(0) @ qml.PauliY(3), + qml.PauliY(0) @ qml.PauliY(2) @ qml.PauliY(3), + qml.Hermitian( + np.kron(qml.PauliY.compute_matrix(), qml.PauliZ.compute_matrix()), + wires=[3, 2], + ), + qml.Hermitian(np.array([[0, 1], [1, 0]], requires_grad=False), wires=0), + qml.Hermitian(np.array([[0, 1], [1, 0]], requires_grad=False), wires=0) @ qml.PauliZ(2), + ], +) +def test_integration(returns): + """Integration tests that compare to default.qubit for a large circuit containing parametrized + operations""" + dev_def = qml.device("default.qubit", wires=range(8)) + dev_lightning = qml.device(device_name, wires=range(8), mpi=True) + + def circuit(params): + circuit_ansatz(params, wires=range(8)) + return qml.expval(returns), qml.expval(qml.PauliY(1)) + + n_params = 30 + params = np.linspace(0, 10, n_params) + + qnode_def = qml.QNode(circuit, dev_def) + qnode_lightning = qml.QNode(circuit, dev_lightning, diff_method="adjoint") + + def casted_to_array_def(params): + return np.array(qnode_def(params)) + + def casted_to_array_lightning(params): + return np.array(qnode_lightning(params)) + + j_def = qml.jacobian(casted_to_array_def)(params) + j_lightning = qml.jacobian(casted_to_array_lightning)(params) + + assert np.allclose(j_def, j_lightning) + + +custom_wires = ["alice", 3.14, -1, 0, "bob", 1, "unit", "test"] + + +@pytest.mark.parametrize( + "returns", + [ + qml.PauliZ(custom_wires[0]), + qml.PauliX(custom_wires[2]), + qml.PauliZ(custom_wires[0]) @ qml.PauliY(custom_wires[3]), + qml.Hadamard(custom_wires[2]), + qml.Hadamard(custom_wires[3]) @ qml.PauliZ(custom_wires[2]), + # qml.Projector([0, 1], wires=[custom_wires[0], custom_wires[2]]) @ qml.Hadamard(custom_wires[3]) + # qml.Projector([0, 0], wires=[custom_wires[2], custom_wires[0]]) + qml.PauliX(custom_wires[0]) @ qml.PauliY(custom_wires[3]), + qml.PauliY(custom_wires[0]) @ qml.PauliY(custom_wires[2]) @ qml.PauliY(custom_wires[3]), + qml.Hermitian(np.array([[0, 1], [1, 0]], requires_grad=False), wires=custom_wires[0]), + qml.Hermitian( + np.kron(qml.PauliY.compute_matrix(), qml.PauliZ.compute_matrix()), + wires=[custom_wires[3], custom_wires[2]], + ), + qml.Hermitian(np.array([[0, 1], [1, 0]], requires_grad=False), wires=custom_wires[0]) + @ qml.PauliZ(custom_wires[2]), + ], +) +def test_integration_custom_wires(returns): + """Integration tests that compare to default.qubit for a large circuit containing parametrized + operations and when using custom wire labels""" + dev_def = qml.device("default.qubit", wires=custom_wires) + dev_lightning = qml.device(device_name, wires=custom_wires, mpi=True, batch_obs=False) + + def circuit(params): + circuit_ansatz(params, wires=custom_wires) + return qml.expval(returns), qml.expval(qml.PauliY(custom_wires[1])) + + n_params = 30 + params = np.linspace(0, 10, n_params) + + qnode_def = qml.QNode(circuit, dev_def) + qnode_lightning = qml.QNode(circuit, dev_lightning, diff_method="adjoint") + + def casted_to_array_def(params): + return np.array(qnode_def(params)) + + def casted_to_array_lightning(params): + return np.array(qnode_lightning(params)) + + j_def = qml.jacobian(casted_to_array_def)(params) + j_lightning = qml.jacobian(casted_to_array_lightning)(params) + + assert np.allclose(j_def, j_lightning) + + +@pytest.mark.parametrize( + "returns", + [ + (qml.PauliZ(custom_wires[0]),), + (qml.PauliZ(custom_wires[0]), qml.PauliZ(custom_wires[1])), + ( + qml.PauliZ(custom_wires[0]), + qml.PauliZ(custom_wires[1]), + qml.PauliZ(custom_wires[3]), + ), + ( + qml.PauliZ(custom_wires[0]), + qml.PauliZ(custom_wires[1]), + qml.PauliZ(custom_wires[3]), + qml.PauliZ(custom_wires[2]), + ), + ( + qml.PauliZ(custom_wires[0]) @ qml.PauliY(custom_wires[3]), + qml.PauliZ(custom_wires[1]) @ qml.PauliY(custom_wires[2]), + ), + ( + qml.PauliZ(custom_wires[0]) @ qml.PauliY(custom_wires[3]), + qml.PauliZ(custom_wires[1]), + ), + ], +) +def test_integration_custom_wires_batching(returns): + """Integration tests that compare to default.qubit for a large circuit containing parametrized + operations and when using custom wire labels""" + + dev_def = qml.device("default.qubit", wires=custom_wires) + dev_gpu = qml.device("lightning.gpu", wires=custom_wires, mpi=True, batch_obs=True) + + def circuit(params): + circuit_ansatz(params, wires=custom_wires) + return [qml.expval(r) for r in returns] + [qml.expval(qml.PauliY(custom_wires[1]))] + + n_params = 30 + np.random.seed(1337) + params = np.random.rand(n_params) + + qnode_gpu = qml.QNode(circuit, dev_gpu, diff_method="adjoint") + qnode_def = qml.QNode(circuit, dev_def) + + def convert_to_array_gpu(params): + return np.hstack(qnode_gpu(params)) + + def convert_to_array_def(params): + return np.hstack(qnode_def(params)) + + j_gpu = qml.jacobian(convert_to_array_gpu)(params) + j_def = qml.jacobian(convert_to_array_def)(params) + + assert np.allclose(j_gpu, j_def, atol=1e-7) + + +@pytest.mark.parametrize( + "returns", + [ + (0.5 * qml.PauliZ(custom_wires[0]),), + (0.5 * qml.PauliZ(custom_wires[0]), qml.PauliZ(custom_wires[1])), + ( + qml.PauliZ(custom_wires[0]), + 0.5 * qml.PauliZ(custom_wires[1]), + qml.PauliZ(custom_wires[3]), + ), + ( + qml.PauliZ(custom_wires[0]), + qml.PauliZ(custom_wires[1]), + qml.PauliZ(custom_wires[3]), + 0.5 * qml.PauliZ(custom_wires[2]), + ), + ( + qml.PauliZ(custom_wires[0]) @ qml.PauliY(custom_wires[3]), + 0.5 * qml.PauliZ(custom_wires[1]) @ qml.PauliY(custom_wires[2]), + ), + ( + qml.PauliZ(custom_wires[0]) @ qml.PauliY(custom_wires[3]), + 0.5 * qml.PauliZ(custom_wires[1]), + ), + ( + 0.0 * qml.PauliZ(custom_wires[0]) @ qml.PauliZ(custom_wires[1]), + 1.0 * qml.Identity(10), + 1.2 * qml.PauliZ(custom_wires[2]) @ qml.PauliZ(custom_wires[3]), + ), + ], +) +def test_batching_H(returns): + """Integration tests that compare to default.qubit for a large circuit containing parametrized + operations and when using custom wire labels""" + + dev_cpu = qml.device("default.qubit", wires=custom_wires + [10, 72]) + dev_gpu = qml.device(device_name, wires=custom_wires + [10, 72], batch_obs=True) + dev_gpu_default = qml.device(device_name, wires=custom_wires + [10, 72], batch_obs=False) + + def circuit(params): + circuit_ansatz(params, wires=custom_wires) + return qml.math.hstack([qml.expval(r) for r in returns]) + + n_params = 30 + np.random.seed(1337) + params = np.random.rand(n_params) + + qnode_cpu = qml.QNode(circuit, dev_cpu, diff_method="parameter-shift") + qnode_gpu = qml.QNode(circuit, dev_gpu, diff_method="adjoint") + qnode_gpu_default = qml.QNode(circuit, dev_gpu_default, diff_method="adjoint") + + j_cpu = qml.jacobian(qnode_cpu)(params) + j_gpu = qml.jacobian(qnode_gpu)(params) + j_gpu_default = qml.jacobian(qnode_gpu_default)(params) + + assert np.allclose(j_cpu, j_gpu) + assert np.allclose(j_gpu, j_gpu_default) + + +@pytest.fixture(scope="session") +def create_xyz_file(tmp_path_factory): + """Creates a coordinate file for an H2 molecule in the XYZ format.""" + directory = tmp_path_factory.mktemp("tmp") + file = directory / "h2.xyz" + file.write_text("""2\nH2, Unoptimized\nH 1.0 0.0 0.0\nH -1.0 0.0 0.0""") + yield file + + +@pytest.mark.parametrize( + "batches", + [False, True, 1, 2, 3, 4], +) +def test_integration_H2_Hamiltonian( + create_xyz_file, batches +): # pylint: disable=redefined-outer-name + """Tests getting the total energy and its derivatives for an H2 Hamiltonian.""" + _ = pytest.importorskip("openfermionpyscf") + + n_electrons = 2 + np.random.seed(1337) + + str_path = create_xyz_file + symbols, coordinates = qml.qchem.read_structure(str(str_path), outpath=str(str_path.parent)) + + H, qubits = qml.qchem.molecular_hamiltonian( + symbols, + coordinates, + method="pyscf", + basis="6-31G", + active_electrons=n_electrons, + name="h2", + outpath=str(str_path.parent), + load_data=True, + ) + hf_state = qml.qchem.hf_state(n_electrons, qubits) + _, doubles = qml.qchem.excitations(n_electrons, qubits) + + # Choose different batching supports here + dev = qml.device(device_name, wires=qubits, mpi=True, batch_obs=batches) + dev_comp = qml.device("default.qubit", wires=qubits) + + @qml.qnode(dev, diff_method="adjoint") + def circuit(params, excitations): + qml.BasisState(hf_state, wires=H.wires) + for i, excitation in enumerate(excitations): + if len(excitation) == 4: + qml.DoubleExcitation(params[i], wires=excitation) + else: + qml.SingleExcitation(params[i], wires=excitation) + return qml.expval(H) + + @qml.qnode(dev_comp, diff_method="parameter-shift") + def circuit_compare(params, excitations): + qml.BasisState(hf_state, wires=H.wires) + + for i, excitation in enumerate(excitations): + if len(excitation) == 4: + qml.DoubleExcitation(params[i], wires=excitation) + else: + qml.SingleExcitation(params[i], wires=excitation) + return qml.expval(H) + + jac_func = qml.jacobian(circuit) + jac_func_comp = qml.jacobian(circuit_compare) + + params = qml.numpy.array([0.0] * len(doubles), requires_grad=True) + jacs = jac_func(params, excitations=doubles) + jacs_comp = jac_func_comp(params, excitations=doubles) + + assert np.allclose(jacs, jacs_comp) + + +@pytest.mark.parametrize( + "returns", + [ + qml.SparseHamiltonian( + qml.Hamiltonian( + [0.1], [qml.PauliX(wires=custom_wires[0]) @ qml.PauliY(wires=custom_wires[1])] + ).sparse_matrix(custom_wires), + wires=custom_wires, + ), + qml.SparseHamiltonian( + qml.Hamiltonian( + [2.0], [qml.PauliX(wires=custom_wires[2]) @ qml.PauliZ(wires=custom_wires[0])] + ).sparse_matrix(custom_wires), + wires=custom_wires, + ), + qml.SparseHamiltonian( + qml.Hamiltonian( + [2.0], [qml.PauliX(wires=custom_wires[1]) @ qml.PauliZ(wires=custom_wires[2])] + ).sparse_matrix(custom_wires), + wires=custom_wires, + ), + qml.SparseHamiltonian( + qml.Hamiltonian( + [1.1], [qml.PauliX(wires=custom_wires[0]) @ qml.PauliZ(wires=custom_wires[2])] + ).sparse_matrix(custom_wires), + wires=custom_wires, + ), + ], +) +def test_adjoint_SparseHamiltonian_custom_wires(returns): + """Integration tests that compare to default.qubit for a large circuit containing parametrized + operations and when using custom wire labels""" + + comm = MPI.COMM_WORLD + dev_gpu = qml.device("lightning.gpu", wires=custom_wires, mpi=True) + dev_cpu = qml.device("default.qubit", wires=custom_wires) + + def circuit(params): + circuit_ansatz(params, wires=custom_wires) + return qml.expval(returns) + + if comm.Get_rank() == 0: + n_params = 30 + np.random.seed(1337) + params = np.random.rand(n_params) + else: + params = None + + params = comm.bcast(params, root=0) + + qnode_gpu = qml.QNode(circuit, dev_gpu, diff_method="adjoint") + qnode_cpu = qml.QNode(circuit, dev_cpu, diff_method="parameter-shift") + + j_gpu = qml.jacobian(qnode_gpu)(params) + j_cpu = qml.jacobian(qnode_cpu)(params) + + assert np.allclose(j_cpu, j_gpu) + + +@pytest.mark.parametrize( + "returns", + [ + qml.SparseHamiltonian( + qml.Hamiltonian( + [0.1], + [qml.PauliZ(1) @ qml.PauliX(0) @ qml.Identity(2) @ qml.PauliX(4) @ qml.Identity(5)], + ).sparse_matrix(range(len(custom_wires))), + wires=range(len(custom_wires)), + ), + qml.SparseHamiltonian( + qml.Hamiltonian( + [0.1], + [qml.PauliX(1) @ qml.PauliZ(0)], + ).sparse_matrix(range(len(custom_wires))), + wires=range(len(custom_wires)), + ), + qml.SparseHamiltonian( + qml.Hamiltonian( + [0.1], + [qml.PauliX(0)], + ).sparse_matrix(range(len(custom_wires))), + wires=range(len(custom_wires)), + ), + qml.SparseHamiltonian( + qml.Hamiltonian( + [0.1], + [qml.PauliX(5)], + ).sparse_matrix(range(len(custom_wires))), + wires=range(len(custom_wires)), + ), + qml.SparseHamiltonian( + qml.Hamiltonian( + [0.1], + [qml.PauliX(0) @ qml.PauliZ(1)], + ).sparse_matrix(range(len(custom_wires))), + wires=range(len(custom_wires)), + ), + qml.SparseHamiltonian( + qml.Hamiltonian([2.0], [qml.PauliX(1) @ qml.PauliZ(2)]).sparse_matrix( + range(len(custom_wires)) + ), + wires=range(len(custom_wires)), + ), + qml.SparseHamiltonian( + qml.Hamiltonian([2.0], [qml.PauliX(2) @ qml.PauliZ(4)]).sparse_matrix( + range(len(custom_wires)) + ), + wires=range(len(custom_wires)), + ), + qml.SparseHamiltonian( + qml.Hamiltonian([1.1], [qml.PauliX(2) @ qml.PauliZ(0)]).sparse_matrix( + range(len(custom_wires)) + ), + wires=range(len(custom_wires)), + ), + ], +) +def test_adjoint_SparseHamiltonian(returns): + """Integration tests that compare to default.qubit for a large circuit containing parametrized + operations and when using custom wire labels""" + + comm = MPI.COMM_WORLD + dev_gpu = qml.device("lightning.gpu", wires=len(custom_wires), mpi=True) + dev_cpu = qml.device("default.qubit", wires=len(custom_wires)) + + def circuit(params): + circuit_ansatz(params, wires=range(len(custom_wires))) + return qml.expval(returns) + + if comm.Get_rank() == 0: + n_params = 30 + np.random.seed(1337) + params = np.random.rand(n_params) + else: + params = None + + params = comm.bcast(params, root=0) + + qnode_gpu = qml.QNode(circuit, dev_gpu, diff_method="adjoint") + qnode_cpu = qml.QNode(circuit, dev_cpu, diff_method="parameter-shift") + + j_gpu = qml.jacobian(qnode_gpu)(params) + j_cpu = qml.jacobian(qnode_cpu)(params) + + assert np.allclose(j_cpu, j_gpu) diff --git a/mpitests/test_apply.py b/mpitests/test_apply.py new file mode 100644 index 0000000000..ad9e474fb4 --- /dev/null +++ b/mpitests/test_apply.py @@ -0,0 +1,1049 @@ +# Copyright 2018-2023 Xanadu Quantum Technologies Inc. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Unit tests for the :mod:`pennylane_lightning_gpu.LightningGPU` device (MPI). +""" +# pylint: disable=protected-access,cell-var-from-loop,c-extension-no-member +import itertools +from mpi4py import MPI +import pytest + +from conftest import TOL_STOCHASTIC, device_name, fixture_params + +import numpy as np +import pennylane as qml + + +numQubits = 8 + +# Tuple passed to distributed device ctor +# np.complex for data type and True or False +# for enabling batched_obs. +fixture_params = itertools.product( + [np.complex64, np.complex128], + [True, False], +) + + +def create_random_init_state(numWires, R_DTYPE, seed_value=48): + """Returns a random initial state of a certain type.""" + np.random.seed(seed_value) + num_elements = 1 << numWires + init_state = np.random.rand(num_elements).astype(R_DTYPE) + 1j * np.random.rand( + num_elements + ).astype(R_DTYPE) + scale_sum = np.sqrt(np.sum(np.abs(init_state) ** 2)).astype(R_DTYPE) + init_state = init_state / scale_sum + return init_state + + +def apply_operation_gates_qnode_param(tol, dev_mpi, operation, par, Wires): + """Wrapper applying a parametric gate with QNode function.""" + num_wires = numQubits + comm = MPI.COMM_WORLD + commSize = comm.Get_size() + num_global_wires = commSize.bit_length() - 1 + num_local_wires = num_wires - num_global_wires + + if dev_mpi.R_DTYPE == np.float32: + c_dtype = np.complex64 + else: + c_dtype = np.complex128 + + expected_output_cpu = np.zeros(1 << num_wires).astype(c_dtype) + local_state_vector = np.zeros(1 << num_local_wires).astype(c_dtype) + local_expected_output_cpu = np.zeros(1 << num_local_wires).astype(c_dtype) + + state_vector = create_random_init_state(num_wires, dev_mpi.R_DTYPE) + comm.Bcast(state_vector, root=0) + + comm.Scatter(state_vector, local_state_vector, root=0) + dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=c_dtype) + + def circuit(*params): + qml.StatePrep(state_vector, wires=range(num_wires)) + operation(*params, wires=Wires) + return qml.state() + + cpu_qnode = qml.QNode(circuit, dev_cpu) + expected_output_cpu = cpu_qnode(*par).astype(c_dtype) + comm.Scatter(expected_output_cpu, local_expected_output_cpu, root=0) + + mpi_qnode = qml.QNode(circuit, dev_mpi) + local_state_vector = mpi_qnode(*par) + + assert np.allclose(local_state_vector, local_expected_output_cpu, atol=tol, rtol=0) + + +def apply_operation_gates_apply_param(tol, dev_mpi, operation, par, Wires): + """Wrapper applying a parametric gate with the apply method.""" + num_wires = numQubits + comm = MPI.COMM_WORLD + commSize = comm.Get_size() + num_global_wires = commSize.bit_length() - 1 + num_local_wires = num_wires - num_global_wires + + if dev_mpi.R_DTYPE == np.float32: + c_dtype = np.complex64 + else: + c_dtype = np.complex128 + + expected_output_cpu = np.zeros(1 << num_wires).astype(c_dtype) + local_state_vector = np.zeros(1 << num_local_wires).astype(c_dtype) + local_expected_output_cpu = np.zeros(1 << num_local_wires).astype(c_dtype) + + state_vector = create_random_init_state(num_wires, dev_mpi.R_DTYPE) + comm.Bcast(state_vector, root=0) + + comm.Scatter(state_vector, local_state_vector, root=0) + dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=c_dtype) + + @qml.qnode(dev_cpu) + def circuit(*params): + qml.StatePrep(state_vector, wires=range(num_wires)) + operation(*params, wires=Wires) + return qml.state() + + expected_output_cpu = np.array(circuit(*par)).astype(c_dtype) + comm.Scatter(expected_output_cpu, local_expected_output_cpu, root=0) + + dev_mpi.syncH2D(local_state_vector) + dev_mpi.apply([operation(*par, wires=Wires)]) + dev_mpi.syncD2H(local_state_vector) + + assert np.allclose(local_state_vector, local_expected_output_cpu, atol=tol, rtol=0) + + +def apply_operation_gates_qnode_nonparam(tol, dev_mpi, operation, Wires): + """Wrapper applying a non-parametric gate with QNode function.""" + num_wires = numQubits + comm = MPI.COMM_WORLD + commSize = comm.Get_size() + num_global_wires = commSize.bit_length() - 1 + num_local_wires = num_wires - num_global_wires + + if dev_mpi.R_DTYPE == np.float32: + c_dtype = np.complex64 + else: + c_dtype = np.complex128 + + expected_output_cpu = np.zeros(1 << num_wires).astype(c_dtype) + local_state_vector = np.zeros(1 << num_local_wires).astype(c_dtype) + local_expected_output_cpu = np.zeros(1 << num_local_wires).astype(c_dtype) + + state_vector = create_random_init_state(num_wires, dev_mpi.R_DTYPE) + comm.Bcast(state_vector, root=0) + + comm.Scatter(state_vector, local_state_vector, root=0) + dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=c_dtype) + + def circuit(): + qml.StatePrep(state_vector, wires=range(num_wires)) + operation(wires=Wires) + return qml.state() + + cpu_qnode = qml.QNode(circuit, dev_cpu) + expected_output_cpu = cpu_qnode().astype(c_dtype) + comm.Scatter(expected_output_cpu, local_expected_output_cpu, root=0) + + mpi_qnode = qml.QNode(circuit, dev_mpi) + local_state_vector = mpi_qnode() + + assert np.allclose(local_state_vector, local_expected_output_cpu, atol=tol, rtol=0) + + +def apply_operation_gates_apply_nonparam(tol, dev_mpi, operation, Wires): + """Wrapper applying a non-parametric gate with the apply method.""" + num_wires = numQubits + comm = MPI.COMM_WORLD + commSize = comm.Get_size() + num_global_wires = commSize.bit_length() - 1 + num_local_wires = num_wires - num_global_wires + + if dev_mpi.R_DTYPE == np.float32: + c_dtype = np.complex64 + else: + c_dtype = np.complex128 + + expected_output_cpu = np.zeros(1 << num_wires).astype(c_dtype) + local_state_vector = np.zeros(1 << num_local_wires).astype(c_dtype) + local_expected_output_cpu = np.zeros(1 << num_local_wires).astype(c_dtype) + + state_vector = create_random_init_state(num_wires, dev_mpi.R_DTYPE) + comm.Bcast(state_vector, root=0) + + comm.Scatter(state_vector, local_state_vector, root=0) + dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=c_dtype) + + @qml.qnode(dev_cpu) + def circuit(): + qml.StatePrep(state_vector, wires=range(num_wires)) + operation(wires=Wires) + return qml.state() + + expected_output_cpu = np.array(circuit()).astype(c_dtype) + comm.Scatter(expected_output_cpu, local_expected_output_cpu, root=0) + + dev_mpi.syncH2D(local_state_vector) + dev_mpi.apply([operation(wires=Wires)]) + dev_mpi.syncD2H(local_state_vector) + + assert np.allclose(local_state_vector, local_expected_output_cpu, atol=tol, rtol=0) + + +class TestApply: # pylint: disable=missing-function-docstring,too-many-arguments + """Tests whether the device can apply supported quantum gates.""" + + @pytest.fixture(params=fixture_params) + def dev_mpi(self, request): + return qml.device( + device_name, + wires=numQubits, + mpi=True, + c_dtype=request.param[0], + batch_obs=request.param[1], + ) + + # Parameterized test case for single wire nonparam gates + @pytest.mark.parametrize( + "operation", [qml.PauliX, qml.PauliY, qml.PauliZ, qml.Hadamard, qml.S, qml.T] + ) + @pytest.mark.parametrize("Wires", [0, 1, numQubits - 2, numQubits - 1]) + def test_apply_operation_single_wire_nonparam(self, tol, operation, Wires, dev_mpi): + apply_operation_gates_qnode_nonparam(tol, dev_mpi, operation, Wires) + apply_operation_gates_apply_nonparam(tol, dev_mpi, operation, Wires) + + @pytest.mark.parametrize("operation", [qml.CNOT, qml.SWAP, qml.CY, qml.CZ]) + @pytest.mark.parametrize("Wires", [[0, 1], [numQubits - 2, numQubits - 1], [0, numQubits - 1]]) + def test_apply_operation_two_wire_nonparam(self, tol, operation, Wires, dev_mpi): + apply_operation_gates_qnode_nonparam(tol, dev_mpi, operation, Wires) + apply_operation_gates_apply_nonparam(tol, dev_mpi, operation, Wires) + + @pytest.mark.parametrize("operation", [qml.CSWAP, qml.Toffoli]) + @pytest.mark.parametrize( + "Wires", + [ + [0, 1, 2], + [numQubits - 3, numQubits - 2, numQubits - 1], + [0, 1, numQubits - 1], + [0, numQubits - 2, numQubits - 1], + ], + ) + def test_apply_operation_three_wire_nonparam(self, tol, operation, Wires, dev_mpi): + apply_operation_gates_qnode_nonparam(tol, dev_mpi, operation, Wires) + apply_operation_gates_apply_nonparam(tol, dev_mpi, operation, Wires) + + @pytest.mark.parametrize("operation", [qml.CSWAP, qml.Toffoli]) + @pytest.mark.parametrize( + "Wires", + [ + [0, 1, 2], + [numQubits - 3, numQubits - 2, numQubits - 1], + [0, 1, numQubits - 1], + [0, numQubits - 2, numQubits - 1], + ], + ) + def test_apply_operation_three_wire_qnode_nonparam(self, tol, operation, Wires, dev_mpi): + apply_operation_gates_qnode_nonparam(tol, dev_mpi, operation, Wires) + apply_operation_gates_apply_nonparam(tol, dev_mpi, operation, Wires) + + @pytest.mark.parametrize("operation", [qml.PhaseShift, qml.RX, qml.RY, qml.RZ]) + @pytest.mark.parametrize("par", [[0.1], [0.2], [0.3]]) + @pytest.mark.parametrize("Wires", [0, numQubits - 1]) + def test_apply_operation_1gatequbit_1param_gate_qnode_param( + self, tol, operation, par, Wires, dev_mpi + ): + apply_operation_gates_qnode_param(tol, dev_mpi, operation, par, Wires) + apply_operation_gates_apply_param(tol, dev_mpi, operation, par, Wires) + + @pytest.mark.parametrize("operation", [qml.Rot]) + @pytest.mark.parametrize("par", [[0.1, 0.2, 0.3], [0.2, 0.3, 0.4]]) + @pytest.mark.parametrize("Wires", [0, numQubits - 1]) + def test_apply_operation_1gatequbit_3param_gate_qnode_param( + self, tol, operation, par, Wires, dev_mpi + ): + apply_operation_gates_qnode_param(tol, dev_mpi, operation, par, Wires) + apply_operation_gates_apply_param(tol, dev_mpi, operation, par, Wires) + + @pytest.mark.parametrize("operation", [qml.CRot]) + @pytest.mark.parametrize("par", [[0.1, 0.2, 0.3], [0.2, 0.3, 0.4]]) + @pytest.mark.parametrize("Wires", [[0, numQubits - 1], [0, 1], [numQubits - 2, numQubits - 1]]) + def test_apply_operation_1gatequbit_3param_cgate_qnode_param( + self, tol, operation, par, Wires, dev_mpi + ): + apply_operation_gates_qnode_param(tol, dev_mpi, operation, par, Wires) + apply_operation_gates_apply_param(tol, dev_mpi, operation, par, Wires) + + @pytest.mark.parametrize( + "operation", + [ + qml.CRX, + qml.CRY, + qml.CRZ, + qml.ControlledPhaseShift, + qml.SingleExcitation, + qml.SingleExcitationMinus, + qml.SingleExcitationPlus, + qml.IsingXX, + qml.IsingYY, + qml.IsingZZ, + ], + ) + @pytest.mark.parametrize("par", [[0.1], [0.2], [0.3]]) + @pytest.mark.parametrize("Wires", [[0, numQubits - 1], [0, 1], [numQubits - 2, numQubits - 1]]) + def test_apply_operation_2gatequbit_1param_gate_qnode_param( + self, tol, operation, par, Wires, dev_mpi + ): + apply_operation_gates_qnode_param(tol, dev_mpi, operation, par, Wires) + apply_operation_gates_apply_param(tol, dev_mpi, operation, par, Wires) + + @pytest.mark.parametrize( + "operation", + [qml.DoubleExcitation, qml.DoubleExcitationMinus, qml.DoubleExcitationPlus], + ) + @pytest.mark.parametrize("par", [[0.13], [0.2], [0.3]]) + @pytest.mark.parametrize( + "Wires", + [ + [0, 1, numQubits - 2, numQubits - 1], + [0, 1, 2, 3], + [numQubits - 4, numQubits - 3, numQubits - 2, numQubits - 1], + ], + ) + def test_apply_operation_4gatequbit_1param_gate_qnode_param( + self, tol, operation, par, Wires, dev_mpi + ): + apply_operation_gates_qnode_param(tol, dev_mpi, operation, par, Wires) + apply_operation_gates_apply_param(tol, dev_mpi, operation, par, Wires) + + # BasisState test + @pytest.mark.parametrize("operation", [qml.BasisState]) + @pytest.mark.parametrize("index", range(numQubits)) + def test_state_prep(self, tol, operation, index, dev_mpi): + par = np.zeros(numQubits, dtype=int) + par[index] = 1 + num_wires = numQubits + comm = MPI.COMM_WORLD + commSize = comm.Get_size() + num_global_wires = commSize.bit_length() - 1 + num_local_wires = num_wires - num_global_wires + + if dev_mpi.R_DTYPE == np.float32: + c_dtype = np.complex64 + else: + c_dtype = np.complex128 + + state_vector = np.zeros(1 << num_wires).astype(c_dtype) + expected_output_cpu = np.zeros(1 << num_wires).astype(c_dtype) + local_state_vector = np.zeros(1 << num_local_wires).astype(c_dtype) + local_expected_output_cpu = np.zeros(1 << num_local_wires).astype(c_dtype) + + state_vector = create_random_init_state(num_wires, dev_mpi.R_DTYPE) + + comm.Scatter(state_vector, local_state_vector, root=0) + dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=c_dtype) + + def circuit(): + operation(par, wires=range(numQubits)) + return qml.state() + + cpu_qnode = qml.QNode(circuit, dev_cpu) + mpi_qnode = qml.QNode(circuit, dev_mpi) + + expected_output_cpu = cpu_qnode().astype(c_dtype) + comm.Scatter(expected_output_cpu, local_expected_output_cpu, root=0) + + local_state_vector = mpi_qnode() + + assert np.allclose(local_state_vector, local_expected_output_cpu, atol=tol, rtol=0) + + @pytest.mark.parametrize( + "par, Wires", + [ + (np.array([1 / np.sqrt(2), 1 / np.sqrt(2)]), [0]), + (np.array([1 / np.sqrt(2), 1 / np.sqrt(2)]), [1]), + (np.array([1 / np.sqrt(2), 1 / np.sqrt(2)]), [2]), + (np.array([1 / np.sqrt(2), 1 / np.sqrt(2)]), [3]), + (np.array([1 / np.sqrt(2), 1 / np.sqrt(2)]), [4]), + (np.array([1 / np.sqrt(2), 1 / np.sqrt(2)]), [5]), + (np.array([0, 1 / np.sqrt(2), 0, 1 / np.sqrt(2)]), [1, 0]), + (np.array([0, 1 / np.sqrt(2), 0, 1 / np.sqrt(2)]), [0, 1]), + (np.array([0, 1 / np.sqrt(2), 0, 1 / np.sqrt(2)]), [0, 2]), + ( + np.array([0, 1 / np.sqrt(2), 0, 1 / np.sqrt(2)]), + [numQubits - 2, numQubits - 1], + ), + ( + np.array([0, 1 / np.sqrt(2), 0, 1 / np.sqrt(2)]), + [0, numQubits - 1], + ), + ( + np.array([0, 1 / np.sqrt(2), 0, 1 / np.sqrt(2)]), + [0, numQubits - 2], + ), + ], + ) + def test_qubit_state_prep(self, tol, par, Wires, dev_mpi): + num_wires = numQubits + comm = MPI.COMM_WORLD + commSize = comm.Get_size() + num_global_wires = commSize.bit_length() - 1 + num_local_wires = num_wires - num_global_wires + + if dev_mpi.R_DTYPE == np.float32: + c_dtype = np.complex64 + else: + c_dtype = np.complex128 + + state_vector = np.zeros(1 << num_wires).astype(c_dtype) + expected_output_cpu = np.zeros(1 << num_wires).astype(c_dtype) + local_state_vector = np.zeros(1 << num_local_wires).astype(c_dtype) + local_expected_output_cpu = np.zeros(1 << num_local_wires).astype(c_dtype) + + state_vector = create_random_init_state(num_wires, dev_mpi.R_DTYPE) + + comm.Scatter(state_vector, local_state_vector, root=0) + dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=c_dtype) + + def circuit(): + qml.StatePrep(par, wires=Wires) + return qml.state() + + cpu_qnode = qml.QNode(circuit, dev_cpu) + mpi_qnode = qml.QNode(circuit, dev_mpi) + + expected_output_cpu = cpu_qnode().astype(c_dtype) + comm.Scatter(expected_output_cpu, local_expected_output_cpu, root=0) + + local_state_vector = mpi_qnode() + + assert np.allclose(local_state_vector, local_expected_output_cpu, atol=tol, rtol=0) + + def test_dev_reset(self, tol, dev_mpi): + num_wires = numQubits + comm = MPI.COMM_WORLD + commSize = comm.Get_size() + num_global_wires = commSize.bit_length() - 1 + num_local_wires = num_wires - num_global_wires + + if dev_mpi.R_DTYPE == np.float32: + c_dtype = np.complex64 + else: + c_dtype = np.complex128 + + state_vector = np.zeros(1 << num_wires).astype(c_dtype) + expected_output_cpu = np.zeros(1 << num_wires).astype(c_dtype) + local_state_vector = np.zeros(1 << num_local_wires).astype(c_dtype) + local_expected_output_cpu = np.zeros(1 << num_local_wires).astype(c_dtype) + + state_vector = create_random_init_state(num_wires, dev_mpi.R_DTYPE) + + comm.Scatter(state_vector, local_state_vector, root=0) + dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=c_dtype) + + dev_cpu.reset() + + def circuit(): + qml.PauliX(wires=[0]) + qml.PauliX(wires=[0]) + return qml.state() + + cpu_qnode = qml.QNode(circuit, dev_cpu) + + expected_output_cpu = cpu_qnode().astype(c_dtype) + comm.Scatter(expected_output_cpu, local_expected_output_cpu, root=0) + + dev_mpi.reset() + + gpumpi_qnode = qml.QNode(circuit, dev_mpi) + dev_mpi.reset() + + local_state_vector = gpumpi_qnode() + assert np.allclose(local_state_vector, local_expected_output_cpu, atol=tol, rtol=0) + + +class TestSparseHamExpval: # pylint: disable=too-few-public-methods,missing-function-docstring + """Tests sparse hamiltonian expectation values.""" + + @pytest.mark.parametrize("C_DTYPE", [np.complex128, np.complex64]) + def test_sparse_hamiltonian_expectation(self, C_DTYPE): + comm = MPI.COMM_WORLD + commSize = comm.Get_size() + num_global_wires = commSize.bit_length() - 1 + num_local_wires = 3 - num_global_wires + + obs = qml.Identity(0) @ qml.PauliX(1) @ qml.PauliY(2) + obs1 = qml.Identity(1) + Hmat = qml.Hamiltonian([1.0, 1.0], [obs1, obs]).sparse_matrix() + + state_vector = np.array( + [ + 0.0 + 0.0j, + 0.0 + 0.1j, + 0.1 + 0.1j, + 0.1 + 0.2j, + 0.2 + 0.2j, + 0.2 + 0.3j, + 0.3 + 0.3j, + 0.3 + 0.5j, + ], + dtype=C_DTYPE, + ) + + local_state_vector = np.zeros(1 << num_local_wires).astype(C_DTYPE) + comm.Scatter(state_vector, local_state_vector, root=0) + + dev_gpu = qml.device("lightning.gpu", wires=3, mpi=False, c_dtype=C_DTYPE) + dev_mpi = qml.device("lightning.gpu", wires=3, mpi=True, c_dtype=C_DTYPE) + + dev_mpi.syncH2D(local_state_vector) + dev_gpu.syncH2D(state_vector) + + H_sparse = qml.SparseHamiltonian(Hmat, wires=range(3)) + + comm.Barrier() + + res = dev_mpi.expval(H_sparse) + expected = dev_gpu.expval(H_sparse) + + assert np.allclose(res, expected) + + +class TestExpval: + """Tests that expectation values are properly calculated or that the proper errors are raised.""" + + @pytest.mark.parametrize("C_DTYPE", [np.complex128, np.complex64]) + @pytest.mark.parametrize( + "operation", + [ + qml.PauliX, + qml.PauliY, + qml.PauliZ, + qml.Hadamard, + qml.Identity, + ], + ) + @pytest.mark.parametrize("wires", [0, 1, 2, numQubits - 3, numQubits - 2, numQubits - 1]) + def test_expval_single_wire_no_parameters(self, tol, operation, wires, C_DTYPE): + """Tests that expectation values are properly calculated for single-wire observables without parameters.""" + num_wires = numQubits + comm = MPI.COMM_WORLD + commSize = comm.Get_size() + num_global_wires = commSize.bit_length() - 1 + num_local_wires = num_wires - num_global_wires + + dev_mpi = qml.device("lightning.gpu", wires=numQubits, mpi=True, c_dtype=C_DTYPE) + + state_vector = create_random_init_state(num_wires, dev_mpi.R_DTYPE) + comm.Bcast(state_vector, root=0) + + local_state_vector = np.zeros(1 << num_local_wires).astype(C_DTYPE) + comm.Scatter(state_vector, local_state_vector, root=0) + dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=C_DTYPE) + + def circuit(): + qml.StatePrep(state_vector, wires=range(num_wires)) + return qml.expval(operation(wires)) + + cpu_qnode = qml.QNode(circuit, dev_cpu) + expected_output_cpu = cpu_qnode() + comm.Bcast(expected_output_cpu, root=0) + + mpi_qnode = qml.QNode(circuit, dev_mpi) + expected_output_mpi = mpi_qnode() + + assert np.allclose(expected_output_mpi, expected_output_cpu, atol=tol, rtol=0) + + @pytest.mark.parametrize("C_DTYPE", [np.complex128, np.complex64]) + @pytest.mark.parametrize( + "obs", + [ + qml.PauliX(0) @ qml.PauliZ(1), + qml.PauliX(0) @ qml.PauliZ(numQubits - 1), + qml.PauliX(numQubits - 2) @ qml.PauliZ(numQubits - 1), + qml.PauliZ(0) @ qml.PauliZ(1), + qml.PauliZ(0) @ qml.PauliZ(numQubits - 1), + qml.PauliZ(numQubits - 2) @ qml.PauliZ(numQubits - 1), + ], + ) + def test_expval_multiple_obs(self, obs, tol, C_DTYPE): + """Test expval with Hamiltonian""" + num_wires = numQubits + + dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=C_DTYPE) + dev_mpi = qml.device("lightning.gpu", wires=num_wires, mpi=True, c_dtype=C_DTYPE) + + def circuit(): + qml.RX(0.4, wires=[0]) + qml.RY(-0.2, wires=[num_wires - 1]) + return qml.expval(obs) + + cpu_qnode = qml.QNode(circuit, dev_cpu) + mpi_qnode = qml.QNode(circuit, dev_mpi) + + assert np.allclose(cpu_qnode(), mpi_qnode(), atol=tol, rtol=0) + + @pytest.mark.parametrize("C_DTYPE", [np.complex128, np.complex64]) + @pytest.mark.parametrize( + "obs, coeffs", + [ + ([qml.PauliX(0) @ qml.PauliZ(1)], [0.314]), + ([qml.PauliX(0) @ qml.PauliZ(numQubits - 1)], [0.314]), + ([qml.PauliZ(0) @ qml.PauliZ(1)], [0.314]), + ([qml.PauliZ(0) @ qml.PauliZ(numQubits - 1)], [0.314]), + ( + [qml.PauliX(0) @ qml.PauliZ(1), qml.PauliZ(0) @ qml.PauliZ(1)], + [0.314, 0.2], + ), + ( + [ + qml.PauliX(0) @ qml.PauliZ(numQubits - 1), + qml.PauliZ(0) @ qml.PauliZ(1), + ], + [0.314, 0.2], + ), + ( + [ + qml.PauliX(numQubits - 2) @ qml.PauliZ(numQubits - 1), + qml.PauliZ(0) @ qml.PauliZ(1), + ], + [0.314, 0.2], + ), + ], + ) + def test_expval_hamiltonian(self, obs, coeffs, tol, C_DTYPE): + """Test expval with Hamiltonian""" + num_wires = numQubits + + ham = qml.Hamiltonian(coeffs, obs) + + dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=C_DTYPE) + dev_mpi = qml.device("lightning.gpu", wires=num_wires, mpi=True, c_dtype=C_DTYPE) + + def circuit(): + qml.RX(0.4, wires=[0]) + qml.RY(-0.2, wires=[numQubits - 1]) + return qml.expval(ham) + + cpu_qnode = qml.QNode(circuit, dev_cpu) + mpi_qnode = qml.QNode(circuit, dev_mpi) + + assert np.allclose(cpu_qnode(), mpi_qnode(), atol=tol, rtol=0) + + def test_expval_non_pauli_word_hamiltionian(self, tol): + """Tests expectation values of non-Pauli word Hamiltonians.""" + dev_mpi = qml.device("lightning.gpu", wires=3, mpi=True) + dev_cpu = qml.device("lightning.qubit", wires=3) + + theta = 0.432 + phi = 0.123 + varphi = -0.543 + + def circuit(): + qml.RX(theta, wires=[0]) + qml.RX(phi, wires=[1]) + qml.RX(varphi, wires=[2]) + qml.CNOT(wires=[0, 1]) + qml.CNOT(wires=[1, 2]) + return qml.expval(0.5 * qml.Hadamard(2)) + + cpu_qnode = qml.QNode(circuit, dev_cpu) + mpi_qnode = qml.QNode(circuit, dev_mpi) + + assert np.allclose(cpu_qnode(), mpi_qnode(), atol=tol, rtol=0) + + +class TestGenerateSample: + """Tests that samples are properly calculated.""" + + @pytest.mark.parametrize("C_DTYPE", [np.complex128, np.complex64]) + def test_sample_dimensions(self, C_DTYPE): + """Tests if the samples returned by sample have + the correct dimensions + """ + num_wires = numQubits + + dev = qml.device("lightning.gpu", wires=num_wires, mpi=True, shots=1000, c_dtype=C_DTYPE) + + dev.apply([qml.RX(1.5708, wires=[0]), qml.RX(1.5708, wires=[1])]) + + dev.shots = 10 + dev._wires_measured = {0} + dev._samples = dev.generate_samples() + s1 = dev.sample(qml.PauliZ(wires=[0])) + assert np.array_equal(s1.shape, (10,)) + + dev.reset() + dev.shots = 12 + dev._wires_measured = {1} + dev._samples = dev.generate_samples() + s2 = dev.sample(qml.PauliZ(wires=[1])) + assert np.array_equal(s2.shape, (12,)) + + dev.reset() + dev.shots = 17 + dev._wires_measured = {0, 1} + dev._samples = dev.generate_samples() + s3 = dev.sample(qml.PauliX(0) @ qml.PauliZ(1)) + assert np.array_equal(s3.shape, (17,)) + + @pytest.mark.parametrize("C_DTYPE", [np.complex128, np.complex64]) + def test_sample_values(self, tol, C_DTYPE): + """Tests if the samples returned by sample have + the correct values + """ + num_wires = numQubits + + dev = qml.device("lightning.gpu", wires=num_wires, mpi=True, shots=1000, c_dtype=C_DTYPE) + dev.reset() + dev.apply([qml.RX(1.5708, wires=[0])]) + dev._wires_measured = {0} + dev._samples = dev.generate_samples() + + s1 = dev.sample(qml.PauliZ(0)) + + # s1 should only contain 1 and -1, which is guaranteed if + # they square to 1 + assert np.allclose(s1**2, 1, atol=tol, rtol=0) + + @pytest.mark.parametrize("C_DTYPE", [np.complex128, np.complex64]) + def test_sample_values_qnode(self, tol, C_DTYPE): + """Tests if the samples returned by sample have + the correct values + """ + num_wires = numQubits + + dev_mpi = qml.device( + "lightning.gpu", wires=num_wires, mpi=True, shots=1000, c_dtype=C_DTYPE + ) + dev_mpi.reset() + + @qml.qnode(dev_mpi) + def circuit(): + qml.RX(1.5708, wires=0) + return qml.sample(qml.PauliZ(0)) + + # s1 should only contain 1 and -1, which is guaranteed if + # they square to 1 + assert np.allclose(circuit() ** 2, 1, atol=tol, rtol=0) + + @pytest.mark.parametrize("C_DTYPE", [np.complex128, np.complex64]) + def test_multi_samples_return_correlated_results(self, C_DTYPE): + """Tests if the samples returned by the sample function have + the correct dimensions + """ + num_wires = 3 + + dev_gpumpi = qml.device( + "lightning.gpu", wires=num_wires, mpi=True, shots=1000, c_dtype=C_DTYPE + ) + + @qml.qnode(dev_gpumpi) + def circuit(): + qml.Hadamard(0) + qml.CNOT(wires=[0, 1]) + return qml.sample(qml.PauliZ(0)), qml.sample(qml.PauliZ(1)) + + outcomes = circuit() + + assert np.array_equal(outcomes[0], outcomes[1]) + + @pytest.mark.parametrize("C_DTYPE", [np.complex128, np.complex64]) + def test_paulix_pauliy(self, C_DTYPE, tol=TOL_STOCHASTIC): + """Test that a tensor product involving PauliX and PauliY works correctly""" + num_wires = 3 + + dev_gpumpi = qml.device( + "lightning.gpu", wires=num_wires, mpi=True, shots=1000, c_dtype=C_DTYPE + ) + + theta = 0.432 + phi = 0.123 + varphi = -0.543 + + @qml.qnode(dev_gpumpi) + def circuit(): + qml.RX(theta, wires=[0]) + qml.RX(phi, wires=[1]) + qml.RX(varphi, wires=[2]) + qml.CNOT(wires=[0, 1]) + qml.CNOT(wires=[1, 2]) + return qml.sample(qml.PauliX(wires=[0]) @ qml.PauliY(wires=[2])) + + res = circuit() + + # res should only contain 1 and -1 + assert np.allclose(res**2, 1, atol=tol) + + mean = np.mean(res) + expected = np.sin(theta) * np.sin(phi) * np.sin(varphi) + assert np.allclose(mean, expected, atol=tol) + + var = np.var(res) + expected = ( + 8 * np.sin(theta) ** 2 * np.cos(2 * varphi) * np.sin(phi) ** 2 + - np.cos(2 * (theta - phi)) + - np.cos(2 * (theta + phi)) + + 2 * np.cos(2 * theta) + + 2 * np.cos(2 * phi) + + 14 + ) / 16 + assert np.allclose(var, expected, atol=tol) + + @pytest.mark.parametrize("C_DTYPE", [np.complex128, np.complex64]) + def test_pauliz_hadamard(self, C_DTYPE, tol=TOL_STOCHASTIC): + """Test that a tensor product involving PauliZ and PauliY and hadamard works correctly""" + num_wires = 3 + + dev_gpumpi = qml.device( + "lightning.gpu", wires=num_wires, mpi=True, shots=1000, c_dtype=C_DTYPE + ) + + theta = 0.432 + phi = 0.123 + varphi = -0.543 + + @qml.qnode(dev_gpumpi) + def circuit(): + qml.RX(theta, wires=[0]) + qml.RX(phi, wires=[1]) + qml.RX(varphi, wires=[2]) + qml.CNOT(wires=[0, 1]) + qml.CNOT(wires=[1, 2]) + return qml.sample( + qml.PauliZ(wires=[0]) @ qml.Hadamard(wires=[1]) @ qml.PauliY(wires=[2]) + ) + + res = circuit() + + # s1 should only contain 1 and -1 + assert np.allclose(res**2, 1, atol=tol) + + mean = np.mean(res) + expected = -(np.cos(varphi) * np.sin(phi) + np.sin(varphi) * np.cos(theta)) / np.sqrt(2) + assert np.allclose(mean, expected, atol=tol) + + var = np.var(res) + expected = ( + 3 + + np.cos(2 * phi) * np.cos(varphi) ** 2 + - np.cos(2 * theta) * np.sin(varphi) ** 2 + - 2 * np.cos(theta) * np.sin(phi) * np.sin(2 * varphi) + ) / 4 + assert np.allclose(var, expected, atol=tol) + + +class TestTensorVar: + """Test tensor variance measurements.""" + + @pytest.mark.parametrize("C_DTYPE", [np.complex128, np.complex64]) + def test_paulix_pauliy(self, C_DTYPE, tol=TOL_STOCHASTIC): + """Test that a tensor product involving PauliX and PauliY works correctly""" + num_wires = 3 + + dev_gpumpi = qml.device( + "lightning.gpu", wires=num_wires, mpi=True, shots=1000, c_dtype=C_DTYPE + ) + + theta = 0.432 + phi = 0.123 + varphi = -0.543 + + @qml.qnode(dev_gpumpi) + def circuit(): + qml.RX(theta, wires=[0]) + qml.RX(phi, wires=[1]) + qml.RX(varphi, wires=[2]) + qml.CNOT(wires=[0, 1]) + qml.CNOT(wires=[1, 2]) + return qml.var(qml.PauliX(wires=[0]) @ qml.PauliY(wires=[2])) + + res = circuit() + + expected = ( + 8 * np.sin(theta) ** 2 * np.cos(2 * varphi) * np.sin(phi) ** 2 + - np.cos(2 * (theta - phi)) + - np.cos(2 * (theta + phi)) + + 2 * np.cos(2 * theta) + + 2 * np.cos(2 * phi) + + 14 + ) / 16 + assert np.allclose(res, expected, atol=tol) + + @pytest.mark.parametrize("C_DTYPE", [np.complex128, np.complex64]) + def test_pauliz_hadamard(self, C_DTYPE, tol=TOL_STOCHASTIC): + """Test that a tensor product involving PauliZ and PauliY and hadamard works correctly""" + num_wires = 3 + dev_gpumpi = qml.device( + "lightning.gpu", wires=num_wires, mpi=True, shots=1000, c_dtype=C_DTYPE + ) + + theta = 0.432 + phi = 0.123 + varphi = -0.543 + + @qml.qnode(dev_gpumpi) + def circuit(): + qml.RX(theta, wires=[0]) + qml.RX(phi, wires=[1]) + qml.RX(varphi, wires=[2]) + qml.CNOT(wires=[0, 1]) + qml.CNOT(wires=[1, 2]) + return qml.var(qml.PauliZ(wires=[0]) @ qml.Hadamard(wires=[1]) @ qml.PauliY(wires=[2])) + + res = circuit() + + expected = ( + 3 + + np.cos(2 * phi) * np.cos(varphi) ** 2 + - np.cos(2 * theta) * np.sin(varphi) ** 2 + - 2 * np.cos(theta) * np.sin(phi) * np.sin(2 * varphi) + ) / 4 + assert np.allclose(res, expected, atol=tol) + + +def circuit_ansatz(params, wires): + """Circuit ansatz containing all the parametrized gates""" + # pylint: disable=undefined-variable + qml.StatePrep( + unitary_group.rvs(2**numQubits, random_state=0)[0], + wires=wires, + ) + qml.RX(params[0], wires=wires[0]) + qml.RY(params[1], wires=wires[1]) + qml.adjoint(qml.RX(params[2], wires=wires[2])) + qml.RZ(params[0], wires=wires[3]) + qml.CRX(params[3], wires=[wires[3], wires[0]]) + qml.PhaseShift(params[4], wires=wires[2]) + qml.CRY(params[5], wires=[wires[2], wires[1]]) + qml.adjoint(qml.CRZ(params[5], wires=[wires[0], wires[3]])) + qml.adjoint(qml.PhaseShift(params[6], wires=wires[0])) + qml.Rot(params[6], params[7], params[8], wires=wires[0]) + qml.adjoint(qml.Rot(params[8], params[8], params[9], wires=wires[1])) + qml.MultiRZ(params[11], wires=[wires[0], wires[1]]) + qml.CPhase(params[12], wires=[wires[3], wires[2]]) + qml.IsingXX(params[13], wires=[wires[1], wires[0]]) + qml.IsingYY(params[14], wires=[wires[3], wires[2]]) + qml.IsingZZ(params[15], wires=[wires[2], wires[1]]) + qml.SingleExcitation(params[24], wires=[wires[2], wires[0]]) + qml.DoubleExcitation(params[25], wires=[wires[2], wires[0], wires[1], wires[3]]) + + +@pytest.mark.parametrize( + "returns", + [ + (qml.PauliX(0),), + (qml.PauliY(0),), + (qml.PauliZ(0),), + (qml.PauliX(1),), + (qml.PauliY(1),), + (qml.PauliZ(1),), + (qml.PauliX(2),), + (qml.PauliY(2),), + (qml.PauliZ(2),), + (qml.PauliX(3),), + (qml.PauliY(3),), + (qml.PauliZ(3),), + (qml.PauliX(0), qml.PauliY(1)), + ( + qml.PauliZ(0), + qml.PauliX(1), + qml.PauliY(2), + ), + ( + qml.PauliY(0), + qml.PauliZ(1), + qml.PauliY(3), + ), + (qml.PauliZ(0) @ qml.PauliY(3),), + (qml.Hadamard(2),), + (qml.Hadamard(3) @ qml.PauliZ(2),), + (qml.PauliX(0) @ qml.PauliY(3),), + (qml.PauliY(0) @ qml.PauliY(2) @ qml.PauliY(3),), + (qml.PauliZ(0) @ qml.PauliZ(1) @ qml.PauliZ(2),), + (0.5 * qml.PauliZ(0) @ qml.PauliZ(2),), + ], +) +def test_integration(returns): + """Integration tests that compare to default.qubit for a large circuit containing parametrized + operations""" + num_wires = numQubits + dev_default = qml.device("lightning.qubit", wires=range(num_wires)) + dev_gpu = qml.device("lightning.gpu", wires=num_wires, mpi=True, c_dtype=np.complex128) + + def circuit(params): + circuit_ansatz(params, wires=range(num_wires)) + return qml.math.hstack([qml.expval(r) for r in returns]) + + n_params = 30 + np.random.seed(1337) + params = np.random.rand(n_params) + + qnode_gpu = qml.QNode(circuit, dev_gpu, diff_method="parameter-shift") + qnode_default = qml.QNode(circuit, dev_default, diff_method="parameter-shift") + + def convert_to_array_gpu(params): + return np.array(qnode_gpu(params)) + + def convert_to_array_default(params): + return np.array(qnode_default(params)) + + j_gpu = qml.jacobian(convert_to_array_gpu)(params) + j_default = qml.jacobian(convert_to_array_default)(params) + + assert np.allclose(j_gpu, j_default, atol=1e-7) + + +custom_wires = ["alice", 3.14, -1, 0, "bob", "l", "m", "n"] + + +@pytest.mark.parametrize( + "returns", + [ + qml.PauliZ(custom_wires[0]), + qml.PauliX(custom_wires[2]), + qml.PauliZ(custom_wires[0]) @ qml.PauliY(custom_wires[3]), + qml.Hadamard(custom_wires[2]), + qml.Hadamard(custom_wires[3]) @ qml.PauliZ(custom_wires[2]), + qml.PauliX(custom_wires[0]) @ qml.PauliY(custom_wires[3]), + qml.PauliY(custom_wires[0]) @ qml.PauliY(custom_wires[2]) @ qml.PauliY(custom_wires[3]), + ], +) +def test_integration_custom_wires(returns): + """Integration tests that compare to default.qubit for a large circuit containing parametrized + operations and when using custom wire labels""" + dev_lightning = qml.device("lightning.qubit", wires=custom_wires) + dev_gpu = qml.device("lightning.gpu", wires=custom_wires, mpi=True, c_dtype=np.complex128) + + def circuit(params): + circuit_ansatz(params, wires=custom_wires) + return qml.expval(returns), qml.expval(qml.PauliY(custom_wires[1])) + + n_params = 30 + np.random.seed(1337) + params = np.random.rand(n_params) + + qnode_gpu = qml.QNode(circuit, dev_gpu, diff_method="parameter-shift") + qnode_lightning = qml.QNode(circuit, dev_lightning, diff_method="parameter-shift") + + def convert_to_array_gpu(params): + return np.array(qnode_gpu(params)) + + def convert_to_array_lightning(params): + return np.array(qnode_lightning(params)) + + j_gpu = qml.jacobian(convert_to_array_gpu)(params) + j_lightning = qml.jacobian(convert_to_array_lightning)(params) + + assert np.allclose(j_gpu, j_lightning, atol=1e-7) diff --git a/mpitests/test_device.py b/mpitests/test_device.py new file mode 100644 index 0000000000..d9761bf148 --- /dev/null +++ b/mpitests/test_device.py @@ -0,0 +1,54 @@ +# Copyright 2018-2023 Xanadu Quantum Technologies Inc. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Unit tests for Lightning devices creation. +""" +# pylint: disable=protected-access,unused-variable,missing-function-docstring,c-extension-no-member + +import pytest +from conftest import device_name, LightningDevice as ld + +import pennylane as qml +from mpi4py import MPI + +if not ld._CPP_BINARY_AVAILABLE: + pytest.skip("No binary module found. Skipping.", allow_module_level=True) + + +def test_create_device(): + if MPI.COMM_WORLD.Get_size() > 2: + with pytest.raises( + ValueError, + match="Number of devices should be larger than or equal to the number of processes on each node.", + ): + dev = qml.device(device_name, mpi=True, wires=4) + else: + dev = qml.device(device_name, mpi=True, wires=4) + + +def test_unsupported_mpi_buf_size(): + with pytest.raises(TypeError, match="Unsupported mpi_buf_size value"): + dev = qml.device(device_name, mpi=True, wires=4, mpi_buf_size=-1) + with pytest.raises(TypeError, match="Unsupported mpi_buf_size value"): + dev = qml.device(device_name, mpi=True, wires=4, mpi_buf_size=3) + with pytest.warns( + RuntimeWarning, + match="The MPI buffer size is larger than the local state vector size", + ): + dev = qml.device(device_name, mpi=True, wires=4, mpi_buf_size=2**4) + with pytest.raises( + ValueError, + match="Number of processes should be smaller than the number of statevector elements", + ): + dev = qml.device(device_name, mpi=True, wires=1) diff --git a/mpitests/test_expval.py b/mpitests/test_expval.py new file mode 100644 index 0000000000..ad76da1aa5 --- /dev/null +++ b/mpitests/test_expval.py @@ -0,0 +1,332 @@ +# Copyright 2018-2023 Xanadu Quantum Technologies Inc. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Unit tests for the expval method of Lightning devices. +""" +# pylint: disable=protected-access,too-few-public-methods,unused-import,missing-function-docstring,too-many-arguments,c-extension-no-member + +import pytest +from conftest import THETA, PHI, VARPHI, device_name + +import numpy as np +import pennylane as qml +from mpi4py import MPI + + +@pytest.mark.parametrize("theta, phi", list(zip(THETA, PHI))) +class TestExpval: + """Test expectation values""" + + def test_identity_expectation(self, theta, phi, tol): + """Test that identity expectation value (i.e. the trace) is 1""" + dev = qml.device(device_name, mpi=True, wires=3) + if device_name == "lightning.gpu" and dev.R_DTYPE == np.float32: + pytest.skip("Skipped FP32 tests for expval in lightning.gpu") + + O1 = qml.Identity(wires=[0]) + O2 = qml.Identity(wires=[1]) + + dev.apply( + [qml.RX(theta, wires=[0]), qml.RX(phi, wires=[1]), qml.CNOT(wires=[0, 1])], + rotations=[*O1.diagonalizing_gates(), *O2.diagonalizing_gates()], + ) + + res = np.array([dev.expval(O1), dev.expval(O2)]) + assert np.allclose(res, np.array([1, 1]), tol) + + def test_pauliz_expectation(self, theta, phi, tol): + """Test that PauliZ expectation value is correct""" + dev = qml.device(device_name, mpi=True, wires=3) + + if device_name == "lightning.gpu" and dev.R_DTYPE == np.float32: + pytest.skip("Skipped FP32 tests for expval in lightning.gpu") + + O1 = qml.PauliZ(wires=[0]) + O2 = qml.PauliZ(wires=[1]) + + dev.apply( + [qml.RX(theta, wires=[0]), qml.RX(phi, wires=[1]), qml.CNOT(wires=[0, 1])], + rotations=[*O1.diagonalizing_gates(), *O2.diagonalizing_gates()], + ) + + res = np.array([dev.expval(O1), dev.expval(O2)]) + assert np.allclose(res, np.array([np.cos(theta), np.cos(theta) * np.cos(phi)]), tol) + + def test_paulix_expectation(self, theta, phi, tol): + """Test that PauliX expectation value is correct""" + dev = qml.device(device_name, mpi=True, wires=3) + + if device_name == "lightning.gpu" and dev.R_DTYPE == np.float32: + pytest.skip("Skipped FP32 tests for expval in lightning.gpu") + + O1 = qml.PauliX(wires=[0]) + O2 = qml.PauliX(wires=[1]) + + dev.apply( + [qml.RY(theta, wires=[0]), qml.RY(phi, wires=[1]), qml.CNOT(wires=[0, 1])], + rotations=[*O1.diagonalizing_gates(), *O2.diagonalizing_gates()], + ) + + res = np.array([dev.expval(O1), dev.expval(O2)], dtype=dev.C_DTYPE) + assert np.allclose( + res, + np.array([np.sin(theta) * np.sin(phi), np.sin(phi)], dtype=dev.C_DTYPE), + tol * 10, + ) + + def test_pauliy_expectation(self, theta, phi, tol): + """Test that PauliY expectation value is correct""" + dev = qml.device(device_name, mpi=True, wires=3) + + if device_name == "lightning.gpu" and dev.R_DTYPE == np.float32: + pytest.skip("Skipped FP32 tests for expval in lightning.gpu") + + O1 = qml.PauliY(wires=[0]) + O2 = qml.PauliY(wires=[1]) + + dev.apply( + [qml.RX(theta, wires=[0]), qml.RX(phi, wires=[1]), qml.CNOT(wires=[0, 1])], + rotations=[*O1.diagonalizing_gates(), *O2.diagonalizing_gates()], + ) + + res = np.array([dev.expval(O1), dev.expval(O2)]) + assert np.allclose(res, np.array([0, -np.cos(theta) * np.sin(phi)]), tol) + + def test_hadamard_expectation(self, theta, phi, tol): + """Test that Hadamard expectation value is correct""" + dev = qml.device(device_name, mpi=True, wires=3) + + O1 = qml.Hadamard(wires=[0]) + O2 = qml.Hadamard(wires=[1]) + + dev.apply( + [qml.RY(theta, wires=[0]), qml.RY(phi, wires=[1]), qml.CNOT(wires=[0, 1])], + rotations=[*O1.diagonalizing_gates(), *O2.diagonalizing_gates()], + ) + + res = np.array([dev.expval(O1), dev.expval(O2)]) + expected = np.array( + [ + np.sin(theta) * np.sin(phi) + np.cos(theta), + np.cos(theta) * np.cos(phi) + np.sin(phi), + ] + ) / np.sqrt(2) + assert np.allclose(res, expected, tol) + + @pytest.mark.parametrize("n_wires", range(1, 8)) + def test_hermitian_expectation(self, n_wires, theta, phi, tol): + """Test that Hadamard expectation value is correct""" + n_qubits = 7 + dev_def = qml.device("default.qubit", wires=n_qubits) + dev = qml.device(device_name, mpi=True, wires=n_qubits) + if device_name == "lightning.gpu" and dev.R_DTYPE == np.float32: + pytest.skip("Skipped FP32 tests for expval in lightning.gpu") + comm = MPI.COMM_WORLD + + m = 2**n_wires + U = np.random.rand(m, m) + 1j * np.random.rand(m, m) + U = U + np.conj(U.T) + U = U.astype(dev.C_DTYPE) + comm.Bcast(U, root=0) + obs = qml.Hermitian(U, wires=range(n_wires)) + + init_state = np.random.rand(2**n_qubits) + 1j * np.random.rand(2**n_qubits) + init_state /= np.sqrt(np.dot(np.conj(init_state), init_state)) + init_state = init_state.astype(dev.C_DTYPE) + comm.Bcast(init_state, root=0) + + def circuit(): + qml.StatePrep(init_state, wires=range(n_qubits)) + qml.RY(theta, wires=[0]) + qml.RY(phi, wires=[1]) + qml.CNOT(wires=[0, 1]) + return qml.expval(obs) + + circ = qml.QNode(circuit, dev) + comm = MPI.COMM_WORLD + mpisize = comm.Get_size() + if n_wires > n_qubits - np.log2(mpisize): + with pytest.raises( + RuntimeError, + match="MPI backend does not support Hermitian with number of target wires larger than local wire number", + ): + circ() + else: + circ_def = qml.QNode(circuit, dev_def) + assert np.allclose(circ(), circ_def(), tol) + + +@pytest.mark.parametrize("diff_method", ("parameter-shift", "adjoint")) +class TestExpOperatorArithmetic: + """Test integration of lightning with SProd, Prod, and Sum.""" + + def test_sprod(self, diff_method): + """Test the `SProd` class with lightning qubit.""" + + dev = qml.device(device_name, mpi=True, wires=2) + + @qml.qnode(dev, diff_method=diff_method) + def circuit(x): + qml.RX(x, wires=0) + return qml.expval(qml.s_prod(0.5, qml.PauliZ(0))) + + x = qml.numpy.array(0.123, requires_grad=True) + res = circuit(x) + assert qml.math.allclose(res, 0.5 * np.cos(x)) + + g = qml.grad(circuit)(x) + expected_grad = -0.5 * np.sin(x) + assert qml.math.allclose(g, expected_grad) + + def test_prod(self, diff_method): + """Test the `Prod` class with lightning qubit.""" + + dev = qml.device(device_name, mpi=True, wires=2) + + @qml.qnode(dev, diff_method=diff_method) + def circuit(x): + qml.RX(x, wires=0) + qml.Hadamard(1) + qml.PauliZ(1) + return qml.expval(qml.prod(qml.PauliZ(0), qml.PauliX(1))) + + x = qml.numpy.array(0.123, requires_grad=True) + res = circuit(x) + assert qml.math.allclose(res, -np.cos(x)) + + g = qml.grad(circuit)(x) + expected_grad = np.sin(x) + assert qml.math.allclose(g, expected_grad) + + def test_sum(self, diff_method): + """Test the `Sum` class with Lightning.""" + + dev = qml.device(device_name, mpi=True, wires=2) + + @qml.qnode(dev, diff_method=diff_method) + def circuit(x, y): + qml.RX(x, wires=0) + qml.RY(y, wires=1) + return qml.expval(qml.sum(qml.PauliZ(0), qml.PauliX(1))) + + x = qml.numpy.array(-3.21, requires_grad=True) + y = qml.numpy.array(2.34, requires_grad=True) + res = circuit(x, y) + assert qml.math.allclose(res, np.cos(x) + np.sin(y)) + + g = qml.grad(circuit)(x, y) + expected = (-np.sin(x), np.cos(y)) + assert qml.math.allclose(g, expected) + + def test_integration(self, diff_method): + """Test a Combination of `Sum`, `SProd`, and `Prod`.""" + + obs = qml.sum( + qml.s_prod(2.3, qml.PauliZ(0)), + -0.5 * qml.prod(qml.PauliY(0), qml.PauliZ(1)), + ) + + dev = qml.device(device_name, mpi=True, wires=2) + + @qml.qnode(dev, diff_method=diff_method) + def circuit(x, y): + qml.RX(x, wires=0) + qml.RY(y, wires=1) + return qml.expval(obs) + + x = qml.numpy.array(0.654, requires_grad=True) + y = qml.numpy.array(-0.634, requires_grad=True) + + res = circuit(x, y) + expected = 2.3 * np.cos(x) + 0.5 * np.sin(x) * np.cos(y) + assert qml.math.allclose(res, expected) + + g = qml.grad(circuit)(x, y) + expected = ( + -2.3 * np.sin(x) + 0.5 * np.cos(y) * np.cos(x), + -0.5 * np.sin(x) * np.sin(y), + ) + assert qml.math.allclose(g, expected) + + +@pytest.mark.parametrize("theta,phi,varphi", list(zip(THETA, PHI, VARPHI))) +class TestTensorExpval: + """Test tensor expectation values""" + + def test_paulix_pauliy(self, theta, phi, varphi, tol): + """Test that a tensor product involving PauliX and PauliY works + correctly""" + dev = qml.device(device_name, mpi=True, wires=3) + obs = qml.PauliX(0) @ qml.PauliY(2) + + dev.apply( + [ + qml.RX(theta, wires=[0]), + qml.RX(phi, wires=[1]), + qml.RX(varphi, wires=[2]), + qml.CNOT(wires=[0, 1]), + qml.CNOT(wires=[1, 2]), + ], + rotations=obs.diagonalizing_gates(), + ) + res = dev.expval(obs) + + expected = np.sin(theta) * np.sin(phi) * np.sin(varphi) + + assert np.allclose(res, expected, atol=tol) + + def test_pauliz_identity(self, theta, phi, varphi, tol): + """Test that a tensor product involving PauliZ and Identity works + correctly""" + dev = qml.device(device_name, mpi=True, wires=3) + obs = qml.PauliZ(0) @ qml.Identity(1) @ qml.PauliZ(2) + + dev.apply( + [ + qml.RX(theta, wires=[0]), + qml.RX(phi, wires=[1]), + qml.RX(varphi, wires=[2]), + qml.CNOT(wires=[0, 1]), + qml.CNOT(wires=[1, 2]), + ], + rotations=obs.diagonalizing_gates(), + ) + + res = dev.expval(obs) + + expected = np.cos(varphi) * np.cos(phi) + + assert np.allclose(res, expected, tol) + + def test_pauliz_hadamard_pauliy(self, theta, phi, varphi, tol): + """Test that a tensor product involving PauliZ and PauliY and Hadamard + works correctly""" + dev = qml.device(device_name, mpi=True, wires=3) + obs = qml.PauliZ(0) @ qml.Hadamard(1) @ qml.PauliY(2) + + dev.apply( + [ + qml.RX(theta, wires=[0]), + qml.RX(phi, wires=[1]), + qml.RX(varphi, wires=[2]), + qml.CNOT(wires=[0, 1]), + qml.CNOT(wires=[1, 2]), + ], + rotations=obs.diagonalizing_gates(), + ) + + res = dev.expval(obs) + expected = -(np.cos(varphi) * np.sin(phi) + np.sin(varphi) * np.cos(theta)) / np.sqrt(2) + + assert np.allclose(res, expected, tol) diff --git a/mpitests/test_measurements_sparse.py b/mpitests/test_measurements_sparse.py new file mode 100644 index 0000000000..4ea2856289 --- /dev/null +++ b/mpitests/test_measurements_sparse.py @@ -0,0 +1,168 @@ +# Copyright 2018-2023 Xanadu Quantum Technologies Inc. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Unit tests for Sparse Measurements Lightning devices. +""" +# pylint: disable=protected-access,too-few-public-methods,unused-import,missing-function-docstring,too-many-arguments + +import pytest +from conftest import device_name, LightningDevice as ld +from mpi4py import MPI + +import numpy as np +import pennylane as qml +from pennylane import qchem + +if not ld._CPP_BINARY_AVAILABLE: + pytest.skip("No binary module found. Skipping.", allow_module_level=True) + + +class TestSparseExpval: + """Tests for the expval function""" + + @pytest.fixture(params=[np.complex64, np.complex128]) + def dev(self, request): + return qml.device(device_name, mpi=True, wires=2, c_dtype=request.param) + + @pytest.mark.parametrize( + "cases", + [ + [ + qml.PauliX(0) @ qml.Identity(1), + 0.00000000000000000, + 1.000000000000000000, + ], + [ + qml.Identity(0) @ qml.PauliX(1), + -0.19866933079506122, + 0.960530638694763184, + ], + [ + qml.PauliY(0) @ qml.Identity(1), + -0.38941834230865050, + 0.848353326320648193, + ], + [ + qml.Identity(0) @ qml.PauliY(1), + 0.00000000000000000, + 1.000000119209289551, + ], + [ + qml.PauliZ(0) @ qml.Identity(1), + 0.92106099400288520, + 0.151646673679351807, + ], + [ + qml.Identity(0) @ qml.PauliZ(1), + 0.98006657784124170, + 0.039469480514526367, + ], + ], + ) + def test_sparse_Pauli_words(self, cases, tol, dev): + """Test expval of some simple sparse Hamiltonian""" + + @qml.qnode(dev, diff_method="parameter-shift") + def circuit_expval(): + qml.RX(0.4, wires=[0]) + qml.RY(-0.2, wires=[1]) + return qml.expval( + qml.SparseHamiltonian( + qml.Hamiltonian([1], [cases[0]]).sparse_matrix(), wires=[0, 1] + ) + ) + + assert np.allclose(circuit_expval(), cases[1], atol=tol, rtol=0) + + @qml.qnode(dev, diff_method="parameter-shift") + def circuit_var(): + qml.RX(0.4, wires=[0]) + qml.RY(-0.2, wires=[1]) + return qml.var( + qml.SparseHamiltonian( + qml.Hamiltonian([1], [cases[0]]).sparse_matrix(), wires=[0, 1] + ) + ) + + assert np.allclose(circuit_var(), cases[2], atol=tol, rtol=0) + + +class TestSparseExpvalQChem: + """Tests for the expval function with qchem workflow""" + + symbols = ["Li", "H"] + geometry = np.array([0.0, 0.0, 0.0, 0.0, 0.0, 2.969280527]) + + H, qubits = qchem.molecular_hamiltonian( + symbols, + geometry, + ) + + active_electrons = 1 + + hf_state = qchem.hf_state(active_electrons, qubits) + + singles, doubles = qchem.excitations(active_electrons, qubits) + excitations = singles + doubles + + @pytest.fixture( + params=[np.complex64, np.complex128] if device_name != "lightning.gpu" else [np.complex128] + ) + @pytest.mark.parametrize( + "qubits, wires, H, hf_state, excitations", + [ + [qubits, range(qubits), H, hf_state, excitations], + [ + qubits, + np.random.permutation(np.arange(qubits)), + H, + hf_state, + excitations, + ], + ], + ) + def test_sparse_Pauli_words(self, qubits, wires, H, hf_state, excitations, tol, request): + """Test expval of some simple sparse Hamiltonian""" + + H_sparse = H.sparse_matrix(wires) + + dev = qml.device(device_name, mpi=True, wires=wires, c_dtype=request.param) + + @qml.qnode(dev, diff_method="parameter-shift") + def circuit(): + qml.BasisState(hf_state, wires=range(qubits)) + + for excitation in excitations: + if len(excitation) == 4: + qml.DoubleExcitation(1, wires=excitation) + elif len(excitation) == 2: + qml.SingleExcitation(1, wires=excitation) + + return qml.expval(qml.SparseHamiltonian(H_sparse, wires=wires)) + + dev_default = qml.device("default.qubit", wires=qubits) + + @qml.qnode(dev_default, diff_method="parameter-shift") + def circuit_default(): + qml.BasisState(hf_state, wires=range(qubits)) + + for excitation in excitations: + if len(excitation) == 4: + qml.DoubleExcitation(1, wires=excitation) + elif len(excitation) == 2: + qml.SingleExcitation(1, wires=excitation) + + return qml.expval(qml.SparseHamiltonian(H_sparse, wires=wires)) + + assert np.allclose(circuit(), circuit_default(), atol=tol, rtol=0) diff --git a/mpitests/test_probs.py b/mpitests/test_probs.py new file mode 100644 index 0000000000..f07a00ba6f --- /dev/null +++ b/mpitests/test_probs.py @@ -0,0 +1,312 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Unit tests for the :mod:`pennylane_lightning.LightningGPU` device (MPI). +""" +# pylint: disable=missing-function-docstring,unnecessary-comprehension,too-many-arguments,wrong-import-order,unused-variable,c-extension-no-member +from mpi4py import MPI +import pytest + +from conftest import ( + device_name, +) + +import numpy as np +import pennylane as qml + +numQubits = 8 + + +def create_random_init_state(numWires, R_DTYPE, seed_value=48): + np.random.seed(seed_value) + num_elements = 1 << numWires + init_state = np.random.rand(num_elements).astype(R_DTYPE) + 1j * np.random.rand( + num_elements + ).astype(R_DTYPE) + scale_sum = np.sqrt(np.sum(np.abs(init_state) ** 2)).astype(R_DTYPE) + init_state = init_state / scale_sum + return init_state + + +def apply_probs_nonparam(tol, operation, GateWires, Wires, C_DTYPE): + num_wires = numQubits + comm = MPI.COMM_WORLD + rank = comm.Get_rank() + commSize = comm.Get_size() + + dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=C_DTYPE) + dev_mpi = qml.device(device_name, wires=num_wires, mpi=True, c_dtype=C_DTYPE) + + state_vector = create_random_init_state(num_wires, dev_mpi.R_DTYPE) + comm.Bcast(state_vector, root=0) + + def circuit(): + qml.StatePrep(state_vector, wires=range(num_wires)) + operation(wires=GateWires) + return qml.probs(wires=Wires) + + cpu_qnode = qml.QNode(circuit, dev_cpu) + probs_cpu = cpu_qnode() + + mpi_qnode = qml.QNode(circuit, dev_mpi) + local_probs = mpi_qnode() + + recv_counts = comm.gather(len(local_probs), root=0) + + comm.Barrier() + + if rank == 0: + probs_mpi = np.zeros(1 << len(Wires)).astype(dev_mpi.R_DTYPE) + displacements = [i for i in range(commSize)] + else: + probs_mpi = None + probs_cpu = None + comm.Barrier() + comm.Gatherv(local_probs, [probs_mpi, recv_counts], root=0) + + if rank == 0: + assert np.allclose(probs_mpi, probs_cpu, atol=tol, rtol=0) + comm.Barrier() + + +def apply_probs_param(tol, operation, par, GateWires, Wires, C_DTYPE): + num_wires = numQubits + comm = MPI.COMM_WORLD + rank = comm.Get_rank() + commSize = comm.Get_size() + + dev_cpu = qml.device("lightning.qubit", wires=num_wires, c_dtype=C_DTYPE) + dev_mpi = qml.device(device_name, wires=num_wires, mpi=True, c_dtype=C_DTYPE) + + state_vector = create_random_init_state(num_wires, dev_mpi.R_DTYPE) + comm.Bcast(state_vector, root=0) + + def circuit(): + qml.StatePrep(state_vector, wires=range(num_wires)) + operation(*par, wires=GateWires) + return qml.probs(wires=Wires) + + cpu_qnode = qml.QNode(circuit, dev_cpu) + probs_cpu = cpu_qnode() + + mpi_qnode = qml.QNode(circuit, dev_mpi) + local_probs = mpi_qnode() + + recv_counts = comm.gather(len(local_probs), root=0) + + comm.Barrier() + + if rank == 0: + probs_mpi = np.zeros(1 << len(Wires)).astype(dev_mpi.R_DTYPE) + else: + probs_mpi = None + probs_cpu = None + comm.Barrier() + + comm.Gatherv(local_probs, [probs_mpi, recv_counts], root=0) + + if rank == 0: + assert np.allclose(probs_mpi, probs_cpu, atol=tol, rtol=0) + comm.Barrier() + + +class TestProbs: + """Tests for the probability method.""" + + @pytest.mark.parametrize( + "operation", [qml.PauliX, qml.PauliY, qml.PauliZ, qml.Hadamard, qml.S, qml.T] + ) + @pytest.mark.parametrize("GateWires", [[0], [numQubits - 1]]) + @pytest.mark.parametrize( + "Wires", + [ + [0], + [1], + [0, 1], + [0, 2], + [0, numQubits - 1], + [numQubits - 2, numQubits - 1], + range(numQubits), + ], + ) + @pytest.mark.parametrize("C_DTYPE", [np.complex128]) + def test_prob_single_wire_nonparam(self, tol, operation, GateWires, Wires, C_DTYPE): + apply_probs_nonparam(tol, operation, GateWires, Wires, C_DTYPE) + + @pytest.mark.parametrize("operation", [qml.CNOT, qml.SWAP, qml.CY, qml.CZ]) + @pytest.mark.parametrize( + "GateWires", [[0, 1], [numQubits - 2, numQubits - 1], [0, numQubits - 1]] + ) + @pytest.mark.parametrize( + "Wires", + [ + [0], + [1], + [0, 1], + [0, 2], + [0, numQubits - 1], + [numQubits - 2, numQubits - 1], + range(numQubits), + ], + ) + @pytest.mark.parametrize("C_DTYPE", [np.complex128]) + def test_prob_two_wire_nonparam(self, tol, operation, GateWires, Wires, C_DTYPE): + apply_probs_nonparam(tol, operation, GateWires, Wires, C_DTYPE) + + @pytest.mark.parametrize("operation", [qml.CSWAP, qml.Toffoli]) + @pytest.mark.parametrize( + "GateWires", + [ + [0, 1, 2], + [numQubits - 3, numQubits - 2, numQubits - 1], + [0, 1, numQubits - 1], + [0, numQubits - 2, numQubits - 1], + ], + ) + @pytest.mark.parametrize( + "Wires", + [ + [0], + [1], + [0, 1], + [0, 2], + [0, numQubits - 1], + [numQubits - 2, numQubits - 1], + range(numQubits), + ], + ) + @pytest.mark.parametrize("C_DTYPE", [np.complex128]) + def test_prob_three_wire_nonparam(self, tol, operation, GateWires, Wires, C_DTYPE): + apply_probs_nonparam(tol, operation, GateWires, Wires, C_DTYPE) + + @pytest.mark.parametrize("operation", [qml.PhaseShift, qml.RX, qml.RY, qml.RZ]) + @pytest.mark.parametrize("par", [[0.1], [0.2], [0.3]]) + @pytest.mark.parametrize("GateWires", [0, numQubits - 1]) + @pytest.mark.parametrize( + "Wires", + [ + [0], + [1], + [0, 1], + [0, 2], + [0, numQubits - 1], + [numQubits - 2, numQubits - 1], + range(numQubits), + ], + ) + @pytest.mark.parametrize("C_DTYPE", [np.complex128]) + def test_prob_single_wire_param(self, tol, operation, par, GateWires, Wires, C_DTYPE): + apply_probs_param(tol, operation, par, GateWires, Wires, C_DTYPE) + + @pytest.mark.parametrize("operation", [qml.Rot]) + @pytest.mark.parametrize("par", [[0.1, 0.2, 0.3], [0.2, 0.3, 0.4]]) + @pytest.mark.parametrize("GateWires", [0, numQubits - 1]) + @pytest.mark.parametrize( + "Wires", + [ + [0], + [1], + [0, 1], + [0, 2], + [0, numQubits - 1], + [numQubits - 2, numQubits - 1], + range(numQubits), + ], + ) + @pytest.mark.parametrize("C_DTYPE", [np.complex128]) + def test_prob_single_wire_3param(self, tol, operation, par, GateWires, Wires, C_DTYPE): + apply_probs_param(tol, operation, par, GateWires, Wires, C_DTYPE) + + @pytest.mark.parametrize("operation", [qml.CRot]) + @pytest.mark.parametrize("par", [[0.1, 0.2, 0.3], [0.2, 0.3, 0.4]]) + @pytest.mark.parametrize( + "GateWires", [[0, numQubits - 1], [0, 1], [numQubits - 2, numQubits - 1]] + ) + @pytest.mark.parametrize( + "Wires", + [ + [0], + [1], + [0, 1], + [0, 2], + [0, numQubits - 1], + [numQubits - 2, numQubits - 1], + range(numQubits), + ], + ) + @pytest.mark.parametrize("C_DTYPE", [np.complex128]) + def test_prob_two_wire_3param(self, tol, operation, par, GateWires, Wires, C_DTYPE): + apply_probs_param(tol, operation, par, GateWires, Wires, C_DTYPE) + + @pytest.mark.parametrize( + "operation", + [ + qml.CRX, + qml.CRY, + qml.CRZ, + qml.ControlledPhaseShift, + qml.SingleExcitation, + qml.SingleExcitationMinus, + qml.SingleExcitationPlus, + qml.IsingXX, + qml.IsingYY, + qml.IsingZZ, + ], + ) + @pytest.mark.parametrize("par", [[0.1], [0.2], [0.3]]) + @pytest.mark.parametrize( + "GateWires", [[0, numQubits - 1], [0, 1], [numQubits - 2, numQubits - 1]] + ) + @pytest.mark.parametrize( + "Wires", + [ + [0], + [1], + [0, 1], + [0, 2], + [0, numQubits - 1], + [numQubits - 2, numQubits - 1], + range(numQubits), + ], + ) + @pytest.mark.parametrize("C_DTYPE", [np.complex128]) + def test_prob_two_wire_param(self, tol, operation, par, GateWires, Wires, C_DTYPE): + apply_probs_param(tol, operation, par, GateWires, Wires, C_DTYPE) + + @pytest.mark.parametrize( + "operation", + [qml.DoubleExcitation, qml.DoubleExcitationMinus, qml.DoubleExcitationPlus], + ) + @pytest.mark.parametrize("par", [[0.13], [0.2], [0.3]]) + @pytest.mark.parametrize( + "GateWires", + [ + [0, 1, numQubits - 2, numQubits - 1], + [0, 1, 2, 3], + [numQubits - 4, numQubits - 3, numQubits - 2, numQubits - 1], + ], + ) + @pytest.mark.parametrize( + "Wires", + [ + [0], + [1], + [0, 1], + [0, 2], + [0, numQubits - 1], + [numQubits - 2, numQubits - 1], + range(numQubits), + ], + ) + @pytest.mark.parametrize("C_DTYPE", [np.complex128]) + def test_prob_four_wire_param(self, tol, operation, par, GateWires, Wires, C_DTYPE): + apply_probs_param(tol, operation, par, GateWires, Wires, C_DTYPE) diff --git a/pennylane_lightning/core/_serialize.py b/pennylane_lightning/core/_serialize.py index 12981faf92..4f7b3e624c 100644 --- a/pennylane_lightning/core/_serialize.py +++ b/pennylane_lightning/core/_serialize.py @@ -25,6 +25,8 @@ Identity, StatePrep, Rot, + Hamiltonian, + SparseHamiltonian, ) from pennylane.operation import Tensor from pennylane.tape import QuantumTape @@ -49,9 +51,10 @@ class QuantumScriptSerializer: """ - # pylint: disable=import-outside-toplevel, too-many-instance-attributes - def __init__(self, device_name, use_csingle: bool = False): + # pylint: disable=import-outside-toplevel, too-many-instance-attributes, c-extension-no-member + def __init__(self, device_name, use_csingle: bool = False, use_mpi: bool = False): self.use_csingle = use_csingle + self.device_name = device_name if device_name == "lightning.qubit": try: import pennylane_lightning.lightning_qubit_ops as lightning_ops @@ -75,6 +78,7 @@ def __init__(self, device_name, use_csingle: bool = False): ) from exception else: raise DeviceError(f'The device name "{device_name}" is not a valid option.') + self.statevector_c64 = lightning_ops.StateVectorC64 self.statevector_c128 = lightning_ops.StateVectorC128 self.named_obs_c64 = lightning_ops.observables.NamedObsC64 self.named_obs_c128 = lightning_ops.observables.NamedObsC128 @@ -84,6 +88,26 @@ def __init__(self, device_name, use_csingle: bool = False): self.tensor_prod_obs_c128 = lightning_ops.observables.TensorProdObsC128 self.hamiltonian_c64 = lightning_ops.observables.HamiltonianC64 self.hamiltonian_c128 = lightning_ops.observables.HamiltonianC128 + self.sparse_hamiltonian_c64 = lightning_ops.observables.SparseHamiltonianC64 + self.sparse_hamiltonian_c128 = lightning_ops.observables.SparseHamiltonianC128 + + self._use_mpi = use_mpi + + if self._use_mpi: + self.statevector_mpi_c64 = lightning_ops.StateVectorMPIC64 + self.statevector_mpi_c128 = lightning_ops.StateVectorMPIC128 + self.named_obs_mpi_c64 = lightning_ops.observablesMPI.NamedObsMPIC64 + self.named_obs_mpi_c128 = lightning_ops.observablesMPI.NamedObsMPIC128 + self.hermitian_obs_mpi_c64 = lightning_ops.observablesMPI.HermitianObsMPIC64 + self.hermitian_obs_mpi_c128 = lightning_ops.observablesMPI.HermitianObsMPIC128 + self.tensor_prod_obs_mpi_c64 = lightning_ops.observablesMPI.TensorProdObsMPIC64 + self.tensor_prod_obs_mpi_c128 = lightning_ops.observablesMPI.TensorProdObsMPIC128 + self.hamiltonian_mpi_c64 = lightning_ops.observablesMPI.HamiltonianMPIC64 + self.hamiltonian_mpi_c128 = lightning_ops.observablesMPI.HamiltonianMPIC128 + self.sparse_hamiltonian_mpi_c64 = lightning_ops.observablesMPI.SparseHamiltonianMPIC64 + self.sparse_hamiltonian_mpi_c128 = lightning_ops.observablesMPI.SparseHamiltonianMPIC128 + + self._mpi_manager = lightning_ops.MPIManager @property def ctype(self): @@ -95,26 +119,54 @@ def rtype(self): """Real type.""" return np.float32 if self.use_csingle else np.float64 + @property + def sv_type(self): + """State vector matching ``use_csingle`` precision (and MPI if it is supported).""" + if self._use_mpi: + return self.statevector_mpi_c64 if self.use_csingle else self.statevector_mpi_c128 + return self.statevector_c64 if self.use_csingle else self.statevector_c128 + @property def named_obs(self): """Named observable matching ``use_csingle`` precision.""" + if self._use_mpi: + return self.named_obs_mpi_c64 if self.use_csingle else self.named_obs_mpi_c128 return self.named_obs_c64 if self.use_csingle else self.named_obs_c128 @property def hermitian_obs(self): """Hermitian observable matching ``use_csingle`` precision.""" + if self._use_mpi: + return self.hermitian_obs_mpi_c64 if self.use_csingle else self.hermitian_obs_mpi_c128 return self.hermitian_obs_c64 if self.use_csingle else self.hermitian_obs_c128 @property def tensor_obs(self): """Tensor product observable matching ``use_csingle`` precision.""" + if self._use_mpi: + return ( + self.tensor_prod_obs_mpi_c64 if self.use_csingle else self.tensor_prod_obs_mpi_c128 + ) return self.tensor_prod_obs_c64 if self.use_csingle else self.tensor_prod_obs_c128 @property def hamiltonian_obs(self): """Hamiltonian observable matching ``use_csingle`` precision.""" + if self._use_mpi: + return self.hamiltonian_mpi_c64 if self.use_csingle else self.hamiltonian_mpi_c128 return self.hamiltonian_c64 if self.use_csingle else self.hamiltonian_c128 + @property + def sparse_hamiltonian_obs(self): + """SparseHamiltonian observable matching ``use_csingle`` precision.""" + if self._use_mpi: + return ( + self.sparse_hamiltonian_mpi_c64 + if self.use_csingle + else self.sparse_hamiltonian_mpi_c128 + ) + return self.sparse_hamiltonian_c64 if self.use_csingle else self.sparse_hamiltonian_c128 + def _named_obs(self, observable, wires_map: dict): """Serializes a Named observable""" wires = [wires_map[w] for w in observable.wires] @@ -139,6 +191,37 @@ def _hamiltonian(self, observable, wires_map: dict): terms = [self._ob(t, wires_map) for t in observable.ops] return self.hamiltonian_obs(coeffs, terms) + def _sparse_hamiltonian(self, observable, wires_map: dict): + """Serialize an observable (Sparse Hamiltonian) + + Args: + observable (Observable): the input observable (Sparse Hamiltonian) + wire_map (dict): a dictionary mapping input wires to the device's backend wires + + Returns: + sparse_hamiltonian_obs (SparseHamiltonianC64 or SparseHamiltonianC128): A Sparse Hamiltonian observable object compatible with the C++ backend + """ + + if self._use_mpi: + Hmat = Hamiltonian([1.0], [Identity(0)]).sparse_matrix() + H_sparse = SparseHamiltonian(Hmat, wires=range(1)) + spm = H_sparse.sparse_matrix() + # Only root 0 needs the overall sparsematrix data + if self._mpi_manager().getRank() == 0: + spm = observable.sparse_matrix() + self._mpi_manager().Barrier() + else: + spm = observable.sparse_matrix() + data = np.array(spm.data).astype(self.ctype) + indices = np.array(spm.indices).astype(np.int64) + offsets = np.array(spm.indptr).astype(np.int64) + + wires = [] + wires_list = observable.wires.tolist() + wires.extend([wires_map[w] for w in wires_list]) + + return self.sparse_hamiltonian_obs(data, indices, offsets, wires) + def _pauli_word(self, observable, wires_map: dict): """Serialize a :class:`pennylane.pauli.PauliWord` into a Named or Tensor observable.""" if len(observable) == 1: @@ -166,6 +249,8 @@ def _ob(self, observable, wires_map): return self._tensor_ob(observable, wires_map) if observable.name == "Hamiltonian": return self._hamiltonian(observable, wires_map) + if observable.name == "SparseHamiltonian": + return self._sparse_hamiltonian(observable, wires_map) if isinstance(observable, (PauliX, PauliY, PauliZ, Identity, Hadamard)): return self._named_obs(observable, wires_map) if observable._pauli_rep is not None: @@ -223,7 +308,7 @@ def serialize_ops( name = single_op.name names.append(name) - if not hasattr(self.statevector_c128, name): + if not hasattr(self.sv_type, name): params.append([]) mats.append(matrix(single_op)) diff --git a/pennylane_lightning/core/_version.py b/pennylane_lightning/core/_version.py index b31d7d35ba..d276e92d29 100644 --- a/pennylane_lightning/core/_version.py +++ b/pennylane_lightning/core/_version.py @@ -16,4 +16,4 @@ Version number (major.minor.patch[-label]) """ -__version__ = "0.33.0-dev23" +__version__ = "0.33.0-dev24" diff --git a/pennylane_lightning/core/lightning_base.py b/pennylane_lightning/core/lightning_base.py index 33a3aa59e2..9587ce3dd2 100644 --- a/pennylane_lightning/core/lightning_base.py +++ b/pennylane_lightning/core/lightning_base.py @@ -255,14 +255,14 @@ def _get_basis_state_index(self, state, wires): return int(qml.math.dot(state, basis_states)) # pylint: disable=too-many-function-args, assignment-from-no-return - def _process_jacobian_tape(self, tape, starting_state, use_device_state): + def _process_jacobian_tape(self, tape, starting_state, use_device_state, use_mpi: bool = False): state_vector = self._init_process_jacobian_tape(tape, starting_state, use_device_state) obs_serialized = QuantumScriptSerializer( - self.short_name, self.use_csingle + self.short_name, self.use_csingle, use_mpi ).serialize_observables(tape, self.wire_map) ops_serialized, use_sp = QuantumScriptSerializer( - self.short_name, self.use_csingle + self.short_name, self.use_csingle, use_mpi ).serialize_ops(tape, self.wire_map) ops_serialized = self.create_ops_list(*ops_serialized) diff --git a/pennylane_lightning/core/src/bindings/Bindings.cpp b/pennylane_lightning/core/src/bindings/Bindings.cpp index 70a192b394..425a5ea096 100644 --- a/pennylane_lightning/core/src/bindings/Bindings.cpp +++ b/pennylane_lightning/core/src/bindings/Bindings.cpp @@ -16,6 +16,9 @@ * Export C++ functions to Python using Pybind. */ #include "Bindings.hpp" +#ifdef _ENABLE_PLGPU_MPI +#include "BindingsMPI.hpp" +#endif #include "pybind11/pybind11.h" // Defining the module name. @@ -55,6 +58,11 @@ PYBIND11_MODULE( registerBackendSpecificInfo(m); registerLightningClassBindings(m); + +#ifdef _ENABLE_PLGPU_MPI + registerBackendSpecificInfoMPI(m); + registerLightningClassBindingsMPI(m); +#endif } #endif \ No newline at end of file diff --git a/pennylane_lightning/core/src/bindings/Bindings.hpp b/pennylane_lightning/core/src/bindings/Bindings.hpp index 30c994a719..1368ee3a94 100644 --- a/pennylane_lightning/core/src/bindings/Bindings.hpp +++ b/pennylane_lightning/core/src/bindings/Bindings.hpp @@ -287,7 +287,8 @@ void registerInfo(py::module_ &m) { * @tparam StateVectorT * @param m Pybind module */ -template void registerObservables(py::module_ &m) { +template +void registerBackendAgnosticObservables(py::module_ &m) { using PrecisionT = typename StateVectorT::PrecisionT; // Statevector's precision. using ComplexT = @@ -627,7 +628,8 @@ template void lightningClassBindings(py::module_ &m) { /* Observables submodule */ py::module_ obs_submodule = m.def_submodule("observables", "Submodule for observables classes."); - registerObservables(obs_submodule); + registerBackendAgnosticObservables(obs_submodule); + registerBackendSpecificObservables(obs_submodule); //***********************************************************************// // Measurements diff --git a/pennylane_lightning/core/src/bindings/BindingsMPI.hpp b/pennylane_lightning/core/src/bindings/BindingsMPI.hpp new file mode 100644 index 0000000000..41276afe5d --- /dev/null +++ b/pennylane_lightning/core/src/bindings/BindingsMPI.hpp @@ -0,0 +1,483 @@ +// Copyright 2018-2023 Xanadu Quantum Technologies Inc. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/** + * @file Bindings.hpp + * Defines device-agnostic operations to export to Python and other utility + * functions interfacing with Pybind11. + */ + +#pragma once +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "CPUMemoryModel.hpp" // CPUMemoryModel, getMemoryModel, bestCPUMemoryModel, getAlignment +#include "JacobianData.hpp" +#include "Macros.hpp" // CPUArch +#include "Memory.hpp" // alignedAlloc +#include "Observables.hpp" +#include "Util.hpp" // for_each_enum + +#ifdef _ENABLE_PLGPU +#include "AdjointJacobianGPUMPI.hpp" +#include "JacobianDataMPI.hpp" +#include "LGPUBindingsMPI.hpp" +#include "MeasurementsGPUMPI.hpp" +#include "ObservablesGPUMPI.hpp" + +/// @cond DEV +namespace { +using namespace Pennylane::LightningGPU; +using namespace Pennylane::LightningGPU::Algorithms; +using namespace Pennylane::LightningGPU::Observables; +using namespace Pennylane::LightningGPU::Measures; +} // namespace + /// @endcond + +#else + +static_assert(false, "Backend not found."); + +#endif + +namespace py = pybind11; + +namespace Pennylane { +/** + * @brief Register observable classes. + * + * @tparam StateVectorT + * @param m Pybind module + */ +template void registerObservablesMPI(py::module_ &m) { + using PrecisionT = + typename StateVectorT::PrecisionT; // Statevector's precision. + using ComplexT = + typename StateVectorT::ComplexT; // Statevector's complex type. + using ParamT = PrecisionT; // Parameter's data precision + + const std::string bitsize = + std::to_string(sizeof(std::complex) * 8); + + using np_arr_c = py::array_t, py::array::c_style>; + using np_arr_r = py::array_t; + using np_arr_sparse_ind = typename std::conditional< + std::is_same::value, + py::array_t, + py::array_t>::type; + + std::string class_name; + + class_name = "ObservableMPIC" + bitsize; + py::class_, + std::shared_ptr>>(m, class_name.c_str(), + py::module_local()); + + class_name = "NamedObsMPIC" + bitsize; + py::class_, + std::shared_ptr>, + Observable>(m, class_name.c_str(), + py::module_local()) + .def(py::init( + [](const std::string &name, const std::vector &wires) { + return NamedObsMPI(name, wires); + })) + .def("__repr__", &NamedObsMPI::getObsName) + .def("get_wires", &NamedObsMPI::getWires, + "Get wires of observables") + .def( + "__eq__", + [](const NamedObsMPI &self, + py::handle other) -> bool { + if (!py::isinstance>(other)) { + return false; + } + auto other_cast = other.cast>(); + return self == other_cast; + }, + "Compare two observables"); + + class_name = "HermitianObsMPIC" + bitsize; + py::class_, + std::shared_ptr>, + Observable>(m, class_name.c_str(), + py::module_local()) + .def(py::init( + [](const np_arr_c &matrix, const std::vector &wires) { + auto buffer = matrix.request(); + const auto *ptr = static_cast(buffer.ptr); + return HermitianObsMPI( + std::vector(ptr, ptr + buffer.size), wires); + })) + .def("__repr__", &HermitianObsMPI::getObsName) + .def("get_wires", &HermitianObsMPI::getWires, + "Get wires of observables") + .def( + "__eq__", + [](const HermitianObsMPI &self, + py::handle other) -> bool { + if (!py::isinstance>(other)) { + return false; + } + auto other_cast = other.cast>(); + return self == other_cast; + }, + "Compare two observables"); + + class_name = "TensorProdObsMPIC" + bitsize; + py::class_, + std::shared_ptr>, + Observable>(m, class_name.c_str(), + py::module_local()) + .def(py::init( + [](const std::vector>> + &obs) { return TensorProdObsMPI(obs); })) + .def("__repr__", &TensorProdObsMPI::getObsName) + .def("get_wires", &TensorProdObsMPI::getWires, + "Get wires of observables") + .def( + "__eq__", + [](const TensorProdObsMPI &self, + py::handle other) -> bool { + if (!py::isinstance>(other)) { + return false; + } + auto other_cast = other.cast>(); + return self == other_cast; + }, + "Compare two observables"); + + class_name = "HamiltonianMPIC" + bitsize; + using ObsPtr = std::shared_ptr>; + py::class_, + std::shared_ptr>, + Observable>(m, class_name.c_str(), + py::module_local()) + .def(py::init( + [](const np_arr_r &coeffs, const std::vector &obs) { + auto buffer = coeffs.request(); + const auto ptr = static_cast(buffer.ptr); + return HamiltonianMPI{ + std::vector(ptr, ptr + buffer.size), obs}; + })) + .def("__repr__", &HamiltonianMPI::getObsName) + .def("get_wires", &HamiltonianMPI::getWires, + "Get wires of observables") + .def( + "__eq__", + [](const HamiltonianMPI &self, + py::handle other) -> bool { + if (!py::isinstance>(other)) { + return false; + } + auto other_cast = other.cast>(); + return self == other_cast; + }, + "Compare two observables"); +#ifdef _ENABLE_PLGPU + class_name = "SparseHamiltonianMPIC" + bitsize; + using SpIDX = typename SparseHamiltonianMPI::IdxT; + py::class_, + std::shared_ptr>, + Observable>(m, class_name.c_str(), + py::module_local()) + .def(py::init([](const np_arr_c &data, const np_arr_sparse_ind &indices, + const np_arr_sparse_ind &offsets, + const std::vector &wires) { + const py::buffer_info buffer_data = data.request(); + const auto *data_ptr = static_cast(buffer_data.ptr); + + const py::buffer_info buffer_indices = indices.request(); + const auto *indices_ptr = static_cast(buffer_indices.ptr); + + const py::buffer_info buffer_offsets = offsets.request(); + const auto *offsets_ptr = static_cast(buffer_offsets.ptr); + + return SparseHamiltonianMPI{ + std::vector({data_ptr, data_ptr + data.size()}), + std::vector({indices_ptr, indices_ptr + indices.size()}), + std::vector({offsets_ptr, offsets_ptr + offsets.size()}), + wires}; + })) + .def("__repr__", &SparseHamiltonianMPI::getObsName) + .def("get_wires", &SparseHamiltonianMPI::getWires, + "Get wires of observables") + .def( + "__eq__", + [](const SparseHamiltonianMPI &self, + py::handle other) -> bool { + if (!py::isinstance>( + other)) { + return false; + } + auto other_cast = + other.cast>(); + return self == other_cast; + }, + "Compare two observables"); +#endif +} + +/** + * @brief Register agnostic measurements class functionalities. + * + * @tparam StateVectorT + * @tparam PyClass + * @param pyclass Pybind11's measurements class to bind methods. + */ +template +void registerBackendAgnosticMeasurementsMPI(PyClass &pyclass) { + using PrecisionT = + typename StateVectorT::PrecisionT; // Statevector's precision. + using ParamT = PrecisionT; // Parameter's data precision + + pyclass + .def("probs", + [](MeasurementsMPI &M, + const std::vector &wires) { + return py::array_t(py::cast(M.probs(wires))); + }) + .def("probs", + [](MeasurementsMPI &M) { + return py::array_t(py::cast(M.probs())); + }) + .def( + "expval", + [](MeasurementsMPI &M, + const std::shared_ptr> &ob) { + return M.expval(*ob); + }, + "Expected value of an observable object.") + .def( + "var", + [](MeasurementsMPI &M, + const std::shared_ptr> &ob) { + return M.var(*ob); + }, + "Variance of an observable object.") + .def("generate_samples", [](MeasurementsMPI &M, + size_t num_wires, size_t num_shots) { + auto &&result = M.generate_samples(num_shots); + const size_t ndim = 2; + const std::vector shape{num_shots, num_wires}; + constexpr auto sz = sizeof(size_t); + const std::vector strides{sz * num_wires, sz}; + // return 2-D NumPy array + return py::array(py::buffer_info( + result.data(), /* data as contiguous array */ + sz, /* size of one scalar */ + py::format_descriptor::format(), /* data type */ + ndim, /* number of dimensions */ + shape, /* shape of the matrix */ + strides /* strides for each axis */ + )); + }); +} + +/** + * @brief Register the adjoint Jacobian method. + */ +template +auto registerAdjointJacobianMPI( + AdjointJacobianMPI &adjoint_jacobian, const StateVectorT &sv, + const std::vector>> &observables, + const OpsData &operations, + const std::vector &trainableParams) + -> py::array_t { + using PrecisionT = typename StateVectorT::PrecisionT; + std::vector jac(observables.size() * trainableParams.size(), + PrecisionT{0.0}); + const JacobianDataMPI jd{operations.getTotalNumParams(), sv, + observables, operations, + trainableParams}; + adjoint_jacobian.adjointJacobian(std::span{jac}, jd, sv); + return py::array_t(py::cast(jac)); +} + +/** + * @brief Register agnostic algorithms. + * + * @tparam StateVectorT + * @param m Pybind module + */ +template +void registerBackendAgnosticAlgorithmsMPI(py::module_ &m) { + using PrecisionT = + typename StateVectorT::PrecisionT; // Statevector's precision + using ComplexT = + typename StateVectorT::ComplexT; // Statevector's complex type + using ParamT = PrecisionT; // Parameter's data precision + + using np_arr_c = py::array_t, py::array::c_style>; + + const std::string bitsize = + std::to_string(sizeof(std::complex) * 8); + + std::string class_name; + + //***********************************************************************// + // Operations + //***********************************************************************// + + class_name = "OpsStructMPIC" + bitsize; + py::class_>(m, class_name.c_str(), py::module_local()) + .def(py::init &, + const std::vector> &, + const std::vector> &, + const std::vector &, + const std::vector> &>()) + .def("__repr__", [](const OpsData &ops) { + using namespace Pennylane::Util; + std::ostringstream ops_stream; + for (size_t op = 0; op < ops.getSize(); op++) { + ops_stream << "{'name': " << ops.getOpsName()[op]; + ops_stream << ", 'params': " << ops.getOpsParams()[op]; + ops_stream << ", 'inv': " << ops.getOpsInverses()[op]; + ops_stream << "}"; + if (op < ops.getSize() - 1) { + ops_stream << ","; + } + } + return "Operations: [" + ops_stream.str() + "]"; + }); + + /** + * Create operation list. + */ + std::string function_name = "create_ops_listMPIC" + bitsize; + m.def( + function_name.c_str(), + [](const std::vector &ops_name, + const std::vector> &ops_params, + const std::vector> &ops_wires, + const std::vector &ops_inverses, + const std::vector &ops_matrices) { + std::vector> conv_matrices( + ops_matrices.size()); + for (size_t op = 0; op < ops_name.size(); op++) { + const auto m_buffer = ops_matrices[op].request(); + if (m_buffer.size) { + const auto m_ptr = + static_cast(m_buffer.ptr); + conv_matrices[op] = + std::vector{m_ptr, m_ptr + m_buffer.size}; + } + } + return OpsData{ops_name, ops_params, ops_wires, + ops_inverses, conv_matrices}; + }, + "Create a list of operations from data."); + + //***********************************************************************// + // Adjoint Jacobian + //***********************************************************************// + class_name = "AdjointJacobianMPIC" + bitsize; + py::class_>(m, class_name.c_str(), + py::module_local()) + .def(py::init<>()) + .def( + "batched", + [](AdjointJacobianMPI &adjoint_jacobian, + const StateVectorT &sv, + const std::vector>> + &observables, + const OpsData &operations, + const std::vector &trainableParams) { + using PrecisionT = typename StateVectorT::PrecisionT; + std::vector jac(observables.size() * + trainableParams.size(), + PrecisionT{0.0}); + const JacobianDataMPI jd{ + operations.getTotalNumParams(), sv, observables, operations, + trainableParams}; + adjoint_jacobian.adjointJacobian_serial(std::span{jac}, jd); + return py::array_t(py::cast(jac)); + }, + "Batch Adjoint Jacobian method.") + .def("__call__", ®isterAdjointJacobianMPI, + "Adjoint Jacobian method."); +} + +/** + * @brief Templated class to build lightning class bindings. + * + * @tparam StateVectorT State vector type + * @param m Pybind11 module. + */ +template void lightningClassBindingsMPI(py::module_ &m) { + using PrecisionT = + typename StateVectorT::PrecisionT; // Statevector's precision. + // Enable module name to be based on size of complex datatype + const std::string bitsize = + std::to_string(sizeof(std::complex) * 8); + + //***********************************************************************// + // StateVector + //***********************************************************************// + std::string class_name = "StateVectorMPIC" + bitsize; + auto pyclass = + py::class_(m, class_name.c_str(), py::module_local()); + pyclass.def_property_readonly("size", &StateVectorT::getLength); + + registerBackendClassSpecificBindingsMPI(pyclass); + + //***********************************************************************// + // Observables + //***********************************************************************// + + py::module_ obs_submodule = + m.def_submodule("observablesMPI", "Submodule for observables classes."); + registerObservablesMPI(obs_submodule); + + //***********************************************************************// + // Measurements + //***********************************************************************// + + class_name = "MeasurementsMPIC" + bitsize; + auto pyclass_measurements = py::class_>( + m, class_name.c_str(), py::module_local()); + + pyclass_measurements.def(py::init()); + registerBackendAgnosticMeasurementsMPI(pyclass_measurements); + registerBackendSpecificMeasurementsMPI(pyclass_measurements); + + //***********************************************************************// + // Algorithms + //***********************************************************************// + + py::module_ alg_submodule = m.def_submodule( + "algorithmsMPI", "Submodule for the algorithms functionality."); + registerBackendAgnosticAlgorithmsMPI(alg_submodule); + registerBackendSpecificAlgorithmsMPI(alg_submodule); +} + +template +void registerLightningClassBindingsMPI(py::module_ &m) { + if constexpr (!std::is_same_v) { + using StateVectorT = typename TypeList::Type; + lightningClassBindingsMPI(m); + registerLightningClassBindingsMPI(m); + } +} +} // namespace Pennylane diff --git a/pennylane_lightning/core/src/observables/Observables.hpp b/pennylane_lightning/core/src/observables/Observables.hpp index 42227f62e5..183a9fcd82 100644 --- a/pennylane_lightning/core/src/observables/Observables.hpp +++ b/pennylane_lightning/core/src/observables/Observables.hpp @@ -414,4 +414,112 @@ class HamiltonianBase : public Observable { } }; +/** + * @brief Sparse representation of SparseHamiltonian + * + * @tparam T Floating-point precision. + */ +template +class SparseHamiltonianBase : public Observable { + public: + using PrecisionT = typename StateVectorT::PrecisionT; + using ComplexT = typename StateVectorT::ComplexT; +#ifdef _ENABLE_PLGPU + using IdxT = + typename std::conditional::value, + int32_t, int64_t>::type; +#else + using IdxT = std::size_t; +#endif + + protected: + std::vector data_; + std::vector indices_; + std::vector offsets_; + std::vector wires_; + + private: + [[nodiscard]] bool + isEqual(const Observable &other) const override { + const auto &other_cast = + static_cast &>(other); + return data_ == other_cast.data_ && indices_ == other_cast.indices_ && + offsets_ == other_cast.offsets_ && (wires_ == other_cast.wires_); + } + + public: + /** + * @brief Create a SparseHamiltonianBase from data, indices and offsets in + * CSR format. + * + * @param data Arguments to construct data + * @param indices Arguments to construct indices + * @param offsets Arguments to construct offsets + * @param wires Arguments to construct wires + */ + template > + SparseHamiltonianBase(T1 &&data, T2 &&indices, T3 &&offsets, T4 &&wires) + : data_{std::forward(data)}, indices_{std::forward(indices)}, + offsets_{std::forward(offsets)}, wires_{std::forward(wires)} { + PL_ASSERT(data_.size() == indices_.size()); + } + + /** + * @brief Convenient wrapper for the constructor as the constructor does not + * convert the std::shared_ptr with a derived class correctly. + * + * This function is useful as std::make_shared does not handle + * brace-enclosed initializer list correctly. + * + * @param data Argument to construct data + * @param indices Argument to construct indices + * @param offsets Argument to construct offsets + * @param wires Argument to construct wires + */ + static auto create(std::initializer_list data, + std::initializer_list indices, + std::initializer_list offsets, + std::initializer_list wires) + -> std::shared_ptr> { + // NOLINTBEGIN(*-move-const-arg) + return std::shared_ptr>( + new SparseHamiltonianBase{ + std::move(data), std::move(indices), std::move(offsets), + std::move(wires)}); + // NOLINTEND(*-move-const-arg) + } + + void applyInPlace([[maybe_unused]] StateVectorT &sv) const override { + PL_ABORT("For SparseHamiltonian Observables, the applyInPlace method " + "must be " + "defined at the backend level."); + } + + [[nodiscard]] auto getObsName() const -> std::string override { + using Pennylane::Util::operator<<; + std::ostringstream ss; + ss << "SparseHamiltonian: {\n'data' : \n"; + for (const auto &d : data_) { + ss << "{" << d.real() << ", " << d.imag() << "}, "; + } + ss << ",\n'indices' : \n"; + for (const auto &i : indices_) { + ss << i << ", "; + } + ss << ",\n'offsets' : \n"; + for (const auto &o : offsets_) { + ss << o << ", "; + } + ss << "\n}"; + return ss.str(); + } + /** + * @brief Get the wires the observable applies to. + */ + [[nodiscard]] auto getWires() const -> std::vector override { + return wires_; + }; +}; + } // namespace Pennylane::Observables \ No newline at end of file diff --git a/pennylane_lightning/core/src/observables/tests/Test_Observables.cpp b/pennylane_lightning/core/src/observables/tests/Test_Observables.cpp index 2c4e3b60da..d0bb9799b1 100644 --- a/pennylane_lightning/core/src/observables/tests/Test_Observables.cpp +++ b/pennylane_lightning/core/src/observables/tests/Test_Observables.cpp @@ -30,8 +30,8 @@ /// @cond DEV namespace { using namespace Pennylane::Observables; - using Pennylane::Util::createProductState; +using Pennylane::Util::createRandomStateVectorData; using Pennylane::Util::createZeroState; using Pennylane::Util::isApproxEqual; using Pennylane::Util::LightningException; @@ -464,3 +464,84 @@ TEST_CASE("Methods implemented in the HamiltonianBase class", testHamiltonianBase(); } } + +template void testSparseHamiltonianBase() { + if constexpr (!std::is_same_v) { + using StateVectorT = typename TypeList::Type; + using PrecisionT = typename StateVectorT::PrecisionT; + using ComplexT = typename StateVectorT::ComplexT; + + const std::size_t num_qubits = 3; + std::mt19937 re{1337}; + + auto sparseH = SparseHamiltonianBase::create( + {ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}, + ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}, + ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}}, + {7, 6, 5, 4, 3, 2, 1, 0}, {0, 1, 2, 3, 4, 5, 6, 7, 8}, {0, 1, 2}); + + DYNAMIC_SECTION("SparseHamiltonianBase - isEqual - " + << StateVectorToName::name) { + auto sparseH0 = SparseHamiltonianBase::create( + {ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}, + ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}, + ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}}, + {7, 6, 5, 4, 3, 2, 1, 0}, {0, 1, 2, 3, 4, 5, 6, 7, 8}, + {0, 1, 2}); + auto sparseH1 = SparseHamiltonianBase::create( + {ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}, + ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}, + ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}}, + {7, 6, 5, 4, 3, 2, 1, 0}, {0, 1, 2, 3, 4, 5, 6, 7, 8}, + {0, 1, 2}); + auto sparseH2 = SparseHamiltonianBase::create( + {ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}, + ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}, + ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}}, + {8, 6, 5, 4, 3, 2, 1, 0}, {0, 1, 2, 3, 4, 5, 6, 7, 8}, + {0, 1, 2}); + + REQUIRE(*sparseH0 == *sparseH1); + REQUIRE(*sparseH0 != *sparseH2); + } + + DYNAMIC_SECTION("SparseHamiltonianBase - getWires - " + << StateVectorToName::name) { + REQUIRE(sparseH->getWires() == std::vector{0, 1, 2}); + } + + DYNAMIC_SECTION("SparseHamiltonianBase - getObsName - " + << StateVectorToName::name) { + REQUIRE(sparseH->getObsName() == + "SparseHamiltonian: {\n" + "'data' : \n" + "{1, 0}, {1, 0}, {1, 0}, {1, 0}, {1, 0}, {1, 0}, {1, 0}, " + "{1, 0}, ,\n" + "'indices' : \n" + "7, 6, 5, 4, 3, 2, 1, 0, ,\n" + "'offsets' : \n" + "0, 1, 2, 3, 4, 5, 6, 7, 8, \n" + "}"); + } + + DYNAMIC_SECTION("SparseHamiltonianBase - applyInPlace must fail - " + << StateVectorToName::name) { + auto init_state = + createRandomStateVectorData(re, num_qubits); + + StateVectorT state_vector(init_state.data(), init_state.size()); + + REQUIRE_THROWS_AS(sparseH->applyInPlace(state_vector), + LightningException); + } + + testSparseHamiltonianBase(); + } +} + +TEST_CASE("Methods implemented in the SparseHamiltonianBase class", + "[SparseHamiltonianBase]") { + if constexpr (BACKEND_FOUND) { + testSparseHamiltonianBase(); + } +} \ No newline at end of file diff --git a/pennylane_lightning/core/src/observables/tests/mpi/Test_ObservablesMPI.cpp b/pennylane_lightning/core/src/observables/tests/mpi/Test_ObservablesMPI.cpp index fdcfa8c2ea..201952efa5 100644 --- a/pennylane_lightning/core/src/observables/tests/mpi/Test_ObservablesMPI.cpp +++ b/pennylane_lightning/core/src/observables/tests/mpi/Test_ObservablesMPI.cpp @@ -524,3 +524,91 @@ TEST_CASE("Methods implemented in the HamiltonianBase class", testHamiltonianBase(); } } + +template void testSparseHamiltonianBase() { + if constexpr (!std::is_same_v) { + using StateVectorT = typename TypeList::Type; + using PrecisionT = typename StateVectorT::PrecisionT; + using ComplexT = typename StateVectorT::ComplexT; + + const std::size_t num_qubits = 3; + std::mt19937 re{1337}; + + MPIManager mpi_manager(MPI_COMM_WORLD); + + size_t mpi_buffersize = 1; + size_t nGlobalIndexBits = + std::bit_width(static_cast(mpi_manager.getSize())) - 1; + size_t nLocalIndexBits = num_qubits - nGlobalIndexBits; + size_t subSvLength = 1 << nLocalIndexBits; + + int nDevices = 0; + cudaGetDeviceCount(&nDevices); + int deviceId = mpi_manager.getRank() % nDevices; + cudaSetDevice(deviceId); + DevTag dt_local(deviceId, 0); + mpi_manager.Barrier(); + + std::vector expected_sv(subSvLength); + std::vector local_state(subSvLength); + + auto init_state = + createRandomStateVectorData(re, num_qubits); + + mpi_manager.Scatter(init_state.data(), local_state.data(), subSvLength, + 0); + mpi_manager.Barrier(); + + DYNAMIC_SECTION("applyInPlace must fail - " + << StateVectorMPIToName::name) { + auto sparseH = SparseHamiltonianBase::create( + {ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}, + ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}, + ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}}, + {7, 6, 5, 4, 3, 2, 1, 0}, {0, 1, 2, 3, 4, 5, 6, 7, 8}, + {0, 1, 2}); + + StateVectorT sv_mpi(mpi_manager, dt_local, mpi_buffersize, + nGlobalIndexBits, nLocalIndexBits); + + sv_mpi.CopyHostDataToGpu(local_state, false); + + REQUIRE_THROWS_AS(sparseH->applyInPlace(sv_mpi), + LightningException); + } + + DYNAMIC_SECTION("SparseHamiltonianBase - isEqual - " + << StateVectorMPIToName::name) { + auto sparseH0 = SparseHamiltonianBase::create( + {ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}, + ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}, + ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}}, + {7, 6, 5, 4, 3, 2, 1, 0}, {0, 1, 2, 3, 4, 5, 6, 7, 8}, + {0, 1, 2}); + auto sparseH1 = SparseHamiltonianBase::create( + {ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}, + ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}, + ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}}, + {7, 6, 5, 4, 3, 2, 1, 0}, {0, 1, 2, 3, 4, 5, 6, 7, 8}, + {0, 1, 2}); + auto sparseH2 = SparseHamiltonianBase::create( + {ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}, + ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}, + ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}}, + {8, 6, 5, 4, 3, 2, 1, 0}, {0, 1, 2, 3, 4, 5, 6, 7, 8}, + {0, 1, 2}); + + REQUIRE(*sparseH0 == *sparseH1); + REQUIRE(*sparseH0 != *sparseH2); + } + + testSparseHamiltonianBase(); + } +} + +TEST_CASE("Methods implemented in the SparseHamiltonianBase class", + "[SparseHamiltonianBase]") { + if constexpr (BACKEND_FOUND) { + testSparseHamiltonianBase(); + } +} diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/CMakeLists.txt b/pennylane_lightning/core/src/simulators/lightning_gpu/CMakeLists.txt index adec26a5af..5d5a336f98 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/CMakeLists.txt +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/CMakeLists.txt @@ -31,6 +31,10 @@ add_library(${PL_BACKEND} STATIC ${LGPU_FILES}) target_compile_options(lightning_compile_options INTERFACE "-D_ENABLE_PLGPU=1") +if(ENABLE_MPI) + target_compile_options(lightning_compile_options INTERFACE "-D_ENABLE_PLGPU_MPI=1") +endif() + ########################## ## Enforce C++ Standard ## ########################## diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/bindings/LGPUBindings.hpp b/pennylane_lightning/core/src/simulators/lightning_gpu/bindings/LGPUBindings.hpp index e713ea2eef..2ebf7d3f95 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/bindings/LGPUBindings.hpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/bindings/LGPUBindings.hpp @@ -188,10 +188,13 @@ void registerBackendSpecificMeasurements(PyClass &pyclass) { using np_arr_c = py::array_t, py::array::c_style | py::array::forcecast>; - using sparse_index_type = std::size_t; - using np_arr_sparse_ind = - py::array_t; + using sparse_index_type = + typename std::conditional::value, int32_t, + int64_t>::type; + using np_arr_sparse_ind = typename std::conditional< + std::is_same::value, + py::array_t, + py::array_t>::type; pyclass .def("expval", @@ -205,10 +208,14 @@ void registerBackendSpecificMeasurements(PyClass &pyclass) { const np_arr_sparse_ind &entries, const np_arr_c &values) { return M.expval( static_cast(row_map.request().ptr), - static_cast(row_map.request().size), + static_cast( + row_map.request() + .size), // int64_t is required by cusparse static_cast(entries.request().ptr), static_cast(values.request().ptr), - static_cast(values.request().size)); + static_cast( + values.request() + .size)); // int64_t is required by cusparse }, "Expected value of a sparse Hamiltonian.") .def( @@ -249,14 +256,81 @@ void registerBackendSpecificMeasurements(PyClass &pyclass) { const np_arr_sparse_ind &entries, const np_arr_c &values) { return M.var( static_cast(row_map.request().ptr), - static_cast(row_map.request().size), + static_cast(row_map.request().size), static_cast(entries.request().ptr), static_cast(values.request().ptr), - static_cast(values.request().size)); + static_cast(values.request().size)); }, "Variance of a sparse Hamiltonian."); } +/** + * @brief Register backend specific observables. + * + * @tparam StateVectorT + * @param m Pybind module + */ +template +void registerBackendSpecificObservables(py::module_ &m) { + using PrecisionT = + typename StateVectorT::PrecisionT; // Statevector's precision. + using ComplexT = + typename StateVectorT::ComplexT; // Statevector's complex type. + using ParamT = PrecisionT; // Parameter's data precision + + const std::string bitsize = + std::to_string(sizeof(std::complex) * 8); + + using np_arr_c = py::array_t, py::array::c_style>; + + std::string class_name; + + class_name = "SparseHamiltonianC" + bitsize; + using np_arr_sparse_ind = typename std::conditional< + std::is_same::value, + py::array_t, + py::array_t>::type; + using IdxT = typename SparseHamiltonian::IdxT; + py::class_, + std::shared_ptr>, + Observable>(m, class_name.c_str(), + py::module_local()) + .def(py::init([](const np_arr_c &data, const np_arr_sparse_ind &indices, + const np_arr_sparse_ind &offsets, + const std::vector &wires) { + const py::buffer_info buffer_data = data.request(); + const auto *data_ptr = static_cast(buffer_data.ptr); + + const py::buffer_info buffer_indices = indices.request(); + const auto *indices_ptr = + static_cast(buffer_indices.ptr); + + const py::buffer_info buffer_offsets = offsets.request(); + const auto *offsets_ptr = + static_cast(buffer_offsets.ptr); + + return SparseHamiltonian{ + std::vector({data_ptr, data_ptr + data.size()}), + std::vector({indices_ptr, indices_ptr + indices.size()}), + std::vector({offsets_ptr, offsets_ptr + offsets.size()}), + wires}; + })) + .def("__repr__", &SparseHamiltonian::getObsName) + .def("get_wires", &SparseHamiltonian::getWires, + "Get wires of observables") + .def( + "__eq__", + [](const SparseHamiltonian &self, + py::handle other) -> bool { + if (!py::isinstance>(other)) { + return false; + } + auto other_cast = other.cast>(); + return self == other_cast; + }, + "Compare two observables"); +} + /** * @brief Register backend specific adjoint Jacobian methods. * diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/bindings/LGPUBindingsMPI.hpp b/pennylane_lightning/core/src/simulators/lightning_gpu/bindings/LGPUBindingsMPI.hpp new file mode 100644 index 0000000000..1ca4670fe7 --- /dev/null +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/bindings/LGPUBindingsMPI.hpp @@ -0,0 +1,323 @@ +// Copyright 2022-2023 Xanadu Quantum Technologies Inc. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include +#include +#include +#include + +#include "cuda.h" + +#include "BindingsBase.hpp" +#include "Constant.hpp" +#include "ConstantUtil.hpp" // lookup +#include "DevTag.hpp" +#include "DevicePool.hpp" +#include "Error.hpp" +#include "MPIManager.hpp" +#include "MeasurementsGPUMPI.hpp" +#include "ObservablesGPUMPI.hpp" +#include "StateVectorCudaMPI.hpp" +#include "TypeList.hpp" +#include "cuda_helpers.hpp" + +/// @cond DEV +namespace { +using namespace Pennylane; +using namespace Pennylane::Bindings; +using namespace Pennylane::LightningGPU::Algorithms; +using namespace Pennylane::LightningGPU::Measures; +using namespace Pennylane::LightningGPU::Observables; +using Pennylane::LightningGPU::StateVectorCudaMPI; +} // namespace +/// @endcond + +namespace py = pybind11; + +namespace Pennylane::LightningGPU { +using StateVectorMPIBackends = + Pennylane::Util::TypeList, + StateVectorCudaMPI, void>; + +/** + * @brief Get a gate kernel map for a statevector. + */ + +template +void registerBackendClassSpecificBindingsMPI(PyClass &pyclass) { + using PrecisionT = + typename StateVectorT::PrecisionT; // Statevector's precision + using CFP_t = + typename StateVectorT::CFP_t; // Statevector's complex precision + using ParamT = PrecisionT; // Parameter's data precision + using np_arr_c = py::array_t, + py::array::c_style | py::array::forcecast>; + using np_arr_sparse_ind = typename std::conditional< + std::is_same::value, + py::array_t, + py::array_t>::type; + + registerGatesForStateVector(pyclass); + + pyclass + .def( + py::init([](MPIManager &mpi_manager, const DevTag devtag_local, + std::size_t mpi_buf_size, std::size_t num_global_qubits, + std::size_t num_local_qubits) { + return new StateVectorT(mpi_manager, devtag_local, mpi_buf_size, + num_global_qubits, num_local_qubits); + })) // qubits, device + .def(py::init( + [](const DevTag devtag_local, std::size_t mpi_buf_size, + std::size_t num_global_qubits, std::size_t num_local_qubits) { + return new StateVectorT(devtag_local, mpi_buf_size, + num_global_qubits, num_local_qubits); + })) // qubits, device + .def( + "setBasisState", + [](StateVectorT &sv, const size_t index, const bool use_async) { + const std::complex value(1, 0); + sv.setBasisState(value, index, use_async); + }, + "Create Basis State on GPU.") + .def( + "setStateVector", + [](StateVectorT &sv, const np_arr_sparse_ind &indices, + const np_arr_c &state, const bool use_async) { + using index_type = typename std::conditional< + std::is_same::value, int32_t, int64_t>::type; + + sv.template setStateVector( + static_cast(indices.request().size), + static_cast *>( + state.request().ptr), + static_cast(indices.request().ptr), + use_async); + }, + "Set State Vector on GPU with values and their corresponding " + "indices for the state vector on device") + .def( + "DeviceToDevice", + [](StateVectorT &sv, const StateVectorT &other, bool async) { + sv.updateData(other, async); + }, + "Synchronize data from another GPU device to current device.") + .def("DeviceToHost", + py::overload_cast *, size_t, bool>( + &StateVectorT::CopyGpuDataToHost, py::const_), + "Synchronize data from the GPU device to host.") + .def( + "DeviceToHost", + [](const StateVectorT &gpu_sv, np_arr_c &cpu_sv, bool) { + py::buffer_info numpyArrayInfo = cpu_sv.request(); + auto *data_ptr = + static_cast *>(numpyArrayInfo.ptr); + if (cpu_sv.size()) { + gpu_sv.CopyGpuDataToHost(data_ptr, cpu_sv.size()); + } + }, + "Synchronize data from the GPU device to host.") + .def("HostToDevice", + py::overload_cast *, size_t, bool>( + &StateVectorT::CopyHostDataToGpu), + "Synchronize data from the host device to GPU.") + .def("HostToDevice", + py::overload_cast> &, + bool>(&StateVectorT::CopyHostDataToGpu), + "Synchronize data from the host device to GPU.") + .def( + "HostToDevice", + [](StateVectorT &gpu_sv, const np_arr_c &cpu_sv, bool async) { + const py::buffer_info numpyArrayInfo = cpu_sv.request(); + const auto *data_ptr = + static_cast *>(numpyArrayInfo.ptr); + const auto length = + static_cast(numpyArrayInfo.shape[0]); + if (length) { + gpu_sv.CopyHostDataToGpu(data_ptr, length, async); + } + }, + "Synchronize data from the host device to GPU.") + .def("GetNumGPUs", &getGPUCount, "Get the number of available GPUs.") + .def("getCurrentGPU", &getGPUIdx, + "Get the GPU index for the statevector data.") + .def("numQubits", &StateVectorT::getNumQubits) + .def("dataLength", &StateVectorT::getLength) + .def("resetGPU", &StateVectorT::initSV) + .def( + "apply", + [](StateVectorT &sv, const std::string &str, + const std::vector &wires, bool inv, + [[maybe_unused]] const std::vector> ¶ms, + [[maybe_unused]] const np_arr_c &gate_matrix) { + const auto m_buffer = gate_matrix.request(); + std::vector matrix_cu; + if (m_buffer.size) { + const auto m_ptr = static_cast(m_buffer.ptr); + matrix_cu = + std::vector{m_ptr, m_ptr + m_buffer.size}; + } + sv.applyOperation(str, wires, inv, std::vector{}, + matrix_cu); + }, + "Apply operation via the gate matrix"); +} + +/** + * @brief Register backend specific measurements class functionalities. + * + * @tparam StateVectorT + * @tparam PyClass + * @param pyclass Pybind11's measurements class to bind methods. + */ +template +void registerBackendSpecificMeasurementsMPI(PyClass &pyclass) { + using PrecisionT = + typename StateVectorT::PrecisionT; // Statevector's precision + using ComplexT = + typename StateVectorT::ComplexT; // Statevector's complex type + using ParamT = PrecisionT; // Parameter's data precision + + using np_arr_c = py::array_t, + py::array::c_style | py::array::forcecast>; + using sparse_index_type = + typename std::conditional::value, int32_t, + int64_t>::type; + using np_arr_sparse_ind = typename std::conditional< + std::is_same::value, + py::array_t, + py::array_t>::type; + + pyclass + .def("expval", + static_cast::*)( + const std::string &, const std::vector &)>( + &MeasurementsMPI::expval), + "Expected value of an operation by name.") + .def( + "expval", + [](MeasurementsMPI &M, + const np_arr_sparse_ind &row_map, + const np_arr_sparse_ind &entries, const np_arr_c &values) { + return M.expval( + static_cast(row_map.request().ptr), + static_cast(row_map.request().size), + static_cast(entries.request().ptr), + static_cast(values.request().ptr), + static_cast(values.request().size)); + }, + "Expected value of a sparse Hamiltonian.") + .def( + "expval", + [](MeasurementsMPI &M, + const std::vector &pauli_words, + const std::vector> &target_wires, + const np_arr_c &coeffs) { + return M.expval(pauli_words, target_wires, + static_cast(coeffs.request().ptr)); + }, + "Expected value of Hamiltonian represented by Pauli words.") + .def( + "expval", + [](MeasurementsMPI &M, const np_arr_c &matrix, + const std::vector &wires) { + const std::size_t matrix_size = exp2(2 * wires.size()); + auto matrix_data = + static_cast(matrix.request().ptr); + std::vector matrix_v{matrix_data, + matrix_data + matrix_size}; + return M.expval(matrix_v, wires); + }, + "Expected value of a Hermitian observable.") + .def("var", + [](MeasurementsMPI &M, const std::string &operation, + const std::vector &wires) { + return M.var(operation, wires); + }) + .def("var", + static_cast::*)( + const std::string &, const std::vector &)>( + &MeasurementsMPI::var), + "Variance of an operation by name.") + .def( + "var", + [](MeasurementsMPI &M, + const np_arr_sparse_ind &row_map, + const np_arr_sparse_ind &entries, const np_arr_c &values) { + return M.var( + static_cast(row_map.request().ptr), + static_cast(row_map.request().size), + static_cast(entries.request().ptr), + static_cast(values.request().ptr), + static_cast(values.request().size)); + }, + "Variance of a sparse Hamiltonian."); +} + +/** + * @brief Register backend specific adjoint Jacobian methods. + * + * @tparam StateVectorT + * @param m Pybind module + */ +template +void registerBackendSpecificAlgorithmsMPI([[maybe_unused]] py::module_ &m) {} + +/** + * @brief Register bindings for backend-specific info. + * + * @param m Pybind11 module. + */ +void registerBackendSpecificInfoMPI(py::module_ &m) { + using np_arr_c64 = py::array_t, + py::array::c_style | py::array::forcecast>; + using np_arr_c128 = py::array_t, + py::array::c_style | py::array::forcecast>; + py::class_(m, "MPIManager") + .def(py::init<>()) + .def(py::init()) + .def("Barrier", &MPIManager::Barrier) + .def("getRank", &MPIManager::getRank) + .def("getSize", &MPIManager::getSize) + .def("getSizeNode", &MPIManager::getSizeNode) + .def("getTime", &MPIManager::getTime) + .def("getVendor", &MPIManager::getVendor) + .def("getVersion", &MPIManager::getVersion) + .def( + "Scatter", + [](MPIManager &mpi_manager, np_arr_c64 &sendBuf, + np_arr_c64 &recvBuf, int root) { + auto send_ptr = + static_cast *>(sendBuf.request().ptr); + auto recv_ptr = + static_cast *>(recvBuf.request().ptr); + mpi_manager.template Scatter>( + send_ptr, recv_ptr, recvBuf.request().size, root); + }, + "MPI Scatter.") + .def( + "Scatter", + [](MPIManager &mpi_manager, np_arr_c128 &sendBuf, + np_arr_c128 &recvBuf, int root) { + auto send_ptr = + static_cast *>(sendBuf.request().ptr); + auto recv_ptr = + static_cast *>(recvBuf.request().ptr); + mpi_manager.template Scatter>( + send_ptr, recv_ptr, recvBuf.request().size, root); + }, + "MPI Scatter."); +} +} // namespace Pennylane::LightningGPU + /// @endcond \ No newline at end of file diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/MeasurementsGPU.hpp b/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/MeasurementsGPU.hpp index af2ae38ec2..8ca6eacc69 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/MeasurementsGPU.hpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/MeasurementsGPU.hpp @@ -259,10 +259,10 @@ class Measurements final * @return auto Expectation value. */ template - auto expval(const index_type *csrOffsets_ptr, - const index_type csrOffsets_size, const index_type *columns_ptr, + auto expval(const index_type *csrOffsets_ptr, const int64_t csrOffsets_size, + const index_type *columns_ptr, const std::complex *values_ptr, - const index_type numNNZ) -> PrecisionT { + const int64_t numNNZ) -> PrecisionT { const std::size_t nIndexBits = this->_statevector.getNumQubits(); const std::size_t length = std::size_t{1} << nIndexBits; @@ -580,10 +580,10 @@ class Measurements final * @return Floating point with the variance of the sparse Hamiltonian. */ template - PrecisionT - var(const index_type *csrOffsets_ptr, const index_type csrOffsets_size, - const index_type *columns_ptr, - const std::complex *values_ptr, const index_type numNNZ) { + PrecisionT var(const index_type *csrOffsets_ptr, + const int64_t csrOffsets_size, const index_type *columns_ptr, + const std::complex *values_ptr, + const int64_t numNNZ) { PL_ABORT_IF( (this->_statevector.getLength() != (size_t(csrOffsets_size) - 1)), "Statevector and Hamiltonian have incompatible sizes."); diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/MeasurementsGPUMPI.hpp b/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/MeasurementsGPUMPI.hpp index f96e2bc217..ff101654df 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/MeasurementsGPUMPI.hpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/MeasurementsGPUMPI.hpp @@ -47,6 +47,7 @@ using namespace Pennylane; using namespace Pennylane::Measures; using namespace Pennylane::Observables; using namespace Pennylane::LightningGPU::Observables; +using namespace Pennylane::LightningGPU::MPI; namespace cuUtil = Pennylane::LightningGPU::Util; using Pennylane::LightningGPU::StateVectorCudaManaged; using namespace Pennylane::Util; @@ -366,10 +367,10 @@ class MeasurementsMPI final * @return auto Expectation value. */ template - auto expval(const index_type *csrOffsets_ptr, - const index_type csrOffsets_size, const index_type *columns_ptr, + auto expval(const index_type *csrOffsets_ptr, const int64_t csrOffsets_size, + const index_type *columns_ptr, const std::complex *values_ptr, - const index_type numNNZ) -> PrecisionT { + const int64_t numNNZ) -> PrecisionT { if (mpi_manager_.getRank() == 0) { PL_ABORT_IF_NOT( static_cast(csrOffsets_size - 1) == @@ -657,10 +658,10 @@ class MeasurementsMPI final * @return Floating point with the variance of the sparse Hamiltonian. */ template - PrecisionT - var(const index_type *csrOffsets_ptr, const index_type csrOffsets_size, - const index_type *columns_ptr, - const std::complex *values_ptr, const index_type numNNZ) { + PrecisionT var(const index_type *csrOffsets_ptr, + const int64_t csrOffsets_size, const index_type *columns_ptr, + const std::complex *values_ptr, + const int64_t numNNZ) { if (mpi_manager_.getRank() == 0) { PL_ABORT_IF_NOT( static_cast(csrOffsets_size - 1) == diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/tests/Test_StateVectorCudaManaged_Expval.cpp b/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/tests/Test_StateVectorCudaManaged_Expval.cpp index 82b4d41e93..36f1f1f128 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/tests/Test_StateVectorCudaManaged_Expval.cpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/tests/Test_StateVectorCudaManaged_Expval.cpp @@ -336,6 +336,8 @@ TEMPLATE_TEST_CASE("StateVectorCudaManaged::Hamiltonian_expval_Sparse", "[StateVectorCudaManaged_Expval]", float, double) { using StateVectorT = StateVectorCudaManaged; using ComplexT = StateVectorT::ComplexT; + using IdxT = typename std::conditional::value, + int32_t, int64_t>::type; SECTION("Sparse expval") { std::vector init_state{{0.0, 0.0}, {0.0, 0.1}, {0.1, 0.1}, @@ -344,17 +346,18 @@ TEMPLATE_TEST_CASE("StateVectorCudaManaged::Hamiltonian_expval_Sparse", StateVectorT sv{init_state.data(), init_state.size()}; auto m = Measurements(sv); - std::vector index_ptr = {0, 2, 4, 6, 8, 10, 12, 14, 16}; - std::vector indices = {0, 3, 1, 2, 1, 2, 0, 3, - 4, 7, 5, 6, 5, 6, 4, 7}; + std::vector index_ptr = {0, 2, 4, 6, 8, 10, 12, 14, 16}; + std::vector indices = {0, 3, 1, 2, 1, 2, 0, 3, + 4, 7, 5, 6, 5, 6, 4, 7}; std::vector values = { {3.1415, 0.0}, {0.0, -3.1415}, {3.1415, 0.0}, {0.0, 3.1415}, {0.0, -3.1415}, {3.1415, 0.0}, {0.0, 3.1415}, {3.1415, 0.0}, {3.1415, 0.0}, {0.0, -3.1415}, {3.1415, 0.0}, {0.0, 3.1415}, {0.0, -3.1415}, {3.1415, 0.0}, {0.0, 3.1415}, {3.1415, 0.0}}; - auto result = m.expval(index_ptr.data(), index_ptr.size(), - indices.data(), values.data(), values.size()); + auto result = m.expval( + index_ptr.data(), static_cast(index_ptr.size()), + indices.data(), values.data(), static_cast(values.size())); auto expected = TestType(3.1415); CHECK(expected == Approx(result).epsilon(1e-7)); } @@ -372,22 +375,23 @@ TEMPLATE_TEST_CASE("StateVectorCudaManaged::Hamiltonian_expval_Sparse", // measurements. Measurements Measurer(sv); const size_t num_qubits = 3; - const size_t data_size = Pennylane::Util::exp2(num_qubits); + size_t data_size = Pennylane::Util::exp2(num_qubits); - std::vector row_map; - std::vector entries; + std::vector row_map; + std::vector entries; std::vector values; - write_CSR_vectors(row_map, entries, values, data_size); + write_CSR_vectors(row_map, entries, values, + static_cast(data_size)); - PrecisionT exp_values = - Measurer.expval(row_map.data(), row_map.size(), entries.data(), - values.data(), values.size()); + PrecisionT exp_values = Measurer.expval( + row_map.data(), static_cast(row_map.size()), + entries.data(), values.data(), static_cast(values.size())); PrecisionT exp_values_ref = 0.5930885; REQUIRE(exp_values == Approx(exp_values_ref).margin(1e-6)); - PrecisionT var_values = - Measurer.var(row_map.data(), row_map.size(), entries.data(), - values.data(), values.size()); + PrecisionT var_values = Measurer.var( + row_map.data(), static_cast(row_map.size()), + entries.data(), values.data(), static_cast(values.size())); PrecisionT var_values_ref = 2.4624654; REQUIRE(var_values == Approx(var_values_ref).margin(1e-6)); } diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/tests/mpi/Test_StateVectorCudaMPI_Expval.cpp b/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/tests/mpi/Test_StateVectorCudaMPI_Expval.cpp index 47505e73fe..b6fdab8737 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/tests/mpi/Test_StateVectorCudaMPI_Expval.cpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/measurements/tests/mpi/Test_StateVectorCudaMPI_Expval.cpp @@ -401,6 +401,8 @@ TEMPLATE_TEST_CASE("StateVectorCudaMPI::Hamiltonian_expval_Sparse", "[StateVectorCudaMPI_Expval]", double) { using StateVectorT = StateVectorCudaMPI; using ComplexT = StateVectorT::ComplexT; + using IdxT = typename std::conditional::value, + int32_t, int64_t>::type; MPIManager mpi_manager(MPI_COMM_WORLD); REQUIRE(mpi_manager.getSize() == 2); @@ -431,17 +433,18 @@ TEMPLATE_TEST_CASE("StateVectorCudaMPI::Hamiltonian_expval_Sparse", sv.CopyHostDataToGpu(local_init_sv.data(), local_init_sv.size(), false); auto m = MeasurementsMPI(sv); - std::vector index_ptr = {0, 2, 4, 6, 8, 10, 12, 14, 16}; - std::vector indices = {0, 3, 1, 2, 1, 2, 0, 3, - 4, 7, 5, 6, 5, 6, 4, 7}; + std::vector index_ptr = {0, 2, 4, 6, 8, 10, 12, 14, 16}; + std::vector indices = {0, 3, 1, 2, 1, 2, 0, 3, + 4, 7, 5, 6, 5, 6, 4, 7}; std::vector values = { {3.1415, 0.0}, {0.0, -3.1415}, {3.1415, 0.0}, {0.0, 3.1415}, {0.0, -3.1415}, {3.1415, 0.0}, {0.0, 3.1415}, {3.1415, 0.0}, {3.1415, 0.0}, {0.0, -3.1415}, {3.1415, 0.0}, {0.0, 3.1415}, {0.0, -3.1415}, {3.1415, 0.0}, {0.0, 3.1415}, {3.1415, 0.0}}; - auto result = m.expval(index_ptr.data(), index_ptr.size(), - indices.data(), values.data(), values.size()); + auto result = m.expval( + index_ptr.data(), static_cast(index_ptr.size()), + indices.data(), values.data(), static_cast(values.size())); auto expected = TestType(3.1415); CHECK(expected == Approx(result).epsilon(1e-7)); } @@ -464,24 +467,25 @@ TEMPLATE_TEST_CASE("StateVectorCudaMPI::Hamiltonian_expval_Sparse", // This object attaches to the statevector allowing several // measurements. MeasurementsMPI Measurer(sv); - const size_t data_size = Pennylane::Util::exp2(num_qubits); + size_t data_size = Pennylane::Util::exp2(num_qubits); - std::vector row_map; - std::vector entries; + std::vector row_map; + std::vector entries; std::vector values; - write_CSR_vectors(row_map, entries, values, data_size); + write_CSR_vectors(row_map, entries, values, + static_cast(data_size)); - PrecisionT exp_values = - Measurer.expval(row_map.data(), row_map.size(), entries.data(), - values.data(), values.size()); + PrecisionT exp_values = Measurer.expval( + row_map.data(), static_cast(row_map.size()), + entries.data(), values.data(), static_cast(values.size())); PrecisionT exp_values_ref = 0.5930885; REQUIRE(exp_values == Approx(exp_values_ref).margin(1e-6)); mpi_manager.Barrier(); - PrecisionT var_values = - Measurer.var(row_map.data(), row_map.size(), entries.data(), - values.data(), values.size()); + PrecisionT var_values = Measurer.var( + row_map.data(), static_cast(row_map.size()), + entries.data(), values.data(), static_cast(values.size())); PrecisionT var_values_ref = 2.4624654; REQUIRE(var_values == Approx(var_values_ref).margin(1e-6)); } diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/observables/ObservablesGPU.cpp b/pennylane_lightning/core/src/simulators/lightning_gpu/observables/ObservablesGPU.cpp index b186caa39e..c4f2ca82d4 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/observables/ObservablesGPU.cpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/observables/ObservablesGPU.cpp @@ -28,3 +28,6 @@ template class Observables::TensorProdObs>; template class Observables::Hamiltonian>; template class Observables::Hamiltonian>; + +template class Observables::SparseHamiltonian>; +template class Observables::SparseHamiltonian>; diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/observables/ObservablesGPU.hpp b/pennylane_lightning/core/src/simulators/lightning_gpu/observables/ObservablesGPU.hpp index c92e7dac5d..6d0d0a94e7 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/observables/ObservablesGPU.hpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/observables/ObservablesGPU.hpp @@ -209,4 +209,87 @@ class Hamiltonian final : public HamiltonianBase { } }; +/** + * @brief Sparse representation of Hamiltonian + * + */ +template +class SparseHamiltonian final : public SparseHamiltonianBase { + private: + using BaseType = SparseHamiltonianBase; + + public: + using PrecisionT = typename StateVectorT::PrecisionT; + using ComplexT = typename StateVectorT::ComplexT; + // cuSparse required index type + using IdxT = typename BaseType::IdxT; + + /** + * @brief Create a SparseHamiltonian from data, indices and offsets in CSR + * format. + * + * @param data Arguments to construct data + * @param indices Arguments to construct indices + * @param offsets Arguments to construct offsets + * @param wires Arguments to construct wires + */ + template + explicit SparseHamiltonian(T1 &&data, T2 &&indices, T3 &&offsets, + T4 &&wires) + : BaseType{data, indices, offsets, wires} {} + + /** + * @brief Convenient wrapper for the constructor as the constructor does not + * convert the std::shared_ptr with a derived class correctly. + * + * This function is useful as std::make_shared does not handle + * brace-enclosed initializer list correctly. + * + * @param data Argument to construct data + * @param indices Argument to construct indices + * @param offsets Argument to construct ofsets + * @param wires Argument to construct wires + */ + static auto create(std::initializer_list data, + std::initializer_list indices, + std::initializer_list offsets, + std::initializer_list wires) + -> std::shared_ptr> { + return std::shared_ptr>( + new SparseHamiltonian{ + std::move(data), std::move(indices), std::move(offsets), + std::move(wires)}); + } + + /** + * @brief Updates the statevector SV:->SV', where SV' = a*H*SV, and where H + * is a sparse Hamiltonian. + * + */ + void applyInPlace(StateVectorT &sv) const override { + PL_ABORT_IF_NOT(this->wires_.size() == sv.getNumQubits(), + "SparseH wire count does not match state-vector size"); + using CFP_t = typename StateVectorT::CFP_t; + + const std::size_t nIndexBits = sv.getNumQubits(); + const std::size_t length = std::size_t{1} << nIndexBits; + + auto device_id = sv.getDataBuffer().getDevTag().getDeviceID(); + auto stream_id = sv.getDataBuffer().getDevTag().getStreamID(); + + cusparseHandle_t handle = sv.getCusparseHandle(); + + std::unique_ptr> d_sv_prime = + std::make_unique>(length, device_id, stream_id, + true); + + SparseMV_cuSparse( + this->offsets_.data(), static_cast(this->offsets_.size()), + this->indices_.data(), this->data_.data(), + static_cast(this->data_.size()), sv.getData(), + d_sv_prime->getData(), device_id, stream_id, handle); + sv.updateData(std::move(d_sv_prime)); + } +}; + } // namespace Pennylane::LightningGPU::Observables diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/observables/ObservablesGPUMPI.cpp b/pennylane_lightning/core/src/simulators/lightning_gpu/observables/ObservablesGPUMPI.cpp index 9b1f776e0f..ae9ac9100a 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/observables/ObservablesGPUMPI.cpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/observables/ObservablesGPUMPI.cpp @@ -28,3 +28,6 @@ template class Observables::TensorProdObsMPI>; template class Observables::HamiltonianMPI>; template class Observables::HamiltonianMPI>; + +template class Observables::SparseHamiltonianMPI>; +template class Observables::SparseHamiltonianMPI>; diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/observables/ObservablesGPUMPI.hpp b/pennylane_lightning/core/src/simulators/lightning_gpu/observables/ObservablesGPUMPI.hpp index d15df18207..94f5e45739 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/observables/ObservablesGPUMPI.hpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/observables/ObservablesGPUMPI.hpp @@ -21,6 +21,7 @@ #include "Constant.hpp" #include "ConstantUtil.hpp" // lookup #include "LinearAlg.hpp" +#include "MPILinearAlg.hpp" #include "Observables.hpp" #include "StateVectorCudaMPI.hpp" #include "Util.hpp" @@ -213,4 +214,97 @@ class HamiltonianMPI final : public HamiltonianBase { } }; +/** + * @brief Sparse representation of Hamiltonian + * + */ +template +class SparseHamiltonianMPI final : public SparseHamiltonianBase { + public: + using PrecisionT = typename StateVectorT::PrecisionT; + using ComplexT = typename StateVectorT::ComplexT; + // cuSparse required index type + using IdxT = + typename std::conditional::value, + int32_t, int64_t>::type; + + private: + using BaseType = SparseHamiltonianBase; + + public: + /** + * @brief Create a SparseHamiltonianMPI from data, indices and offsets in + * CSR format. + * + * @param data Arguments to construct data + * @param indices Arguments to construct indices + * @param offsets Arguments to construct offsets + * @param wires Arguments to construct wires + */ + template + explicit SparseHamiltonianMPI(T1 &&data, T2 &&indices, T3 &&offsets, + T4 &&wires) + : BaseType{data, indices, offsets, wires} {} + + /** + * @brief Convenient wrapper for the constructor as the constructor does not + * convert the std::shared_ptr with a derived class correctly. + * + * This function is useful as std::make_shared does not handle + * brace-enclosed initializer list correctly. + * + * @param data Argument to construct data + * @param indices Argument to construct indices + * @param offsets Argument to construct ofsets + * @param wires Argument to construct wires + */ + static auto create(std::initializer_list data, + std::initializer_list indices, + std::initializer_list offsets, + std::initializer_list wires) + -> std::shared_ptr> { + return std::shared_ptr>( + new SparseHamiltonianMPI{ + std::move(data), std::move(indices), std::move(offsets), + std::move(wires)}); + } + + /** + * @brief Updates the statevector SV:->SV', where SV' = a*H*SV, and where H + * is a sparse Hamiltonian. + * + */ + void applyInPlace(StateVectorT &sv) const override { + auto mpi_manager = sv.getMPIManager(); + if (mpi_manager.getRank() == 0) { + PL_ABORT_IF_NOT( + this->wires_.size() == sv.getTotalNumQubits(), + "SparseH wire count does not match state-vector size"); + } + using CFP_t = typename StateVectorT::CFP_t; + + auto device_id = sv.getDataBuffer().getDevTag().getDeviceID(); + auto stream_id = sv.getDataBuffer().getDevTag().getStreamID(); + + const size_t length_local = size_t{1} << sv.getNumLocalQubits(); + + std::unique_ptr> d_sv_prime = + std::make_unique>(length_local, device_id, + stream_id, true); + d_sv_prime->zeroInit(); + PL_CUDA_IS_SUCCESS(cudaDeviceSynchronize()); + mpi_manager.Barrier(); + + cuUtil::SparseMV_cuSparseMPI( + mpi_manager, length_local, this->offsets_.data(), + static_cast(this->offsets_.size()), this->indices_.data(), + this->data_.data(), const_cast(sv.getData()), + d_sv_prime->getData(), device_id, stream_id, + sv.getCusparseHandle()); + + sv.CopyGpuDataToGpuIn(d_sv_prime->getData(), d_sv_prime->getLength()); + mpi_manager.Barrier(); + } +}; + } // namespace Pennylane::LightningGPU::Observables diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/observables/tests/Test_ObservablesGPU.cpp b/pennylane_lightning/core/src/simulators/lightning_gpu/observables/tests/Test_ObservablesGPU.cpp index 0d8bd7d388..398f664ffc 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/observables/tests/Test_ObservablesGPU.cpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/observables/tests/Test_ObservablesGPU.cpp @@ -154,6 +154,20 @@ TEMPLATE_PRODUCT_TEST_CASE("Hamiltonian", "[Observables]", } } +TEMPLATE_PRODUCT_TEST_CASE("SparseHamiltonian", "[Observables]", + (StateVectorCudaManaged), (float, double)) { + using StateVectorT = TestType; + using SparseHamiltonianT = SparseHamiltonian; + + SECTION("Copy constructibility") { + REQUIRE(std::is_copy_constructible_v); + } + + SECTION("Move constructibility") { + REQUIRE(std::is_move_constructible_v); + } +} + TEMPLATE_PRODUCT_TEST_CASE("Observables::HermitianHasher", "[Observables]", (StateVectorCudaManaged), (float, double)) { using StateVectorT = TestType; @@ -257,3 +271,31 @@ TEMPLATE_PRODUCT_TEST_CASE("Hamiltonian::ApplyInPlace", "[Observables]", } } } + +TEMPLATE_PRODUCT_TEST_CASE("SparseHamiltonian::ApplyInPlace", "[Observables]", + (StateVectorCudaManaged), (float, double)) { + using StateVectorT = TestType; + using PrecisionT = typename StateVectorT::PrecisionT; + using ComplexT = typename StateVectorT::ComplexT; + + const std::size_t num_qubits = 3; + std::mt19937 re{1337}; + + auto sparseH = SparseHamiltonian::create( + {ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}, + ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}, + ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}}, + {7, 6, 5, 4, 3, 2, 1, 0}, {0, 1, 2, 3, 4, 5, 6, 7, 8}, {0, 1, 2}); + + auto init_state = createRandomStateVectorData(re, num_qubits); + + StateVectorT state_vector(init_state.data(), init_state.size()); + + sparseH->applyInPlace(state_vector); + + std::reverse(init_state.begin(), init_state.end()); + + REQUIRE(isApproxEqual(state_vector.getDataVector().data(), + state_vector.getDataVector().size(), + init_state.data(), init_state.size())); +} \ No newline at end of file diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/observables/tests/mpi/Test_ObservablesGPUMPI.cpp b/pennylane_lightning/core/src/simulators/lightning_gpu/observables/tests/mpi/Test_ObservablesGPUMPI.cpp index 6abde861e6..bc4d0e517f 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/observables/tests/mpi/Test_ObservablesGPUMPI.cpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/observables/tests/mpi/Test_ObservablesGPUMPI.cpp @@ -289,4 +289,58 @@ TEMPLATE_PRODUCT_TEST_CASE("Observables::HermitianHasherMPI", "[Observables]", CHECK(ham_1->getObsName() == res1.str()); CHECK(ham_2->getObsName() == res2.str()); } +} + +TEMPLATE_PRODUCT_TEST_CASE("SparseHamiltonian::ApplyInPlace", "[Observables]", + (StateVectorCudaMPI), (float, double)) { + using StateVectorT = TestType; + using PrecisionT = typename StateVectorT::PrecisionT; + using ComplexT = typename StateVectorT::ComplexT; + MPIManager mpi_manager(MPI_COMM_WORLD); + + const std::size_t num_qubits = 3; + std::mt19937 re{1337}; + + auto sparseH = SparseHamiltonianMPI::create( + {ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}, + ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}, + ComplexT{1.0, 0.0}, ComplexT{1.0, 0.0}}, + {7, 6, 5, 4, 3, 2, 1, 0}, {0, 1, 2, 3, 4, 5, 6, 7, 8}, {0, 1, 2}); + + size_t mpi_buffersize = 1; + size_t nGlobalIndexBits = + std::bit_width(static_cast(mpi_manager.getSize())) - 1; + size_t nLocalIndexBits = num_qubits - nGlobalIndexBits; + size_t subSvLength = 1 << nLocalIndexBits; + + mpi_manager.Barrier(); + std::vector expected_sv(subSvLength); + std::vector local_state(subSvLength); + + auto init_state = createRandomStateVectorData(re, num_qubits); + + mpi_manager.Scatter(init_state.data(), local_state.data(), subSvLength, 0); + mpi_manager.Barrier(); + + int nDevices = 0; + cudaGetDeviceCount(&nDevices); + int deviceId = mpi_manager.getRank() % nDevices; + cudaSetDevice(deviceId); + DevTag dt_local(deviceId, 0); + mpi_manager.Barrier(); + + StateVectorT sv_mpi(mpi_manager, dt_local, mpi_buffersize, nGlobalIndexBits, + nLocalIndexBits); + + sv_mpi.CopyHostDataToGpu(local_state, false); + + sparseH->applyInPlace(sv_mpi); + + std::reverse(init_state.begin(), init_state.end()); + mpi_manager.Scatter(init_state.data(), expected_sv.data(), subSvLength, 0); + mpi_manager.Barrier(); + + REQUIRE(isApproxEqual(sv_mpi.getDataVector().data(), + sv_mpi.getDataVector().size(), expected_sv.data(), + expected_sv.size())); } \ No newline at end of file diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/utils/LinearAlg.hpp b/pennylane_lightning/core/src/simulators/lightning_gpu/utils/LinearAlg.hpp index f70d4ea9f2..13a7ec9a90 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/utils/LinearAlg.hpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/utils/LinearAlg.hpp @@ -307,20 +307,19 @@ inline SharedCusparseHandle make_shared_cusparse_handle() { * @param handle cuSparse handle. */ template -inline void SparseMV_cuSparse(const index_type *csrOffsets_ptr, - const index_type csrOffsets_size, - const index_type *columns_ptr, - const std::complex *values_ptr, - const index_type numNNZ, CFP_t *X, CFP_t *Y, - DevTypeID device_id, cudaStream_t stream_id, - cusparseHandle_t handle) { - const int64_t num_rows = static_cast( +inline void +SparseMV_cuSparse(const index_type *csrOffsets_ptr, + const int64_t csrOffsets_size, const index_type *columns_ptr, + const std::complex *values_ptr, + const int64_t numNNZ, CFP_t *X, CFP_t *Y, DevTypeID device_id, + cudaStream_t stream_id, cusparseHandle_t handle) { + const int64_t num_rows = csrOffsets_size - - 1); // int64_t is required for num_rows by cusparseCreateCsr - const int64_t num_cols = static_cast( - num_rows); // int64_t is required for num_cols by cusparseCreateCsr - const int64_t nnz = static_cast( - numNNZ); // int64_t is required for nnz by cusparseCreateCsr + 1; // int64_t is required for num_rows by cusparseCreateCsr + const int64_t num_cols = + num_rows; // int64_t is required for num_cols by cusparseCreateCsr + const int64_t nnz = + numNNZ; // int64_t is required for nnz by cusparseCreateCsr const CFP_t alpha = {1.0, 0.0}; const CFP_t beta = {0.0, 0.0}; @@ -338,13 +337,15 @@ inline void SparseMV_cuSparse(const index_type *csrOffsets_ptr, d_values.CopyHostDataToGpu(values_ptr, d_values.getLength(), false); cudaDataType_t data_type; - cusparseIndexType_t compute_type = CUSPARSE_INDEX_64I; + cusparseIndexType_t compute_type; if constexpr (std::is_same_v || std::is_same_v) { data_type = CUDA_C_64F; + compute_type = CUSPARSE_INDEX_64I; } else { data_type = CUDA_C_32F; + compute_type = CUSPARSE_INDEX_32I; } // CUSPARSE APIs @@ -394,8 +395,7 @@ inline void SparseMV_cuSparse(const index_type *csrOffsets_ptr, /* cusparseSpMVAlg_t */ CUSPARSE_SPMV_ALG_DEFAULT, /* size_t* */ &bufferSize)); - DataBuffer dBuffer{bufferSize, device_id, stream_id, - true}; + DataBuffer dBuffer{bufferSize, device_id, stream_id, true}; // execute SpMV PL_CUSPARSE_IS_SUCCESS(cusparseSpMV( @@ -439,19 +439,19 @@ inline void SparseMV_cuSparse(const index_type *csrOffsets_ptr, */ template inline void SparseMV_cuSparse(const index_type *csrOffsets_ptr, - const index_type csrOffsets_size, + const int64_t csrOffsets_size, const index_type *columns_ptr, const std::complex *values_ptr, - const index_type numNNZ, const CFP_t *X, CFP_t *Y, + const int64_t numNNZ, const CFP_t *X, CFP_t *Y, DevTypeID device_id, cudaStream_t stream_id, cusparseHandle_t handle) { - const int64_t num_rows = static_cast( + const int64_t num_rows = csrOffsets_size - - 1); // int64_t is required for num_rows by cusparseCreateCsr - const int64_t num_cols = static_cast( - num_rows); // int64_t is required for num_cols by cusparseCreateCsr - const int64_t nnz = static_cast( - numNNZ); // int64_t is required for nnz by cusparseCreateCsr + 1; // int64_t is required for num_rows by cusparseCreateCsr + const int64_t num_cols = + num_rows; // int64_t is required for num_cols by cusparseCreateCsr + const int64_t nnz = + numNNZ; // int64_t is required for nnz by cusparseCreateCsr const CFP_t alpha = {1.0, 0.0}; const CFP_t beta = {0.0, 0.0}; diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/utils/MPILinearAlg.hpp b/pennylane_lightning/core/src/simulators/lightning_gpu/utils/MPILinearAlg.hpp index 4b2e905b8e..cd2afd426b 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/utils/MPILinearAlg.hpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/utils/MPILinearAlg.hpp @@ -45,15 +45,15 @@ namespace Pennylane::LightningGPU::Util { template inline void SparseMV_cuSparseMPI( MPIManager &mpi_manager, const size_t &length_local, - const index_type *csrOffsets_ptr, const index_type csrOffsets_size, + const index_type *csrOffsets_ptr, const int64_t csrOffsets_size, const index_type *columns_ptr, const std::complex *values_ptr, CFP_t *X, CFP_t *Y, DevTypeID device_id, cudaStream_t stream_id, cusparseHandle_t handle) { std::vector>> csrmatrix_blocks; if (mpi_manager.getRank() == 0) { csrmatrix_blocks = splitCSRMatrix( - mpi_manager, csrOffsets_size - 1, csrOffsets_ptr, columns_ptr, - values_ptr); + mpi_manager, static_cast(csrOffsets_size - 1), + csrOffsets_ptr, columns_ptr, values_ptr); } mpi_manager.Barrier(); @@ -79,11 +79,11 @@ inline void SparseMV_cuSparseMPI( color = 1; SparseMV_cuSparse( localCSRMatrix.getCsrOffsets().data(), - localCSRMatrix.getCsrOffsets().size(), + static_cast(localCSRMatrix.getCsrOffsets().size()), localCSRMatrix.getColumns().data(), localCSRMatrix.getValues().data(), - localCSRMatrix.getValues().size(), X, d_res_per_block.getData(), - device_id, stream_id, handle); + static_cast(localCSRMatrix.getValues().size()), X, + d_res_per_block.getData(), device_id, stream_id, handle); } PL_CUDA_IS_SUCCESS(cudaDeviceSynchronize()); diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/utils/tests/Test_LinearAlgebra.cpp b/pennylane_lightning/core/src/simulators/lightning_gpu/utils/tests/Test_LinearAlgebra.cpp index cd1dd3937a..a2b35d0742 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/utils/tests/Test_LinearAlgebra.cpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/utils/tests/Test_LinearAlgebra.cpp @@ -40,6 +40,8 @@ TEMPLATE_TEST_CASE("Linear Algebra::SparseMV", "[Linear Algebra]", float, using StateVectorT = StateVectorCudaManaged; using ComplexT = StateVectorT::ComplexT; using CFP_t = StateVectorT::CFP_t; + using IdxT = typename std::conditional::value, + int32_t, int64_t>::type; std::size_t num_qubits = 3; std::size_t data_size = exp2(num_qubits); @@ -52,9 +54,9 @@ TEMPLATE_TEST_CASE("Linear Algebra::SparseMV", "[Linear Algebra]", float, {0.2, -0.1}, {-0.1, 0.2}, {0.2, 0.1}, {0.1, 0.2}, {0.7, -0.2}, {-0.1, 0.6}, {0.6, 0.1}, {0.2, 0.7}}; - std::vector indptr = {0, 2, 4, 6, 8, 10, 12, 14, 16}; - std::vector indices = {0, 3, 1, 2, 1, 2, 0, 3, - 4, 7, 5, 6, 5, 6, 4, 7}; + std::vector indptr = {0, 2, 4, 6, 8, 10, 12, 14, 16}; + std::vector indices = {0, 3, 1, 2, 1, 2, 0, 3, + 4, 7, 5, 6, 5, 6, 4, 7}; std::vector values = { {1.0, 0.0}, {0.0, -1.0}, {1.0, 0.0}, {0.0, 1.0}, {0.0, -1.0}, {1.0, 0.0}, {0.0, 1.0}, {1.0, 0.0}, @@ -69,10 +71,10 @@ TEMPLATE_TEST_CASE("Linear Algebra::SparseMV", "[Linear Algebra]", float, SECTION("Testing sparse matrix vector product:") { std::vector result(data_size); - cuUtil::SparseMV_cuSparse( - indptr.data(), indptr.size(), indices.data(), values.data(), - values.size(), sv_x.getData(), sv_y.getData(), - sv_x.getDataBuffer().getDevTag().getDeviceID(), + cuUtil::SparseMV_cuSparse( + indptr.data(), static_cast(indptr.size()), indices.data(), + values.data(), static_cast(values.size()), sv_x.getData(), + sv_y.getData(), sv_x.getDataBuffer().getDevTag().getDeviceID(), sv_x.getDataBuffer().getDevTag().getStreamID(), sv_x.getCusparseHandle()); diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/utils/tests/mpi/Test_LinearAlgebraMPI.cpp b/pennylane_lightning/core/src/simulators/lightning_gpu/utils/tests/mpi/Test_LinearAlgebraMPI.cpp index 64df9d77e8..371bc66f75 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/utils/tests/mpi/Test_LinearAlgebraMPI.cpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/utils/tests/mpi/Test_LinearAlgebraMPI.cpp @@ -40,6 +40,8 @@ TEMPLATE_TEST_CASE("Linear Algebra::SparseMV", "[Linear Algebra]", float, using StateVectorT = StateVectorCudaMPI; using ComplexT = StateVectorT::ComplexT; using CFP_t = StateVectorT::CFP_t; + using IdxT = typename std::conditional::value, + int32_t, int64_t>::type; MPIManager mpi_manager(MPI_COMM_WORLD); REQUIRE(mpi_manager.getSize() == 2); @@ -54,9 +56,9 @@ TEMPLATE_TEST_CASE("Linear Algebra::SparseMV", "[Linear Algebra]", float, {0.1, 0.2}, {0.7, -0.2}, {-0.1, 0.6}, {0.6, 0.1}, {0.2, 0.7}}; - std::vector indptr = {0, 2, 4, 6, 8, 10, 12, 14, 16}; - std::vector indices = {0, 3, 1, 2, 1, 2, 0, 3, - 4, 7, 5, 6, 5, 6, 4, 7}; + std::vector indptr = {0, 2, 4, 6, 8, 10, 12, 14, 16}; + std::vector indices = {0, 3, 1, 2, 1, 2, 0, 3, + 4, 7, 5, 6, 5, 6, 4, 7}; std::vector values = { {1.0, 0.0}, {0.0, -1.0}, {1.0, 0.0}, {0.0, 1.0}, {0.0, -1.0}, {1.0, 0.0}, {0.0, 1.0}, {1.0, 0.0}, @@ -95,9 +97,10 @@ TEMPLATE_TEST_CASE("Linear Algebra::SparseMV", "[Linear Algebra]", float, nGlobalIndexBits, nLocalIndexBits); sv_x.CopyHostDataToGpu(local_state, false); - cuUtil::SparseMV_cuSparseMPI( - mpi_manager, sv_x.getLength(), indptr.data(), indptr.size(), - indices.data(), values.data(), sv_x.getData(), sv_y.getData(), + cuUtil::SparseMV_cuSparseMPI( + mpi_manager, sv_x.getLength(), indptr.data(), + static_cast(indptr.size()), indices.data(), values.data(), + sv_x.getData(), sv_y.getData(), sv_x.getDataBuffer().getDevTag().getDeviceID(), sv_x.getDataBuffer().getDevTag().getStreamID(), sv_x.getCusparseHandle()); diff --git a/pennylane_lightning/core/src/simulators/lightning_kokkos/StateVectorKokkos.hpp b/pennylane_lightning/core/src/simulators/lightning_kokkos/StateVectorKokkos.hpp index a3f0951b24..02064e811f 100644 --- a/pennylane_lightning/core/src/simulators/lightning_kokkos/StateVectorKokkos.hpp +++ b/pennylane_lightning/core/src/simulators/lightning_kokkos/StateVectorKokkos.hpp @@ -17,6 +17,7 @@ */ #pragma once +#include #include #include #include @@ -170,7 +171,7 @@ class StateVectorKokkos final * * @param num_qubits Number of qubits */ - StateVectorKokkos(ComplexT *hostdata_, size_t length, + StateVectorKokkos(ComplexT *hostdata_, std::size_t length, const Kokkos::InitializationSettings &kokkos_args = {}) : StateVectorKokkos(log2(length), kokkos_args) { PL_ABORT_IF_NOT(isPerfectPowerOf2(length), @@ -178,12 +179,20 @@ class StateVectorKokkos final HostToDevice(hostdata_, length); } + StateVectorKokkos(std::complex *hostdata_, std::size_t length, + const Kokkos::InitializationSettings &kokkos_args = {}) + : StateVectorKokkos(log2(length), kokkos_args) { + PL_ABORT_IF_NOT(isPerfectPowerOf2(length), + "The size of provided data must be a power of 2."); + HostToDevice(reinterpret_cast(hostdata_), length); + } + /** * @brief Create a new state vector from data on the host. * * @param num_qubits Number of qubits */ - StateVectorKokkos(const ComplexT *hostdata_, size_t length, + StateVectorKokkos(const ComplexT *hostdata_, std::size_t length, const Kokkos::InitializationSettings &kokkos_args = {}) : StateVectorKokkos(log2(length), kokkos_args) { PL_ABORT_IF_NOT(isPerfectPowerOf2(length), @@ -692,7 +701,7 @@ class StateVectorKokkos final * @param new_data data pointer to new data. * @param new_size size of underlying data storage. */ - void updateData(ComplexT *new_data, size_t new_size) { + void updateData(ComplexT *new_data, std::size_t new_size) { updateData(KokkosVector(new_data, new_size)); } @@ -744,7 +753,7 @@ class StateVectorKokkos final * @brief Copy data from the host space to the device space. * */ - inline void HostToDevice(ComplexT *sv, size_t length) { + inline void HostToDevice(ComplexT *sv, std::size_t length) { Kokkos::deep_copy(*data_, UnmanagedComplexHostView(sv, length)); } @@ -752,7 +761,7 @@ class StateVectorKokkos final * @brief Copy data from the device space to the host space. * */ - inline void DeviceToHost(ComplexT *sv, size_t length) const { + inline void DeviceToHost(ComplexT *sv, std::size_t length) const { Kokkos::deep_copy(UnmanagedComplexHostView(sv, length), *data_); } diff --git a/pennylane_lightning/core/src/simulators/lightning_kokkos/bindings/LKokkosBindings.hpp b/pennylane_lightning/core/src/simulators/lightning_kokkos/bindings/LKokkosBindings.hpp index bd9d89d72f..6432864c4e 100644 --- a/pennylane_lightning/core/src/simulators/lightning_kokkos/bindings/LKokkosBindings.hpp +++ b/pennylane_lightning/core/src/simulators/lightning_kokkos/bindings/LKokkosBindings.hpp @@ -24,6 +24,7 @@ #include "ConstantUtil.hpp" // lookup #include "GateOperation.hpp" #include "MeasurementsKokkos.hpp" +#include "ObservablesKokkos.hpp" #include "StateVectorKokkos.hpp" #include "TypeList.hpp" #include "Util.hpp" // exp2 @@ -33,6 +34,7 @@ namespace { using namespace Pennylane::Bindings; using namespace Pennylane::LightningKokkos::Algorithms; using namespace Pennylane::LightningKokkos::Measures; +using namespace Pennylane::LightningKokkos::Observables; using Kokkos::InitializationSettings; using Pennylane::LightningKokkos::StateVectorKokkos; using Pennylane::Util::exp2; @@ -214,6 +216,58 @@ void registerBackendSpecificMeasurements(PyClass &pyclass) { "Variance of a sparse Hamiltonian."); } +/** + * @brief Register observable classes. + * + * @tparam StateVectorT + * @param m Pybind module + */ +template +void registerBackendSpecificObservables(py::module_ &m) { + using PrecisionT = + typename StateVectorT::PrecisionT; // Statevector's precision. + using ParamT = PrecisionT; // Parameter's data precision + + const std::string bitsize = + std::to_string(sizeof(std::complex) * 8); + + using np_arr_c = py::array_t, py::array::c_style>; + + std::string class_name; + + class_name = "SparseHamiltonianC" + bitsize; + py::class_, + std::shared_ptr>, + Observable>(m, class_name.c_str(), + py::module_local()) + .def(py::init([](const np_arr_c &data, + const std::vector &indices, + const std::vector &indptr, + const std::vector &wires) { + using ComplexT = typename StateVectorT::ComplexT; + const py::buffer_info buffer_data = data.request(); + const auto *data_ptr = static_cast(buffer_data.ptr); + + return SparseHamiltonian{ + std::vector({data_ptr, data_ptr + data.size()}), + indices, indptr, wires}; + })) + .def("__repr__", &SparseHamiltonian::getObsName) + .def("get_wires", &SparseHamiltonian::getWires, + "Get wires of observables") + .def( + "__eq__", + [](const SparseHamiltonian &self, + py::handle other) -> bool { + if (!py::isinstance>(other)) { + return false; + } + auto other_cast = other.cast>(); + return self == other_cast; + }, + "Compare two observables"); +} + /** * @brief Register backend specific adjoint Jacobian methods. * diff --git a/pennylane_lightning/core/src/simulators/lightning_kokkos/observables/ObservablesKokkos.cpp b/pennylane_lightning/core/src/simulators/lightning_kokkos/observables/ObservablesKokkos.cpp index d90f3e6019..66192b934a 100644 --- a/pennylane_lightning/core/src/simulators/lightning_kokkos/observables/ObservablesKokkos.cpp +++ b/pennylane_lightning/core/src/simulators/lightning_kokkos/observables/ObservablesKokkos.cpp @@ -28,3 +28,6 @@ template class Observables::TensorProdObs>; template class Observables::Hamiltonian>; template class Observables::Hamiltonian>; + +template class Observables::SparseHamiltonian>; +template class Observables::SparseHamiltonian>; diff --git a/pennylane_lightning/core/src/simulators/lightning_kokkos/observables/ObservablesKokkos.hpp b/pennylane_lightning/core/src/simulators/lightning_kokkos/observables/ObservablesKokkos.hpp index a0371df7f2..c3fae6b3ea 100644 --- a/pennylane_lightning/core/src/simulators/lightning_kokkos/observables/ObservablesKokkos.hpp +++ b/pennylane_lightning/core/src/simulators/lightning_kokkos/observables/ObservablesKokkos.hpp @@ -31,6 +31,7 @@ namespace { using namespace Pennylane::Util; using namespace Pennylane::Observables; using Pennylane::LightningKokkos::StateVectorKokkos; +using Pennylane::LightningKokkos::Util::SparseMV_Kokkos; } // namespace /// @endcond @@ -199,6 +200,76 @@ class Hamiltonian final : public HamiltonianBase { } }; +/** + * @brief Sparse representation of Hamiltonian + * + */ +template +class SparseHamiltonian final : public SparseHamiltonianBase { + private: + using BaseType = SparseHamiltonianBase; + + public: + using PrecisionT = typename StateVectorT::PrecisionT; + using ComplexT = typename StateVectorT::ComplexT; + using IdxT = typename BaseType::IdxT; + + /** + * @brief Create a SparseHamiltonian from data, indices and offsets in CSR + * format. + * + * @param data Arguments to construct data + * @param indices Arguments to construct indices + * @param offsets Arguments to construct offsets + * @param wires Arguments to construct wires + */ + template + explicit SparseHamiltonian(T1 &&data, T2 &&indices, T3 &&offsets, + T4 &&wires) + : BaseType{data, indices, offsets, wires} {} + + /** + * @brief Convenient wrapper for the constructor as the constructor does not + * convert the std::shared_ptr with a derived class correctly. + * + * This function is useful as std::make_shared does not handle + * brace-enclosed initializer list correctly. + * + * @param data Argument to construct data + * @param indices Argument to construct indices + * @param offsets Argument to construct ofsets + * @param wires Argument to construct wires + */ + static auto create(std::initializer_list data, + std::initializer_list indices, + std::initializer_list offsets, + std::initializer_list wires) + -> std::shared_ptr> { + return std::shared_ptr>( + new SparseHamiltonian{ + std::move(data), std::move(indices), std::move(offsets), + std::move(wires)}); + } + + /** + * @brief Updates the statevector SV:->SV', where SV' = a*H*SV, and where H + * is a sparse Hamiltonian. + * + */ + void applyInPlace(StateVectorT &sv) const override { + PL_ABORT_IF_NOT(this->wires_.size() == sv.getNumQubits(), + "SparseH wire count does not match state-vector size"); + StateVectorT d_sv_prime(sv.getNumQubits()); + + SparseMV_Kokkos( + sv.getView(), d_sv_prime.getView(), this->offsets_.data(), + this->offsets_.size(), this->indices_.data(), this->data_.data(), + this->data_.size()); + + sv.updateData(d_sv_prime); + } +}; + /// @cond DEV namespace detail { using Pennylane::LightningKokkos::Util::axpy_Kokkos; diff --git a/pennylane_lightning/core/src/simulators/lightning_kokkos/observables/tests/Test_ObservablesKokkos.cpp b/pennylane_lightning/core/src/simulators/lightning_kokkos/observables/tests/Test_ObservablesKokkos.cpp index aa616d2e33..5d402b1e2d 100644 --- a/pennylane_lightning/core/src/simulators/lightning_kokkos/observables/tests/Test_ObservablesKokkos.cpp +++ b/pennylane_lightning/core/src/simulators/lightning_kokkos/observables/tests/Test_ObservablesKokkos.cpp @@ -153,6 +153,20 @@ TEMPLATE_PRODUCT_TEST_CASE("Hamiltonian", "[Observables]", (StateVectorKokkos), } } +TEMPLATE_PRODUCT_TEST_CASE("SparseHamiltonian", "[Observables]", + (StateVectorKokkos), (float, double)) { + using StateVectorT = TestType; + using SparseHamiltonianT = SparseHamiltonian; + + SECTION("Copy constructibility") { + REQUIRE(std::is_copy_constructible_v); + } + + SECTION("Move constructibility") { + REQUIRE(std::is_move_constructible_v); + } +} + TEMPLATE_PRODUCT_TEST_CASE("Hamiltonian::ApplyInPlace", "[Observables]", (StateVectorKokkos), (float, double)) { using StateVectorT = TestType; diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/bindings/LQubitBindings.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/bindings/LQubitBindings.hpp index 190a9c6525..2f83f2f39d 100644 --- a/pennylane_lightning/core/src/simulators/lightning_qubit/bindings/LQubitBindings.hpp +++ b/pennylane_lightning/core/src/simulators/lightning_qubit/bindings/LQubitBindings.hpp @@ -25,15 +25,17 @@ #include "DynamicDispatcher.hpp" #include "GateOperation.hpp" #include "MeasurementsLQubit.hpp" +#include "ObservablesLQubit.hpp" #include "StateVectorLQubitRaw.hpp" #include "TypeList.hpp" #include "VectorJacobianProduct.hpp" /// @cond DEV namespace { -using namespace Pennylane::LightningQubit::Measures; -using namespace Pennylane::LightningQubit::Algorithms; using namespace Pennylane::Bindings; +using namespace Pennylane::LightningQubit::Algorithms; +using namespace Pennylane::LightningQubit::Measures; +using namespace Pennylane::LightningQubit::Observables; using Pennylane::LightningQubit::StateVectorLQubitRaw; } // namespace /// @endcond @@ -180,6 +182,58 @@ void registerBackendSpecificMeasurements(PyClass &pyclass) { }); } +/** + * @brief Register backend specific observables. + * + * @tparam StateVectorT + * @param m Pybind module + */ +template +void registerBackendSpecificObservables([[maybe_unused]] py::module_ &m) { + using PrecisionT = + typename StateVectorT::PrecisionT; // Statevector's precision. + using ParamT = PrecisionT; // Parameter's data precision + + const std::string bitsize = + std::to_string(sizeof(std::complex) * 8); + + using np_arr_c = py::array_t, py::array::c_style>; + + std::string class_name; + + class_name = "SparseHamiltonianC" + bitsize; + py::class_, + std::shared_ptr>, + Observable>(m, class_name.c_str(), + py::module_local()) + .def(py::init([](const np_arr_c &data, + const std::vector &indices, + const std::vector &indptr, + const std::vector &wires) { + using ComplexT = typename StateVectorT::ComplexT; + const py::buffer_info buffer_data = data.request(); + const auto *data_ptr = static_cast(buffer_data.ptr); + + return SparseHamiltonian{ + std::vector({data_ptr, data_ptr + data.size()}), + indices, indptr, wires}; + })) + .def("__repr__", &SparseHamiltonian::getObsName) + .def("get_wires", &SparseHamiltonian::getWires, + "Get wires of observables") + .def( + "__eq__", + [](const SparseHamiltonian &self, + py::handle other) -> bool { + if (!py::isinstance>(other)) { + return false; + } + auto other_cast = other.cast>(); + return self == other_cast; + }, + "Compare two observables"); +} + /** * @brief Register Vector Jacobian Product. */ diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/observables/ObservablesLQubit.cpp b/pennylane_lightning/core/src/simulators/lightning_qubit/observables/ObservablesLQubit.cpp index 0a1fb54c48..e45e2c1572 100644 --- a/pennylane_lightning/core/src/simulators/lightning_qubit/observables/ObservablesLQubit.cpp +++ b/pennylane_lightning/core/src/simulators/lightning_qubit/observables/ObservablesLQubit.cpp @@ -41,3 +41,9 @@ template class Observables::Hamiltonian>; template class Observables::Hamiltonian>; template class Observables::Hamiltonian>; + +template class Observables::SparseHamiltonian>; +template class Observables::SparseHamiltonian>; + +template class Observables::SparseHamiltonian>; +template class Observables::SparseHamiltonian>; diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/observables/ObservablesLQubit.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/observables/ObservablesLQubit.hpp index 3433a3fcc3..b659969037 100644 --- a/pennylane_lightning/core/src/simulators/lightning_qubit/observables/ObservablesLQubit.hpp +++ b/pennylane_lightning/core/src/simulators/lightning_qubit/observables/ObservablesLQubit.hpp @@ -27,6 +27,7 @@ #include "LinearAlgebra.hpp" // scaleAndAdd #include "Macros.hpp" // use_openmp #include "Observables.hpp" +#include "SparseLinAlg.hpp" #include "StateVectorLQubitManaged.hpp" #include "StateVectorLQubitRaw.hpp" #include "Util.hpp" @@ -359,4 +360,74 @@ class Hamiltonian final : public HamiltonianBase { } }; +/** + * @brief Sparse representation of Hamiltonian + * + */ +template +class SparseHamiltonian final : public SparseHamiltonianBase { + private: + using BaseType = SparseHamiltonianBase; + + public: + using PrecisionT = typename StateVectorT::PrecisionT; + using ComplexT = typename StateVectorT::ComplexT; + using IdxT = typename BaseType::IdxT; + + /** + * @brief Create a SparseHamiltonian from data, indices and offsets in CSR + * format. + * + * @param data Arguments to construct data + * @param indices Arguments to construct indices + * @param offsets Arguments to construct offsets + * @param wires Arguments to construct wires + */ + template + explicit SparseHamiltonian(T1 &&data, T2 &&indices, T3 &&offsets, + T4 &&wires) + : BaseType{data, indices, offsets, wires} {} + + /** + * @brief Convenient wrapper for the constructor as the constructor does not + * convert the std::shared_ptr with a derived class correctly. + * + * This function is useful as std::make_shared does not handle + * brace-enclosed initializer list correctly. + * + * @param data Argument to construct data + * @param indices Argument to construct indices + * @param offsets Argument to construct ofsets + * @param wires Argument to construct wires + */ + static auto create(std::initializer_list data, + std::initializer_list indices, + std::initializer_list offsets, + std::initializer_list wires) + -> std::shared_ptr> { + // NOLINTBEGIN(*-move-const-arg) + return std::shared_ptr>( + new SparseHamiltonian{ + std::move(data), std::move(indices), std::move(offsets), + std::move(wires)}); + // NOLINTEND(*-move-const-arg) + } + + /** + * @brief Updates the statevector SV:->SV', where SV' = a*H*SV, and where H + * is a sparse Hamiltonian. + * + */ + void applyInPlace(StateVectorT &sv) const override { + PL_ABORT_IF_NOT(this->wires_.size() == sv.getNumQubits(), + "SparseH wire count does not match state-vector size"); + auto operator_vector = Util::apply_Sparse_Matrix( + sv.getData(), sv.getLength(), this->offsets_.data(), + this->offsets_.size(), this->indices_.data(), this->data_.data(), + this->data_.size()); + + sv.updateData(operator_vector); + } +}; + } // namespace Pennylane::LightningQubit::Observables \ No newline at end of file diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/observables/tests/Test_ObservablesLQubit.cpp b/pennylane_lightning/core/src/simulators/lightning_qubit/observables/tests/Test_ObservablesLQubit.cpp index 624248da20..4ef59b04c5 100644 --- a/pennylane_lightning/core/src/simulators/lightning_qubit/observables/tests/Test_ObservablesLQubit.cpp +++ b/pennylane_lightning/core/src/simulators/lightning_qubit/observables/tests/Test_ObservablesLQubit.cpp @@ -159,6 +159,21 @@ TEMPLATE_PRODUCT_TEST_CASE("Hamiltonian", "[Observables]", } } +TEMPLATE_PRODUCT_TEST_CASE("SparseHamiltonian", "[Observables]", + (StateVectorLQubitManaged, StateVectorLQubitRaw), + (float, double)) { + using StateVectorT = TestType; + using SparseHamiltonianT = SparseHamiltonian; + + SECTION("Copy constructibility") { + REQUIRE(std::is_copy_constructible_v); + } + + SECTION("Move constructibility") { + REQUIRE(std::is_move_constructible_v); + } +} + TEMPLATE_PRODUCT_TEST_CASE("Hamiltonian::ApplyInPlace", "[Observables]", (StateVectorLQubitManaged, StateVectorLQubitRaw), (float, double)) { diff --git a/pennylane_lightning/core/src/utils/TestHelpers.hpp b/pennylane_lightning/core/src/utils/TestHelpers.hpp index 605e584bcb..af1a46618a 100644 --- a/pennylane_lightning/core/src/utils/TestHelpers.hpp +++ b/pennylane_lightning/core/src/utils/TestHelpers.hpp @@ -429,7 +429,8 @@ void write_CSR_vectors(std::vector &row_map, const ComplexT SC_ONE = 1.0; row_map.resize(numRows + 1); - for (IndexT rowIdx = 1; rowIdx < (IndexT)row_map.size(); ++rowIdx) { + for (IndexT rowIdx = 1; rowIdx < static_cast(row_map.size()); + ++rowIdx) { row_map[rowIdx] = row_map[rowIdx - 1] + 3; }; const IndexT numNNZ = row_map[numRows]; @@ -437,6 +438,7 @@ void write_CSR_vectors(std::vector &row_map, entries.resize(numNNZ); values.resize(numNNZ); for (IndexT rowIdx = 0; rowIdx < numRows; ++rowIdx) { + size_t idx = row_map[rowIdx]; if (rowIdx == 0) { entries[0] = rowIdx; entries[1] = rowIdx + 1; @@ -446,21 +448,21 @@ void write_CSR_vectors(std::vector &row_map, values[1] = -SC_ONE; values[2] = -SC_ONE; } else if (rowIdx == numRows - 1) { - entries[row_map[rowIdx]] = 0; - entries[row_map[rowIdx] + 1] = rowIdx - 1; - entries[row_map[rowIdx] + 2] = rowIdx; + entries[idx] = 0; + entries[idx + 1] = rowIdx - 1; + entries[idx + 2] = rowIdx; - values[row_map[rowIdx]] = -SC_ONE; - values[row_map[rowIdx] + 1] = -SC_ONE; - values[row_map[rowIdx] + 2] = SC_ONE; + values[idx] = -SC_ONE; + values[idx + 1] = -SC_ONE; + values[idx + 2] = SC_ONE; } else { - entries[row_map[rowIdx]] = rowIdx - 1; - entries[row_map[rowIdx] + 1] = rowIdx; - entries[row_map[rowIdx] + 2] = rowIdx + 1; + entries[idx] = rowIdx - 1; + entries[idx + 1] = rowIdx; + entries[idx + 2] = rowIdx + 1; - values[row_map[rowIdx]] = -SC_ONE; - values[row_map[rowIdx] + 1] = SC_ONE; - values[row_map[rowIdx] + 2] = -SC_ONE; + values[idx] = -SC_ONE; + values[idx + 1] = SC_ONE; + values[idx + 2] = -SC_ONE; } } }; diff --git a/pennylane_lightning/lightning_gpu/lightning_gpu.py b/pennylane_lightning/lightning_gpu/lightning_gpu.py index e5b85eea2f..98de0e9512 100644 --- a/pennylane_lightning/lightning_gpu/lightning_gpu.py +++ b/pennylane_lightning/lightning_gpu/lightning_gpu.py @@ -40,6 +40,21 @@ DevPool, ) + try: + # pylint: disable=no-name-in-module + from pennylane_lightning.lightning_gpu_ops import ( + StateVectorMPIC128, + StateVectorMPIC64, + MeasurementsMPIC128, + MeasurementsMPIC64, + MPIManager, + DevTag, + ) + + MPI_SUPPORT = True + except ImportError: + MPI_SUPPORT = False + from ctypes.util import find_library from importlib import util as imp_util @@ -91,11 +106,29 @@ create_ops_listC128, ) - def _gpu_dtype(dtype): + if MPI_SUPPORT: + from pennylane_lightning.lightning_gpu_ops.algorithmsMPI import ( + AdjointJacobianMPIC64, + create_ops_listMPIC64, + AdjointJacobianMPIC128, + create_ops_listMPIC128, + ) + + def _gpu_dtype(dtype, mpi=False): if dtype not in [np.complex128, np.complex64]: # pragma: no cover raise ValueError(f"Data type is not supported for state-vector computation: {dtype}") + if mpi: + return StateVectorMPIC128 if dtype == np.complex128 else StateVectorMPIC64 return StateVectorC128 if dtype == np.complex128 else StateVectorC64 + def _adj_dtype(use_csingle, mpi=False): + if mpi: + return AdjointJacobianMPIC64 if use_csingle else AdjointJacobianMPIC128 + return AdjointJacobianC64 if use_csingle else AdjointJacobianC128 + + def _mebibytesToBytes(mebibytes): + return mebibytes * 1024 * 1024 + allowed_operations = { "Identity", "BasisState", @@ -170,7 +203,7 @@ def _gpu_dtype(dtype): "SProd", } - class LightningGPU(LightningBase): + class LightningGPU(LightningBase): # pylint: disable=too-many-instance-attributes """PennyLane-Lightning-GPU device. Args: wires (int): the number of wires to initialize the device with @@ -194,11 +227,13 @@ def __init__( self, wires, *, + mpi: bool = False, + mpi_buf_size: int = 0, sync=False, c_dtype=np.complex128, shots=None, batch_obs: Union[bool, int] = False, - ): # pylint: disable=unused-argument + ): # pylint: disable=too-many-arguments if c_dtype is np.complex64: self.use_csingle = True elif c_dtype is np.complex128: @@ -209,14 +244,72 @@ def __init__( super().__init__(wires, shots=shots, c_dtype=c_dtype) self._dp = DevPool() - self._sync = sync - self._batch_obs = batch_obs - self._num_local_wires = self.num_wires - self._gpu_state = _gpu_dtype(c_dtype)(self._num_local_wires) + if not mpi: + self._mpi = False + self._num_local_wires = self.num_wires + self._gpu_state = _gpu_dtype(c_dtype)(self._num_local_wires) + else: + self._mpi = True + self._mpi_init_helper(self.num_wires) + + if mpi_buf_size < 0: + raise TypeError(f"Unsupported mpi_buf_size value: {mpi_buf_size}") + if mpi_buf_size: + if mpi_buf_size & (mpi_buf_size - 1): + raise TypeError( + f"Unsupported mpi_buf_size value: {mpi_buf_size}. mpi_buf_size should be power of 2." + ) + # Memory size in bytes + sv_memsize = np.dtype(c_dtype).itemsize * (1 << self._num_local_wires) + if _mebibytesToBytes(mpi_buf_size) > sv_memsize: + w_msg = "The MPI buffer size is larger than the local state vector size." + warn( + w_msg, + RuntimeWarning, + ) + + self._gpu_state = _gpu_dtype(c_dtype, mpi)( + self._mpi_manager, + self._devtag, + mpi_buf_size, + self._num_global_wires, + self._num_local_wires, + ) + + self._sync = sync + self._batch_obs = batch_obs self._create_basis_state(0) + def _mpi_init_helper(self, num_wires): + if not MPI_SUPPORT: + raise ImportError("MPI related APIs are not found.") + # initialize MPIManager and config check in the MPIManager ctor + self._mpi_manager = MPIManager() + # check if number of GPUs per node is larger than + # number of processes per node + numDevices = self._dp.getTotalDevices() + numProcsNode = self._mpi_manager.getSizeNode() + if numDevices < numProcsNode: + raise ValueError( + "Number of devices should be larger than or equal to the number of processes on each node." + ) + # check if the process number is larger than number of statevector elements + if self._mpi_manager.getSize() > (1 << (num_wires - 1)): + raise ValueError( + "Number of processes should be smaller than the number of statevector elements." + ) + # set the number of global and local wires + commSize = self._mpi_manager.getSize() + self._num_global_wires = commSize.bit_length() - 1 + self._num_local_wires = num_wires - self._num_global_wires + # set GPU device + rank = self._mpi_manager.getRank() + deviceid = rank % numProcsNode + self._dp.setDeviceID(deviceid) + self._devtag = DevTag(deviceid) + @staticmethod def _asarray(arr, dtype=None): arr = np.asarray(arr) # arr is not copied @@ -266,11 +359,19 @@ def state(self): @property def create_ops_list(self): """Returns create_ops_list function of the matching precision.""" + if self._mpi: + return create_ops_listMPIC64 if self.use_csingle else create_ops_listMPIC128 return create_ops_listC64 if self.use_csingle else create_ops_listC128 @property def measurements(self): """Returns Measurements constructor of the matching precision.""" + if self._mpi: + return ( + MeasurementsMPIC64(self._gpu_state) + if self.use_csingle + else MeasurementsMPIC128(self._gpu_state) + ) return ( MeasurementsC64(self._gpu_state) if self.use_csingle @@ -345,6 +446,11 @@ def _apply_state_vector(self, state, device_wires, use_async=False): if self.num_wires == self._num_local_wires: self.syncH2D(self._reshape(state, output_shape)) return + local_state = np.zeros(1 << self._num_local_wires, dtype=self.C_DTYPE) + self._mpi_manager.Scatter(state, local_state, 0) + # Initialize the entire device state with the input state + self.syncH2D(self._reshape(local_state, output_shape)) + return # generate basis states on subset of qubits via the cartesian product basis_states = np.array(list(product([0, 1], repeat=len(device_wires)))) @@ -550,7 +656,9 @@ def adjoint_jacobian(self, tape, starting_state=None, use_device_state=False): # Check adjoint diff support self._check_adjdiff_supported_operations(tape.operations) - processed_data = self._process_jacobian_tape(tape, starting_state, use_device_state) + processed_data = self._process_jacobian_tape( + tape, starting_state, use_device_state, self._mpi + ) if not processed_data: # training_params is empty return np.array([], dtype=self.state.dtype) @@ -565,7 +673,7 @@ def adjoint_jacobian(self, tape, starting_state=None, use_device_state=False): - Evenly distribute the observables over all available GPUs (`batch_obs=True`): This will evenly split the data into ceil(num_obs/num_gpus) chunks, and allocate enough space on each GPU up-front before running through them concurrently. This relies on C++ threads to handle the orchestration. - Allocate at most `n` observables per GPU (`batch_obs=n`): Providing an integer value restricts each available GPU to at most `n` copies of the statevector, and hence `n` given observables for a given batch. This will iterate over the data in chnuks of size `n*num_gpus`. """ - adjoint_jacobian = AdjointJacobianC64() if self.use_csingle else AdjointJacobianC128() + adjoint_jacobian = _adj_dtype(self.use_csingle, self._mpi)() if self._batch_obs: adjoint_jacobian = adjoint_jacobian.batched @@ -661,7 +769,19 @@ def expval(self, observable, shot_range=None, bin_size=None): return np.squeeze(np.mean(samples, axis=0)) if observable.name in ["SparseHamiltonian"]: - CSR_SparseHamiltonian = observable.sparse_matrix().tocsr() + if self._mpi: + # Identity for CSR_SparseHamiltonian to pass to processes with rank != 0 to reduce + # host(cpu) memory requirements + obs = qml.Identity(0) + Hmat = qml.Hamiltonian([1.0], [obs]).sparse_matrix() + H_sparse = qml.SparseHamiltonian(Hmat, wires=range(1)) + CSR_SparseHamiltonian = H_sparse.sparse_matrix().tocsr() + # CSR_SparseHamiltonian for rank == 0 + if self._mpi_manager.getRank() == 0: + CSR_SparseHamiltonian = observable.sparse_matrix().tocsr() + else: + CSR_SparseHamiltonian = observable.sparse_matrix().tocsr() + return self.measurements.expval( CSR_SparseHamiltonian.indptr, CSR_SparseHamiltonian.indices, @@ -671,6 +791,10 @@ def expval(self, observable, shot_range=None, bin_size=None): # use specialized functors to compute expval(Hermitian) if observable.name == "Hermitian": observable_wires = self.map_wires(observable.wires) + if self._mpi and len(observable_wires) > self._num_local_wires: + raise RuntimeError( + "MPI backend does not support Hermitian with number of target wires larger than local wire number." + ) matrix = observable.matrix() return self.measurements.expval(matrix, observable_wires) @@ -679,9 +803,9 @@ def expval(self, observable, shot_range=None, bin_size=None): or (observable.arithmetic_depth > 0) or isinstance(observable.name, List) ): - ob_serialized = QuantumScriptSerializer(self.short_name, self.use_csingle)._ob( - observable, self.wire_map - ) + ob_serialized = QuantumScriptSerializer( + self.short_name, self.use_csingle, self._mpi + )._ob(observable, self.wire_map) return self.measurements.expval(ob_serialized) # translate to wire labels used by device @@ -694,8 +818,10 @@ def probability_lightning(self, wires=None): observable_wires = self.map_wires(wires) # Device returns as col-major orderings, so perform transpose on data for bit-index shuffle for now. local_prob = self.measurements.probs(observable_wires) - num_local_wires = len(local_prob).bit_length() - 1 if len(local_prob) > 0 else 0 - return local_prob.reshape([2] * num_local_wires).transpose().reshape(-1) + if len(local_prob) > 0: + num_local_wires = len(local_prob).bit_length() - 1 if len(local_prob) > 0 else 0 + return local_prob.reshape([2] * num_local_wires).transpose().reshape(-1) + return local_prob # pylint: disable=missing-function-docstring def var(self, observable, shot_range=None, bin_size=None): @@ -718,9 +844,9 @@ def var(self, observable, shot_range=None, bin_size=None): or (observable.arithmetic_depth > 0) or isinstance(observable.name, List) ): - ob_serialized = QuantumScriptSerializer(self.short_name, self.use_csingle)._ob( - observable, self.wire_map - ) + ob_serialized = QuantumScriptSerializer( + self.short_name, self.use_csingle, self._mpi + )._ob(observable, self.wire_map) return self.measurements.var(ob_serialized) # translate to wire labels used by device diff --git a/requirements-dev.txt b/requirements-dev.txt index b377d8acd1..642a74ad27 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -6,5 +6,7 @@ pybind11 pytest pytest-cov pytest-mock +pre-commit>=2.19.0 black==23.7.0 -clang-format==14 \ No newline at end of file +clang-format==14 +pylint \ No newline at end of file diff --git a/tests/test_adjoint_jacobian.py b/tests/test_adjoint_jacobian.py index c8d65dd093..41a9784dc0 100644 --- a/tests/test_adjoint_jacobian.py +++ b/tests/test_adjoint_jacobian.py @@ -25,6 +25,7 @@ from pennylane import QNode, qnode from pennylane import qchem + I, X, Y, Z = ( np.eye(2), qml.PauliX.compute_matrix(), @@ -892,7 +893,10 @@ def circuit_ansatz(params, wires): qml.RX(params[29], wires=wires[1]) -@pytest.mark.skipif(not ld._CPP_BINARY_AVAILABLE, reason="Lightning binary required") +@pytest.mark.skipif( + device_name != "lightning.gpu" or not ld._CPP_BINARY_AVAILABLE, + reason="Lightning binary required", +) def test_tape_qchem(tol): """Tests the circuit Ansatz with a QChem Hamiltonian produces correct results""" @@ -944,6 +948,60 @@ def circuit(params): assert np.allclose(qml.grad(circuit_ld)(params), qml.grad(circuit_dq)(params), tol) +custom_wires = ["alice", 3.14, -1, 0] + + +@pytest.mark.skipif(not ld._CPP_BINARY_AVAILABLE, reason="Lightning binary required") +@pytest.mark.parametrize( + "returns", + [ + qml.SparseHamiltonian( + qml.Hamiltonian( + [0.1], + [qml.PauliX(wires=custom_wires[0]) @ qml.PauliZ(wires=custom_wires[1])], + ).sparse_matrix(custom_wires), + wires=custom_wires, + ), + qml.SparseHamiltonian( + qml.Hamiltonian( + [2.0], + [qml.PauliX(wires=custom_wires[2]) @ qml.PauliZ(wires=custom_wires[0])], + ).sparse_matrix(custom_wires), + wires=custom_wires, + ), + qml.SparseHamiltonian( + qml.Hamiltonian( + [1.1], + [qml.PauliX(wires=custom_wires[0]) @ qml.PauliZ(wires=custom_wires[2])], + ).sparse_matrix(custom_wires), + wires=custom_wires, + ), + ], +) +def test_adjoint_SparseHamiltonian(returns): + """Integration tests that compare to default.qubit for a large circuit containing parametrized + operations and when using custom wire labels""" + + dev = qml.device(device_name, wires=custom_wires) + dev_default = qml.device("default.qubit", wires=custom_wires) + + def circuit(params): + circuit_ansatz(params, wires=custom_wires) + return qml.expval(returns) + + n_params = 30 + np.random.seed(1337) + params = np.random.rand(n_params) + + qnode = qml.QNode(circuit, dev, diff_method="adjoint") + qnode_default = qml.QNode(circuit, dev_default, diff_method="parameter-shift") + + j_device = qml.jacobian(qnode)(params) + j_default = qml.jacobian(qnode_default)(params) + + assert np.allclose(j_device, j_default) + + @pytest.mark.parametrize( "returns", [ @@ -1024,9 +1082,6 @@ def casted_to_array_batched(params): assert np.allclose(j_def, j_lightning_batched) -custom_wires = ["alice", 3.14, -1, 0] - - @pytest.mark.parametrize( "returns", [ @@ -1218,7 +1273,7 @@ def create_xyz_file(tmp_path_factory): [False, True, 1, 2, 3, 4], ) def test_integration_H2_Hamiltonian(create_xyz_file, batches): - skipp_condn = pytest.importorskip("openfermionpyscf") + _ = pytest.importorskip("openfermionpyscf") n_electrons = 2 np.random.seed(1337) @@ -1232,9 +1287,10 @@ def test_integration_H2_Hamiltonian(create_xyz_file, batches): active_electrons=n_electrons, name="h2", outpath=str(str_path.parent), + load_data=True, ) hf_state = qml.qchem.hf_state(n_electrons, qubits) - singles, doubles = qml.qchem.excitations(n_electrons, qubits) + _, doubles = qml.qchem.excitations(n_electrons, qubits) # Choose different batching supports here dev = qml.device(device_name, wires=qubits, batch_obs=batches) diff --git a/tests/test_device.py b/tests/test_device.py index a45f269fb1..4039394276 100644 --- a/tests/test_device.py +++ b/tests/test_device.py @@ -48,3 +48,18 @@ def test_create_device_with_unsupported_dtype(): def test_create_device_with_unsupported_kokkos_args(): with pytest.raises(TypeError, match="Argument kokkos_args must be of type"): dev = qml.device(device_name, wires=1, kokkos_args=np.complex256) + + +@pytest.mark.skipif( + device_name != "lightning.gpu" or not ld._CPP_BINARY_AVAILABLE, + reason="Only lightning.gpu has a kwarg mpi_buf_size.", +) +def test_create_device_with_unsupported_mpi_buf_size(): + try: + from mpi4py import MPI + + with pytest.raises(ImportError, match="MPI related APIs are not found"): + dev = qml.device(device_name, wires=1) + dev._mpi_init_helper(1) + except: + pass diff --git a/tests/test_serialize.py b/tests/test_serialize.py index ab6df3c26e..360ac0e71b 100644 --- a/tests/test_serialize.py +++ b/tests/test_serialize.py @@ -34,6 +34,8 @@ TensorProdObsC128, HamiltonianC64, HamiltonianC128, + SparseHamiltonianC64, + SparseHamiltonianC128, ) elif device_name == "lightning.gpu": from pennylane_lightning.lightning_gpu_ops.observables import ( @@ -45,6 +47,8 @@ TensorProdObsC128, HamiltonianC64, HamiltonianC128, + SparseHamiltonianC64, + SparseHamiltonianC128, ) else: from pennylane_lightning.lightning_qubit_ops.observables import ( @@ -56,6 +60,8 @@ TensorProdObsC128, HamiltonianC64, HamiltonianC128, + SparseHamiltonianC64, + SparseHamiltonianC128, ) @@ -92,6 +98,10 @@ def test_wrong_device_name(): (qml.Projector([0], wires=0), HermitianObsC128), (qml.Hamiltonian([1], [qml.PauliZ(0)]), HamiltonianC128), (qml.sum(qml.Hadamard(0), qml.PauliX(1)), HermitianObsC128), + ( + qml.SparseHamiltonian(qml.Hamiltonian([1], [qml.PauliZ(0)]).sparse_matrix(), wires=[0]), + SparseHamiltonianC128, + ), ], ) def test_obs_returns_expected_type(obs, obs_type):