Add mpi tests to LGPU (#519)

* Initial commit mpi tests * Remove label guards * Fix PL_DEVICE * Install lightning_qubit. * Fix ENABLE_MPI * print cuquantum * export cu_sdk * revert define * Debug cpp tests. * Debug cpp tests. * Fix cmake options. * Compile with mpicxx * Specify backend. * Specify backend. * Remove obsolete line. * Specify cov backend * Merge test/cov & try simplifying python * if-no-files-found: error and fix python tests. * Fix mpi find * Install real lightning. * Revert python tests. * Hardcode backend values in python tests * Install lightning_qubit with gpu in python tests * Remove explicit mpich mentions. * Parametrize mpilib name. * Add openmpi tests. * Build only openmpi python tests. * Add timeouts * test/test_apply.py * Revert pull triggers. * Clean gpu-mpi test workflows. * Revert to 804ed24. * Revert back. * Update tests_linux_x86_mpi.yml [ci skip] * Add jobs dep. * Remove module unload * Simplify mpi-gpu tests. * trigger CI * unset CFLAGS. * set CFLAGS * Revert triggers. * Fix pull_request: [skip ci] * trigger CI * Rename test_gpu_cu11.yml -> tests_gpu_cu11.yml [skip ci]
PennyLaneAI · Oct 18, 2023 · 9d172dc · 9d172dc
1 parent 3e80ce6
commit 9d172dc
Show file tree

Hide file tree

Showing 2 changed files with 268 additions and 2 deletions.
diff --git a/.github/workflows/test_gpu_cu11.yml → .github/workflows/tests_gpu_cu11.yml b/.github/workflows/test_gpu_cu11.yml → .github/workflows/tests_gpu_cu11.yml
@@ -1,4 +1,4 @@
-name: Testing::Linux::x86_64 (Lightning-GPU)
+name: Testing::Linux::x86_64::LGPU
 on:
   pull_request:
   push:
@@ -144,7 +144,7 @@ jobs:
           if-no-files-found: error
 
   pythontestswithLGPU:
-    needs: [builddeps]
+    needs: [builddeps, cpptestswithLGPU_cu11]
     strategy:
       matrix:
         os: [ubuntu-22.04]

diff --git a/.github/workflows/tests_linux_x86_mpi.yml b/.github/workflows/tests_linux_x86_mpi.yml
@@ -0,0 +1,266 @@
+name: Tests::Linux::x86_64::LGPU::MPI
+on:
+  workflow_call:
+    inputs:
+      lightning-version:
+        type: string
+        required: true
+        description: The version of lightning to use. Valid values are either 'stable' (most recent git-tag) or 'latest' (most recent commit from master)
+      pennylane-version:
+        type: string
+        required: true
+        description: The version of PennyLane to use. Valid values are either 'stable' (most recent git-tag) or 'latest' (most recent commit from master)
+  release:
+  push:
+    branches:
+      - main
+  pull_request:
+
+env:
+  COVERAGE_FLAGS: "--cov=pennylane_lightning --cov-report=term-missing --cov-report=xml:./coverage.xml --no-flaky-report -p no:warnings --tb=native"
+  GCC_VERSION: 11
+  OMP_NUM_THREADS: "2"
+  CI_CUDA_ARCH: 86
+
+concurrency:
+  group: gpu-test-mpi-${{ github.ref }}-${{ inputs.lightning-version }}-${{ inputs.pennylane-version }}
+  cancel-in-progress: true
+
+jobs:
+  cpp_tests:
+    if: contains(github.event.pull_request.labels.*.name, 'ci:use-multi-gpu-runner') || (inputs.lightning-version != '' && inputs.pennylane-version != '')
+    runs-on:
+      - self-hosted
+      - linux
+      - x64
+      - ubuntu-22.04
+      - multi-gpu
+    strategy:
+      max-parallel: 1
+      matrix:
+        mpilib: ["mpich", "openmpi"]
+    timeout-minutes: 30
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          fetch-tags: true
+
+      - name: Switch to stable build of Lightning-GPU
+        if: inputs.lightning-version == 'stable'
+        run: |
+          git fetch tags --force
+          git checkout $(git tag | sort -V | tail -1)
+
+      - uses: actions/setup-python@v4
+        id: setup_python
+        name: Install Python
+        with:
+          python-version: '3.9'
+
+      # Since the self-hosted runner can be re-used. It is best to set up all package
+      # installations in a virtual environment that gets cleaned at the end of each workflow run
+      - name: Setup Python virtual environment
+        id: setup_venv
+        env:
+          VENV_NAME: ${{ github.workspace }}/venv_${{ steps.setup_python.outputs.python-version }}_${{ github.sha }}
+        run: |
+          # Clear any pre-existing venvs
+          rm -rf venv_*
+
+          # Create new venv for this workflow_run
+          python --version
+          python -m venv ${{ env.VENV_NAME }}
+
+          # Add the venv to PATH for subsequent steps
+          echo ${{ env.VENV_NAME }}/bin >> $GITHUB_PATH
+
+          # Adding venv name as an output for subsequent steps to reference if needed
+          echo "venv_name=${{ env.VENV_NAME }}" >> $GITHUB_OUTPUT
+
+      - name: Display Python-Path
+        id: python_path
+        run: |
+          py_path=$(which python)
+          echo "Python Interpreter Path => $py_path"
+          echo "python=$py_path" >> $GITHUB_OUTPUT
+
+          pip_path=$(which python)
+          echo "PIP Path => $pip_path"
+          echo "pip=$pip_path" >> $GITHUB_OUTPUT
+
+      - name: Install required packages
+        run: |
+          python -m pip install ninja cmake custatevec-cu11
+
+      - name: Validate GPU version and installed compiler
+        run: |
+          source /etc/profile.d/modules.sh && module use /opt/modules && module load cuda/11.8
+          which -a nvcc
+          nvcc --version
+
+      - name: Validate Multi-GPU packages
+        run: |
+          source /etc/profile.d/modules.sh && module use /opt/modules/ && module load ${{ matrix.mpilib }}
+          echo 'Checking for ${{ matrix.mpilib }}'
+          which -a mpirun
+          mpirun --version
+          which -a mpicxx
+          mpicxx --version
+          module unload ${{ matrix.mpilib }}
+
+      - name: Install Latest PennyLane
+        if: inputs.pennylane-version == 'latest'
+        run: python -m pip install git+https://github.com/PennyLaneAI/pennylane.git@master
+
+      - name: Build and run unit tests
+        run: |
+          source /etc/profile.d/modules.sh && module use /opt/modules/ && module load ${{ matrix.mpilib }}
+          export CUQUANTUM_SDK=$(python -c "import site; print( f'{site.getsitepackages()[0]}/cuquantum/lib')")
+          cmake . -BBuild \
+            -DPL_BACKEND=lightning_gpu \
+            -DENABLE_MPI=ON \
+            -DCMAKE_BUILD_TYPE=Debug \
+            -DENABLE_COVERAGE=ON \
+            -DBUILD_TESTS=ON \
+            -DCMAKE_CXX_COMPILER=mpicxx \
+            -DCMAKE_CUDA_COMPILER="/usr/local/cuda/bin/nvcc" \
+            -DCMAKE_CUDA_ARCHITECTURES="86" \
+            -DPython_EXECUTABLE:FILE="${{ steps.python_path.outputs.python }}" \
+            -G Ninja
+          cmake --build ./Build
+          cd ./Build
+          mkdir -p ./tests/results
+          for file in *runner ; do ./$file --order lex --reporter junit --out ./tests/results/report_$file.xml; done;
+          for file in *runner_mpi ; do /opt/mpi/${{ matrix.mpilib }}/bin/mpirun -np 2 ./$file --order lex --reporter junit --out ./tests/results/report_$file.xml; done;
+          lcov --directory . -b ../pennylane_lightning/src --capture --output-file coverage.info
+          lcov --remove coverage.info '/usr/*' --output-file coverage.info
+          mv coverage.info coverage-${{ github.job }}-lightning_gpu.info
+
+      - name: Upload test results
+        uses: actions/upload-artifact@v3
+        if: always()
+        with:
+          if-no-files-found: error
+          name: ubuntu-tests-reports
+          path: ./Build/tests/results/
+
+      - name: Upload code coverage results
+        uses: actions/upload-artifact@v3
+        with:
+          if-no-files-found: error
+          name: ubuntu-codecov-results-cpp
+          path: ./Build/coverage-${{ github.job }}-lightning_gpu.info
+
+      - name: Cleanup
+        if: always()
+        run: |
+          rm -rf ${{ steps.setup_venv.outputs.venv_name }}
+          rm -rf *
+          rm -rf .git
+          rm -rf .gitignore
+          rm -rf .github
+          pip cache purge
+
+
+  python_tests:
+    if: contains(github.event.pull_request.labels.*.name, 'ci:use-multi-gpu-runner') || (inputs.lightning-version != '' && inputs.pennylane-version != '')
+    runs-on:
+      - self-hosted
+      - linux
+      - x64
+      - ubuntu-22.04
+      - multi-gpu
+    needs: ["cpp_tests"]
+    strategy:
+      max-parallel: 1
+      matrix:
+        mpilib: ["mpich", "openmpi"]
+    timeout-minutes: 30
+
+    steps:
+      - name: Checkout pennyLane-lightning-gpu
+        uses: actions/checkout@v3
+        with:
+          fetch-tags: true
+
+      - name: Switch to stable build of Lightning-GPU
+        if: inputs.lightning-version == 'stable'
+        run: |
+          git fetch tags --force
+          git checkout $(git tag | sort -V | tail -1)
+
+      - uses: actions/setup-python@v4
+        id: setup_python
+        name: Install Python
+        with:
+          python-version: '3.9'
+
+      # Since the self-hosted runner can be re-used. It is best to set up all package
+      # installations in a virtual environment that gets cleaned at the end of each workflow run
+      - name: Setup Python virtual environment
+        id: setup_venv
+        env:
+          VENV_NAME: ${{ github.workspace }}/venv_${{ steps.setup_python.outputs.python-version }}_${{ github.sha }}
+        run: |
+          # Clear any pre-existing venvs
+          rm -rf venv_*
+
+          # Create new venv for this workflow_run
+          python --version
+          python -m venv ${{ env.VENV_NAME }}
+
+          # Add the venv to PATH for subsequent steps
+          echo ${{ env.VENV_NAME }}/bin >> $GITHUB_PATH
+
+          # Adding venv name as an output for subsequent steps to reference if needed
+          source ${{ env.VENV_NAME }}/bin/activate
+          echo "venv_name=${{ env.VENV_NAME }}" >> $GITHUB_OUTPUT
+
+      - name: Display Python-Path
+        id: python_path
+        run: |
+          py_path=$(which python)
+          echo "Python Interpreter Path => $py_path"
+          echo "python=$py_path" >> $GITHUB_OUTPUT
+
+          pip_path=$(which python)
+          echo "PIP Path => $pip_path"
+          echo "pip=$pip_path" >> $GITHUB_OUTPUT
+
+      - name: Install Latest PennyLane
+        # We want to install the latest PL on non workflow_call events
+        if: inputs.pennylane-version == 'latest'  || inputs.pennylane-version == ''
+        run: python -m pip install git+https://github.com/PennyLaneAI/pennylane.git@master
+
+      - name: Install required packages
+        run: |
+          source /etc/profile.d/modules.sh && module use /opt/modules/ && module load ${{ matrix.mpilib }}
+          python -m pip install pip~=22.0
+          python -m pip install ninja cmake custatevec-cu11 pytest pytest-mock flaky pytest-cov mpi4py
+          SKIP_COMPILATION=True PL_BACKEND=lightning_qubit python -m pip install -e . -vv
+
+      - name: Build and install package
+        env:
+          CUQUANTUM_SDK: $(python -c "import site; print( f'{site.getsitepackages()[0]}/cuquantum/lib')")
+        run: |
+          source /etc/profile.d/modules.sh && module use /opt/modules/ && module load ${{ matrix.mpilib }}
+          PL_BACKEND=lightning_gpu python setup.py build_ext -i --define="CMAKE_CXX_COMPILER=mpicxx;ENABLE_MPI=ON;CMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc;CMAKE_CUDA_ARCHITECTURES=${{ env.CI_CUDA_ARCH }};Python_EXECUTABLE=${{ steps.python_path.outputs.python }}"
+          PL_BACKEND=lightning_gpu python -m pip install -e . --verbose
+
+      - name: Run PennyLane-Lightning-GPU unit tests
+        run: |
+          source /etc/profile.d/modules.sh && module use /opt/modules/ && module load ${{ matrix.mpilib }}
+          PL_DEVICE=lightning.gpu python -m pytest ./tests/
+          PL_DEVICE=lightning.gpu /opt/mpi/${{ matrix.mpilib }}/bin/mpirun -np 2 python -m pytest ./mpitests/test_adjoint_jacobian.py
+
+      - name: Cleanup
+        if: always()
+        run: |
+          rm -rf ${{ steps.setup_venv.outputs.venv_name }}
+          rm -rf *
+          rm -rf .git
+          rm -rf .gitignore
+          rm -rf .github
+          pip cache purge