-
Notifications
You must be signed in to change notification settings - Fork 39
338 lines (292 loc) · 12.4 KB
/
tests_linux_x86_mpi_gpu.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
name: Tests::Linux::x86_64::LGPU::MPI
on:
workflow_call:
inputs:
lightning-version:
type: string
required: true
description: The version of lightning to use. Valid values are either 'stable' (most recent git-tag) or 'latest' (most recent commit from master)
pennylane-version:
type: string
required: true
description: The version of PennyLane to use. Valid values are either 'stable' (most recent git-tag) or 'latest' (most recent commit from master)
release:
push:
branches:
- main
pull_request:
env:
COVERAGE_FLAGS: "--cov=pennylane_lightning --cov-report=term-missing --cov-report=xml:./coverage.xml --no-flaky-report -p no:warnings --tb=native"
GCC_VERSION: 11
OMP_NUM_THREADS: "2"
CI_CUDA_ARCH: 86
concurrency:
group: gpu-test-mpi-${{ github.ref }}-${{ inputs.lightning-version }}-${{ inputs.pennylane-version }}
cancel-in-progress: true
jobs:
cpp_tests:
if: ${{ contains(github.event.pull_request.labels.*.name, 'ci:use-multi-gpu-runner') && !contains(fromJSON('["schedule", "workflow_dispatch"]'), github.event_name) }}
runs-on:
- self-hosted
- linux
- x64
- ubuntu-22.04
- multi-gpu
strategy:
max-parallel: 1
matrix:
mpilib: ["mpich", "openmpi"]
cuda_version_maj: ["12"]
cuda_version_min: ["2"]
timeout-minutes: 30
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-tags: true
- name: Switch to stable build of Lightning-GPU
if: inputs.lightning-version == 'stable'
run: |
git fetch tags --force
git checkout $(git tag | sort -V | tail -1)
- uses: actions/setup-python@v5
id: setup_python
name: Install Python
with:
python-version: '3.9'
# Since the self-hosted runner can be re-used. It is best to set up all package
# installations in a virtual environment that gets cleaned at the end of each workflow run
- name: Setup Python virtual environment
id: setup_venv
env:
VENV_NAME: ${{ github.workspace }}/venv_${{ steps.setup_python.outputs.python-version }}_${{ github.sha }}
run: |
# Clear any pre-existing venvs
rm -rf venv_*
# Create new venv for this workflow_run
python --version
python -m venv ${{ env.VENV_NAME }}
# Add the venv to PATH for subsequent steps
echo ${{ env.VENV_NAME }}/bin >> $GITHUB_PATH
# Adding venv name as an output for subsequent steps to reference if needed
echo "venv_name=${{ env.VENV_NAME }}" >> $GITHUB_OUTPUT
- name: Display Python-Path
id: python_path
run: |
py_path=$(which python)
echo "Python Interpreter Path => $py_path"
echo "python=$py_path" >> $GITHUB_OUTPUT
pip_path=$(which python)
echo "PIP Path => $pip_path"
echo "pip=$pip_path" >> $GITHUB_OUTPUT
- name: Install required packages
run: |
python -m pip install -r requirements-dev.txt
python -m pip install cmake custatevec-cu12 scipy
- name: Validate GPU version and installed compiler and modules
run: |
source /etc/profile.d/modules.sh && module use /opt/modules && module load cuda/${{ matrix.cuda_version_maj }}
which -a nvcc
nvcc --version
ls -R /opt/modules
- name: Validate Multi-GPU packages
run: |
source /etc/profile.d/modules.sh && module use /opt/modules/ && module load ${{ matrix.mpilib }}/cuda-${{ matrix.cuda_version_maj }}.${{ matrix.cuda_version_min }}
echo 'Checking for ${{ matrix.mpilib }}'
which -a mpirun
mpirun --version
which -a mpicxx
mpicxx --version
module unload ${{ matrix.mpilib }}/cuda-${{ matrix.cuda_version_maj }}.${{ matrix.cuda_version_min }}
- name: Build and run unit tests
run: |
source /etc/profile.d/modules.sh && module use /opt/modules/ && module load ${{ matrix.mpilib }}/cuda-${{ matrix.cuda_version_maj }}.${{ matrix.cuda_version_min }}
export CUQUANTUM_SDK=$(python -c "import site; print( f'{site.getsitepackages()[0]}/cuquantum/lib')")
cmake . -BBuild \
-DPL_BACKEND=lightning_gpu \
-DENABLE_PYTHON=OFF \
-DENABLE_MPI=ON \
-DCMAKE_BUILD_TYPE=Debug \
-DENABLE_COVERAGE=ON \
-DBUILD_TESTS=ON \
-DCMAKE_CXX_COMPILER=mpicxx \
-DCMAKE_CUDA_COMPILER=$(which nvcc) \
-DCMAKE_CUDA_ARCHITECTURES="86" \
-DPython_EXECUTABLE:FILE="${{ steps.python_path.outputs.python }}" \
-G Ninja
cmake --build ./Build
cd ./Build
mkdir -p ./tests/results
for file in *runner ; do ./$file --order lex --reporter junit --out ./tests/results/report_$file.xml; done;
for file in *runner_mpi ; do mpirun -np 2 ./$file --order lex --reporter junit --out ./tests/results/report_$file.xml; done;
lcov --directory . -b ../pennylane_lightning/src --capture --output-file coverage.info
lcov --remove coverage.info '/usr/*' --output-file coverage.info
mv coverage.info coverage-${{ github.job }}-lightning_gpu_${{ matrix.mpilib }}.info
- name: Upload test results
uses: actions/upload-artifact@v3
if: always()
with:
name: ubuntu-tests-reports
path: ./Build/tests/results/
if-no-files-found: error
- name: Upload code coverage results
uses: actions/upload-artifact@v3
with:
name: ubuntu-codecov-results-cpp
path: ./Build/coverage-${{ github.job }}-lightning_gpu_${{ matrix.mpilib }}.info
if-no-files-found: error
- name: Cleanup
if: always()
run: |
rm -rf ${{ steps.setup_venv.outputs.venv_name }}
rm -rf * .git .gitignore .github
pip cache purge
python_tests:
if: ${{ contains(github.event.pull_request.labels.*.name, 'ci:use-multi-gpu-runner') || contains(fromJSON('["schedule", "workflow_dispatch"]'), github.event_name) }}
runs-on:
- self-hosted
- linux
- x64
- ubuntu-22.04
- multi-gpu
strategy:
max-parallel: 1
matrix:
mpilib: ["mpich", "openmpi"]
cuda_version_maj: ["12"]
cuda_version_min: ["2"]
timeout-minutes: 30
steps:
- name: Checkout pennyLane-lightning
uses: actions/checkout@v4
with:
fetch-tags: true
- name: Switch to release build of Lightning
if: inputs.lightning-version == 'release'
run: |
git fetch --all
git checkout $(git branch -a --list "origin/v*rc*" | tail -1)
- name: Switch to stable build of Lightning
if: inputs.lightning-version == 'stable'
run: |
git fetch --tags --force
git checkout $(git tag | sort -V | tail -1)
# Since the self-hosted runner can be re-used. It is best to set up all package
# installations in a virtual environment that gets cleaned at the end of each workflow run
- name: Setup Python virtual environment
id: setup_venv
env:
VENV_NAME: ${{ github.workspace }}/venv_${{ steps.setup_python.outputs.python-version }}_${{ github.sha }}
run: |
# Clear any pre-existing venvs
rm -rf venv_*
# Create new venv for this workflow_run
python --version
python -m venv ${{ env.VENV_NAME }}
# Add the venv to PATH for subsequent steps
echo ${{ env.VENV_NAME }}/bin >> $GITHUB_PATH
# Adding venv name as an output for subsequent steps to reference if needed
source ${{ env.VENV_NAME }}/bin/activate
echo "venv_name=${{ env.VENV_NAME }}" >> $GITHUB_OUTPUT
- name: Display Python-Path
id: python_path
run: |
py_path=$(which python)
echo "Python Interpreter Path => $py_path"
echo "python=$py_path" >> $GITHUB_OUTPUT
pip_path=$(which python)
echo "PIP Path => $pip_path"
echo "pip=$pip_path" >> $GITHUB_OUTPUT
- name: Install required packages
run: |
source /etc/profile.d/modules.sh && module use /opt/modules/ && module load ${{ matrix.mpilib }}/cuda-${{ matrix.cuda_version_maj }}.${{ matrix.cuda_version_min }}
python -m pip install -r requirements-dev.txt
python -m pip install custatevec-cu${{ matrix.cuda_version_maj }} mpi4py openfermionpyscf
PL_BACKEND=lightning_qubit python -m pip install -e . -vv
- name: Checkout PennyLane for release build
if: inputs.pennylane-version == 'release'
uses: actions/checkout@v4
with:
path: pennylane
repository: PennyLaneAI/pennylane
- name: Switch to release build of PennyLane
if: inputs.pennylane-version == 'release'
run: |
cd pennylane
git fetch --all
git checkout $(git branch -a --list "origin/v*rc*" | tail -1)
python -m pip uninstall -y pennylane && python -m pip install . -vv --no-deps
- name: Build and install package
env:
CUQUANTUM_SDK: $(python -c "import site; print( f'{site.getsitepackages()[0]}/cuquantum/lib')")
run: |
source /etc/profile.d/modules.sh && module use /opt/modules/ && module load ${{ matrix.mpilib }}/cuda-${{ matrix.cuda_version_maj }}.${{ matrix.cuda_version_min }}
CMAKE_ARGS="-DCMAKE_C_COMPILER=mpicc -DCMAKE_CXX_COMPILER=mpicxx -DENABLE_MPI=ON -DCMAKE_CUDA_COMPILER=$(which nvcc) -DCMAKE_CUDA_ARCHITECTURES=${{ env.CI_CUDA_ARCH }} -DPython_EXECUTABLE=${{ steps.python_path.outputs.python }}" \
PL_BACKEND=lightning_gpu python -m pip install -e . --verbose
# There are issues running py-cov with MPI. A solution is to use coverage as reported
# [here](https://github.com/pytest-dev/pytest-cov/issues/237#issuecomment-544824228)
- name: Run unit tests for MPI-enabled lightning.gpu device
run: |
source /etc/profile.d/modules.sh && module use /opt/modules/ && module load ${{ matrix.mpilib }}/cuda-${{ matrix.cuda_version_maj }}.${{ matrix.cuda_version_min }}
PL_DEVICE=lightning.gpu mpirun -np 2 \
coverage run --rcfile=.coveragerc --source=pennylane_lightning -p -m mpi4py -m pytest ./mpitests --tb=native
coverage combine
coverage xml -o coverage-${{ github.job }}-lightning_gpu_${{ matrix.mpilib }}_cu${{ matrix.cuda_version_maj }}-main.xml
- name: Upload code coverage results
uses: actions/upload-artifact@v3
with:
name: ubuntu-codecov-results-python
path: coverage-${{ github.job }}-lightning_gpu_${{ matrix.mpilib }}_cu${{ matrix.cuda_version_maj }}-*.xml
if-no-files-found: error
- name: Cleanup
if: always()
run: |
rm -rf ${{ steps.setup_venv.outputs.venv_name }}
rm -rf * .git .gitignore .github
pip cache purge
upload-to-codecov-linux-cpp:
needs: ["cpp_tests"]
name: Upload coverage data to codecov
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Download coverage reports
uses: actions/download-artifact@v3
with:
name: ubuntu-codecov-results-cpp
- name: Upload to Codecov
uses: codecov/codecov-action@v4
with:
fail_ci_if_error: true
verbose: true
token: ${{ secrets.CODECOV_TOKEN }}
- name: Cleanup
if: always()
run: |
rm -rf ${{ steps.setup_venv.outputs.venv_name }}
rm -rf * .git .gitignore .github
pip cache purge
upload-to-codecov-linux-python:
needs: ["python_tests"]
name: Upload coverage data to codecov
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Download coverage reports
uses: actions/download-artifact@v3
with:
name: ubuntu-codecov-results-python
- name: Upload to Codecov
uses: codecov/codecov-action@v4
with:
fail_ci_if_error: true
verbose: true
token: ${{ secrets.CODECOV_TOKEN }}
- name: Cleanup
if: always()
run: |
rm -rf ${{ steps.setup_venv.outputs.venv_name }}
rm -rf * .git .gitignore .github
pip cache purge