From 50278da57451fccccc58b9ec3098ce51ef922594 Mon Sep 17 00:00:00 2001 From: "Peter St. John" Date: Wed, 30 Oct 2024 16:11:10 -0700 Subject: [PATCH 1/2] manually pin pypi versions --- Dockerfile | 108 +++++++---------- requirements-docker.txt | 263 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 307 insertions(+), 64 deletions(-) create mode 100644 requirements-docker.txt diff --git a/Dockerfile b/Dockerfile index 2a74224dd..17ea91a25 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,9 +3,7 @@ ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:24.02-py3 FROM ${BASE_IMAGE} AS bionemo2-base # Install NeMo dependencies. -WORKDIR /build - -ARG MAX_JOBS=4 +ARG MAX_JOBS=-1 ENV MAX_JOBS=${MAX_JOBS} # See NeMo readme for the latest tested versions of these libraries @@ -14,7 +12,8 @@ RUN git clone https://github.com/NVIDIA/apex.git && \ cd apex && \ git checkout ${APEX_COMMIT} && \ pip install . -v --no-build-isolation --disable-pip-version-check --no-cache-dir \ - --config-settings "--build-option=--cpp_ext --cuda_ext --fast_layer_norm --distributed_adam --deprecated_fused_adam --group_norm" + --config-settings "--build-option=--cpp_ext --cuda_ext --fast_layer_norm --distributed_adam --deprecated_fused_adam --group_norm" && \ + cd .. && rm -rf apex # Transformer Engine pre-1.7.0. 1.7 standardizes the meaning of bits in the attention mask to match ARG TE_COMMIT=7d576ed25266a17a7b651f2c12e8498f67e0baea @@ -23,54 +22,39 @@ RUN git clone https://github.com/NVIDIA/TransformerEngine.git && \ git fetch origin ${TE_COMMIT} && \ git checkout FETCH_HEAD && \ git submodule init && git submodule update && \ - NVTE_FRAMEWORK=pytorch NVTE_WITH_USERBUFFERS=1 MPI_HOME=/usr/local/mpi pip install . + NVTE_FRAMEWORK=pytorch NVTE_WITH_USERBUFFERS=1 MPI_HOME=/usr/local/mpi pip install . && \ + cd .. && rm -rf TransformerEngine + +# Install core apt packages and addressing Security Scan Vulnerabilities +RUN --mount=type=cache,id=apt-cache,target=/var/cache/apt,sharing=locked \ + --mount=type=cache,id=apt-lib,target=/var/lib/apt,sharing=locked \ + < Date: Mon, 4 Nov 2024 13:30:01 -0800 Subject: [PATCH 2/2] add a script to update the docker pins --- ci/docker/Dockerfile.pip_deps | 11 ++++ ci/docker/update_docker_pypi_deps.sh | 13 +++++ requirements-docker.txt | 85 ++++++++++++++++------------ 3 files changed, 73 insertions(+), 36 deletions(-) create mode 100644 ci/docker/Dockerfile.pip_deps create mode 100755 ci/docker/update_docker_pypi_deps.sh diff --git a/ci/docker/Dockerfile.pip_deps b/ci/docker/Dockerfile.pip_deps new file mode 100644 index 000000000..57543f1df --- /dev/null +++ b/ci/docker/Dockerfile.pip_deps @@ -0,0 +1,11 @@ +# Base image with apex and transformer engine, but without NeMo or Megatron-LM. +ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:24.02-py3 +FROM ${BASE_IMAGE} AS bionemo2-base + +# Use UV to install python packages from the workspace. This just installs packages into the system's python +# environment, and does not use the current uv.lock file. +COPY --from=ghcr.io/astral-sh/uv:0.4.25 /uv /usr/local/bin/uv +ENV UV_LINK_MODE=copy \ + UV_COMPILE_BYTECODE=1 \ + UV_PYTHON_DOWNLOADS=never \ + UV_SYSTEM_PYTHON=true diff --git a/ci/docker/update_docker_pypi_deps.sh b/ci/docker/update_docker_pypi_deps.sh new file mode 100755 index 000000000..5ffff571c --- /dev/null +++ b/ci/docker/update_docker_pypi_deps.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +REPO_ROOT=$(git rev-parse --show-toplevel) +docker build $REPO_ROOT -t bionemo-deps -f $REPO_ROOT/ci/docker/Dockerfile.pip_deps + +# Run the container to update the dependencies +docker run --rm -it -v $REPO_ROOT:/workspace -v $HOME/.cache:/root/.cache bionemo-deps /bin/bash -c " + set -eo pipefail + uv pip freeze > /pre-install-packages.txt + uv pip install --no-build-isolation -r /workspace/requirements-docker.txt + uv pip freeze > /post-install-packages.txt + grep -vxFf /pre-install-packages.txt /post-install-packages.txt > /workspace/requirements-docker.txt +" diff --git a/requirements-docker.txt b/requirements-docker.txt index bd712cb5e..1bd5cd8c1 100644 --- a/requirements-docker.txt +++ b/requirements-docker.txt @@ -10,7 +10,8 @@ aniso8601==9.0.1 anndata==0.10.9 antlr4-python3-runtime==4.9.3 anyio==4.6.2.post1 -array_api_compat==1.9.1 +appdirs==1.4.4 +array-api-compat==1.9.1 arrow==1.3.0 asciitree==0.3.3 asgiref==3.8.1 @@ -26,6 +27,7 @@ botocore==1.34.151 braceexpand==0.1.7 causal-conv1d @ git+https://github.com/Dao-AILab/causal-conv1d.git@96456720c00393a5c32872d8352d7a7ec31fb3db cdifflib==1.2.6 +cellxgene-census==1.16.2 certifi==2024.8.30 cfgv==3.3.1 clip==0.2.0 @@ -35,34 +37,34 @@ cryptography==42.0.8 cytoolz==1.0.0 datasets==2.19.1 decord==0.6.0 -Deprecated==1.2.14 +deprecated==1.2.14 diffusers==0.31.0 dill==0.3.8 -Distance==0.1.3 +distance==0.1.3 distlib==0.3.4 dnspython==2.7.0 docker==7.1.0 docker-pycreds==0.4.0 docopt==0.6.2 -docstring_parser==0.16 +docstring-parser==0.16 docutils==0.16 editdistance==0.8.1 einops-exts==0.0.4 -email_validator==2.2.0 +email-validator==2.2.0 fabric==3.2.2 faiss-cpu==1.8.0.post1 fasteners==0.19 fasttext==0.9.3 fiddle==0.3.0 -Flask==3.0.3 -Flask-RESTful==0.3.10 +flask==3.0.3 +flask-restful==0.3.10 fqdn==1.5.1 ftfy==6.3.1 future==1.0.0 g2p-en==2.1.0 gdown==5.2.0 gitdb==4.0.11 -GitPython==3.1.43 +gitpython==3.1.43 google-api-core==2.22.0 googleapis-common-protos==1.65.0 graphviz==0.20.3 @@ -86,30 +88,31 @@ isodate==0.6.1 isoduration==20.11.0 isort==5.13.2 itsdangerous==2.2.0 -Janome==0.5.0 +janome==0.5.0 jieba==0.42.1 -Jinja2==3.1.4 +jinja2==3.1.4 jiwer==3.0.4 jmespath==1.0.1 jsonpointer==3.0.0 jupyter-events==0.10.0 jupyter-lsp==2.2.5 -jupyter_server==2.14.2 -jupyter_server_terminals==0.5.3 +jupyter-server==2.14.2 +jupyter-server-terminals==0.5.3 jupyterlab==4.3.0 -jupyterlab_server==2.27.3 +jupyterlab-server==2.27.3 kaldi-python-io==1.2.2 kaldiio==2.18.0 kornia==0.7.3 -kornia_rs==0.1.7 +kornia-rs==0.1.7 latexcodec==3.0.0 -Levenshtein==0.26.1 +levenshtein==0.26.1 lhotse==1.27.0 libcst==1.4.0 librosa==0.10.2.post1 lightning==2.4.0 lightning-utilities==0.11.8 lilcom==1.8.0 +llvmlite==0.43.0 loguru==0.7.2 lxml==5.3.0 mamba-ssm @ git+https://github.com/state-spaces/mamba.git@28f623d6542987733283d8e7ea43743afd2072f3 @@ -121,17 +124,18 @@ multiprocess==0.70.16 mypy-extensions==1.0.0 natsort==8.4.0 nbval==0.11.0 -nemo_text_processing==1.1.0 +nemo-text-processing==1.1.0 nerfacc==0.5.3 ngcsdk==3.50.0 nltk==3.9.1 nodeenv==0.13.4 -notebook_shim==0.2.4 +notebook-shim==0.2.4 +numba==0.60.0 numcodecs==0.13.1 +numpy==1.26.4 omegaconf==2.3.0 -onnx==1.17.0 open-clip-torch==2.24.0 -OpenCC==1.1.6 +opencc==1.1.6 opentelemetry-api==1.24.0 opentelemetry-exporter-otlp-proto-common==1.24.0 opentelemetry-exporter-otlp-proto-grpc==1.24.0 @@ -145,11 +149,11 @@ opentelemetry-test-utils==0.45b0 opentelemetry-util-http==0.45b0 overrides==7.7.0 packaging==24.1 -pandas==2.2.3 pangu==4.0.6.1 parameterized==0.9.0 paramiko==3.5.0 pathspec==0.12.1 +patsy==0.5.6 pesq==0.0.4 pfzy==0.3.4 plac==1.4.3 @@ -161,24 +165,24 @@ progress==1.6 propcache==0.2.0 proto-plus==1.25.0 protobuf==3.20.3 -pyannote.core==5.0.0 -pyannote.database==5.1.0 -pyannote.metrics==3.2.1 -pyarrow==18.0.0 +pyannote-core==5.0.0 +pyannote-database==5.1.0 +pyannote-metrics==3.2.1 pyarrow-hotfix==0.6 pybtex==0.24.0 pybtex-docutils==1.0.3 pydantic==2.9.2 -pydantic_core==2.23.4 +pydantic-core==2.23.4 pydub==0.25.1 pyloudnorm==0.1.1 -PyMCubes==0.1.6 -PyNaCl==1.5.0 +pymcubes==0.1.6 +pynacl==1.5.0 pynini==2.1.6.post1 +pynndescent==0.5.13 pypinyin==0.53.0 pypinyin-dict==0.8.0 pyre-extensions==0.0.31 -PySocks==1.7.1 +pysocks==1.7.1 pystoi==0.4.1 pytest-cov==4.1.0 pytest-dependency==0.5.1 @@ -187,34 +191,37 @@ pytest-runner==6.0.1 pytest-timeout==2.2.0 python-json-logger==2.0.7 pytorch-lightning==2.4.0 -RapidFuzz==3.10.1 +rapidfuzz==3.10.1 requests-mock==1.11.0 requests-toolbelt==1.0.0 resampy==0.4.3 rfc3339-validator==0.1.4 rfc3986-validator==0.1.1 -rich==13.8.1 rouge-score==0.1.2 rsa==4.7.2 -ruamel.yaml==0.18.6 -ruamel.yaml.clib==0.2.12 +ruamel-yaml==0.18.6 +ruamel-yaml-clib==0.2.12 s3fs==2023.12.2 s3transfer==0.10.3 sacrebleu==2.4.3 sacremoses==0.1.1 safetensors==0.4.5 +scanpy==1.9.8 +seaborn==0.13.2 semver==2.13.0 sentence-transformers==3.2.1 sentencepiece==0.2.0 sentry-sdk==2.17.0 +session-info==1.0.0 setproctitle==1.3.3 shellingham==1.5.4 shortuuid==1.0.13 smmap==5.0.1 sniffio==1.3.1 snowballstemmer==2.2.0 +somacore==1.0.17 sox==1.5.0 -Sphinx==5.3.0 +sphinx==5.3.0 sphinxcontrib-applehelp==2.0.0 sphinxcontrib-bibtex==2.6.3 sphinxcontrib-devhelp==2.0.0 @@ -222,6 +229,8 @@ sphinxcontrib-htmlhelp==2.1.0 sphinxcontrib-jsmath==1.0.1 sphinxcontrib-qthelp==2.0.0 sphinxcontrib-serializinghtml==2.0.0 +statsmodels==0.14.4 +stdlib-list==0.11.0 taming-transformers==0.0.1 tensorstore==0.1.45 termcolor==2.5.0 @@ -230,12 +239,14 @@ text-unidecode==1.3 textdistance==4.6.3 texterrors==0.5.1 tiktoken==0.7.0 +tiledb==0.32.5 +tiledbsoma==1.14.5 timm==1.0.11 tokenizers==0.20.0 torch-cluster==1.6.3 +torch-geometric==2.5.0 torch-scatter==2.1.2 torch-sparse==0.6.18 -torch_geometric==2.5.0 torchdiffeq==0.2.4 torchmetrics==1.5.1 torchsde==0.2.6 @@ -249,15 +260,17 @@ typer==0.12.5 types-python-dateutil==2.9.0.20241003 typing-inspect==0.9.0 tzdata==2024.2 +umap-learn==0.5.7 uri-template==1.3.0 validators==0.34.0 +virtualenv==20.4.7 wandb==0.18.5 webcolors==24.8.0 webdataset==0.2.96 websocket-client==1.8.0 -Werkzeug==3.0.6 +werkzeug==3.0.6 wget==3.2 wrapt==1.16.0 xxhash==3.5.0 -yarl==1.17.0 +yarl==1.17.1 zarr==2.18.3