interTwin-eu · matbun · Jan 10, 2025 · Jan 10, 2025 · Jan 10, 2025 · Jan 10, 2025
diff --git a/README.md b/README.md
@@ -86,26 +86,51 @@ environment for TensorFlow:
 
 ### Install itwinai for users
 
-Install itwinai and its dependencies using the
-following command, and follow the instructions:
+Install itwinai and its dependencies.
 
 ```bash
 # First, load the required environment modules, if on an HPC
 
 # Second, create a python virtual environment and activate it
 $ python -m venv ENV_NAME
 $ source ENV_NAME/bin/activate
+```
+
+Install itwinai with support for PyTorch using:
 
-# Install itwinai inside the environment
-(ENV_NAME) $ export ML_FRAMEWORK="pytorch" # or "tensorflow"
-(ENV_NAME) $ curl -fsSL https://github.com/interTwin-eu/itwinai/raw/main/env-files/itwinai-installer.sh | bash
+```bash
+pip install itwinai[torch]
 ```
 
-The `ML_FRAMEWORK` environment variable controls whether you are installing
-itwinai for PyTorch or TensorFlow.
+or with TensorFlow support using:
+
+```bash
+pip install itwinai[tf]
+
+# Alternatively, if you have access to GPUs
+pip install itwinai[tf-cuda]
+```
+
+If you want to use Prov4ML logger, you need to install it explicitly since it is only
+available on GitHub:
+
+```bash
+# For systems with Nvidia GPUs
+pip install "prov4ml[nvidia]@git+https://github.com/matbun/ProvML@new-main"
+
+# For MacOs
+pip install "prov4ml[apple]@git+https://github.com/matbun/ProvML@new-main"
+```
+
+If you also want to install Horovod and Microsoft DeepSpeed for distributed ML with PyTorch,
+install them *after* itwinai. You can use this command:
+
+```bash
+curl -fsSL https://github.com/interTwin-eu/itwinai/raw/main/env-files/torch/install-horovod-deepspeed-cuda.sh | bash
+```
 
 > [!WARNING]  
-> itwinai depends on Horovod, which requires `CMake>=1.13` and
+> Horovod requires `CMake>=1.13` and
 > [other packages](https://horovod.readthedocs.io/en/latest/install_include.html#requirements).
 > Make sure to have them installed in your environment before proceeding.
 
@@ -163,11 +188,9 @@ commands they will use the version from your venv.
 
 ##### Installation of packages
 
-We provide some _extras_ that can be activated depending on which platform you are
+We provide some *extras* that can be activated depending on which platform you are
 using.
 
-- `macos`, `amd` or `nvidia` depending on which platform you use. Changes the version
-of `prov4ML`.
 - `dev` for development purposes. Includes libraries for testing and tensorboard etc.
 - `torch` for installation with PyTorch.
 
@@ -180,18 +203,23 @@ directory, as you can very easily reach your disk quota otherwise. An example of
 complete command for installing as a developer on HPC with CUDA thus becomes:
 
 ```bash
-pip install -e ".[torch,dev,nvidia,tf]" \
+pip install -e ".[torch,dev,tf]" \
     --no-cache-dir \
     --extra-index-url https://download.pytorch.org/whl/cu121
 ```
 
-If you wanted to install this locally on macOS (i.e. without CUDA) with PyTorch, you
+If you wanted to install this locally on **macOS** (i.e. without CUDA) with PyTorch, you
 would do the following instead:
 
 ```bash
-pip install -e ".[torch,dev,macos,tf]"
+pip install -e ".[torch,dev,tf]"
 ```
 
+If you want to use [Prov4ML](https://github.com/HPCI-Lab/yProvML) logger, you need to install
+it explicitly since it is only available on GitHub. Please refer to the
+[users installation](#install-itwinai-for-users)
+to know more on how to install Prov4ML.
+
 <!-- You can create the Python virtual environments using our predefined Makefile targets. -->
 
 #### Horovod and DeepSpeed

diff --git a/env-files/itwinai-installer.sh b/env-files/itwinai-installer.sh
diff --git a/env-files/tensorflow/Dockerfile b/env-files/tensorflow/Dockerfile
@@ -41,7 +41,7 @@ ENV PYTHONPATH=""
 # Install itwinai
 COPY pyproject.toml pyproject.toml
 COPY src src
-RUN pip install --no-cache-dir .[prov4ml-nvidia] \
+RUN pip install --no-cache-dir .[tf-cuda] \
     && itwinai sanity-check --tensorflow --optional-deps ray
 
 # Additional pip deps

diff --git a/env-files/tensorflow/generic_tf.sh b/env-files/tensorflow/generic_tf.sh
@@ -25,4 +25,10 @@ else
 fi
 
 source $ENV_NAME/bin/activate
-pip install --no-cache-dir -e ".[dev,prov4ml-nvidia,tf]"
+
+if [ -z "$NO_CUDA" ]; then
+  TF_EXTRA="tf"
+else
+  TF_EXTRA="tf-cuda"
+fi
+pip install --no-cache-dir -e ".[$TF_EXTRA,dev]"
diff --git a/env-files/torch/Dockerfile b/env-files/torch/Dockerfile
@@ -35,7 +35,7 @@ RUN pip install --no-cache-dir --upgrade pip \
 # Install itwinai with torch
 COPY pyproject.toml pyproject.toml
 COPY src src
-RUN pip install --no-cache-dir .[torch,tf,prov4ml-nvidia,dev] --extra-index-url https://download.pytorch.org/whl/cu124
+RUN pip install --no-cache-dir .[torch,tf,dev] --extra-index-url https://download.pytorch.org/whl/cu124
 
 # Install DeepSpeed, Horovod and Ray
 ENV HOROVOD_WITH_PYTORCH=1 \

diff --git a/env-files/torch/createEnvVega.sh b/env-files/torch/createEnvVega.sh
@@ -1,6 +1,15 @@
 #!/bin/bash
 # -*- coding: utf-8 -*-
 
+# --------------------------------------------------------------------------------------
+# Part of the interTwin Project: https://www.intertwin.eu/
+#
+# Created by: Matteo Bunino
+#
+# Credit:
+# - Matteo Bunino <[email protected]> - CERN
+# --------------------------------------------------------------------------------------
+
 if [ ! -f "env-files/torch/generic_torch.sh" ]; then
   echo "ERROR: env-files/torch/generic_torch.sh not found!"
   exit 1
@@ -13,10 +22,8 @@ ml Python/3.11.5-GCCcore-13.2.0
 ml CMake/3.24.3-GCCcore-11.3.0
 ml mpi4py
 ml OpenMPI
-#ml CUDA/11.7
 ml CUDA/12.3
 ml GCCcore/11.3.0
-#ml NCCL/2.12.12-GCCcore-11.3.0-CUDA-11.7.0
 ml NCCL
 ml cuDNN/8.9.7.29-CUDA-12.3.0
 ml UCX-CUDA/1.15.0-GCCcore-13.2.0-CUDA-12.3.0

diff --git a/env-files/torch/generic_torch.sh b/env-files/torch/generic_torch.sh
@@ -26,6 +26,6 @@ fi
 
 # Activate the venv and then install itwinai as editable
 source $ENV_NAME/bin/activate
-pip install -e ".[torch,tf,dev,prov4ml-nvidia]" \
+pip install -e ".[torch,tf,dev]" \
     --no-cache-dir \
     --extra-index-url https://download.pytorch.org/whl/cu121
diff --git a/env-files/torch/horovod-deepspeed.slurm → env-files/torch/horovod-deepspeed-JSC.slurm b/env-files/torch/horovod-deepspeed.slurm → env-files/torch/horovod-deepspeed-JSC.slurm
@@ -1,5 +1,7 @@
 #!/bin/bash
 
+# Installation for JSC
+
 # Job configuration
 #SBATCH --job-name=setup_venv
 #SBATCH --account=intertwin

diff --git a/env-files/torch/jupyter/Dockerfile b/env-files/torch/jupyter/Dockerfile
@@ -216,7 +216,7 @@ RUN pip install --no-cache-dir --upgrade pip \
 WORKDIR "$HOME/itwinai"
 COPY --chown=${NB_UID} pyproject.toml pyproject.toml
 COPY --chown=${NB_UID} src src
-RUN pip install --no-cache-dir .[torch,prov4ml-nvidia,dev] --extra-index-url https://download.pytorch.org/whl/cu124
+RUN pip install --no-cache-dir .[torch,dev] --extra-index-url https://download.pytorch.org/whl/cu124
 
 # Apex: https://github.com/NVIDIA/apex
 # (needed for DeepSpeed *_FUSED optinal build options)

diff --git a/env-files/torch/slim.Dockerfile b/env-files/torch/slim.Dockerfile
@@ -56,7 +56,7 @@ RUN /usr/bin/python3.10 -m venv /opt/venv \
 WORKDIR /app
 COPY pyproject.toml pyproject.toml
 COPY src src
-RUN pip install --no-cache-dir .[torch,prov4ml-nvidia,dev] --extra-index-url https://download.pytorch.org/whl/cu124
+RUN pip install --no-cache-dir .[torch,dev] --extra-index-url https://download.pytorch.org/whl/cu124
 
 # Install DeepSpeed, Horovod and Ray
 RUN CONTAINER_TORCH_VERSION="$(python -c 'import torch;print(torch.__version__)')" \

diff --git a/pyproject.toml b/pyproject.toml
@@ -78,11 +78,12 @@ hpo = [
   "hpbandster>=0.7.0",
   "gpy>=1.13.2",
 ]
-prov4ml = ["prov4ml@git+https://github.com/matbun/ProvML@new-main"]
-prov4ml-macos = ["prov4ml[apple]@git+https://github.com/matbun/ProvML@new-main"]
-prov4ml-nvidia = [
-  "prov4ml[nvidia]@git+https://github.com/matbun/ProvML@new-main",
-]
+# Git dependencies are not supported by PyPI and will prevent pushing new versions to it.
+# prov4ml = ["prov4ml@git+https://github.com/matbun/ProvML@new-main"]
+# prov4ml-macos = ["prov4ml[apple]@git+https://github.com/matbun/ProvML@new-main"]
+# prov4ml-nvidia = [
+#   "prov4ml[nvidia]@git+https://github.com/matbun/ProvML@new-main",
+# ]
 # prov4ml-amd = ["prov4ml[amd]@git+https://github.com/matbun/ProvML@new-main"]
 
 [project.urls]