vllm fixes for ROCm

sasha0552 · Dec 28, 2023 · 5ee2c04 · 5ee2c04
1 parent c5afd19
commit 5ee2c04
Show file tree

Hide file tree

Showing 5 changed files with 27 additions and 15 deletions.
diff --git a/.gitignore b/.gitignore
@@ -8,6 +8,5 @@ airootfs/root/customize_airootfs/scripts/0100-koboldcpp-patches.sh
 airootfs/root/customize_airootfs/scripts/1000-sillytavern-extras-dependencies.sh
 airootfs/root/customize_airootfs/scripts/1000-vllm-dependencies.sh
 airootfs/root/customize_airootfs/scripts/1000-text-generation-webui-dependencies.sh
-airootfs/root/customize_airootfs/scripts/0100-vllm-patches.sh
 airootfs/root/customize_airootfs/scripts/0100-automatic-patches.sh
 airootfs/root/customize_airootfs/scripts/9999-cleanup.sh
diff --git a/.../patches/0100-vllm-build-for-pascal.patch → ...atches/0100-vllm-enable-other-archs.patch b/.../patches/0100-vllm-build-for-pascal.patch → ...atches/0100-vllm-enable-other-archs.patch
@@ -1,14 +1,16 @@
 --- a/setup.py
 +++ b/setup.py
-@@ -15,7 +15,7 @@ ROOT_DIR = os.path.dirname(__file__)
+@@ -15,8 +15,8 @@ ROOT_DIR = os.path.dirname(__file__)
  MAIN_CUDA_VERSION = "12.1"
 
  # Supported NVIDIA GPU architectures.
 -NVIDIA_SUPPORTED_ARCHS = {"7.0", "7.5", "8.0", "8.6", "8.9", "9.0"}
-+NVIDIA_SUPPORTED_ARCHS = {"6.0", "6.1", "7.0", "7.5", "8.0", "8.6", "8.9", "9.0"}
- ROCM_SUPPORTED_ARCHS = {"gfx90a", "gfx908", "gfx906", "gfx1030", "gfx1100"}
+-ROCM_SUPPORTED_ARCHS = {"gfx90a", "gfx908", "gfx906", "gfx1030", "gfx1100"}
++NVIDIA_SUPPORTED_ARCHS = {"6.0", "6.1", "6.2", "7.0", "7.2", "7.5", "8.0", "8.6", "8.9", "9.0"}
++ROCM_SUPPORTED_ARCHS = {"gfx803", "gfx900", "gfx906", "gfx908", "gfx90a", "gfx1030", "gfx1100", "gfx1101", "gfx1102"}
  # SUPPORTED_ARCHS = NVIDIA_SUPPORTED_ARCHS.union(ROCM_SUPPORTED_ARCHS)
 
+
 @@ -146,9 +146,9 @@ if _is_cuda() and not compute_capabilities:
      device_count = torch.cuda.device_count()
      for i in range(device_count):

diff --git a/airootfs/root/customize_airootfs/scripts/0100-vllm-patches.sh b/airootfs/root/customize_airootfs/scripts/0100-vllm-patches.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+set -eu
+
+# vllm patches
+pushd "vllm"
+  # enable other architectures
+  patch -p1 < "$CUSTOMIZE_AIROOTFS/patches/0100-vllm-enable-other-archs.patch"
+popd
diff --git a/airootfs/root/customize_airootfs/scripts/0100-vllm-patches.sh.jinja2 b/airootfs/root/customize_airootfs/scripts/0100-vllm-patches.sh.jinja2
diff --git a/airootfs/root/customize_airootfs/scripts/1000-vllm-dependencies.sh.jinja2 b/airootfs/root/customize_airootfs/scripts/1000-vllm-dependencies.sh.jinja2
@@ -9,15 +9,25 @@ pushd "vllm"
   # limit the number of parallel jobs to avoid OOM
   export MAX_JOBS=1
 
+{% if CUDA %}
   # define supported architectures
-  export TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0"
+  export TORCH_CUDA_ARCH_LIST="6.0 6.1 6.2 7.0 7.2 7.5 8.0 8.6 8.9 9.0 9.0+PTX"
 
   # cuda home directory
   export CUDA_HOME=/opt/cuda
 
   # use gcc 12
   export CC=gcc-12
   export CXX=g++-12
+{% endif %}
+
+{% if ROCm %}
+  # define supported architectures
+  export TORCH_CUDA_ARCH_LIST="gfx803 gfx900 gfx906 gfx908 gfx90a gfx1030 gfx1100 gfx1101 gfx1102"
+
+  # rocm home directory
+  export ROCM_HOME=/opt/rocm
+{% endif %}
 
   # create venv
   python3 -m venv venv
@@ -58,5 +68,8 @@ pushd "vllm"
     # install dependencies (rocm)
     pip3 install -r requirements-rocm.txt
 {% endif %}
+
+    # install dependencies for openai api server
+    pip3 install accelerate
   deactivate
 popd