Skip to content

Commit

Permalink
vllm fixes for ROCm
Browse files Browse the repository at this point in the history
  • Loading branch information
sasha0552 authored Dec 28, 2023
1 parent c5afd19 commit 5ee2c04
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 15 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,5 @@ airootfs/root/customize_airootfs/scripts/0100-koboldcpp-patches.sh
airootfs/root/customize_airootfs/scripts/1000-sillytavern-extras-dependencies.sh
airootfs/root/customize_airootfs/scripts/1000-vllm-dependencies.sh
airootfs/root/customize_airootfs/scripts/1000-text-generation-webui-dependencies.sh
airootfs/root/customize_airootfs/scripts/0100-vllm-patches.sh
airootfs/root/customize_airootfs/scripts/0100-automatic-patches.sh
airootfs/root/customize_airootfs/scripts/9999-cleanup.sh
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
--- a/setup.py
+++ b/setup.py
@@ -15,7 +15,7 @@ ROOT_DIR = os.path.dirname(__file__)
@@ -15,8 +15,8 @@ ROOT_DIR = os.path.dirname(__file__)
MAIN_CUDA_VERSION = "12.1"

# Supported NVIDIA GPU architectures.
-NVIDIA_SUPPORTED_ARCHS = {"7.0", "7.5", "8.0", "8.6", "8.9", "9.0"}
+NVIDIA_SUPPORTED_ARCHS = {"6.0", "6.1", "7.0", "7.5", "8.0", "8.6", "8.9", "9.0"}
ROCM_SUPPORTED_ARCHS = {"gfx90a", "gfx908", "gfx906", "gfx1030", "gfx1100"}
-ROCM_SUPPORTED_ARCHS = {"gfx90a", "gfx908", "gfx906", "gfx1030", "gfx1100"}
+NVIDIA_SUPPORTED_ARCHS = {"6.0", "6.1", "6.2", "7.0", "7.2", "7.5", "8.0", "8.6", "8.9", "9.0"}
+ROCM_SUPPORTED_ARCHS = {"gfx803", "gfx900", "gfx906", "gfx908", "gfx90a", "gfx1030", "gfx1100", "gfx1101", "gfx1102"}
# SUPPORTED_ARCHS = NVIDIA_SUPPORTED_ARCHS.union(ROCM_SUPPORTED_ARCHS)


@@ -146,9 +146,9 @@ if _is_cuda() and not compute_capabilities:
device_count = torch.cuda.device_count()
for i in range(device_count):
Expand Down
8 changes: 8 additions & 0 deletions airootfs/root/customize_airootfs/scripts/0100-vllm-patches.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash
set -eu

# vllm patches
pushd "vllm"
# enable other architectures
patch -p1 < "$CUSTOMIZE_AIROOTFS/patches/0100-vllm-enable-other-archs.patch"
popd

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,25 @@ pushd "vllm"
# limit the number of parallel jobs to avoid OOM
export MAX_JOBS=1

{% if CUDA %}
# define supported architectures
export TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0 7.5 8.0 8.6 8.9 9.0"
export TORCH_CUDA_ARCH_LIST="6.0 6.1 6.2 7.0 7.2 7.5 8.0 8.6 8.9 9.0 9.0+PTX"

# cuda home directory
export CUDA_HOME=/opt/cuda

# use gcc 12
export CC=gcc-12
export CXX=g++-12
{% endif %}

{% if ROCm %}
# define supported architectures
export TORCH_CUDA_ARCH_LIST="gfx803 gfx900 gfx906 gfx908 gfx90a gfx1030 gfx1100 gfx1101 gfx1102"

# rocm home directory
export ROCM_HOME=/opt/rocm
{% endif %}

# create venv
python3 -m venv venv
Expand Down Expand Up @@ -58,5 +68,8 @@ pushd "vllm"
# install dependencies (rocm)
pip3 install -r requirements-rocm.txt
{% endif %}

# install dependencies for openai api server
pip3 install accelerate
deactivate
popd

0 comments on commit 5ee2c04

Please sign in to comment.