From 21a7641d589eeaa38915831da2f1a19d8f0627af Mon Sep 17 00:00:00 2001
From: sasha0552 <admin@sasha0552.org>
Date: Tue, 16 Jul 2024 04:33:12 +0000
Subject: [PATCH] Enable limited api build on cibw

---
 .github/workflows/build-vllm.yml              |  7 +--
 ...000-enable-limited-api-build-on-cibw.patch | 56 +++++++++++++++++++
 2 files changed, 58 insertions(+), 5 deletions(-)
 create mode 100644 patches/vllm/2000-enable-limited-api-build-on-cibw.patch

diff --git a/.github/workflows/build-vllm.yml b/.github/workflows/build-vllm.yml
index 379b13e..152d93d 100644
--- a/.github/workflows/build-vllm.yml
+++ b/.github/workflows/build-vllm.yml
@@ -33,6 +33,7 @@ jobs:
           patches=(
             "https://raw.githubusercontent.com/${{ github.repository }}/${{ github.sha }}/patches/vllm/0000-enable-support-for-pascal-gpus.patch"
             "https://raw.githubusercontent.com/${{ github.repository }}/${{ github.sha }}/patches/vllm/1000-set-torch-cuda-arch-list.patch"
+            "https://raw.githubusercontent.com/${{ github.repository }}/${{ github.sha }}/patches/vllm/2000-enable-limited-api-build-on-cibw.patch"
           )
 
           # Apply patches
@@ -44,7 +45,7 @@ jobs:
       - name: Build wheels
         uses: pypa/cibuildwheel@v2.18.0
         env:
-          CIBW_BUILD: cp${{ matrix.python_version }}-manylinux_x86_64
+          CIBW_BUILD: cp38-manylinux_x86_64
           CIBW_ENVIRONMENT: CMAKE_BUILD_TYPE=Release VLLM_INSTALL_PUNICA_KERNELS=0
           CIBW_MANYLINUX_PYPY_X86_64_IMAGE: ghcr.io/sasha0552/manylinux2014_x86_64-cuda
           CIBW_MANYLINUX_X86_64_IMAGE: ghcr.io/sasha0552/manylinux2014_x86_64-cuda
@@ -59,10 +60,6 @@ jobs:
           prerelease: true
           tag_name: ${{ github.event.inputs.tag_name }}
 
-    strategy:
-      matrix:
-        python_version: [38, 39, 310, 311]
-
 on:
   workflow_dispatch:
     inputs:
diff --git a/patches/vllm/2000-enable-limited-api-build-on-cibw.patch b/patches/vllm/2000-enable-limited-api-build-on-cibw.patch
new file mode 100644
index 0000000..907f1fb
--- /dev/null
+++ b/patches/vllm/2000-enable-limited-api-build-on-cibw.patch
@@ -0,0 +1,56 @@
+--- a/setup.py
++++ b/setup.py
+@@ -14,6 +14,7 @@ from packaging.version import Version, parse
+ from setuptools import Extension, find_packages, setup
+ from setuptools.command.build_ext import build_ext
+ from torch.utils.cpp_extension import CUDA_HOME
++from wheel.bdist_wheel import bdist_wheel
+ 
+ 
+ def load_module_from_path(module_name, path):
+@@ -234,6 +235,18 @@ class cmake_build_ext(build_ext):
+         subprocess.check_call(["cmake", *build_args], cwd=self.build_temp)
+ 
+ 
++class bdist_wheel_abi3(bdist_wheel):
++
++    def get_tag(self):
++        python, abi, plat = super().get_tag()
++
++        if python.startswith("cp"):
++            # on CPython, our wheels are abi3 and compatible back to 3.8
++            return "cp38", "abi3", plat
++
++        return python, abi, plat
++
++
+ def _is_cuda() -> bool:
+     has_cuda = torch.version.cuda is not None
+     return (VLLM_TARGET_DEVICE == "cuda" and has_cuda
+@@ -440,6 +453,8 @@ def get_requirements() -> List[str]:
+ 
+ ext_modules = []
+ 
++cmdclass = {"bdist_wheel": bdist_wheel_abi3}
++
+ if _is_cuda() or _is_hip():
+     ext_modules.append(CMakeExtension(name="vllm._moe_C"))
+ 
+@@ -449,6 +464,8 @@ if _build_custom_ops():
+     if _install_punica():
+         ext_modules.append(CMakeExtension(name="vllm._punica_C"))
+ 
++    cmdclass["build_ext"] = cmake_build_ext
++
+ package_data = {
+     "vllm": ["py.typed", "model_executor/layers/fused_moe/configs/*.json"]
+ }
+@@ -486,7 +503,7 @@ setup(
+     extras_require={
+         "tensorizer": ["tensorizer>=2.9.0"],
+     },
+-    cmdclass={"build_ext": cmake_build_ext} if _build_custom_ops() else {},
++    cmdclass=cmdclass,
+     package_data=package_data,
+     entry_points={
+         "console_scripts": [