From 21a7641d589eeaa38915831da2f1a19d8f0627af Mon Sep 17 00:00:00 2001 From: sasha0552 Date: Tue, 16 Jul 2024 04:33:12 +0000 Subject: [PATCH] Enable limited api build on cibw --- .github/workflows/build-vllm.yml | 7 +-- ...000-enable-limited-api-build-on-cibw.patch | 56 +++++++++++++++++++ 2 files changed, 58 insertions(+), 5 deletions(-) create mode 100644 patches/vllm/2000-enable-limited-api-build-on-cibw.patch diff --git a/.github/workflows/build-vllm.yml b/.github/workflows/build-vllm.yml index 379b13e..152d93d 100644 --- a/.github/workflows/build-vllm.yml +++ b/.github/workflows/build-vllm.yml @@ -33,6 +33,7 @@ jobs: patches=( "https://raw.githubusercontent.com/${{ github.repository }}/${{ github.sha }}/patches/vllm/0000-enable-support-for-pascal-gpus.patch" "https://raw.githubusercontent.com/${{ github.repository }}/${{ github.sha }}/patches/vllm/1000-set-torch-cuda-arch-list.patch" + "https://raw.githubusercontent.com/${{ github.repository }}/${{ github.sha }}/patches/vllm/2000-enable-limited-api-build-on-cibw.patch" ) # Apply patches @@ -44,7 +45,7 @@ jobs: - name: Build wheels uses: pypa/cibuildwheel@v2.18.0 env: - CIBW_BUILD: cp${{ matrix.python_version }}-manylinux_x86_64 + CIBW_BUILD: cp38-manylinux_x86_64 CIBW_ENVIRONMENT: CMAKE_BUILD_TYPE=Release VLLM_INSTALL_PUNICA_KERNELS=0 CIBW_MANYLINUX_PYPY_X86_64_IMAGE: ghcr.io/sasha0552/manylinux2014_x86_64-cuda CIBW_MANYLINUX_X86_64_IMAGE: ghcr.io/sasha0552/manylinux2014_x86_64-cuda @@ -59,10 +60,6 @@ jobs: prerelease: true tag_name: ${{ github.event.inputs.tag_name }} - strategy: - matrix: - python_version: [38, 39, 310, 311] - on: workflow_dispatch: inputs: diff --git a/patches/vllm/2000-enable-limited-api-build-on-cibw.patch b/patches/vllm/2000-enable-limited-api-build-on-cibw.patch new file mode 100644 index 0000000..907f1fb --- /dev/null +++ b/patches/vllm/2000-enable-limited-api-build-on-cibw.patch @@ -0,0 +1,56 @@ +--- a/setup.py ++++ b/setup.py +@@ -14,6 +14,7 @@ from packaging.version import Version, parse + from setuptools import Extension, find_packages, setup + from setuptools.command.build_ext import build_ext + from torch.utils.cpp_extension import CUDA_HOME ++from wheel.bdist_wheel import bdist_wheel + + + def load_module_from_path(module_name, path): +@@ -234,6 +235,18 @@ class cmake_build_ext(build_ext): + subprocess.check_call(["cmake", *build_args], cwd=self.build_temp) + + ++class bdist_wheel_abi3(bdist_wheel): ++ ++ def get_tag(self): ++ python, abi, plat = super().get_tag() ++ ++ if python.startswith("cp"): ++ # on CPython, our wheels are abi3 and compatible back to 3.8 ++ return "cp38", "abi3", plat ++ ++ return python, abi, plat ++ ++ + def _is_cuda() -> bool: + has_cuda = torch.version.cuda is not None + return (VLLM_TARGET_DEVICE == "cuda" and has_cuda +@@ -440,6 +453,8 @@ def get_requirements() -> List[str]: + + ext_modules = [] + ++cmdclass = {"bdist_wheel": bdist_wheel_abi3} ++ + if _is_cuda() or _is_hip(): + ext_modules.append(CMakeExtension(name="vllm._moe_C")) + +@@ -449,6 +464,8 @@ if _build_custom_ops(): + if _install_punica(): + ext_modules.append(CMakeExtension(name="vllm._punica_C")) + ++ cmdclass["build_ext"] = cmake_build_ext ++ + package_data = { + "vllm": ["py.typed", "model_executor/layers/fused_moe/configs/*.json"] + } +@@ -486,7 +503,7 @@ setup( + extras_require={ + "tensorizer": ["tensorizer>=2.9.0"], + }, +- cmdclass={"build_ext": cmake_build_ext} if _build_custom_ops() else {}, ++ cmdclass=cmdclass, + package_data=package_data, + entry_points={ + "console_scripts": [