Initial commit

sasha0552 · May 23, 2024 · 2fa1dc3 · 2fa1dc3
commit 2fa1dc3
Show file tree

Hide file tree

Showing 9 changed files with 246 additions and 0 deletions.
diff --git a/.github/workflows/build-triton.yml b/.github/workflows/build-triton.yml
@@ -0,0 +1,70 @@
+name: Build Triton wheel
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event.inputs.ref }}
+          repository: ${{ github.event.inputs.repository }}
+
+      - name: Resolve SOURCE_DATE_EPOCH
+        id: source-date-epoch
+        run: |
+          echo "SOURCE_DATE_EPOCH=$(git log -1 --format=%ct)" | tee $GITHUB_OUTPUT
+
+      - name: Apply patches
+        run: |
+          set -e
+
+          # List of patches
+          patches=(
+            "https://raw.githubusercontent.com/${{ github.repository }}/${{ github.sha }}/patches/triton/0000-add-support-for-conversion-fp16-to-fp32.patch"
+            "https://raw.githubusercontent.com/${{ github.repository }}/${{ github.sha }}/patches/triton/0000-fix-max-ptx-version.patch"
+          )
+
+          # Apply patches
+          for patch in ${patches[@]}; do
+            echo "Applying patch $patch"
+            curl "$patch" | patch -p1
+          done
+
+      - name: Build wheels
+        uses: pypa/[email protected]
+        with:
+          package-dir: python
+        env:
+          CIBW_BUILD: cp311-manylinux_x86_64
+          SOURCE_DATE_EPOCH: ${{ steps.source-date-epoch.outputs.SOURCE_DATE_EPOCH }}
+
+      - name: Create release
+        uses: softprops/action-gh-release@v2
+        with:
+          files: wheelhouse/*.whl
+          tag_name: ${{ github.event.inputs.tag_name }}
+
+on:
+  workflow_dispatch:
+    inputs:
+      repository:
+        default: triton-lang/triton
+        description: Source repository
+        required: true
+        type: string
+
+      ref:
+        default: 3f8d91bb17f6e7bc33dc995ae0860db89d351c7b
+        description: Source ref
+        required: true
+        type: string
+
+      tag_name:
+        description: Target tag
+        required: true
+        type: string
+
+permissions:
+  contents: write
diff --git a/.github/workflows/build-vllm.yml b/.github/workflows/build-vllm.yml
@@ -0,0 +1,72 @@
+name: Build vLLM wheel
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event.inputs.ref }}
+          repository: ${{ github.event.inputs.repository }}
+
+      - name: Resolve SOURCE_DATE_EPOCH
+        id: source-date-epoch
+        run: |
+          echo "SOURCE_DATE_EPOCH=$(git log -1 --format=%ct)" | tee $GITHUB_OUTPUT
+
+      - name: Apply patches
+        run: |
+          set -e
+
+          # List of patches
+          patches=(
+            "https://raw.githubusercontent.com/${{ github.repository }}/${{ github.sha }}/patches/vllm/0000-enable-support-for-pascal-gpus.patch"
+            "https://raw.githubusercontent.com/${{ github.repository }}/${{ github.sha }}/patches/vllm/1000-set-torch-cuda-arch-list.patch"
+            "https://raw.githubusercontent.com/${{ github.repository }}/${{ github.sha }}/patches/vllm/9000-add-vllm-command-that-launches-api-server.patch"
+          )
+
+          # Apply patches
+          for patch in ${patches[@]}; do
+            echo "Applying patch $patch"
+            curl "$patch" | patch -p1
+          done
+
+      - name: Build wheels
+        uses: pypa/[email protected]
+        env:
+          CIBW_BUILD: cp311-manylinux_x86_64
+          CIBW_MANYLINUX_PYPY_X86_64_IMAGE: ghcr.io/sasha0552/manylinux2014_x86_64-cuda
+          CIBW_MANYLINUX_X86_64_IMAGE: ghcr.io/sasha0552/manylinux2014_x86_64-cuda
+          CIBW_REPAIR_WHEEL_COMMAND: ~
+          SOURCE_DATE_EPOCH: ${{ steps.source-date-epoch.outputs.SOURCE_DATE_EPOCH }}
+
+      - name: Create release
+        uses: softprops/action-gh-release@v2
+        with:
+          files: wheelhouse/*.whl
+          tag_name: ${{ github.event.inputs.tag_name }}
+
+on:
+  workflow_dispatch:
+    inputs:
+      repository:
+        default: vllm-project/vllm
+        description: Source repository
+        required: true
+        type: string
+
+      ref:
+        default: main
+        description: Source ref
+        required: true
+        type: string
+
+      tag_name:
+        description: Target tag
+        required: true
+        type: string
+
+permissions:
+  contents: write
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2024 sasha0552
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
@@ -0,0 +1,3 @@
+# vllm-ci
+
+CI scripts designed to build a Pascal-compatible version of vLLM.
diff --git a/patches/triton/0000-add-support-for-conversion-fp16-to-fp32.patch b/patches/triton/0000-add-support-for-conversion-fp16-to-fp32.patch
@@ -0,0 +1,18 @@
+--- a/lib/Conversion/TritonGPUToLLVM/TritonGPUToLLVMPass.cpp
++++ b/lib/Conversion/TritonGPUToLLVM/TritonGPUToLLVMPass.cpp
+@@ -859,9 +859,12 @@ private:
+
+   static Value promoteOperand(OpBuilder &builder, Location loc, Value operand,
+                               Type promotedType) {
+-    Type tensorPromotedType =
+-        operand.getType().cast<RankedTensorType>().cloneWith(std::nullopt,
+-                                                             promotedType);
++    RankedTensorType tensor = operand.getType().cast<RankedTensorType>();
++    Type tensorElementType = tensor.getElementType();
++    Type tensorPromotedType = tensor.cloneWith(std::nullopt, promotedType);
++    if (tensorElementType.isF16() && promotedType.isF32()) {
++      return builder.create<arith::ExtFOp>(loc, tensorPromotedType, operand);
++    }
+     return builder.create<triton::FpToFpOp>(loc, tensorPromotedType, operand);
+   }
+
diff --git a/patches/triton/0000-fix-max-ptx-version.patch b/patches/triton/0000-fix-max-ptx-version.patch
@@ -0,0 +1,21 @@
+--- a/lib/Target/PTX/PTXTranslation.cpp
++++ b/lib/Target/PTX/PTXTranslation.cpp
+@@ -49,7 +49,7 @@ std::string translateLLVMIRToPTX(llvm::Module &module, int cc, int version,
+   // LLVM version in use may not officially support target hardware.
+   // Supported versions for LLVM 14 are here:
+   // https://github.com/llvm/llvm-project/blob/f28c006a5895fc0e329fe15fead81e37457cb1d1/clang/include/clang/Basic/BuiltinsNVPTX.def
+-  int maxPTX = std::min(82, version);
++  int maxPTX = std::min(80, version);
+   int maxCC = std::min(90, cc);
+   // options
+   auto options = llvm::cl::getRegisteredOptions();
+@@ -65,8 +65,7 @@ std::string translateLLVMIRToPTX(llvm::Module &module, int cc, int version,
+   std::string triple = "nvptx64-nvidia-cuda";
+   std::string proc = "sm_" + std::to_string(maxCC);
+   std::string layout = "";
+-  std::string features = "";
+-  // std::string features = "+ptx" + std::to_string(maxPTX);
++  std::string features = "+ptx" + std::to_string(maxPTX);
+   for (llvm::Function &f : module.functions()) {
+     if (!f.hasFnAttribute(llvm::Attribute::NoInline))
+       f.addFnAttr(llvm::Attribute::AlwaysInline);
diff --git a/patches/vllm/0000-enable-support-for-pascal-gpus.patch b/patches/vllm/0000-enable-support-for-pascal-gpus.patch
@@ -0,0 +1,11 @@
+--- a/CMakeLists.txt
++++ b/CMakeLists.txt
+@@ -16,7 +16,7 @@ include(${CMAKE_CURRENT_LIST_DIR}/cmake/utils.cmake)
+ set(PYTHON_SUPPORTED_VERSIONS "3.8" "3.9" "3.10" "3.11")
+
+ # Supported NVIDIA architectures.
+-set(CUDA_SUPPORTED_ARCHS "7.0;7.5;8.0;8.6;8.9;9.0")
++set(CUDA_SUPPORTED_ARCHS "6.0;6.1;7.0;7.5;8.0;8.6;8.9;9.0")
+
+ # Supported AMD GPU architectures.
+ set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100")
diff --git a/patches/vllm/1000-set-torch-cuda-arch-list.patch b/patches/vllm/1000-set-torch-cuda-arch-list.patch
@@ -0,0 +1,10 @@
+--- a/CMakeLists.txt
++++ b/CMakeLists.txt
+@@ -17,6 +17,7 @@ set(PYTHON_SUPPORTED_VERSIONS "3.8" "3.9" "3.10" "3.11")
+
+ # Supported NVIDIA architectures.
+ set(CUDA_SUPPORTED_ARCHS "6.0;6.1;7.0;7.5;8.0;8.6;8.9;9.0")
++set(TORCH_CUDA_ARCH_LIST "${CUDA_SUPPORTED_ARCHS}")
+
+ # Supported AMD GPU architectures.
+ set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100")
diff --git a/patches/vllm/9000-add-vllm-command-that-launches-api-server.patch b/patches/vllm/9000-add-vllm-command-that-launches-api-server.patch
@@ -0,0 +1,20 @@
+--- a/setup.py
++++ b/setup.py
+@@ -430,4 +430,9 @@ def _read_requirements(filename: str) -> List[str]:
+     },
+     cmdclass={"build_ext": cmake_build_ext} if not _is_neuron() else {},
+     package_data=package_data,
++    entry_points={
++        "console_scripts": [
++            "vllm=vllm.scripts:main",
++        ],
++    },
+ )
+--- /dev/null
++++ b/vllm/scripts.py
+@@ -0,0 +1,5 @@
++import subprocess
++import sys
++
++def main():
++  subprocess.run([sys.executable, "-m", "vllm.entrypoints.openai.api_server"] + sys.argv[1:])
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		# vllm-ci

		CI scripts designed to build a Pascal-compatible version of vLLM.