Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into supports_quantize_lm_…
Browse files Browse the repository at this point in the history
…heand
  • Loading branch information
ZX-ModelCloud committed Jan 10, 2025
2 parents de7c0ab + 196afce commit 39fe55c
Show file tree
Hide file tree
Showing 194 changed files with 3,581 additions and 166 deletions.
8 changes: 7 additions & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
name: Release

run-name: "${{ github.event.inputs.title }}"

defaults:
run:
shell: bash -le {0}
Expand All @@ -9,6 +11,10 @@ on:
repository_dispatch:
workflow_dispatch:
inputs:
title:
description: 'set a title for this run'
required: false
default: ''
repo:
description: 'GitHub repo {owner}/{repo}'
required: false
Expand Down Expand Up @@ -44,7 +50,7 @@ env:
CUDA_DEVICE_ORDER: PCI_BUS_ID
RUNNER: 10.0.14.248
TORCH_CUDA_ARCH_LIST: '8.0 8.6 8.9 9.0'
CUDA_RELEASE: 1
RELEASE_MODE: 1
CI: 1
GPTQMODEL_FORCE_BUILD: 1
repo: ${{ github.event.inputs.repo || github.repository }}
Expand Down
172 changes: 132 additions & 40 deletions .github/workflows/unit_tests.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
name: Unit Tests

run-name: "${{ github.event.inputs.title }}"

defaults:
run:
shell: bash -le {0}
Expand All @@ -8,6 +10,10 @@ on:
repository_dispatch:
workflow_dispatch:
inputs:
title:
description: 'set a title for this run'
required: false
default: ''
repo:
description: 'GitHub repo {owner}/{repo}'
required: false
Expand Down Expand Up @@ -35,13 +41,19 @@ on:
max-parallel:
description: 'max parallel jobs'
required: false
default: '10'
default: '20'
m4-only:
description: 'only run m4(test only)'
description: 'run only m4 test only'
type: boolean
required: false
default: false
exclusive-gpu:
description: 'one test, one gpu. for collecting statistics'
type: boolean
required: false
default: false


env:
CUDA_DEVICE_ORDER: PCI_BUS_ID
CUDA_VISIBLE_DEVICES: 0
Expand All @@ -62,9 +74,7 @@ concurrency:

jobs:
check-vm:
runs-on: [ self-hosted, xeon5 ]
container:
image: modelcloud/gptqmodel:alpine-ci-v1
runs-on: ubuntu-latest
outputs:
ip: ${{ steps.get_ip.outputs.ip }}
run_id: ${{ steps.get_ip.outputs.run_id }}
Expand Down Expand Up @@ -93,7 +103,7 @@ jobs:
echo "artifact_id=$run_id"
max_p=${{ github.event.inputs.max-parallel }}
max_p="{\"size\": ${max_p:-10}}"
max_p="{\"size\": ${max_p:-20}}"
echo "max-parallel=$max_p" >> "$GITHUB_OUTPUT"
echo "max-parallel=$max_p"
Expand Down Expand Up @@ -161,13 +171,11 @@ jobs:
echo "Ignored Test files: $IGNORED_TEST_FILES"
build:
runs-on: [ self-hosted, zen4 ]
runs-on: [ self-hosted, Linux ]
needs: check-vm
if: github.event.inputs.m4-only != 'true' && github.event.inputs.artifact_id == '' && !cancelled()
container:
image: ${{ needs.check-vm.outputs.ip }}:5000/modelcloud/gptqmodel:github-ci-v5
steps:

- name: Checkout Codes
uses: actions/checkout@v4
with:
Expand Down Expand Up @@ -196,11 +204,33 @@ jobs:
echo "##### pip list #####"
pip list
- name: Compress dir
run: |
mkdir dist || true
rm -rf dist/* || true
tar -zcf ../gptqmodel_source.tar.gz ./
mv ../gptqmodel_source.tar.gz dist/
sha256=$(sha256sum dist/gptqmodel_source.tar.gz)
echo "hash=$sha256"
echo "SOURCE_HASH=$sha256" >> $GITHUB_ENV
- name: Upload source to local
continue-on-error: true
run: curl -s -F "runid=${{ github.run_id }}" -F "repo=${{ env.repo }}" -F "ref=${{ env.ref }}" -F "sha256=${{ env.SOURCE_HASH }}" -F "file=@dist/gptqmodel_source.tar.gz" http://${{ needs.check-vm.outputs.ip }}/gpu/whl/upload

- name: Upload source to github artifact
uses: actions/upload-artifact@v4
with:
name: source
path: dist/gptqmodel_source.tar.gz

- name: Compile
if: github.event.inputs.m4-only != 'true' && github.event.inputs.artifact_id == '' && !cancelled()
timeout-minutes: 35
run: python setup.py bdist_wheel

- name: Test install
if: github.event.inputs.m4-only != 'true' && github.event.inputs.artifact_id == '' && !cancelled()
run: |
ls -ahl dist
whl=$(ls -t dist/*.whl | head -n 1 | xargs basename)
Expand All @@ -213,16 +243,17 @@ jobs:
twine check dist/$whl
uv pip install dist/$whl
- name: Upload wheel
- name: Upload wheel to local
if: github.event.inputs.m4-only != 'true' && github.event.inputs.artifact_id == '' && !cancelled()
continue-on-error: true
run: |
curl -s -F "runid=${{ github.run_id }}" -F "repo=${{ env.repo }}" -F "ref=${{ env.ref }}" -F "sha256=${{ env.WHL_HASH }}" -F "file=@dist/${{ env.WHL_NAME }}" http://${{ needs.check-vm.outputs.ip }}/gpu/whl/upload
run: curl -s -F "runid=${{ github.run_id }}" -F "repo=${{ env.repo }}" -F "ref=${{ env.ref }}" -F "sha256=${{ env.WHL_HASH }}" -F "file=@dist/${{ env.WHL_NAME }}" http://${{ needs.check-vm.outputs.ip }}/gpu/whl/upload

- name: Upload to artifact
- name: Upload wheel to github artifact
if: github.event.inputs.m4-only != 'true' && github.event.inputs.artifact_id == '' && !cancelled()
uses: actions/upload-artifact@v4
with:
name: dist
path: dist
name: whl
path: dist/${{ env.WHL_NAME }}

- name: Clean cache
if: always()
Expand All @@ -233,15 +264,16 @@ jobs:
- build
- list-test-files
- check-vm
runs-on: [ self-hosted, xeon5 ]
runs-on: [ self-hosted, Linux ]
if: always() && !cancelled() && (needs.build.result == 'success' || github.event.inputs.artifact_id != '') && github.event.inputs.m4-only != 'true' && needs.list-test-files.outputs.transformers-files != '[]'
container:
image: ${{ needs.check-vm.outputs.ip }}:5000/modelcloud/gptqmodel:github-ci-v5
volumes:
- /home/ci/models:/monster/data/model
- /home/ci/models/huggingface:/github/home/.cache/huggingface
strategy:
fail-fast: false
max-parallel: ${{ fromJson(needs.check-vm.outputs.max-parallel).size || 10 }}
max-parallel: ${{ fromJson(needs.check-vm.outputs.max-parallel).size || 20 }}
matrix:
test_script: ${{ fromJSON(needs.list-test-files.outputs.transformers-files) }}
steps:
Expand Down Expand Up @@ -273,26 +305,52 @@ jobs:
echo "== pip list =="
pip list
- name: Download wheel
- name: Download source from local
continue-on-error: true
run: |
curl -s -O http://${{ needs.check-vm.outputs.ip }}/whl/${{ env.repo }}/${{ needs.check-vm.outputs.run_id }}/gptqmodel_source.tar.gz
ls -ahl .
sha256=$(sha256sum $file_name)
echo "sha256=$sha256"
echo "SOURCE_DOWNLOADED=1" >> $GITHUB_ENV
- name: Download source from github
if: env.SOURCE_DOWNLOADED == '' && !cancelled()
uses: actions/download-artifact@v4
with:
name: source
path: dist
run-id: ${{ needs.check-vm.outputs.run_id }}

- name: Uncompress source
continue-on-error: true
run: |
find . -mindepth 1 ! -name "gptqmodel_source.tar.gz" -exec rm -rf {} +
ls -ahl .
tar -zxf gptqmodel_source.tar.gz
- name: Download wheel from local
continue-on-error: true
run: |
file_name=$(curl -s -F "runid=${{ needs.check-vm.outputs.run_id }}" -F "repo=${{ env.repo }}" -F "ref=${{ env.ref }}" -F "fuzz=1" "http://${{ needs.check-vm.outputs.ip }}/gpu/whl/download")
echo "file_name=$file_name"
if echo "$file_name" | grep -q "gptqmodel"; then
mkdir dist || true
cd dist
curl -s -O http://${{ needs.check-vm.outputs.ip }}/whl/${{ env.repo }}/${{ needs.check-vm.outputs.run_id }}/$file_name
ls -ahl .
sha256=$(sha256sum $file_name)
echo "sha256=$sha256"
echo "DOWNLOADED=1" >> $GITHUB_ENV
echo "WHL_DOWNLOADED=1" >> $GITHUB_ENV
fi
- name: Download artifact
if: env.DOWNLOADED == '' && !cancelled()
- name: Download artifact from github
if: env.WHL_DOWNLOADED == '' && !cancelled()
uses: actions/download-artifact@v4
with:
name: dist
name: whl
path: dist
run-id: ${{ needs.check-vm.outputs.run_id }}

Expand Down Expand Up @@ -325,10 +383,10 @@ jobs:
gpu_id=-1
while [ "$gpu_id" -lt 0 ]; do
gpu_id=$(curl -s "http://${{ needs.check-vm.outputs.ip }}/gpu/get?id=${{ github.run_id }}&timestamp=$timestamp")
gpu_id=$(curl -s "http://${{ needs.check-vm.outputs.ip }}/gpu/get?id=${{ github.run_id }}&timestamp=$timestamp&test=${{ matrix.test_script }}&runner=${RUNNER_NAME}")
if [ "$gpu_id" -lt 0 ]; then
echo "http://${{ needs.check-vm.outputs.ip }}/gpu/get?id=${{ github.run_id }}&timestamp=$timestamp returned $gpu_id"
echo "http://${{ needs.check-vm.outputs.ip }}/gpu/get?id=${{ github.run_id }}&timestamp=$timestamp&test=${{ matrix.test_script }}&runner=${RUNNER_NAME} returned $gpu_id"
echo "No available GPU, waiting 5 seconds..."
sleep 5
else
Expand All @@ -350,24 +408,25 @@ jobs:
- name: Release GPU
if: always() && !contains(matrix.test_script, 'ipex')
run: curl -X GET "http://${{ needs.check-vm.outputs.ip }}/gpu/release?id=${{ github.run_id }}&gpu=${{ env.CUDA_VISIBLE_DEVICES }}&timestamp=${{ env.STEP_TIMESTAMP }}"
run: curl -X GET "http://${{ needs.check-vm.outputs.ip }}/gpu/release?id=${{ github.run_id }}&gpu=${{ env.CUDA_VISIBLE_DEVICES }}&timestamp=${{ env.STEP_TIMESTAMP }}&test=${{ matrix.test_script }}&runner=${RUNNER_NAME}"

torch2_5:
needs:
- build
- list-test-files
- check-vm
runs-on: [ self-hosted, xeon5 ]
runs-on: [ self-hosted, Linux ]
if: always() && !cancelled() && (needs.build.result == 'success' || github.event.inputs.artifact_id != '') && github.event.inputs.m4-only != 'true' && needs.list-test-files.outputs.torch-2-5-files != '[]'
container:
image: ${{ needs.check-vm.outputs.ip }}:5000/modelcloud/gptqmodel:github-ci-v5
options: --device /dev/dri --ipc=host
volumes:
- /dev/dri/by-path:/dev/dri/by-path
- /home/ci/models:/monster/data/model
- /home/ci/models/huggingface:/github/home/.cache/huggingface
strategy:
fail-fast: false
max-parallel: ${{ fromJson(needs.check-vm.outputs.max-parallel).size || 10 }}
max-parallel: ${{ fromJson(needs.check-vm.outputs.max-parallel).size || 20 }}
matrix:
test_script: ${{ fromJSON(needs.list-test-files.outputs.torch-2-5-files) }}
steps:
Expand Down Expand Up @@ -399,25 +458,53 @@ jobs:
echo "== pip list =="
pip list
- name: Download wheel
- name: Download source from local
continue-on-error: true
run: |
curl -s -O http://${{ needs.check-vm.outputs.ip }}/whl/${{ env.repo }}/${{ needs.check-vm.outputs.run_id }}/gptqmodel_source.tar.gz
ls -ahl .
sha256=$(sha256sum $file_name)
echo "sha256=$sha256"
echo "SOURCE_DOWNLOADED=1" >> $GITHUB_ENV
- name: Download source from github
if: env.SOURCE_DOWNLOADED == '' && !cancelled()
uses: actions/download-artifact@v4
with:
name: source
path: dist
run-id: ${{ needs.check-vm.outputs.run_id }}

- name: Uncompress source
continue-on-error: true
run: |
find . -mindepth 1 ! -name "gptqmodel_source.tar.gz" -exec rm -rf {} +
ls -ahl .
tar -zxf gptqmodel_source.tar.gz
- name: Download wheel from local
continue-on-error: true
run: |
file_name=$(curl -s -F "runid=${{ needs.check-vm.outputs.run_id }}" -F "repo=${{ env.repo }}" -F "ref=${{ env.ref }}" -F "fuzz=1" "http://${{ needs.check-vm.outputs.ip }}/gpu/whl/download")
echo "file_name=$file_name"
if echo "$file_name" | grep -q "gptqmodel"; then
mkdir dist || true
cd dist
curl -s -O http://${{ needs.check-vm.outputs.ip }}/whl/${{ env.repo }}/${{ needs.check-vm.outputs.run_id }}/$file_name
ls -ahl .
sha256=$(sha256sum $file_name)
echo "sha256=$sha256"
echo "DOWNLOADED=1" >> $GITHUB_ENV
echo "WHL_DOWNLOADED=1" >> $GITHUB_ENV
fi
- name: Download artifact
if: env.DOWNLOADED == '' && !cancelled()
- name: Download artifact from github
if: env.WHL_DOWNLOADED == '' && !cancelled()
uses: actions/download-artifact@v4
with:
name: dist
name: whl
path: dist
run-id: ${{ needs.check-vm.outputs.run_id }}

Expand All @@ -427,13 +514,18 @@ jobs:
echo "===== install auto_round ====="
uv pip install auto_round
fi
if [ "${{ matrix.test_script }}" == "models/test_cohere2" ]; then
if [ "${{ matrix.test_script }}" == "models/test_cohere2" ] || [ "${{ matrix.test_script }}" == "models/test_gemma" ]; then
echo "===== install transformers from git ====="
uv pip install -U git+https://github.com/huggingface/transformers.git@5615a393691c81e00251e420c73e4d04c6fe22e5
uv pip install -U git+https://github.com/huggingface/transformers.git
fi
if [ "${{ matrix.test_script }}" == "test_ipex_xpu" ]; then
source /etc/profile.d/pyenv.sh && pyenv activate xpu
fi
if [[ "${{ matrix.test_script }}" == *"mlx"* ]]; then
uv pip install mlx_lm --no-build-isolation
fi
echo "===== install dist/whl ====="
uv pip install dist/*.whl
Expand All @@ -448,10 +540,10 @@ jobs:
gpu_id=-1
while [ "$gpu_id" -lt 0 ]; do
gpu_id=$(curl -s "http://${{ needs.check-vm.outputs.ip }}/gpu/get?id=${{ github.run_id }}&timestamp=$timestamp")
gpu_id=$(curl -s "http://${{ needs.check-vm.outputs.ip }}/gpu/get?id=${{ github.run_id }}&timestamp=$timestamp&test=${{ matrix.test_script }}&runner=${RUNNER_NAME}")
if [ "$gpu_id" -lt 0 ]; then
echo "http://${{ needs.check-vm.outputs.ip }}/gpu/get?id=${{ github.run_id }}&timestamp=$timestamp returned $gpu_id"
echo "http://${{ needs.check-vm.outputs.ip }}/gpu/get?id=${{ github.run_id }}&timestamp=$timestamp&test=${{ matrix.test_script }}&runner=${RUNNER_NAME} returned $gpu_id"
echo "No available GPU, waiting 5 seconds..."
sleep 5
else
Expand All @@ -476,15 +568,15 @@ jobs:
pytest --durations=0 tests/${{ matrix.test_script }}.py || { echo "ERROR=1" >> $GITHUB_ENV; exit 1; }
execution_time=$(( $(date +%s) - start_time ))
echo "$((execution_time / 60))m $((execution_time % 60))s"
curl "http://${{ needs.check-vm.outputs.ip }}/gpu/log_test_vram?id=${{ github.run_id }}&gpu=${{ env.CUDA_VISIBLE_DEVICES }}&range=$execution_time&unit=second&name=${{ matrix.test_script }}"
curl "http://${{ needs.check-vm.outputs.ip }}/gpu/log_test_vram?id=${{ github.run_id }}&gpu=${{ env.CUDA_VISIBLE_DEVICES }}&range=$execution_time&unit=second&test=${{ matrix.test_script }}"
- name: Release GPU
if: always() && !contains(matrix.test_script, 'ipex')
run: curl -X GET "http://${{ needs.check-vm.outputs.ip }}/gpu/release?id=${{ github.run_id }}&gpu=${{ env.CUDA_VISIBLE_DEVICES }}&timestamp=${{ env.STEP_TIMESTAMP }}"
run: curl -X GET "http://${{ needs.check-vm.outputs.ip }}/gpu/release?id=${{ github.run_id }}&gpu=${{ env.CUDA_VISIBLE_DEVICES }}&timestamp=${{ env.STEP_TIMESTAMP }}&test=${{ matrix.test_script }}&runner=${RUNNER_NAME}"

show-statistics:
runs-on: [ self-hosted, xeon5 ]
if: always()
runs-on: [ self-hosted, Linux ]
if: github.event.inputs.exclusive-gpu != 'true'
container:
image: modelcloud/gptqmodel:alpine-ci-v1
needs:
Expand Down
Loading

0 comments on commit 39fe55c

Please sign in to comment.