From 5e7916b4510814515212fa10287235a8377b6b8b Mon Sep 17 00:00:00 2001 From: Konrad Zawora Date: Wed, 11 Dec 2024 15:15:18 +0200 Subject: [PATCH 1/3] Update GHA --- .github/workflows/actionlint.yml | 4 +- .github/workflows/codespell.yml | 2 +- .github/workflows/lint-and-deploy.yaml | 81 -------------------------- .github/workflows/png-lint.yml | 2 +- .github/workflows/shellcheck.yml | 4 +- .github/workflows/sphinx-lint.yml | 2 +- 6 files changed, 7 insertions(+), 88 deletions(-) delete mode 100644 .github/workflows/lint-and-deploy.yaml diff --git a/.github/workflows/actionlint.yml b/.github/workflows/actionlint.yml index 0226cf0ca00e9..d139f625d98ab 100644 --- a/.github/workflows/actionlint.yml +++ b/.github/workflows/actionlint.yml @@ -2,14 +2,14 @@ name: Lint GitHub Actions workflows on: push: branches: - - "main" + - "habana_main" paths: - '.github/workflows/*.ya?ml' - '.github/workflows/actionlint.*' - '.github/workflows/matchers/actionlint.json' pull_request: branches: - - "main" + - "habana_main" paths: - '.github/workflows/*.ya?ml' - '.github/workflows/actionlint.*' diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml index 68887adaae54b..2a92c48ee403e 100644 --- a/.github/workflows/codespell.yml +++ b/.github/workflows/codespell.yml @@ -5,7 +5,7 @@ on: # but only for the main branch push: branches: - - main + - habana_main paths: - "**/*.py" - "**/*.md" diff --git a/.github/workflows/lint-and-deploy.yaml b/.github/workflows/lint-and-deploy.yaml deleted file mode 100644 index ab6f6e5d2060d..0000000000000 --- a/.github/workflows/lint-and-deploy.yaml +++ /dev/null @@ -1,81 +0,0 @@ -name: Lint and Deploy Charts - -on: pull_request - -jobs: - lint-and-deploy: - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - with: - fetch-depth: 0 - - - name: Set up Helm - uses: azure/setup-helm@fe7b79cd5ee1e45176fcad797de68ecaf3ca4814 # v4.2.0 - with: - version: v3.14.4 - - #Python is required because ct lint runs Yamale and yamllint which require Python. - - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 - with: - python-version: '3.13' - - - name: Set up chart-testing - uses: helm/chart-testing-action@e6669bcd63d7cb57cb4380c33043eebe5d111992 # v2.6.1 - with: - version: v3.10.1 - - - name: Run chart-testing (lint) - run: ct lint --target-branch ${{ github.event.repository.default_branch }} --chart-dirs examples/chart-helm --charts examples/chart-helm - - - name: Setup minio - run: | - docker network create vllm-net - docker run -d -p 9000:9000 --name minio --net vllm-net \ - -e "MINIO_ACCESS_KEY=minioadmin" \ - -e "MINIO_SECRET_KEY=minioadmin" \ - -v /tmp/data:/data \ - -v /tmp/config:/root/.minio \ - minio/minio server /data - export AWS_ACCESS_KEY_ID=minioadmin - export AWS_SECRET_ACCESS_KEY=minioadmin - export AWS_EC2_METADATA_DISABLED=true - mkdir opt-125m - cd opt-125m && curl -O -Ls "https://huggingface.co/facebook/opt-125m/resolve/main/{pytorch_model.bin,config.json,generation_config.json,merges.txt,special_tokens_map.json,tokenizer_config.json,vocab.json}" && cd .. - aws --endpoint-url http://127.0.0.1:9000/ s3 mb s3://testbucket - aws --endpoint-url http://127.0.0.1:9000/ s3 cp opt-125m/ s3://testbucket/opt-125m --recursive - - - name: Create kind cluster - uses: helm/kind-action@0025e74a8c7512023d06dc019c617aa3cf561fde # v1.10.0 - - - name: Build the Docker image vllm cpu - run: docker buildx build -f Dockerfile.cpu -t vllm-cpu-env . - - - name: Configuration of docker images, network and namespace for the kind cluster - run: | - docker pull amazon/aws-cli:2.6.4 - kind load docker-image amazon/aws-cli:2.6.4 --name chart-testing - kind load docker-image vllm-cpu-env:latest --name chart-testing - docker network connect vllm-net "$(docker ps -aqf "name=chart-testing-control-plane")" - kubectl create ns ns-vllm - - - name: Run chart-testing (install) - run: | - export AWS_ACCESS_KEY_ID=minioadmin - export AWS_SECRET_ACCESS_KEY=minioadmin - helm install --wait --wait-for-jobs --timeout 5m0s --debug --create-namespace --namespace=ns-vllm test-vllm examples/chart-helm -f examples/chart-helm/values.yaml --set secrets.s3endpoint=http://minio:9000 --set secrets.s3bucketname=testbucket --set secrets.s3accesskeyid=$AWS_ACCESS_KEY_ID --set secrets.s3accesskey=$AWS_SECRET_ACCESS_KEY --set resources.requests.cpu=1 --set resources.requests.memory=4Gi --set resources.limits.cpu=2 --set resources.limits.memory=5Gi --set image.env[0].name=VLLM_CPU_KVCACHE_SPACE --set image.env[1].name=VLLM_LOGGING_LEVEL --set-string image.env[0].value="1" --set-string image.env[1].value="DEBUG" --set-string extraInit.s3modelpath="opt-125m/" --set-string 'resources.limits.nvidia\.com/gpu=0' --set-string 'resources.requests.nvidia\.com/gpu=0' --set-string image.repository="vllm-cpu-env" - - - name: curl test - run: | - kubectl -n ns-vllm port-forward service/test-vllm-service 8001:80 & - sleep 10 - CODE="$(curl -v -f --location http://localhost:8001/v1/completions \ - --header "Content-Type: application/json" \ - --data '{ - "model": "opt-125m", - "prompt": "San Francisco is a", - "max_tokens": 7, - "temperature": 0 - }'):$CODE" - echo "$CODE" \ No newline at end of file diff --git a/.github/workflows/png-lint.yml b/.github/workflows/png-lint.yml index 4932af943a07b..630eb09b62f3a 100644 --- a/.github/workflows/png-lint.yml +++ b/.github/workflows/png-lint.yml @@ -2,7 +2,7 @@ name: Lint PNG exports from excalidraw on: push: branches: - - "main" + - "habana_main" paths: - '*.excalidraw.png' - '.github/workflows/png-lint.yml' diff --git a/.github/workflows/shellcheck.yml b/.github/workflows/shellcheck.yml index 4b1587e373e17..f6931150c795d 100644 --- a/.github/workflows/shellcheck.yml +++ b/.github/workflows/shellcheck.yml @@ -2,13 +2,13 @@ name: Lint shell scripts on: push: branches: - - "main" + - "habana_main" paths: - '**/*.sh' - '.github/workflows/shellcheck.yml' pull_request: branches: - - "main" + - "habana_main" paths: - '**/*.sh' - '.github/workflows/shellcheck.yml' diff --git a/.github/workflows/sphinx-lint.yml b/.github/workflows/sphinx-lint.yml index e0bb24276a653..02224582bdce2 100644 --- a/.github/workflows/sphinx-lint.yml +++ b/.github/workflows/sphinx-lint.yml @@ -3,7 +3,7 @@ name: Lint documentation on: push: branches: - - main + - habana_main paths: - "docs/**" pull_request: From dd3505d5b2e7c34be87f765486037ae2fb505747 Mon Sep 17 00:00:00 2001 From: Konrad Zawora Date: Wed, 11 Dec 2024 15:20:27 +0200 Subject: [PATCH 2/3] whoopsie --- .github/workflows/codespell.yml | 2 +- .github/workflows/png-lint.yml | 2 +- .github/workflows/sphinx-lint.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml index 2a92c48ee403e..72e732d878e61 100644 --- a/.github/workflows/codespell.yml +++ b/.github/workflows/codespell.yml @@ -15,7 +15,7 @@ on: - .github/workflows/codespell.yml pull_request: branches: - - main + - habana_main paths: - "**/*.py" - "**/*.md" diff --git a/.github/workflows/png-lint.yml b/.github/workflows/png-lint.yml index 630eb09b62f3a..140cb5e050a6a 100644 --- a/.github/workflows/png-lint.yml +++ b/.github/workflows/png-lint.yml @@ -8,7 +8,7 @@ on: - '.github/workflows/png-lint.yml' pull_request: branches: - - "main" + - "habana_main" paths: - '*.excalidraw.png' - '.github/workflows/png-lint.yml' diff --git a/.github/workflows/sphinx-lint.yml b/.github/workflows/sphinx-lint.yml index 02224582bdce2..a9a179fb33af1 100644 --- a/.github/workflows/sphinx-lint.yml +++ b/.github/workflows/sphinx-lint.yml @@ -8,7 +8,7 @@ on: - "docs/**" pull_request: branches: - - main + - habana_main paths: - "docs/**" From 275919b8788e72742a397a9646bfe42c99aa3c2a Mon Sep 17 00:00:00 2001 From: Konrad Zawora Date: Wed, 11 Dec 2024 15:22:11 +0200 Subject: [PATCH 3/3] fix typo in readme --- README_GAUDI.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README_GAUDI.md b/README_GAUDI.md index 0a095ddd0bd73..49d032469de6b 100644 --- a/README_GAUDI.md +++ b/README_GAUDI.md @@ -247,7 +247,7 @@ INFO 08-02 17:38:43 hpu_executor.py:91] init_cache_engine took 37.92 GiB of devi - `VLLM_HPU_LOG_STEP_GRAPH_COMPILATION_ALL`: if `true`, will log graph compilations per each vLLM engine step, always, even if there were none. Disabled by default. - `VLLM_HPU_LOG_STEP_CPU_FALLBACKS`: if `true`, will log cpu fallbacks per each vLLM engine step, only when there was any. Disabled by default. - `VLLM_HPU_LOG_STEP_CPU_FALLBACKS_ALL`: if `true`, will log cpu fallbacks per each vLLM engine step, always, even if there were none. Disabled by default. -- `VLLM_REGIONAL_COMPILATION`: if `false`, turn off regional complation (when using torch.compile execution mode). +- `VLLM_REGIONAL_COMPILATION`: if `false`, turn off regional compilation (when using torch.compile execution mode). **Performance tuning knobs:**