From 5e7916b4510814515212fa10287235a8377b6b8b Mon Sep 17 00:00:00 2001
From: Konrad Zawora <kzawora@habana.ai>
Date: Wed, 11 Dec 2024 15:15:18 +0200
Subject: [PATCH 1/3] Update GHA

---
 .github/workflows/actionlint.yml       |  4 +-
 .github/workflows/codespell.yml        |  2 +-
 .github/workflows/lint-and-deploy.yaml | 81 --------------------------
 .github/workflows/png-lint.yml         |  2 +-
 .github/workflows/shellcheck.yml       |  4 +-
 .github/workflows/sphinx-lint.yml      |  2 +-
 6 files changed, 7 insertions(+), 88 deletions(-)
 delete mode 100644 .github/workflows/lint-and-deploy.yaml

diff --git a/.github/workflows/actionlint.yml b/.github/workflows/actionlint.yml
index 0226cf0ca00e9..d139f625d98ab 100644
--- a/.github/workflows/actionlint.yml
+++ b/.github/workflows/actionlint.yml
@@ -2,14 +2,14 @@ name: Lint GitHub Actions workflows
 on:
   push:
     branches:
-      - "main"
+      - "habana_main"
     paths:
       - '.github/workflows/*.ya?ml'
       - '.github/workflows/actionlint.*'
       - '.github/workflows/matchers/actionlint.json'
   pull_request:
     branches:
-      - "main"
+      - "habana_main"
     paths:
       - '.github/workflows/*.ya?ml'
       - '.github/workflows/actionlint.*'
diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml
index 68887adaae54b..2a92c48ee403e 100644
--- a/.github/workflows/codespell.yml
+++ b/.github/workflows/codespell.yml
@@ -5,7 +5,7 @@ on:
   # but only for the main branch
   push:
     branches:
-      - main
+      - habana_main
     paths:
       - "**/*.py"
       - "**/*.md"
diff --git a/.github/workflows/lint-and-deploy.yaml b/.github/workflows/lint-and-deploy.yaml
deleted file mode 100644
index ab6f6e5d2060d..0000000000000
--- a/.github/workflows/lint-and-deploy.yaml
+++ /dev/null
@@ -1,81 +0,0 @@
-name: Lint and Deploy Charts
-
-on: pull_request
-
-jobs:
-  lint-and-deploy:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-        with:
-          fetch-depth: 0
-
-      - name: Set up Helm
-        uses: azure/setup-helm@fe7b79cd5ee1e45176fcad797de68ecaf3ca4814 # v4.2.0
-        with:
-          version: v3.14.4
-
-       #Python is required because ct lint runs Yamale and yamllint which require Python.
-      - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
-        with:
-          python-version: '3.13'
-
-      - name: Set up chart-testing
-        uses: helm/chart-testing-action@e6669bcd63d7cb57cb4380c33043eebe5d111992 # v2.6.1
-        with:
-          version: v3.10.1
-
-      - name: Run chart-testing (lint)
-        run: ct lint --target-branch ${{ github.event.repository.default_branch }} --chart-dirs examples/chart-helm --charts examples/chart-helm
-
-      - name: Setup minio
-        run: |
-          docker network create vllm-net
-          docker run -d -p 9000:9000 --name minio --net vllm-net \
-                     -e "MINIO_ACCESS_KEY=minioadmin" \
-                     -e "MINIO_SECRET_KEY=minioadmin" \
-                     -v /tmp/data:/data \
-                     -v /tmp/config:/root/.minio \
-                     minio/minio server /data
-          export AWS_ACCESS_KEY_ID=minioadmin
-          export AWS_SECRET_ACCESS_KEY=minioadmin
-          export AWS_EC2_METADATA_DISABLED=true
-          mkdir opt-125m
-          cd opt-125m && curl -O -Ls "https://huggingface.co/facebook/opt-125m/resolve/main/{pytorch_model.bin,config.json,generation_config.json,merges.txt,special_tokens_map.json,tokenizer_config.json,vocab.json}" && cd ..
-          aws --endpoint-url http://127.0.0.1:9000/ s3 mb s3://testbucket
-          aws --endpoint-url http://127.0.0.1:9000/ s3 cp opt-125m/ s3://testbucket/opt-125m --recursive
-
-      - name: Create kind cluster
-        uses: helm/kind-action@0025e74a8c7512023d06dc019c617aa3cf561fde # v1.10.0
-
-      - name: Build the Docker image vllm cpu
-        run: docker buildx build -f Dockerfile.cpu -t vllm-cpu-env .
-
-      - name: Configuration of docker images, network and namespace for the kind cluster
-        run: |
-          docker pull amazon/aws-cli:2.6.4
-          kind load docker-image  amazon/aws-cli:2.6.4 --name chart-testing
-          kind load docker-image vllm-cpu-env:latest --name chart-testing
-          docker network connect vllm-net "$(docker ps -aqf "name=chart-testing-control-plane")"
-          kubectl create ns ns-vllm
-
-      - name: Run chart-testing (install)
-        run: |
-          export AWS_ACCESS_KEY_ID=minioadmin
-          export AWS_SECRET_ACCESS_KEY=minioadmin
-          helm install --wait --wait-for-jobs --timeout 5m0s --debug --create-namespace --namespace=ns-vllm test-vllm examples/chart-helm -f examples/chart-helm/values.yaml --set secrets.s3endpoint=http://minio:9000 --set secrets.s3bucketname=testbucket --set secrets.s3accesskeyid=$AWS_ACCESS_KEY_ID --set secrets.s3accesskey=$AWS_SECRET_ACCESS_KEY --set resources.requests.cpu=1 --set resources.requests.memory=4Gi --set resources.limits.cpu=2 --set resources.limits.memory=5Gi --set image.env[0].name=VLLM_CPU_KVCACHE_SPACE --set image.env[1].name=VLLM_LOGGING_LEVEL --set-string image.env[0].value="1" --set-string image.env[1].value="DEBUG" --set-string extraInit.s3modelpath="opt-125m/" --set-string 'resources.limits.nvidia\.com/gpu=0' --set-string 'resources.requests.nvidia\.com/gpu=0' --set-string image.repository="vllm-cpu-env"
-    
-      - name: curl test
-        run: |
-          kubectl -n ns-vllm port-forward service/test-vllm-service 8001:80 &
-          sleep 10
-          CODE="$(curl -v -f --location http://localhost:8001/v1/completions \
-                  --header "Content-Type: application/json" \
-                  --data '{
-                          "model": "opt-125m",
-                          "prompt": "San Francisco is a",
-                          "max_tokens": 7,
-                          "temperature": 0
-                  }'):$CODE"
-          echo "$CODE"
\ No newline at end of file
diff --git a/.github/workflows/png-lint.yml b/.github/workflows/png-lint.yml
index 4932af943a07b..630eb09b62f3a 100644
--- a/.github/workflows/png-lint.yml
+++ b/.github/workflows/png-lint.yml
@@ -2,7 +2,7 @@ name: Lint PNG exports from excalidraw
 on:
   push:
     branches:
-      - "main"
+      - "habana_main"
     paths:
       - '*.excalidraw.png'
       - '.github/workflows/png-lint.yml'
diff --git a/.github/workflows/shellcheck.yml b/.github/workflows/shellcheck.yml
index 4b1587e373e17..f6931150c795d 100644
--- a/.github/workflows/shellcheck.yml
+++ b/.github/workflows/shellcheck.yml
@@ -2,13 +2,13 @@ name: Lint shell scripts
 on:
   push:
     branches:
-      - "main"
+      - "habana_main"
     paths:
       - '**/*.sh'
       - '.github/workflows/shellcheck.yml'
   pull_request:
     branches:
-      - "main"
+      - "habana_main"
     paths:
       - '**/*.sh'
       - '.github/workflows/shellcheck.yml'
diff --git a/.github/workflows/sphinx-lint.yml b/.github/workflows/sphinx-lint.yml
index e0bb24276a653..02224582bdce2 100644
--- a/.github/workflows/sphinx-lint.yml
+++ b/.github/workflows/sphinx-lint.yml
@@ -3,7 +3,7 @@ name: Lint documentation
 on:
   push:
     branches:
-      - main
+      - habana_main
     paths:
       - "docs/**"
   pull_request:

From dd3505d5b2e7c34be87f765486037ae2fb505747 Mon Sep 17 00:00:00 2001
From: Konrad Zawora <kzawora@habana.ai>
Date: Wed, 11 Dec 2024 15:20:27 +0200
Subject: [PATCH 2/3] whoopsie

---
 .github/workflows/codespell.yml   | 2 +-
 .github/workflows/png-lint.yml    | 2 +-
 .github/workflows/sphinx-lint.yml | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml
index 2a92c48ee403e..72e732d878e61 100644
--- a/.github/workflows/codespell.yml
+++ b/.github/workflows/codespell.yml
@@ -15,7 +15,7 @@ on:
       - .github/workflows/codespell.yml
   pull_request:
     branches:
-      - main
+      - habana_main
     paths:
       - "**/*.py"
       - "**/*.md"
diff --git a/.github/workflows/png-lint.yml b/.github/workflows/png-lint.yml
index 630eb09b62f3a..140cb5e050a6a 100644
--- a/.github/workflows/png-lint.yml
+++ b/.github/workflows/png-lint.yml
@@ -8,7 +8,7 @@ on:
       - '.github/workflows/png-lint.yml'
   pull_request:
     branches:
-      - "main"
+      - "habana_main"
     paths:
       - '*.excalidraw.png'
       - '.github/workflows/png-lint.yml'
diff --git a/.github/workflows/sphinx-lint.yml b/.github/workflows/sphinx-lint.yml
index 02224582bdce2..a9a179fb33af1 100644
--- a/.github/workflows/sphinx-lint.yml
+++ b/.github/workflows/sphinx-lint.yml
@@ -8,7 +8,7 @@ on:
       - "docs/**"
   pull_request:
     branches:
-      - main
+      - habana_main
     paths:
       - "docs/**"
 

From 275919b8788e72742a397a9646bfe42c99aa3c2a Mon Sep 17 00:00:00 2001
From: Konrad Zawora <kzawora@habana.ai>
Date: Wed, 11 Dec 2024 15:22:11 +0200
Subject: [PATCH 3/3] fix typo in readme

---
 README_GAUDI.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README_GAUDI.md b/README_GAUDI.md
index 0a095ddd0bd73..49d032469de6b 100644
--- a/README_GAUDI.md
+++ b/README_GAUDI.md
@@ -247,7 +247,7 @@ INFO 08-02 17:38:43 hpu_executor.py:91] init_cache_engine took 37.92 GiB of devi
 - `VLLM_HPU_LOG_STEP_GRAPH_COMPILATION_ALL`: if `true`, will log graph compilations per each vLLM engine step, always, even if there were none. Disabled by default.
 - `VLLM_HPU_LOG_STEP_CPU_FALLBACKS`: if `true`, will log cpu fallbacks per each vLLM engine step, only when there was any. Disabled by default.
 - `VLLM_HPU_LOG_STEP_CPU_FALLBACKS_ALL`: if `true`, will log cpu fallbacks per each vLLM engine step, always, even if there were none. Disabled by default.
-- `VLLM_REGIONAL_COMPILATION`: if `false`, turn off regional complation (when using torch.compile execution mode).
+- `VLLM_REGIONAL_COMPILATION`: if `false`, turn off regional compilation (when using torch.compile execution mode).
 
 **Performance tuning knobs:**