Skip to content

Commit

Permalink
Nov 6 rebase (sans vllm-project#6143) (#468)
Browse files Browse the repository at this point in the history
This PR adds all commits before vllm-project#6143 without
vllm-project#6143.
  • Loading branch information
kzawora-intel authored Nov 6, 2024
2 parents 5812cb6 + 8e62377 commit 5eb7f3d
Show file tree
Hide file tree
Showing 164 changed files with 1,313 additions and 922 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@

def read_markdown(file):
if os.path.exists(file):
with open(file, "r") as f:
with open(file) as f:
return f.read() + "\n"
else:
return f"{file} not found.\n"
Expand All @@ -75,14 +75,14 @@ def results_to_json(latency, throughput, serving):
# collect results
for test_file in results_folder.glob("*.json"):

with open(test_file, "r") as f:
with open(test_file) as f:
raw_result = json.loads(f.read())

if "serving" in str(test_file):
# this result is generated via `benchmark_serving.py`

# attach the benchmarking command to raw_result
with open(test_file.with_suffix(".commands"), "r") as f:
with open(test_file.with_suffix(".commands")) as f:
command = json.loads(f.read())
raw_result.update(command)

Expand All @@ -97,7 +97,7 @@ def results_to_json(latency, throughput, serving):
# this result is generated via `benchmark_latency.py`

# attach the benchmarking command to raw_result
with open(test_file.with_suffix(".commands"), "r") as f:
with open(test_file.with_suffix(".commands")) as f:
command = json.loads(f.read())
raw_result.update(command)

Expand All @@ -119,7 +119,7 @@ def results_to_json(latency, throughput, serving):
# this result is generated via `benchmark_throughput.py`

# attach the benchmarking command to raw_result
with open(test_file.with_suffix(".commands"), "r") as f:
with open(test_file.with_suffix(".commands")) as f:
command = json.loads(f.read())
raw_result.update(command)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,15 +72,15 @@ def main(args):

# collect results
for test_file in results_folder.glob("*_nightly_results.json"):
with open(test_file, "r") as f:
with open(test_file) as f:
results = results + json.loads(f.read())

# generate markdown table
df = pd.DataFrame.from_dict(results)

md_table = tabulate(df, headers='keys', tablefmt='pipe', showindex=False)

with open(args.description, "r") as f:
with open(args.description) as f:
description = f.read()

description = description.format(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,11 @@
# collect results
for test_file in results_folder.glob("*.json"):

with open(test_file, "r") as f:
with open(test_file) as f:
raw_result = json.loads(f.read())

# attach the benchmarking command to raw_result
with open(test_file.with_suffix(".commands"), "r") as f:
with open(test_file.with_suffix(".commands")) as f:
command = json.loads(f.read())
raw_result.update(command)

Expand Down
2 changes: 1 addition & 1 deletion .buildkite/run-openvino-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@ trap remove_docker_container EXIT
remove_docker_container

# Run the image and launch offline inference
docker run --network host --env VLLM_OPENVINO_KVCACHE_SPACE=1 --name openvino-test openvino-test python3 /workspace/vllm/examples/offline_inference.py
docker run --network host --env VLLM_OPENVINO_KVCACHE_SPACE=1 --name openvino-test openvino-test python3 /workspace/examples/offline_inference.py
4 changes: 2 additions & 2 deletions .buildkite/test-pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -321,15 +321,14 @@ steps:
- tests/models/decoder_only/language
commands:
- pytest -v -s models/decoder_only/language/test_models.py
- pytest -v -s models/decoder_only/language/test_big_models.py

- label: Decoder-only Language Models Test (Extended) # 1h20min
nightly: true
source_file_dependencies:
- vllm/
- tests/models/decoder_only/language
commands:
- pytest -v -s models/decoder_only/language --ignore=models/decoder_only/language/test_models.py --ignore=models/decoder_only/language/test_big_models.py
- pytest -v -s models/decoder_only/language --ignore=models/decoder_only/language/test_models.py

- label: Decoder-only Multi-Modal Models Test (Standard)
#mirror_hardwares: [amd]
Expand Down Expand Up @@ -511,6 +510,7 @@ steps:
# NOTE: don't test llama model here, it seems hf implementation is buggy
# see https://github.com/vllm-project/vllm/pull/5689 for details
- pytest -v -s distributed/test_custom_all_reduce.py
- torchrun --nproc_per_node=2 distributed/test_ca_buffer_sharing.py
- TARGET_TEST_SUITE=A100 pytest basic_correctness/ -v -s -m distributed_2_gpus
- pytest -v -s -x lora/test_mixtral.py

Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/actionlint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@ on:
paths:
- '.github/workflows/*.ya?ml'
- '.github/workflows/actionlint.*'
- '.github/workflows/matchers/actionlint.json'
pull_request:
branches:
- "main"
paths:
- '.github/workflows/*.ya?ml'
- '.github/workflows/actionlint.*'
- '.github/workflows/matchers/actionlint.json'

env:
LC_ALL: en_US.UTF-8
Expand Down
12 changes: 12 additions & 0 deletions .github/workflows/clang-format.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,21 @@ on:
push:
branches:
- habana_main
paths:
- '**/*.h'
- '**/*.cpp'
- '**/*.cu'
- '**/*.cuh'
- '.github/workflows/clang-format.yml'
pull_request:
branches:
- habana_main
paths:
- '**/*.h'
- '**/*.cpp'
- '**/*.cu'
- '**/*.cuh'
- '.github/workflows/clang-format.yml'

jobs:
clang-format:
Expand Down
19 changes: 18 additions & 1 deletion .github/workflows/mypy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,34 @@ on:
# but only for the habana_main branch
push:
branches:
<<<<<<< HEAD
- habana_main
pull_request:
branches:
- habana_main
=======
- main
paths:
- '**/*.py'
- '.github/workflows/mypy.yaml'
- 'tools/mypy.sh'
- 'pyproject.toml'
pull_request:
branches:
- main
paths:
- '**/*.py'
- '.github/workflows/mypy.yaml'
- 'tools/mypy.sh'
- 'pyproject.toml'
>>>>>>> a5fda50a10641e47c0c290907f30ef2add6d4e7a

jobs:
mypy:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
python-version: ["3.9", "3.10", "3.11", "3.12"]
steps:
- uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
- name: Set up Python ${{ matrix.python-version }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ jobs:
fail-fast: false
matrix:
os: ['ubuntu-20.04']
python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
python-version: ['3.9', '3.10', '3.11', '3.12']
pytorch-version: ['2.4.0'] # Must be the most recent version that meets requirements-cuda.txt.
cuda-version: ['11.8', '12.1']

Expand Down
49 changes: 29 additions & 20 deletions .github/workflows/ruff.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,33 +6,42 @@ on:
push:
branches:
- habana_main
paths:
- "**/*.py"
- pyproject.toml
- requirements-lint.txt
- .github/workflows/matchers/ruff.json
- .github/workflows/ruff.yml
pull_request:
branches:
- habana_main
paths:
- "**/*.py"
- pyproject.toml
- requirements-lint.txt
- .github/workflows/matchers/ruff.json
- .github/workflows/ruff.yml

jobs:
ruff:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
python-version: ["3.12"]
steps:
- uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements-lint.txt
- name: Analysing the code with ruff
run: |
echo "::add-matcher::.github/workflows/matchers/ruff.json"
ruff check --output-format github .
- name: Spelling check with codespell
run: |
codespell --toml pyproject.toml
- name: Run isort
run: |
isort . --check-only
- uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements-lint.txt
- name: Analysing the code with ruff
run: |
echo "::add-matcher::.github/workflows/matchers/ruff.json"
ruff check --output-format github .
- name: Run isort
run: |
isort . --check-only
34 changes: 20 additions & 14 deletions .github/workflows/yapf.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,27 +6,33 @@ on:
push:
branches:
- habana_main
paths:
- "**/*.py"
- .github/workflows/yapf.yml
pull_request:
branches:
- habana_main
paths:
- "**/*.py"
- .github/workflows/yapf.yml

jobs:
yapf:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
python-version: ["3.12"]
steps:
- uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install yapf==0.32.0
pip install toml==0.10.2
- name: Running yapf
run: |
yapf --diff --recursive .
- uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install yapf==0.32.0
pip install toml==0.10.2
- name: Running yapf
run: |
yapf --diff --recursive .
20 changes: 8 additions & 12 deletions .jenkins/lm-eval-harness/test_lm_eval_correctness.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,18 +76,14 @@ def report_performance(task, input_lens, output_lens, time, record_property):
context_lens = [i + o for i, o in zip(input_lens, output_lens)]
gen_tput = sum(output_lens) / time
all_lens = [input_lens, output_lens, context_lens]
min_input_tokens, min_output_tokens, min_context_tokens = [
min(x) for x in all_lens
]
max_input_tokens, max_output_tokens, max_context_tokens = [
max(x) for x in all_lens
]
mean_input_tokens, mean_output_tokens, mean_context_tokens = [
statistics.mean(x) for x in all_lens
]
stddev_input_tokens, stddev_output_tokens, stddev_context_tokens = [
statistics.stdev(x) for x in all_lens
]
min_input_tokens, min_output_tokens, min_context_tokens = (
min(x) for x in all_lens)
max_input_tokens, max_output_tokens, max_context_tokens = (
max(x) for x in all_lens)
mean_input_tokens, mean_output_tokens, mean_context_tokens = (
statistics.mean(x) for x in all_lens)
stddev_input_tokens, stddev_output_tokens, stddev_context_tokens = (
statistics.stdev(x) for x in all_lens)
msg = (
f'{task} | estimated average generation throughput: {gen_tput:.2f} tokens/s \n' # noqa: G004, E501
f'{task} | input_tokens | min: {min_input_tokens} | max: {max_input_tokens} | mean: {mean_input_tokens:.2f} | stddev: {stddev_input_tokens:.2f}\n' # noqa: E501
Expand Down
11 changes: 5 additions & 6 deletions .readthedocs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,16 @@ version: 2
build:
os: ubuntu-22.04
tools:
python: "3.8"
python: '3.9'

sphinx:
configuration: docs/source/conf.py
fail_on_warning: true
configuration: docs/source/conf.py
fail_on_warning: true

# If using Sphinx, optionally build your docs in additional formats such as PDF
formats: []

# Optionally declare the Python requirements required to build your docs
python:
install:
- requirements: docs/requirements-docs.txt

install:
- requirements: docs/requirements-docs.txt
Loading

0 comments on commit 5eb7f3d

Please sign in to comment.