From 4ce44c7126fde7421d80dac8400f3a898a7dcb19 Mon Sep 17 00:00:00 2001 From: Artur Fierka Date: Wed, 30 Oct 2024 10:33:45 +0200 Subject: [PATCH 01/26] Add multi step scheduling scenario to jenkins CI --- .../configs/Meta-Llama-3.1-8B-Instruct-mss.yaml | 16 ++++++++++++++++ .../lm-eval-harness/configs/models-small.txt | 3 ++- .../lm-eval-harness/test_lm_eval_correctness.py | 2 ++ 3 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 .jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml diff --git a/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml b/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml new file mode 100644 index 0000000000000..e6f79ccbde7ee --- /dev/null +++ b/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml @@ -0,0 +1,16 @@ +# FIXME(kzawora): these scores were generated using vLLM on HPU, we need to confirm them on HF +# VLLM_SKIP_WARMUP=true bash run-lm-eval-gsm-cot-llama-vllm-baseline.sh -m "/mnt/weka/data/pytorch/llama3.1/Meta-Llama-3.1-8B-Instruct" -b 128 -l 1319 -f 8 -t 1 +model_name: "/mnt/weka/data/pytorch/llama3.1/Meta-Llama-3.1-8B-Instruct" +tasks: +- name: "gsm8k_cot_llama" + metrics: + - name: "exact_match,strict-match" + value: 0.8317 + - name: "exact_match,flexible-extract" + value: 0.8355 +limit: null +num_fewshot: 8 +dtype: "bfloat16" +fewshot_as_multiturn: true +apply_chat_template: true +num_scheduled_steps: 10 \ No newline at end of file diff --git a/.jenkins/lm-eval-harness/configs/models-small.txt b/.jenkins/lm-eval-harness/configs/models-small.txt index d8ae241e58ad3..5bd0aa13b335f 100644 --- a/.jenkins/lm-eval-harness/configs/models-small.txt +++ b/.jenkins/lm-eval-harness/configs/models-small.txt @@ -1,2 +1,3 @@ Meta-Llama-3-8B-Instruct.yaml -Meta-Llama-3.1-8B-Instruct.yaml \ No newline at end of file +Meta-Llama-3.1-8B-Instruct.yaml +Meta-Llama-3.1-8B-Instruct-mss.yaml \ No newline at end of file diff --git a/.jenkins/lm-eval-harness/test_lm_eval_correctness.py b/.jenkins/lm-eval-harness/test_lm_eval_correctness.py index 3df0621f49a72..3d6e81fe8f944 100644 --- a/.jenkins/lm-eval-harness/test_lm_eval_correctness.py +++ b/.jenkins/lm-eval-harness/test_lm_eval_correctness.py @@ -54,6 +54,8 @@ def launch_lm_eval(eval_config): model_args += ",quantization=inc," \ "kv_cache_dtype=fp8_inc," \ "weights_load_device=cpu" + if eval_config.get("num_scheduled_steps"): + model_args += f",num_scheduled_steps={eval_config.get('num_scheduled_steps')}" kwargs = {} if 'fewshot_as_multiturn' in eval_config: kwargs['fewshot_as_multiturn'] = eval_config['fewshot_as_multiturn'] From f6ce4044b91ba5e8c1692057ce811b26ca850766 Mon Sep 17 00:00:00 2001 From: Artur Fierka Date: Wed, 30 Oct 2024 11:47:24 +0200 Subject: [PATCH 02/26] Fix formatting --- .jenkins/lm-eval-harness/test_lm_eval_correctness.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.jenkins/lm-eval-harness/test_lm_eval_correctness.py b/.jenkins/lm-eval-harness/test_lm_eval_correctness.py index 3d6e81fe8f944..22f1c94d92ffd 100644 --- a/.jenkins/lm-eval-harness/test_lm_eval_correctness.py +++ b/.jenkins/lm-eval-harness/test_lm_eval_correctness.py @@ -55,7 +55,8 @@ def launch_lm_eval(eval_config): "kv_cache_dtype=fp8_inc," \ "weights_load_device=cpu" if eval_config.get("num_scheduled_steps"): - model_args += f",num_scheduled_steps={eval_config.get('num_scheduled_steps')}" + model_args += \ + f",num_scheduled_steps={eval_config.get('num_scheduled_steps')}" kwargs = {} if 'fewshot_as_multiturn' in eval_config: kwargs['fewshot_as_multiturn'] = eval_config['fewshot_as_multiturn'] From 8cfc040bc8f342b8afaa19c788a7b6f3e0eaf072 Mon Sep 17 00:00:00 2001 From: Artur Fierka Date: Wed, 30 Oct 2024 11:47:24 +0200 Subject: [PATCH 03/26] Add debug print --- .jenkins/lm-eval-harness/test_lm_eval_correctness.py | 1 + 1 file changed, 1 insertion(+) diff --git a/.jenkins/lm-eval-harness/test_lm_eval_correctness.py b/.jenkins/lm-eval-harness/test_lm_eval_correctness.py index 22f1c94d92ffd..0024dc524e7e5 100644 --- a/.jenkins/lm-eval-harness/test_lm_eval_correctness.py +++ b/.jenkins/lm-eval-harness/test_lm_eval_correctness.py @@ -57,6 +57,7 @@ def launch_lm_eval(eval_config): if eval_config.get("num_scheduled_steps"): model_args += \ f",num_scheduled_steps={eval_config.get('num_scheduled_steps')}" + print(f"MODEL_ARGS: {model_args}") kwargs = {} if 'fewshot_as_multiturn' in eval_config: kwargs['fewshot_as_multiturn'] = eval_config['fewshot_as_multiturn'] From b23a18639ddc74234306001cd30a8220321d9dec Mon Sep 17 00:00:00 2001 From: Artur Fierka Date: Wed, 30 Oct 2024 14:20:06 +0200 Subject: [PATCH 04/26] Fix typo --- .../configs/Meta-Llama-3.1-8B-Instruct-mss.yaml | 2 +- .jenkins/lm-eval-harness/test_lm_eval_correctness.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml b/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml index e6f79ccbde7ee..ff787f1085cba 100644 --- a/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml +++ b/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml @@ -13,4 +13,4 @@ num_fewshot: 8 dtype: "bfloat16" fewshot_as_multiturn: true apply_chat_template: true -num_scheduled_steps: 10 \ No newline at end of file +num_scheduler_steps: 10 \ No newline at end of file diff --git a/.jenkins/lm-eval-harness/test_lm_eval_correctness.py b/.jenkins/lm-eval-harness/test_lm_eval_correctness.py index 0024dc524e7e5..3b2f76605faab 100644 --- a/.jenkins/lm-eval-harness/test_lm_eval_correctness.py +++ b/.jenkins/lm-eval-harness/test_lm_eval_correctness.py @@ -54,9 +54,9 @@ def launch_lm_eval(eval_config): model_args += ",quantization=inc," \ "kv_cache_dtype=fp8_inc," \ "weights_load_device=cpu" - if eval_config.get("num_scheduled_steps"): + if eval_config.get("num_scheduler_steps"): model_args += \ - f",num_scheduled_steps={eval_config.get('num_scheduled_steps')}" + f",num_scheduler_steps={eval_config.get('num_scheduler_steps')}" print(f"MODEL_ARGS: {model_args}") kwargs = {} if 'fewshot_as_multiturn' in eval_config: From cd7fa7e7d1eb740da2dd51e165470267d9d9f82d Mon Sep 17 00:00:00 2001 From: Artur Fierka Date: Wed, 30 Oct 2024 10:58:48 +0100 Subject: [PATCH 05/26] Add fp8 test to jenkins CI (#429) --- .jenkins/lm-eval-harness/test_lm_eval_correctness.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.jenkins/lm-eval-harness/test_lm_eval_correctness.py b/.jenkins/lm-eval-harness/test_lm_eval_correctness.py index 3b2f76605faab..3df0621f49a72 100644 --- a/.jenkins/lm-eval-harness/test_lm_eval_correctness.py +++ b/.jenkins/lm-eval-harness/test_lm_eval_correctness.py @@ -54,10 +54,6 @@ def launch_lm_eval(eval_config): model_args += ",quantization=inc," \ "kv_cache_dtype=fp8_inc," \ "weights_load_device=cpu" - if eval_config.get("num_scheduler_steps"): - model_args += \ - f",num_scheduler_steps={eval_config.get('num_scheduler_steps')}" - print(f"MODEL_ARGS: {model_args}") kwargs = {} if 'fewshot_as_multiturn' in eval_config: kwargs['fewshot_as_multiturn'] = eval_config['fewshot_as_multiturn'] From 4a273605dc944d6ee2b6f446fd0d0f9a5625819a Mon Sep 17 00:00:00 2001 From: Artur Fierka Date: Wed, 30 Oct 2024 10:33:45 +0200 Subject: [PATCH 06/26] Add multi step scheduling scenario to jenkins CI --- .jenkins/lm-eval-harness/test_lm_eval_correctness.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.jenkins/lm-eval-harness/test_lm_eval_correctness.py b/.jenkins/lm-eval-harness/test_lm_eval_correctness.py index 3df0621f49a72..22f1c94d92ffd 100644 --- a/.jenkins/lm-eval-harness/test_lm_eval_correctness.py +++ b/.jenkins/lm-eval-harness/test_lm_eval_correctness.py @@ -54,6 +54,9 @@ def launch_lm_eval(eval_config): model_args += ",quantization=inc," \ "kv_cache_dtype=fp8_inc," \ "weights_load_device=cpu" + if eval_config.get("num_scheduled_steps"): + model_args += \ + f",num_scheduled_steps={eval_config.get('num_scheduled_steps')}" kwargs = {} if 'fewshot_as_multiturn' in eval_config: kwargs['fewshot_as_multiturn'] = eval_config['fewshot_as_multiturn'] From 732e7a073a05e905719df521c79ecccafa8887b9 Mon Sep 17 00:00:00 2001 From: Artur Fierka Date: Wed, 30 Oct 2024 14:20:06 +0200 Subject: [PATCH 07/26] Fix typo --- .jenkins/lm-eval-harness/test_lm_eval_correctness.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.jenkins/lm-eval-harness/test_lm_eval_correctness.py b/.jenkins/lm-eval-harness/test_lm_eval_correctness.py index 22f1c94d92ffd..3b2f76605faab 100644 --- a/.jenkins/lm-eval-harness/test_lm_eval_correctness.py +++ b/.jenkins/lm-eval-harness/test_lm_eval_correctness.py @@ -54,9 +54,10 @@ def launch_lm_eval(eval_config): model_args += ",quantization=inc," \ "kv_cache_dtype=fp8_inc," \ "weights_load_device=cpu" - if eval_config.get("num_scheduled_steps"): + if eval_config.get("num_scheduler_steps"): model_args += \ - f",num_scheduled_steps={eval_config.get('num_scheduled_steps')}" + f",num_scheduler_steps={eval_config.get('num_scheduler_steps')}" + print(f"MODEL_ARGS: {model_args}") kwargs = {} if 'fewshot_as_multiturn' in eval_config: kwargs['fewshot_as_multiturn'] = eval_config['fewshot_as_multiturn'] From 66569c6f5416d369b8fe9e0355572e68e6e9cb53 Mon Sep 17 00:00:00 2001 From: Artur Fierka Date: Wed, 30 Oct 2024 14:29:16 +0200 Subject: [PATCH 08/26] Cleanup --- .jenkins/lm-eval-harness/test_lm_eval_correctness.py | 1 - 1 file changed, 1 deletion(-) diff --git a/.jenkins/lm-eval-harness/test_lm_eval_correctness.py b/.jenkins/lm-eval-harness/test_lm_eval_correctness.py index 3b2f76605faab..4fce75479972b 100644 --- a/.jenkins/lm-eval-harness/test_lm_eval_correctness.py +++ b/.jenkins/lm-eval-harness/test_lm_eval_correctness.py @@ -57,7 +57,6 @@ def launch_lm_eval(eval_config): if eval_config.get("num_scheduler_steps"): model_args += \ f",num_scheduler_steps={eval_config.get('num_scheduler_steps')}" - print(f"MODEL_ARGS: {model_args}") kwargs = {} if 'fewshot_as_multiturn' in eval_config: kwargs['fewshot_as_multiturn'] = eval_config['fewshot_as_multiturn'] From 87fcca3757a97d6a6c0697159486d73b0d5fdb16 Mon Sep 17 00:00:00 2001 From: Artur Fierka Date: Wed, 30 Oct 2024 16:33:55 +0200 Subject: [PATCH 09/26] Disable MSS --- .../lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml b/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml index ff787f1085cba..c9cf57736034a 100644 --- a/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml +++ b/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml @@ -13,4 +13,4 @@ num_fewshot: 8 dtype: "bfloat16" fewshot_as_multiturn: true apply_chat_template: true -num_scheduler_steps: 10 \ No newline at end of file +num_scheduler_steps: 1 \ No newline at end of file From 69be7fa9a4b087f50ce8f836a40cc5fe99a76d48 Mon Sep 17 00:00:00 2001 From: Artur Fierka Date: Wed, 30 Oct 2024 16:59:26 +0200 Subject: [PATCH 10/26] Enable MSS --- .../lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml b/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml index c9cf57736034a..ff787f1085cba 100644 --- a/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml +++ b/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml @@ -13,4 +13,4 @@ num_fewshot: 8 dtype: "bfloat16" fewshot_as_multiturn: true apply_chat_template: true -num_scheduler_steps: 1 \ No newline at end of file +num_scheduler_steps: 10 \ No newline at end of file From 74b5668e9f56c6211a565f58a583b8cc63e5000f Mon Sep 17 00:00:00 2001 From: Artur Fierka Date: Thu, 31 Oct 2024 15:10:37 +0200 Subject: [PATCH 11/26] Move MSS tests to separate suite --- .jenkins/lm-eval-harness/configs/models-mss.txt | 1 + .jenkins/lm-eval-harness/configs/models-small.txt | 3 +-- .jenkins/test_config.yaml | 11 +++++++++++ 3 files changed, 13 insertions(+), 2 deletions(-) create mode 100644 .jenkins/lm-eval-harness/configs/models-mss.txt diff --git a/.jenkins/lm-eval-harness/configs/models-mss.txt b/.jenkins/lm-eval-harness/configs/models-mss.txt new file mode 100644 index 0000000000000..cfcc3d42d108f --- /dev/null +++ b/.jenkins/lm-eval-harness/configs/models-mss.txt @@ -0,0 +1 @@ +Meta-Llama-3.1-8B-Instruct-mss.yaml \ No newline at end of file diff --git a/.jenkins/lm-eval-harness/configs/models-small.txt b/.jenkins/lm-eval-harness/configs/models-small.txt index 5bd0aa13b335f..d8ae241e58ad3 100644 --- a/.jenkins/lm-eval-harness/configs/models-small.txt +++ b/.jenkins/lm-eval-harness/configs/models-small.txt @@ -1,3 +1,2 @@ Meta-Llama-3-8B-Instruct.yaml -Meta-Llama-3.1-8B-Instruct.yaml -Meta-Llama-3.1-8B-Instruct-mss.yaml \ No newline at end of file +Meta-Llama-3.1-8B-Instruct.yaml \ No newline at end of file diff --git a/.jenkins/test_config.yaml b/.jenkins/test_config.yaml index b32563d6222e9..94ea548b01088 100644 --- a/.jenkins/test_config.yaml +++ b/.jenkins/test_config.yaml @@ -27,3 +27,14 @@ stages: - name: gsm8k_small_g3_tp1_fp8 flavor: g3 command: cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-fp8.txt -t 1 + - name: test_gsm8k_mss + steps: + - name: gsm8k_small_g3_tp1_mss + flavor: g3 + command: cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-mss.txt -t 1 + - name: gsm8k_small_g2_tp1_mss + flavor: g2 + command: cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-mss.txt -t 1 + - name: gsm8k_small_g3_tp2_mss + flavor: g3.s + command: cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-mss.txt -t 2 From 95b3e7dcb7fa6828dc89112992f3f4edb849d7c6 Mon Sep 17 00:00:00 2001 From: Artur Fierka Date: Thu, 31 Oct 2024 15:11:49 +0200 Subject: [PATCH 12/26] [DEBUG] Disable all non-mss tests --- .jenkins/test_config.yaml | 54 +++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/.jenkins/test_config.yaml b/.jenkins/test_config.yaml index 94ea548b01088..b4d09bfd85420 100644 --- a/.jenkins/test_config.yaml +++ b/.jenkins/test_config.yaml @@ -1,32 +1,32 @@ # test_config.yaml stages: - - name: test_gsm8k_small_models - steps: - - name: gsm8k_small_g3_tp1 - flavor: g3 - command: cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-small.txt -t 1 - - name: gsm8k_small_g3_tp2 - flavor: g3.s - command: cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-small.txt -t 2 - - name: gsm8k_small_g2_tp1 - flavor: g2 - command: cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-small.txt -t 1 - - name: gsm8k_small_g2_tp2 - flavor: g2.s - command: cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-small.txt -t 2 - - name: test_gsm8k_large_models - steps: - - name: gsm8k_large_g3_tp2 - flavor: g3.s - command: cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-large.txt -t 2 - - name: gsm8k_large_g2_tp4 - flavor: g2.m - command: cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-large.txt -t 4 - - name: test_gsm8k_fp8 - steps: - - name: gsm8k_small_g3_tp1_fp8 - flavor: g3 - command: cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-fp8.txt -t 1 + # - name: test_gsm8k_small_models + # steps: + # - name: gsm8k_small_g3_tp1 + # flavor: g3 + # command: cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-small.txt -t 1 + # - name: gsm8k_small_g3_tp2 + # flavor: g3.s + # command: cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-small.txt -t 2 + # - name: gsm8k_small_g2_tp1 + # flavor: g2 + # command: cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-small.txt -t 1 + # - name: gsm8k_small_g2_tp2 + # flavor: g2.s + # command: cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-small.txt -t 2 + # - name: test_gsm8k_large_models + # steps: + # - name: gsm8k_large_g3_tp2 + # flavor: g3.s + # command: cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-large.txt -t 2 + # - name: gsm8k_large_g2_tp4 + # flavor: g2.m + # command: cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-large.txt -t 4 + # - name: test_gsm8k_fp8 + # steps: + # - name: gsm8k_small_g3_tp1_fp8 + # flavor: g3 + # command: cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-fp8.txt -t 1 - name: test_gsm8k_mss steps: - name: gsm8k_small_g3_tp1_mss From 0a081d32a4f8633e6ce91059dfe440037abee1a9 Mon Sep 17 00:00:00 2001 From: Artur Fierka Date: Thu, 31 Oct 2024 15:13:55 +0200 Subject: [PATCH 13/26] [DEBUG] num_scheduler_steps=2 --- .../lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml b/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml index ff787f1085cba..8dc02ce0765b2 100644 --- a/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml +++ b/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml @@ -13,4 +13,4 @@ num_fewshot: 8 dtype: "bfloat16" fewshot_as_multiturn: true apply_chat_template: true -num_scheduler_steps: 10 \ No newline at end of file +num_scheduler_steps: 2 \ No newline at end of file From 177b9b23c72672dd238989fab51fec24d2a19495 Mon Sep 17 00:00:00 2001 From: Artur Fierka Date: Thu, 31 Oct 2024 15:24:06 +0200 Subject: [PATCH 14/26] [DEBUG] num_scheduler_steps=5 --- .../lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml b/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml index 8dc02ce0765b2..f51c84a1d6376 100644 --- a/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml +++ b/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml @@ -13,4 +13,4 @@ num_fewshot: 8 dtype: "bfloat16" fewshot_as_multiturn: true apply_chat_template: true -num_scheduler_steps: 2 \ No newline at end of file +num_scheduler_steps: 5 \ No newline at end of file From 551c37a1c0731568239a29e6f67521e8a0909fe8 Mon Sep 17 00:00:00 2001 From: Artur Fierka Date: Thu, 31 Oct 2024 15:29:16 +0200 Subject: [PATCH 15/26] [DEBUG] num_scheduler_steps=20 --- .../lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml b/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml index f51c84a1d6376..51eadd23d51b8 100644 --- a/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml +++ b/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml @@ -13,4 +13,4 @@ num_fewshot: 8 dtype: "bfloat16" fewshot_as_multiturn: true apply_chat_template: true -num_scheduler_steps: 5 \ No newline at end of file +num_scheduler_steps: 20 \ No newline at end of file From 485347a4908887993f7797cb64b29f5fffd20669 Mon Sep 17 00:00:00 2001 From: Artur Fierka Date: Thu, 31 Oct 2024 15:31:19 +0200 Subject: [PATCH 16/26] [DEBUG] num_scheduler_steps=40 --- .../lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml b/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml index 51eadd23d51b8..6c0881a454d46 100644 --- a/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml +++ b/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml @@ -13,4 +13,4 @@ num_fewshot: 8 dtype: "bfloat16" fewshot_as_multiturn: true apply_chat_template: true -num_scheduler_steps: 20 \ No newline at end of file +num_scheduler_steps: 40 \ No newline at end of file From 150eb46ccf129f6ec8c7b7fb4a828ca486617e1e Mon Sep 17 00:00:00 2001 From: Artur Fierka Date: Thu, 31 Oct 2024 15:31:57 +0200 Subject: [PATCH 17/26] [DEBUG] num_scheduler_steps=64 --- .../lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml b/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml index 6c0881a454d46..99417807a94f7 100644 --- a/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml +++ b/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml @@ -13,4 +13,4 @@ num_fewshot: 8 dtype: "bfloat16" fewshot_as_multiturn: true apply_chat_template: true -num_scheduler_steps: 40 \ No newline at end of file +num_scheduler_steps: 64 \ No newline at end of file From 3b32642d8bd3604231b607de98b989d7973657ca Mon Sep 17 00:00:00 2001 From: Artur Fierka Date: Thu, 31 Oct 2024 15:33:19 +0200 Subject: [PATCH 18/26] [DEBUG] num_scheduler_steps=128 --- .../lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml b/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml index 99417807a94f7..f60f35e7ffd77 100644 --- a/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml +++ b/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml @@ -13,4 +13,4 @@ num_fewshot: 8 dtype: "bfloat16" fewshot_as_multiturn: true apply_chat_template: true -num_scheduler_steps: 64 \ No newline at end of file +num_scheduler_steps: 128 \ No newline at end of file From 0a6c54c55a6326a8ba77a8176e3eaf91e7227f42 Mon Sep 17 00:00:00 2001 From: Artur Fierka Date: Mon, 4 Nov 2024 12:47:11 +0200 Subject: [PATCH 19/26] [DEBUG] Apply fix from PR #452 --- vllm/worker/hpu_model_runner.py | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/vllm/worker/hpu_model_runner.py b/vllm/worker/hpu_model_runner.py index c50e4e244dffe..95dbfd4b45076 100644 --- a/vllm/worker/hpu_model_runner.py +++ b/vllm/worker/hpu_model_runner.py @@ -2109,6 +2109,19 @@ def execute_model( # we only want to pythonize in the last step sampling_metadata.skip_sampler_cpu_output = True self.model.model.sampler.include_gpu_probs_tensor = True + cache_orig_output_tokens_len: List[Dict] = [] + + def try_revert_dummy_output_tokens(): + if len(cache_orig_output_tokens_len) > 0: + # Reuse the original output token ids length + for i, seq_group_metadata in enumerate( + seq_group_metadata_list): + for j, data in seq_group_metadata.seq_data.items(): + orig_output_tokens_len = \ + cache_orig_output_tokens_len[i][j] + data.output_token_ids = \ + data.output_token_ids[:orig_output_tokens_len] + for i in range(num_steps): with self.profiler.record_event('internal', model_event_name): hidden_states = self.model.forward( @@ -2155,17 +2168,22 @@ def execute_model( htorch.core.mark_step() if i < num_steps - 1: if i == 0: - import copy ctx = model_input.async_callback.keywords[ # type: ignore "ctx"] seq_group_metadata_list = ctx.seq_group_metadata_list - seq_group_metadata_list = copy.deepcopy( - seq_group_metadata_list) + # Cache the original output token ids + for i, seq_group_metadata in enumerate( + seq_group_metadata_list): + cache_orig_output_tokens_len.append({}) + for j, data in seq_group_metadata.seq_data.items(): + cache_orig_output_tokens_len[i][j] = \ + len(data.output_token_ids) for seq_group_metadata in seq_group_metadata_list: for data in seq_group_metadata.seq_data.values(): max_output_len = sampling_metadata.seq_groups[ 0].sampling_params.max_tokens if len(data.output_token_ids) < max_output_len - 1: + # add a place holder for prepare_decode # arbitrary value, this could be any token dummy_token = (540, ) data.output_token_ids += (dummy_token) @@ -2185,6 +2203,8 @@ def execute_model( "attn_metadata": self.trim_attn_metadata(result.attn_metadata) }) + else: + try_revert_dummy_output_tokens() if self.is_driver_worker and self.profiler.enabled: # Stop recording 'execute_model' event @@ -2201,6 +2221,7 @@ def execute_model( if num_steps == 1: return [output] else: + try_revert_dummy_output_tokens() return [] return output if type(output) is list else [output] From 5e97e672a13278e28824d60a146971bce7e5e94c Mon Sep 17 00:00:00 2001 From: Artur Fierka Date: Mon, 4 Nov 2024 12:53:19 +0200 Subject: [PATCH 20/26] [DEBUG] Fix function order in cherry-picked code --- vllm/worker/hpu_model_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/worker/hpu_model_runner.py b/vllm/worker/hpu_model_runner.py index 95dbfd4b45076..a0cd05bfbd062 100644 --- a/vllm/worker/hpu_model_runner.py +++ b/vllm/worker/hpu_model_runner.py @@ -2191,6 +2191,7 @@ def try_revert_dummy_output_tokens(): if num_steps == 1: return [output] else: + try_revert_dummy_output_tokens() return [] result = self._prepare_decode(seq_group_metadata_list, @@ -2221,7 +2222,6 @@ def try_revert_dummy_output_tokens(): if num_steps == 1: return [output] else: - try_revert_dummy_output_tokens() return [] return output if type(output) is list else [output] From 3bd98f85522201f7368cf9f16f22b34e932e23a6 Mon Sep 17 00:00:00 2001 From: Artur Fierka Date: Mon, 4 Nov 2024 13:47:20 +0200 Subject: [PATCH 21/26] [DEBUG] num_scheduler_steps=64 --- .../lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml b/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml index f60f35e7ffd77..99417807a94f7 100644 --- a/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml +++ b/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml @@ -13,4 +13,4 @@ num_fewshot: 8 dtype: "bfloat16" fewshot_as_multiturn: true apply_chat_template: true -num_scheduler_steps: 128 \ No newline at end of file +num_scheduler_steps: 64 \ No newline at end of file From 70e8ff3a0bd4911f05c59eaa903efef667bc9d14 Mon Sep 17 00:00:00 2001 From: Artur Fierka Date: Mon, 4 Nov 2024 13:49:58 +0200 Subject: [PATCH 22/26] [DEBUG] num_scheduler_steps=40 --- .../lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml b/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml index 99417807a94f7..6c0881a454d46 100644 --- a/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml +++ b/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml @@ -13,4 +13,4 @@ num_fewshot: 8 dtype: "bfloat16" fewshot_as_multiturn: true apply_chat_template: true -num_scheduler_steps: 64 \ No newline at end of file +num_scheduler_steps: 40 \ No newline at end of file From c214c82c3148a9ee3abd3ea3dbad48cf23b89584 Mon Sep 17 00:00:00 2001 From: Artur Fierka Date: Mon, 4 Nov 2024 13:50:54 +0200 Subject: [PATCH 23/26] [DEBUG] num_scheduler_steps=20 --- .../lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml b/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml index 6c0881a454d46..51eadd23d51b8 100644 --- a/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml +++ b/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml @@ -13,4 +13,4 @@ num_fewshot: 8 dtype: "bfloat16" fewshot_as_multiturn: true apply_chat_template: true -num_scheduler_steps: 40 \ No newline at end of file +num_scheduler_steps: 20 \ No newline at end of file From 912587d6dcb560d18d0fab7806963973140205a3 Mon Sep 17 00:00:00 2001 From: Artur Fierka Date: Mon, 4 Nov 2024 13:52:10 +0200 Subject: [PATCH 24/26] [DEBUG] num_scheduler_steps=10 --- .../lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml b/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml index 51eadd23d51b8..ff787f1085cba 100644 --- a/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml +++ b/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml @@ -13,4 +13,4 @@ num_fewshot: 8 dtype: "bfloat16" fewshot_as_multiturn: true apply_chat_template: true -num_scheduler_steps: 20 \ No newline at end of file +num_scheduler_steps: 10 \ No newline at end of file From 19182b91ed1cc111863f4c52615406378ee972c6 Mon Sep 17 00:00:00 2001 From: Artur Fierka Date: Mon, 4 Nov 2024 13:53:17 +0200 Subject: [PATCH 25/26] [DEBUG] num_scheduler_steps=5 --- .../lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml b/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml index ff787f1085cba..f51c84a1d6376 100644 --- a/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml +++ b/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml @@ -13,4 +13,4 @@ num_fewshot: 8 dtype: "bfloat16" fewshot_as_multiturn: true apply_chat_template: true -num_scheduler_steps: 10 \ No newline at end of file +num_scheduler_steps: 5 \ No newline at end of file From 47dc4e5c825c1ac26a3b8ec0985e3392f4d08f91 Mon Sep 17 00:00:00 2001 From: Artur Fierka Date: Mon, 4 Nov 2024 13:57:41 +0200 Subject: [PATCH 26/26] [DEBUG] num_scheduler_steps=2 --- .../lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml b/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml index f51c84a1d6376..8dc02ce0765b2 100644 --- a/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml +++ b/.jenkins/lm-eval-harness/configs/Meta-Llama-3.1-8B-Instruct-mss.yaml @@ -13,4 +13,4 @@ num_fewshot: 8 dtype: "bfloat16" fewshot_as_multiturn: true apply_chat_template: true -num_scheduler_steps: 5 \ No newline at end of file +num_scheduler_steps: 2 \ No newline at end of file