From 8c4c478c38636b29c735cf7251dd353ed14756f0 Mon Sep 17 00:00:00 2001 From: "Wang, Kai Lawrence" Date: Fri, 20 Dec 2024 14:24:42 +0800 Subject: [PATCH 1/6] Use data param instead of json to send request for faqgen Signed-off-by: Wang, Kai Lawrence --- evals/benchmark/stresscli/locust/aistress.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/evals/benchmark/stresscli/locust/aistress.py b/evals/benchmark/stresscli/locust/aistress.py index 1264ac41..623c9069 100644 --- a/evals/benchmark/stresscli/locust/aistress.py +++ b/evals/benchmark/stresscli/locust/aistress.py @@ -120,12 +120,16 @@ def bench_main(self): "faqgenfixed", "faqgenbench", ] + if self.environment.parsed_options.bench_target in ["faqgenfixed", "faqgenbench"]: + req_params = {"data": reqData} + else: + req_params = {"json": reqData} test_start_time = time.time() try: start_ts = time.perf_counter() with self.client.post( url, - json=reqData, + **req_params, stream=True if self.environment.parsed_options.bench_target in streaming_bench_target else False, catch_response=True, timeout=self.environment.parsed_options.http_timeout, From fb8577d93913c71085b6fcdcc9b497cada720d50 Mon Sep 17 00:00:00 2001 From: "Wang, Kai Lawrence" Date: Fri, 20 Dec 2024 14:31:21 +0800 Subject: [PATCH 2/6] Fix the input statistics for faqgen benchmark Signed-off-by: Wang, Kai Lawrence --- evals/benchmark/stresscli/locust/tokenresponse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evals/benchmark/stresscli/locust/tokenresponse.py b/evals/benchmark/stresscli/locust/tokenresponse.py index 4b6bfe75..afa487ef 100644 --- a/evals/benchmark/stresscli/locust/tokenresponse.py +++ b/evals/benchmark/stresscli/locust/tokenresponse.py @@ -15,7 +15,7 @@ def testFunc(): def respStatics(environment, req, resp): tokenizer = transformers.AutoTokenizer.from_pretrained(environment.parsed_options.llm_model) - if environment.parsed_options.bench_target in ["chatqnafixed", "chatqnabench"]: + if environment.parsed_options.bench_target in ["chatqnafixed", "chatqnabench", "faqgenfixed", "faqgenbench"]: num_token_input_prompt = len(tokenizer.encode(req["messages"])) elif environment.parsed_options.bench_target in ["llmfixed"]: num_token_input_prompt = len(tokenizer.encode(req["query"])) From 7c9201749dbb855a2a076c9e8b7c64e14e396395 Mon Sep 17 00:00:00 2001 From: "Wang, Kai Lawrence" Date: Fri, 20 Dec 2024 14:34:31 +0800 Subject: [PATCH 3/6] Update the default prompt for faqgenfixed Signed-off-by: Wang, Kai Lawrence --- evals/benchmark/stresscli/locust/faqgenfixed.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/evals/benchmark/stresscli/locust/faqgenfixed.py b/evals/benchmark/stresscli/locust/faqgenfixed.py index b648a567..a121c555 100644 --- a/evals/benchmark/stresscli/locust/faqgenfixed.py +++ b/evals/benchmark/stresscli/locust/faqgenfixed.py @@ -9,12 +9,7 @@ def getUrl(): def getReqData(): - # return { - # "inputs": "What is the revenue of Nike in last 10 years before 2023? Give me detail", - # "parameters": {"max_new_tokens": 128, "do_sample": True}, - # } - # return {"query": "What is the revenue of Nike in last 10 years before 2023? Give me detail", "max_tokens": 128} - return {"messages": "What is the revenue of Nike in last 10 years before 2023? Give me detail", "max_tokens": 128} + return {"messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E6.", "max_tokens": 128} def respStatics(environment, reqData, respData): From 33641c63a323b46441671e36ea7d5fcb7e3f8524 Mon Sep 17 00:00:00 2001 From: "Wang, Kai Lawrence" Date: Fri, 20 Dec 2024 14:52:18 +0800 Subject: [PATCH 4/6] Implement the complete_response for the streaming output Signed-off-by: Wang, Kai Lawrence --- evals/benchmark/stresscli/locust/aistress.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/evals/benchmark/stresscli/locust/aistress.py b/evals/benchmark/stresscli/locust/aistress.py index 623c9069..f8793898 100644 --- a/evals/benchmark/stresscli/locust/aistress.py +++ b/evals/benchmark/stresscli/locust/aistress.py @@ -173,6 +173,22 @@ def bench_main(self): complete_response += content except json.JSONDecodeError: continue + elif self.environment.parsed_options.bench_target in ["faqgenfixed", "faqgenbench"]: + client = sseclient.SSEClient(resp) + for event in client.events(): + if first_token_ts is None: + first_token_ts = time.perf_counter() + try: + data = json.loads(event.data) + for op in data['ops']: + if op['path'] == '/logs/HuggingFaceEndpoint/final_output': + generations = op['value'].get('generations', []) + for generation in generations: + for item in generation: + text = item.get("text", "") + complete_response += text + except json.JSONDecodeError: + continue else: client = sseclient.SSEClient(resp) for event in client.events(): From 4c79f8a0ab7f2c36c03a338b1b72013570fe7647 Mon Sep 17 00:00:00 2001 From: "Wang, Kai Lawrence" Date: Fri, 20 Dec 2024 14:56:16 +0800 Subject: [PATCH 5/6] Set topK=1 for faqgenfixed Signed-off-by: Wang, Kai Lawrence --- evals/benchmark/stresscli/locust/faqgenfixed.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/evals/benchmark/stresscli/locust/faqgenfixed.py b/evals/benchmark/stresscli/locust/faqgenfixed.py index a121c555..2036c64c 100644 --- a/evals/benchmark/stresscli/locust/faqgenfixed.py +++ b/evals/benchmark/stresscli/locust/faqgenfixed.py @@ -9,7 +9,11 @@ def getUrl(): def getReqData(): - return {"messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E6.", "max_tokens": 128} + return { + "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E6.", + "max_tokens": 128, + "top_k": 1, + } def respStatics(environment, reqData, respData): From 7ed73fe8a5d104abb1176ffde0758e51420f91d7 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 20 Dec 2024 08:19:03 +0000 Subject: [PATCH 6/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- evals/benchmark/stresscli/locust/aistress.py | 6 +++--- evals/benchmark/stresscli/locust/faqgenfixed.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/evals/benchmark/stresscli/locust/aistress.py b/evals/benchmark/stresscli/locust/aistress.py index f8793898..436713e0 100644 --- a/evals/benchmark/stresscli/locust/aistress.py +++ b/evals/benchmark/stresscli/locust/aistress.py @@ -180,9 +180,9 @@ def bench_main(self): first_token_ts = time.perf_counter() try: data = json.loads(event.data) - for op in data['ops']: - if op['path'] == '/logs/HuggingFaceEndpoint/final_output': - generations = op['value'].get('generations', []) + for op in data["ops"]: + if op["path"] == "/logs/HuggingFaceEndpoint/final_output": + generations = op["value"].get("generations", []) for generation in generations: for item in generation: text = item.get("text", "") diff --git a/evals/benchmark/stresscli/locust/faqgenfixed.py b/evals/benchmark/stresscli/locust/faqgenfixed.py index 2036c64c..75f3d29b 100644 --- a/evals/benchmark/stresscli/locust/faqgenfixed.py +++ b/evals/benchmark/stresscli/locust/faqgenfixed.py @@ -10,7 +10,7 @@ def getUrl(): def getReqData(): return { - "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E6.", + "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E6.", "max_tokens": 128, "top_k": 1, }