From 8c4c478c38636b29c735cf7251dd353ed14756f0 Mon Sep 17 00:00:00 2001
From: "Wang, Kai Lawrence" <kai.lawrence.wang@intel.com>
Date: Fri, 20 Dec 2024 14:24:42 +0800
Subject: [PATCH 1/6] Use data param instead of json to send request for faqgen

Signed-off-by: Wang, Kai Lawrence <kai.lawrence.wang@intel.com>
---
 evals/benchmark/stresscli/locust/aistress.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/evals/benchmark/stresscli/locust/aistress.py b/evals/benchmark/stresscli/locust/aistress.py
index 1264ac41..623c9069 100644
--- a/evals/benchmark/stresscli/locust/aistress.py
+++ b/evals/benchmark/stresscli/locust/aistress.py
@@ -120,12 +120,16 @@ def bench_main(self):
             "faqgenfixed",
             "faqgenbench",
         ]
+        if self.environment.parsed_options.bench_target in ["faqgenfixed", "faqgenbench"]:
+            req_params = {"data": reqData}
+        else:
+            req_params = {"json": reqData}
         test_start_time = time.time()
         try:
             start_ts = time.perf_counter()
             with self.client.post(
                 url,
-                json=reqData,
+                **req_params,
                 stream=True if self.environment.parsed_options.bench_target in streaming_bench_target else False,
                 catch_response=True,
                 timeout=self.environment.parsed_options.http_timeout,

From fb8577d93913c71085b6fcdcc9b497cada720d50 Mon Sep 17 00:00:00 2001
From: "Wang, Kai Lawrence" <kai.lawrence.wang@intel.com>
Date: Fri, 20 Dec 2024 14:31:21 +0800
Subject: [PATCH 2/6] Fix the input statistics for faqgen benchmark

Signed-off-by: Wang, Kai Lawrence <kai.lawrence.wang@intel.com>
---
 evals/benchmark/stresscli/locust/tokenresponse.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/evals/benchmark/stresscli/locust/tokenresponse.py b/evals/benchmark/stresscli/locust/tokenresponse.py
index 4b6bfe75..afa487ef 100644
--- a/evals/benchmark/stresscli/locust/tokenresponse.py
+++ b/evals/benchmark/stresscli/locust/tokenresponse.py
@@ -15,7 +15,7 @@ def testFunc():
 
 def respStatics(environment, req, resp):
     tokenizer = transformers.AutoTokenizer.from_pretrained(environment.parsed_options.llm_model)
-    if environment.parsed_options.bench_target in ["chatqnafixed", "chatqnabench"]:
+    if environment.parsed_options.bench_target in ["chatqnafixed", "chatqnabench", "faqgenfixed", "faqgenbench"]:
         num_token_input_prompt = len(tokenizer.encode(req["messages"]))
     elif environment.parsed_options.bench_target in ["llmfixed"]:
         num_token_input_prompt = len(tokenizer.encode(req["query"]))

From 7c9201749dbb855a2a076c9e8b7c64e14e396395 Mon Sep 17 00:00:00 2001
From: "Wang, Kai Lawrence" <kai.lawrence.wang@intel.com>
Date: Fri, 20 Dec 2024 14:34:31 +0800
Subject: [PATCH 3/6] Update the default prompt for faqgenfixed

Signed-off-by: Wang, Kai Lawrence <kai.lawrence.wang@intel.com>
---
 evals/benchmark/stresscli/locust/faqgenfixed.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/evals/benchmark/stresscli/locust/faqgenfixed.py b/evals/benchmark/stresscli/locust/faqgenfixed.py
index b648a567..a121c555 100644
--- a/evals/benchmark/stresscli/locust/faqgenfixed.py
+++ b/evals/benchmark/stresscli/locust/faqgenfixed.py
@@ -9,12 +9,7 @@ def getUrl():
 
 
 def getReqData():
-    # return {
-    #     "inputs": "What is the revenue of Nike in last 10 years before 2023? Give me detail",
-    #     "parameters": {"max_new_tokens": 128, "do_sample": True},
-    # }
-    # return {"query": "What is the revenue of Nike in last 10 years before 2023? Give me detail", "max_tokens": 128}
-    return {"messages": "What is the revenue of Nike in last 10 years before 2023? Give me detail", "max_tokens": 128}
+    return {"messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E6.", "max_tokens": 128}
 
 
 def respStatics(environment, reqData, respData):

From 33641c63a323b46441671e36ea7d5fcb7e3f8524 Mon Sep 17 00:00:00 2001
From: "Wang, Kai Lawrence" <kai.lawrence.wang@intel.com>
Date: Fri, 20 Dec 2024 14:52:18 +0800
Subject: [PATCH 4/6] Implement the complete_response for the streaming output

Signed-off-by: Wang, Kai Lawrence <kai.lawrence.wang@intel.com>
---
 evals/benchmark/stresscli/locust/aistress.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/evals/benchmark/stresscli/locust/aistress.py b/evals/benchmark/stresscli/locust/aistress.py
index 623c9069..f8793898 100644
--- a/evals/benchmark/stresscli/locust/aistress.py
+++ b/evals/benchmark/stresscli/locust/aistress.py
@@ -173,6 +173,22 @@ def bench_main(self):
                                         complete_response += content
                                 except json.JSONDecodeError:
                                     continue
+                        elif self.environment.parsed_options.bench_target in ["faqgenfixed", "faqgenbench"]:
+                            client = sseclient.SSEClient(resp)
+                            for event in client.events():
+                                if first_token_ts is None:
+                                    first_token_ts = time.perf_counter()
+                                try:
+                                    data = json.loads(event.data)
+                                    for op in data['ops']:
+                                        if op['path'] == '/logs/HuggingFaceEndpoint/final_output':
+                                            generations = op['value'].get('generations', [])
+                                            for generation in generations:
+                                                for item in generation:
+                                                    text = item.get("text", "")
+                                                    complete_response += text
+                                except json.JSONDecodeError:
+                                    continue
                         else:
                             client = sseclient.SSEClient(resp)
                             for event in client.events():

From 4c79f8a0ab7f2c36c03a338b1b72013570fe7647 Mon Sep 17 00:00:00 2001
From: "Wang, Kai Lawrence" <kai.lawrence.wang@intel.com>
Date: Fri, 20 Dec 2024 14:56:16 +0800
Subject: [PATCH 5/6] Set topK=1 for faqgenfixed

Signed-off-by: Wang, Kai Lawrence <kai.lawrence.wang@intel.com>
---
 evals/benchmark/stresscli/locust/faqgenfixed.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/evals/benchmark/stresscli/locust/faqgenfixed.py b/evals/benchmark/stresscli/locust/faqgenfixed.py
index a121c555..2036c64c 100644
--- a/evals/benchmark/stresscli/locust/faqgenfixed.py
+++ b/evals/benchmark/stresscli/locust/faqgenfixed.py
@@ -9,7 +9,11 @@ def getUrl():
 
 
 def getReqData():
-    return {"messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E6.", "max_tokens": 128}
+    return {
+        "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E6.", 
+        "max_tokens": 128,
+        "top_k": 1,
+    }
 
 
 def respStatics(environment, reqData, respData):

From 7ed73fe8a5d104abb1176ffde0758e51420f91d7 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 20 Dec 2024 08:19:03 +0000
Subject: [PATCH 6/6] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 evals/benchmark/stresscli/locust/aistress.py    | 6 +++---
 evals/benchmark/stresscli/locust/faqgenfixed.py | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/evals/benchmark/stresscli/locust/aistress.py b/evals/benchmark/stresscli/locust/aistress.py
index f8793898..436713e0 100644
--- a/evals/benchmark/stresscli/locust/aistress.py
+++ b/evals/benchmark/stresscli/locust/aistress.py
@@ -180,9 +180,9 @@ def bench_main(self):
                                     first_token_ts = time.perf_counter()
                                 try:
                                     data = json.loads(event.data)
-                                    for op in data['ops']:
-                                        if op['path'] == '/logs/HuggingFaceEndpoint/final_output':
-                                            generations = op['value'].get('generations', [])
+                                    for op in data["ops"]:
+                                        if op["path"] == "/logs/HuggingFaceEndpoint/final_output":
+                                            generations = op["value"].get("generations", [])
                                             for generation in generations:
                                                 for item in generation:
                                                     text = item.get("text", "")
diff --git a/evals/benchmark/stresscli/locust/faqgenfixed.py b/evals/benchmark/stresscli/locust/faqgenfixed.py
index 2036c64c..75f3d29b 100644
--- a/evals/benchmark/stresscli/locust/faqgenfixed.py
+++ b/evals/benchmark/stresscli/locust/faqgenfixed.py
@@ -10,7 +10,7 @@ def getUrl():
 
 def getReqData():
     return {
-        "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E6.", 
+        "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E6.",
         "max_tokens": 128,
         "top_k": 1,
     }