more work

jmhessel · Aug 14, 2024 · 50eda29 · 50eda29
1 parent 00e1a41
commit 50eda29
Showing 1 changed file with 7 additions and 2 deletions.
diff --git a/src/llmperf/ray_clients/openai_chat_completions_client.py b/src/llmperf/ray_clients/openai_chat_completions_client.py
@@ -94,7 +94,10 @@ def llm_request(self, request_config: RequestConfig) -> Dict[str, Any]:
                         error_msg = data["error"]["message"]
                         error_response_code = data["error"]["code"]
                         raise RuntimeError(data["error"]["message"])
-
+
+                    if len(data["choices"]) == 0: # azure returns no choices at first
+                        continue
+
                     delta = data["choices"][0]["delta"]
                     if delta.get("content", None):
                         if not ttft:
@@ -111,9 +114,11 @@ def llm_request(self, request_config: RequestConfig) -> Dict[str, Any]:
             output_throughput = tokens_received / total_request_time
 
         except Exception as e:
+            import traceback
+            stack_trace = traceback.format_exc()
             metrics[common_metrics.ERROR_MSG] = error_msg
             metrics[common_metrics.ERROR_CODE] = error_response_code
-            print(f"Warning Or Error: {e}")
+            raise ValueError(f"Warning Or Error: {e} \n {stack_trace}")
             print(error_response_code)
 
         metrics[common_metrics.INTER_TOKEN_LAT] = sum(time_to_next_token) #This should be same as metrics[common_metrics.E2E_LAT]. Leave it here for now