Skip to content

Commit

Permalink
more work
Browse files Browse the repository at this point in the history
  • Loading branch information
jmhessel committed Aug 14, 2024
1 parent 00e1a41 commit 50eda29
Showing 1 changed file with 7 additions and 2 deletions.
9 changes: 7 additions & 2 deletions src/llmperf/ray_clients/openai_chat_completions_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,10 @@ def llm_request(self, request_config: RequestConfig) -> Dict[str, Any]:
error_msg = data["error"]["message"]
error_response_code = data["error"]["code"]
raise RuntimeError(data["error"]["message"])


if len(data["choices"]) == 0: # azure returns no choices at first
continue

delta = data["choices"][0]["delta"]
if delta.get("content", None):
if not ttft:
Expand All @@ -111,9 +114,11 @@ def llm_request(self, request_config: RequestConfig) -> Dict[str, Any]:
output_throughput = tokens_received / total_request_time

except Exception as e:
import traceback
stack_trace = traceback.format_exc()
metrics[common_metrics.ERROR_MSG] = error_msg
metrics[common_metrics.ERROR_CODE] = error_response_code
print(f"Warning Or Error: {e}")
raise ValueError(f"Warning Or Error: {e} \n {stack_trace}")
print(error_response_code)

metrics[common_metrics.INTER_TOKEN_LAT] = sum(time_to_next_token) #This should be same as metrics[common_metrics.E2E_LAT]. Leave it here for now
Expand Down

0 comments on commit 50eda29

Please sign in to comment.