Skip to content

Commit

Permalink
server/bench:
Browse files Browse the repository at this point in the history
- fix when prometheus not started
- wait for server to be ready before starting bench
  • Loading branch information
phymbert committed Dec 27, 2024
1 parent 1bf38cf commit fab46ca
Showing 1 changed file with 18 additions and 9 deletions.
27 changes: 18 additions & 9 deletions examples/server/bench/bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,12 +189,12 @@ def main(args_in: list[str] | None = None) -> None:
"pp": {
"p95": round(data['metrics']["llamacpp_prompt_processing_second"]["p(95)"], 2),
"avg": round(data['metrics']["llamacpp_prompt_processing_second"]["avg"], 2),
"0": round(mean(prometheus_metrics['prompt_tokens_seconds']), 2),
"0": round(mean(prometheus_metrics['prompt_tokens_seconds']), 2) if 'prompt_tokens_seconds' in prometheus_metrics else 0,
},
"tg": {
"p95": round(data['metrics']["llamacpp_tokens_second"]["p(95)"], 2),
"avg": round(data['metrics']["llamacpp_tokens_second"]["avg"], 2),
"0": round(mean(prometheus_metrics['predicted_tokens_seconds']), 2),
"0": round(mean(prometheus_metrics['predicted_tokens_seconds']), 2) if 'predicted_tokens_seconds' in prometheus_metrics else 0,
},
}
with open("results.github.env", 'a') as github_env:
Expand Down Expand Up @@ -234,7 +234,7 @@ def start_server(args):
server_process = start_server_background(args)

attempts = 0
max_attempts = 20
max_attempts = 600
if 'GITHUB_ACTIONS' in os.environ:
max_attempts *= 2

Expand All @@ -245,7 +245,15 @@ def start_server(args):
print(f"bench: waiting for server to start ...")
time.sleep(0.5)

print("bench: server started.")
attempts = 0
while not is_server_ready(args.host, args.port):
attempts += 1
if attempts > max_attempts:
assert False, "server not ready"
print(f"bench: waiting for server to be ready ...")
time.sleep(0.5)

print("bench: server started and ready.")
return server_process


Expand All @@ -258,11 +266,6 @@ def start_server_background(args):
'--host', args.host,
'--port', args.port,
]
model_file = args.model_path_prefix + os.path.sep + args.hf_file
model_dir = os.path.dirname(model_file)
if not os.path.exists(model_dir):
os.makedirs(model_dir)
server_args.extend(['--model', model_file])
server_args.extend(['--hf-repo', args.hf_repo])
server_args.extend(['--hf-file', args.hf_file])
server_args.extend(['--n-gpu-layers', args.n_gpu_layers])
Expand Down Expand Up @@ -306,6 +309,12 @@ def is_server_listening(server_fqdn, server_port):
return _is_server_listening


def is_server_ready(server_fqdn, server_port):
url = f"http://{server_fqdn}:{server_port}/health"
response = requests.get(url)
return response.status_code == 200


def escape_metric_name(metric_name):
return re.sub('[^A-Z0-9]', '_', metric_name.upper())

Expand Down

0 comments on commit fab46ca

Please sign in to comment.