-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbenchmark.py
57 lines (47 loc) · 1.59 KB
/
benchmark.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import os
import requests
from typing import Union, List
# service_id = "gx0098lddlsp1c"
def response(chat_query: Union[str, List[str]], service_id: str):
runpod_api_token = os.environ.get("RUNPOD_API_TOKEN", None)
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {runpod_api_token}",
}
url = f"https://api.runpod.ai/v2/{service_id}/run"
data_to_send = {
"input": {
"messages": chat_query,
"stream": True,
"sampling_params": {
"temperature": 0.1,
"top_p": 0.7,
"max_new_tokens": 512,
},
}
}
# First post this request and you will get a Job ID in return
response = requests.post(url=url, headers=headers, json=data_to_send, timeout=600,)
# Now use this ID to /stream results untill the /status shows COMPLETE
job_id = response.json()["id"]
url = f"https://api.runpod.ai/v2/{service_id}/stream/{job_id}"
while True:
response = requests.get(url, headers=headers)
response = response.json()
if response["status"] == "COMPLETED":
break
print(response["stream"][0]["output"])
return
if __name__ == "__main__":
chat_input = [
{"role": "user", "content": "be helpful"},
{
"role": "assistant",
"content": "I'm doing great. How can I help you today?",
},
{
"role": "user",
"content": "I'd like to show off how chat templating works!",
},
]
response(chat_query=chat_input, service_id="gx0098lddlsp1c")