Skip to content

Commit

Permalink
refresh environment variables and add them to README
Browse files Browse the repository at this point in the history
  • Loading branch information
hummerichsander authored Sep 25, 2024
1 parent 953c106 commit d30d1e4
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 13 deletions.
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,16 @@ Available endpoints:
- /v1/chat/completion

Instead of running a LLM model to generate completions, it simply returns a response generated by surrogate models. Available surrogate models are:
- "yes_no": returns random "yes" or "no" response
- "yes_no": returns random "Yes" or "No" response
- "ja_nein": returns random "Ja" or "Nein" response
- "lorem_ipsum": returns random "lorem ipsum" text

## Run via docker:
```bash
docker pull ghcr.io/hummerichsander/openai_api_server_mock:v ... # replace ... with the latest version
```

Environment variables:
- `CONTEXT_SIZE`: context size for the model (default: 4096)
- `SLEEP_TIME`: sleep time in seconds before returning the response (default: 0)
- `MAX_CONCURRENT_REQUESTS`: maximum number of concurrent requests (default: 10^9)
3 changes: 1 addition & 2 deletions example_env
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
MODEL_CONTEXT_SIZE=4096
SURROGATE="yes_no"
CONTEXT_SIZE=4096
SLEEP_TIME=1
MAX_CONCURRENT_REQUESTS=1
LANGUAGE="en"
16 changes: 12 additions & 4 deletions openai_api_server_mock/chat/surrogates.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,15 +89,23 @@ class YesNoSurrogate(ModelSurrogate):

@classmethod
async def generate(cls, n: int, messages: List[Message]) -> List[str]:
if settings.language == "en":
return ["Yes" if random.random() > 0.5 else "No"]
elif settings.language == "de":
return ["Ja" if random.random() > 0.5 else "Nein"]
return ["Yes" if random.random() > 0.5 else "No"]


YesNoSurrogate.register()


class JaNeinSurrogate(ModelSurrogate):
name: str = "ja_nein"

@classmethod
async def generate(cls, n: int, messages: List[Message]) -> List[str]:
return ["Ja" if random.random() > 0.5 else "Nein"]


JaNeinSurrogate.register()


async def get_surrogate(model: str) -> ModelSurrogate:
global available_surrogates
for surrogate in available_surrogates:
Expand Down
1 change: 0 additions & 1 deletion openai_api_server_mock/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

class Settings(BaseSettings):
context_size: int = Field(alias="CONTEXT_SIZE", default=4096)
surrogate: str = Field(alias="SURROGATE", default="lorem_ipsum")
sleep_time: int = Field(alias="SLEEP_TIME", default=0)
max_concurrent_requests: int = Field(
alias="MAX_CONCURRENT_REQUESTS", default=10**9
Expand Down
10 changes: 5 additions & 5 deletions sandbox.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"chat_completion = client.chat.completions.create(\n",
" model=\"yes_no\",\n",
" model=\"ja_nein\",\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n",
" {\"role\": \"user\", \"content\": \"Is the sky blue?\"}\n",
Expand All @@ -38,16 +38,16 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Choice(finish_reason='stop', index=0, logprobs=ChoiceLogprobs(content=[ChatCompletionTokenLogprob(token='No', bytes=None, logprob=-0.4558056105339685, top_logprobs=[TopLogprob(token='Yes', bytes=None, logprob=-2.1267604392490442), TopLogprob(token='No', bytes=None, logprob=-0.7188313398698458), TopLogprob(token='Yes', bytes=None, logprob=-3.7428107344910946)])], refusal=None), message=ChatCompletionMessage(content='Yes', refusal=None, role='assistant', function_call=None, tool_calls=None, name=None))]"
"[Choice(finish_reason='length', index=0, logprobs=ChoiceLogprobs(content=[ChatCompletionTokenLogprob(token='Nein', bytes=None, logprob=-0.05135242454878156, top_logprobs=[TopLogprob(token='Ja', bytes=None, logprob=-0.9180391264546016), TopLogprob(token='Nein', bytes=None, logprob=-0.11234122861118023), TopLogprob(token='Ja', bytes=None, logprob=-2.7463193707941906)])], refusal=None), message=ChatCompletionMessage(content='Ja', refusal=None, role='assistant', function_call=None, tool_calls=None, name=None))]"
]
},
"execution_count": 4,
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
Expand Down

0 comments on commit d30d1e4

Please sign in to comment.