From b1c407081f0e5dc6fea26bd80cc0eca97835116e Mon Sep 17 00:00:00 2001 From: Adrian Cole Date: Sat, 7 Sep 2024 17:49:39 +0800 Subject: [PATCH 1/5] feat: document running openai integration tests with ollama --- CONTRIBUTING.md | 35 ++++++++++++++++--- src/exchange/exchange.py | 7 +++- src/exchange/message.py | 6 ++-- tests/conftest.py | 4 +++ tests/providers/test_openai.py | 5 +-- tests/test_integration.py | 3 +- ...t_vision.py => test_integration_vision.py} | 4 +-- 7 files changed, 52 insertions(+), 12 deletions(-) create mode 100644 tests/conftest.py rename tests/{test_vision.py => test_integration_vision.py} (91%) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 39fa06a..78865e5 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -19,19 +19,46 @@ uv run pytest tests -m "not integration" or, as a shortcut, -``` +```bash just test ``` Generally if you are not developing a new provider, you can test most functionality through mocking and the normal test suite. -However to ensure the providers work, we also have integration tests which actually require a credential and connect +However, to ensure the providers work, we also have integration tests which actually require a credential and connect to the provider endpoints. Those can be run with -``` +```bash uv run pytest tests -m integration -# or `just integration` +# or `just integration` +``` + +### Integration testing OpenAI with Ollama + +The OpenAI provider uses the OpenAI API to access models. The OpenAI API is supported by many tools, and using this can +save you time and money when developing. One such tool is [Ollama](https://github.com/ollama/ollama). + +First, run ollama and pull the models you want to test. +```bash +ollama serve +# Then in another terminal +ollama pull mistral-nemo:12B +ollama pull llava:7b +``` + +Now, export OpenAI variables that control the tests +```bash +export OPENAI_MODEL_TOOL=mistral-nemo +export OPENAI_MODEL_VISION=llava:7b +export OPENAI_HOST=http://localhost:11434 +export OPENAI_API_KEY=unused +``` + +Finally, run openai integration tests against your ollama server. +```bash +uv run pytest tests -m integration -k openai +# or `just integration -k openai` ``` ## Pull Requests diff --git a/src/exchange/exchange.py b/src/exchange/exchange.py index 39df84b..02601ef 100644 --- a/src/exchange/exchange.py +++ b/src/exchange/exchange.py @@ -45,11 +45,16 @@ class Exchange: provider: Provider model: str system: str - moderator: Moderator = field(default=ContextTruncate()) + moderator: Moderator = field(default=None) tools: Tuple[Tool] = field(factory=tuple, converter=tuple) messages: List[Message] = field(factory=list) checkpoint_data: CheckpointData = field(factory=CheckpointData) + def __attrs_post_init__(self) -> None: + """Ensures context truncation uses the same model as the exchange""" + if self.moderator is None: + object.__setattr__(self, "moderator", ContextTruncate(model=self.model)) + @property def _toolmap(self) -> Mapping[str, Tool]: return {tool.name: tool for tool in self.tools} diff --git a/src/exchange/message.py b/src/exchange/message.py index 035c603..6ce9745 100644 --- a/src/exchange/message.py +++ b/src/exchange/message.py @@ -19,8 +19,10 @@ def validate_role_and_content(instance: "Message", *_: Any) -> None: # noqa: AN if instance.tool_use: raise ValueError("User message does not support ToolUse") elif instance.role == "assistant": - if not (instance.text or instance.tool_use): - raise ValueError("Assistant message must include a Text or ToolUsage") + # Note: At least in llama3.1, there's no instance.text in the response + # when the input was a single system message. We also can't determine + # the input inside a validator. Hence, we can't enforce a condition + # that the assistant message must include a Text or ToolUsage. if instance.tool_result: raise ValueError("Assistant message does not support ToolResult") diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..9e7cb61 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,4 @@ +import os + +openai_model_tool = os.getenv("OPENAI_MODEL_TOOL", "gpt-4o-mini") +openai_model_vision = os.getenv("OPENAI_MODEL_VISION", "gpt-4o-mini") diff --git a/tests/providers/test_openai.py b/tests/providers/test_openai.py index 0e0e000..42b1836 100644 --- a/tests/providers/test_openai.py +++ b/tests/providers/test_openai.py @@ -4,6 +4,7 @@ import pytest from exchange import Message, Text from exchange.providers.openai import OpenAiProvider +from conftest import openai_model_tool @pytest.fixture @@ -24,7 +25,7 @@ def test_openai_completion(mock_error, mock_warning, mock_sleep, mock_post, open mock_post.return_value.json.return_value = mock_response - model = "gpt-4" + model = openai_model_tool system = "You are a helpful assistant." messages = [Message.user("Hello")] tools = () @@ -48,7 +49,7 @@ def test_openai_completion(mock_error, mock_warning, mock_sleep, mock_post, open @pytest.mark.integration def test_openai_integration(): provider = OpenAiProvider.from_env() - model = "gpt-4" # specify a valid model + model = openai_model_tool system = "You are a helpful assistant." messages = [Message.user("Hello")] diff --git a/tests/test_integration.py b/tests/test_integration.py index 7bdb26c..8e24ee1 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -3,11 +3,12 @@ from exchange.message import Message from exchange.providers import get_provider from exchange.tool import Tool +from conftest import openai_model_tool too_long_chars = "x" * (2**20 + 1) cases = [ - (get_provider("openai"), "gpt-4o-mini"), + (get_provider("openai"), openai_model_tool), (get_provider("databricks"), "databricks-meta-llama-3-70b-instruct"), (get_provider("bedrock"), "anthropic.claude-3-5-sonnet-20240620-v1:0"), ] diff --git a/tests/test_vision.py b/tests/test_integration_vision.py similarity index 91% rename from tests/test_vision.py rename to tests/test_integration_vision.py index 95be50f..1822278 100644 --- a/tests/test_vision.py +++ b/tests/test_integration_vision.py @@ -3,10 +3,10 @@ from exchange.exchange import Exchange from exchange.message import Message from exchange.providers import get_provider - +from conftest import openai_model_vision cases = [ - (get_provider("openai"), "gpt-4o-mini"), + (get_provider("openai"), openai_model_vision), ] From 1b2aec6083019c26f4fb87cebd51ca5365e38043 Mon Sep 17 00:00:00 2001 From: Adrian Cole Date: Sun, 8 Sep 2024 12:11:29 +0800 Subject: [PATCH 2/5] temporary: httpx logging Signed-off-by: Adrian Cole --- src/exchange/providers/openai.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/exchange/providers/openai.py b/src/exchange/providers/openai.py index 1f3133b..e708a8a 100644 --- a/src/exchange/providers/openai.py +++ b/src/exchange/providers/openai.py @@ -14,10 +14,39 @@ tools_to_openai_spec, ) from exchange.tool import Tool +from typing import Generator OPENAI_HOST = "https://api.openai.com/" +class LogResponse(httpx.Response): + def iter_bytes(self, *args: object, **kwargs: Dict[str, object]) -> Generator[bytes, None, None]: + print("Response content:") + for chunk in super().iter_bytes(*args, **kwargs): + print(chunk.decode('utf-8')) + yield chunk + +class LogTransport(httpx.BaseTransport): + def __init__(self, transport: httpx.BaseTransport) -> None: + self.transport = transport + + def handle_request(self, request: httpx.Request) -> httpx.Response: + # Log the request details + print(f"Request method: {request.method}") + print(f"Request URL: {request.url}") + print(f"Request headers: {request.headers}") + print(f"Request content: {request.content}") + + response = self.transport.handle_request(request) + + return LogResponse( + status_code=response.status_code, + headers=response.headers, + stream=response.stream, + extensions=response.extensions, + ) + + class OpenAiProvider(Provider): """Provides chat completions for models hosted directly by OpenAI""" @@ -38,6 +67,7 @@ def from_env(cls: Type["OpenAiProvider"]) -> "OpenAiProvider": base_url=url, auth=("Bearer", key), timeout=httpx.Timeout(60 * 10), + transport=LogTransport(httpx.HTTPTransport()), ) return cls(client) From 44f6eb198c2c5cb9e9553ec5e888d754a2164970 Mon Sep 17 00:00:00 2001 From: Adrian Cole Date: Sun, 8 Sep 2024 13:55:19 +0800 Subject: [PATCH 3/5] convert ollama to an openai endpoint Signed-off-by: Adrian Cole --- CONTRIBUTING.md | 25 +++------ src/exchange/exchange.py | 7 +-- src/exchange/providers/ollama.py | 95 ++++++-------------------------- src/exchange/providers/openai.py | 30 ---------- tests/conftest.py | 4 -- tests/providers/test_ollama.py | 48 +--------------- tests/providers/test_openai.py | 5 +- tests/test_integration.py | 9 ++- tests/test_integration_vision.py | 5 +- 9 files changed, 42 insertions(+), 186 deletions(-) delete mode 100644 tests/conftest.py diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 78865e5..b9efe8f 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -34,31 +34,22 @@ uv run pytest tests -m integration # or `just integration` ``` -### Integration testing OpenAI with Ollama +### Integration testing with Ollama -The OpenAI provider uses the OpenAI API to access models. The OpenAI API is supported by many tools, and using this can -save you time and money when developing. One such tool is [Ollama](https://github.com/ollama/ollama). +To run integration tests against Ollama, you need the model that tests expect available locally. First, run ollama and pull the models you want to test. ```bash ollama serve -# Then in another terminal -ollama pull mistral-nemo:12B -ollama pull llava:7b +# Then in another terminal, pull the model +OLLAMA_MODEL=$(uv run python -c "from src.exchange.providers.ollama import OLLAMA_MODEL; print(OLLAMA_MODEL)") +ollama pull $OLLAMA_MODEL ``` -Now, export OpenAI variables that control the tests +Finally, run ollama integration tests. ```bash -export OPENAI_MODEL_TOOL=mistral-nemo -export OPENAI_MODEL_VISION=llava:7b -export OPENAI_HOST=http://localhost:11434 -export OPENAI_API_KEY=unused -``` - -Finally, run openai integration tests against your ollama server. -```bash -uv run pytest tests -m integration -k openai -# or `just integration -k openai` +uv run pytest tests -m integration -k ollama +# or `just integration -k ollama` ``` ## Pull Requests diff --git a/src/exchange/exchange.py b/src/exchange/exchange.py index 02601ef..39df84b 100644 --- a/src/exchange/exchange.py +++ b/src/exchange/exchange.py @@ -45,16 +45,11 @@ class Exchange: provider: Provider model: str system: str - moderator: Moderator = field(default=None) + moderator: Moderator = field(default=ContextTruncate()) tools: Tuple[Tool] = field(factory=tuple, converter=tuple) messages: List[Message] = field(factory=list) checkpoint_data: CheckpointData = field(factory=CheckpointData) - def __attrs_post_init__(self) -> None: - """Ensures context truncation uses the same model as the exchange""" - if self.moderator is None: - object.__setattr__(self, "moderator", ContextTruncate(model=self.model)) - @property def _toolmap(self) -> Mapping[str, Tool]: return {tool.name: tool for tool in self.tools} diff --git a/src/exchange/providers/ollama.py b/src/exchange/providers/ollama.py index 37bf026..7570abf 100644 --- a/src/exchange/providers/ollama.py +++ b/src/exchange/providers/ollama.py @@ -1,99 +1,40 @@ import os -from typing import Any, Dict, List, Tuple, Type +from typing import Type import httpx -from exchange.message import Message -from exchange.providers.base import Provider, Usage -from exchange.providers.retry_with_back_off_decorator import retry_httpx_request -from exchange.providers.utils import ( - messages_to_openai_spec, - openai_response_to_message, - openai_single_message_context_length_exceeded, - raise_for_status, - tools_to_openai_spec, -) -from exchange.tool import Tool +from exchange.providers.openai import OpenAiProvider OLLAMA_HOST = "http://localhost:11434/" +OLLAMA_MODEL = "mistral-nemo" -# -# NOTE: this is experimental, best used with 70B model or larger if you can. -# Example profile config to try: -class OllamaProvider(Provider): +class OllamaProvider(OpenAiProvider): """Provides chat completions for models hosted by Ollama""" - """ + __doc__ += f""" - ollama: - provider: ollama - processor: llama3.1 - accelerator: llama3.1 - moderator: passive - toolkits: - - name: developer - requires: {} - """ +Here's an example profile configuration to try: + + ollama: + provider: ollama + processor: {OLLAMA_MODEL} + accelerator: {OLLAMA_MODEL} + moderator: passive + toolkits: + - name: developer + requires: {{}} +""" def __init__(self, client: httpx.Client) -> None: print("PLEASE NOTE: the ollama provider is experimental, use with care") - super().__init__() - self.client = client + super().__init__(client) @classmethod def from_env(cls: Type["OllamaProvider"]) -> "OllamaProvider": - url = os.environ["OLLAMA_HOST"] + url = os.environ.get("OLLAMA_HOST", OLLAMA_HOST) client = httpx.Client( base_url=url, - headers={"Content-Type": "application/json"}, timeout=httpx.Timeout(60 * 10), ) return cls(client) - - @staticmethod - def get_usage(data: dict) -> Usage: - usage = data.get("usage", {}) - input_tokens = usage.get("prompt_tokens", 0) - output_tokens = usage.get("completion_tokens", 0) - total_tokens = usage.get("total_tokens", input_tokens + output_tokens) - - return Usage( - input_tokens=input_tokens, - output_tokens=output_tokens, - total_tokens=total_tokens, - ) - - def complete( - self, - model: str, - system: str, - messages: List[Message], - tools: Tuple[Tool], - **kwargs: Dict[str, Any], - ) -> Tuple[Message, Usage]: - payload = dict( - messages=[ - {"role": "system", "content": system}, - *messages_to_openai_spec(messages), - ], - model=model, - tools=tools_to_openai_spec(tools) if tools else [], - **kwargs, - ) - payload = {k: v for k, v in payload.items() if v} - response = self._send_request(payload) - - # Check for context_length_exceeded error for single, long input message - if "error" in response.json() and len(messages) == 1: - openai_single_message_context_length_exceeded(response.json()["error"]) - - data = raise_for_status(response).json() - - message = openai_response_to_message(data) - usage = self.get_usage(data) - return message, usage - - @retry_httpx_request() - def _send_request(self, payload: Any) -> httpx.Response: # noqa: ANN401 - return self.client.post("v1/chat/completions", json=payload) diff --git a/src/exchange/providers/openai.py b/src/exchange/providers/openai.py index e708a8a..1f3133b 100644 --- a/src/exchange/providers/openai.py +++ b/src/exchange/providers/openai.py @@ -14,39 +14,10 @@ tools_to_openai_spec, ) from exchange.tool import Tool -from typing import Generator OPENAI_HOST = "https://api.openai.com/" -class LogResponse(httpx.Response): - def iter_bytes(self, *args: object, **kwargs: Dict[str, object]) -> Generator[bytes, None, None]: - print("Response content:") - for chunk in super().iter_bytes(*args, **kwargs): - print(chunk.decode('utf-8')) - yield chunk - -class LogTransport(httpx.BaseTransport): - def __init__(self, transport: httpx.BaseTransport) -> None: - self.transport = transport - - def handle_request(self, request: httpx.Request) -> httpx.Response: - # Log the request details - print(f"Request method: {request.method}") - print(f"Request URL: {request.url}") - print(f"Request headers: {request.headers}") - print(f"Request content: {request.content}") - - response = self.transport.handle_request(request) - - return LogResponse( - status_code=response.status_code, - headers=response.headers, - stream=response.stream, - extensions=response.extensions, - ) - - class OpenAiProvider(Provider): """Provides chat completions for models hosted directly by OpenAI""" @@ -67,7 +38,6 @@ def from_env(cls: Type["OpenAiProvider"]) -> "OpenAiProvider": base_url=url, auth=("Bearer", key), timeout=httpx.Timeout(60 * 10), - transport=LogTransport(httpx.HTTPTransport()), ) return cls(client) diff --git a/tests/conftest.py b/tests/conftest.py deleted file mode 100644 index 9e7cb61..0000000 --- a/tests/conftest.py +++ /dev/null @@ -1,4 +0,0 @@ -import os - -openai_model_tool = os.getenv("OPENAI_MODEL_TOOL", "gpt-4o-mini") -openai_model_vision = os.getenv("OPENAI_MODEL_VISION", "gpt-4o-mini") diff --git a/tests/providers/test_ollama.py b/tests/providers/test_ollama.py index 6666ea8..7812fe6 100644 --- a/tests/providers/test_ollama.py +++ b/tests/providers/test_ollama.py @@ -1,54 +1,12 @@ -import os -from unittest.mock import patch - import pytest -from exchange import Message, Text -from exchange.providers.ollama import OLLAMA_HOST, OllamaProvider - - -@pytest.fixture -@patch.dict(os.environ, {}) -def ollama_provider(): - os.environ["OLLAMA_HOST"] = OLLAMA_HOST - return OllamaProvider.from_env() - - -@patch("httpx.Client.post") -@patch("time.sleep", return_value=None) -@patch("logging.warning") -@patch("logging.error") -def test_ollama_completion(mock_error, mock_warning, mock_sleep, mock_post, ollama_provider): - mock_response = { - "choices": [{"message": {"role": "assistant", "content": "Hello!"}}], - } - - mock_post.return_value.json.return_value = mock_response - - model = "llama2" - system = "You are a helpful assistant." - messages = [Message.user("Hello")] - tools = () - - reply_message, _ = ollama_provider.complete(model=model, system=system, messages=messages, tools=tools) - - assert reply_message.content == [Text(text="Hello!")] - mock_post.assert_called_once_with( - "v1/chat/completions", - json={ - "messages": [ - {"role": "system", "content": system}, - {"role": "user", "content": "Hello"}, - ], - "model": model, - }, - ) +from exchange import Message +from exchange.providers.ollama import OllamaProvider, OLLAMA_MODEL @pytest.mark.integration def test_ollama_integration(): - os.environ["OLLAMA_HOST"] = OLLAMA_HOST provider = OllamaProvider.from_env() - model = "llama2" # specify a valid model + model = OLLAMA_MODEL system = "You are a helpful assistant." messages = [Message.user("Hello")] diff --git a/tests/providers/test_openai.py b/tests/providers/test_openai.py index 42b1836..0e0e000 100644 --- a/tests/providers/test_openai.py +++ b/tests/providers/test_openai.py @@ -4,7 +4,6 @@ import pytest from exchange import Message, Text from exchange.providers.openai import OpenAiProvider -from conftest import openai_model_tool @pytest.fixture @@ -25,7 +24,7 @@ def test_openai_completion(mock_error, mock_warning, mock_sleep, mock_post, open mock_post.return_value.json.return_value = mock_response - model = openai_model_tool + model = "gpt-4" system = "You are a helpful assistant." messages = [Message.user("Hello")] tools = () @@ -49,7 +48,7 @@ def test_openai_completion(mock_error, mock_warning, mock_sleep, mock_post, open @pytest.mark.integration def test_openai_integration(): provider = OpenAiProvider.from_env() - model = openai_model_tool + model = "gpt-4" # specify a valid model system = "You are a helpful assistant." messages = [Message.user("Hello")] diff --git a/tests/test_integration.py b/tests/test_integration.py index 8e24ee1..98c1068 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -1,14 +1,16 @@ import pytest from exchange.exchange import Exchange from exchange.message import Message +from exchange.moderators import ContextTruncate from exchange.providers import get_provider +from exchange.providers.ollama import OLLAMA_MODEL from exchange.tool import Tool -from conftest import openai_model_tool too_long_chars = "x" * (2**20 + 1) cases = [ - (get_provider("openai"), openai_model_tool), + (get_provider("ollama"), OLLAMA_MODEL), + (get_provider("openai"), "gpt-4o-mini"), (get_provider("databricks"), "databricks-meta-llama-3-70b-instruct"), (get_provider("bedrock"), "anthropic.claude-3-5-sonnet-20240620-v1:0"), ] @@ -22,6 +24,7 @@ def test_simple(provider, model): ex = Exchange( provider=provider, model=model, + moderator=ContextTruncate(model), system="You are a helpful assistant.", ) @@ -45,6 +48,7 @@ def get_password() -> str: ex = Exchange( provider=provider, model=model, + moderator=ContextTruncate(model), system="You are a helpful assistant. Expect to need to authenticate using get_password.", tools=(Tool.from_function(get_password),), ) @@ -69,6 +73,7 @@ def get_password() -> str: ex = Exchange( provider=provider, model=model, + moderator=ContextTruncate(model), system="You are a helpful assistant. Expect to need to authenticate using get_password.", tools=(Tool.from_function(get_password),), ) diff --git a/tests/test_integration_vision.py b/tests/test_integration_vision.py index 1822278..8635886 100644 --- a/tests/test_integration_vision.py +++ b/tests/test_integration_vision.py @@ -2,11 +2,11 @@ from exchange.content import ToolResult, ToolUse from exchange.exchange import Exchange from exchange.message import Message +from exchange.moderators import ContextTruncate from exchange.providers import get_provider -from conftest import openai_model_vision cases = [ - (get_provider("openai"), openai_model_vision), + (get_provider("openai"), "gpt-4o-mini"), ] @@ -18,6 +18,7 @@ def test_simple(provider, model): ex = Exchange( provider=provider, model=model, + moderator=ContextTruncate(model), system="You are a helpful assistant.", ) From 376ed831459937ec5587439c9c137d4b6718d9f6 Mon Sep 17 00:00:00 2001 From: Adrian Cole Date: Sun, 8 Sep 2024 13:59:04 +0800 Subject: [PATCH 4/5] remove llama3.1 workaround Signed-off-by: Adrian Cole --- src/exchange/message.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/exchange/message.py b/src/exchange/message.py index 6ce9745..035c603 100644 --- a/src/exchange/message.py +++ b/src/exchange/message.py @@ -19,10 +19,8 @@ def validate_role_and_content(instance: "Message", *_: Any) -> None: # noqa: AN if instance.tool_use: raise ValueError("User message does not support ToolUse") elif instance.role == "assistant": - # Note: At least in llama3.1, there's no instance.text in the response - # when the input was a single system message. We also can't determine - # the input inside a validator. Hence, we can't enforce a condition - # that the assistant message must include a Text or ToolUsage. + if not (instance.text or instance.tool_use): + raise ValueError("Assistant message must include a Text or ToolUsage") if instance.tool_result: raise ValueError("Assistant message does not support ToolResult") From b9ee6361babc3d8777920bd510f0b3b96fef6375 Mon Sep 17 00:00:00 2001 From: Mic Neale Date: Sun, 8 Sep 2024 15:15:53 +1000 Subject: [PATCH 5/5] shouldn't hardcode truncate to gpt4o mini --- src/exchange/moderators/truncate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/exchange/moderators/truncate.py b/src/exchange/moderators/truncate.py index 3185206..f156b23 100644 --- a/src/exchange/moderators/truncate.py +++ b/src/exchange/moderators/truncate.py @@ -20,7 +20,7 @@ class ContextTruncate(Moderator): def __init__( self, - model: str = "gpt-4o-mini", + model: str = None, max_tokens: int = MAX_TOKENS, ) -> None: self.model = model