diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 7a4edc4a9..b4d1f2520 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -108,6 +108,23 @@ Additions to the [developer toolkit][developer] change the core performance, and This project follows the [Conventional Commits](https://www.conventionalcommits.org/en/v1.0.0/) specification for PR titles. Conventional Commits make it easier to understand the history of a project and facilitate automation around versioning and changelog generation. +## Running with OpenTelemetry + +Goose uses HTTP to access model providers. If you are receiving failures, it can be helpful to see traces +of the underlying HTTP requests. For example, a 404 could be indicative of an incorrect URL or a missing model. + +First, ensure you have an OpenTelemetry compatible collector listening on port 4318, such as [otel-tui][otel-tui]. + +```bash +docker run --rm -it --name otel-tui -p 4318:4318 ymtdzzz/otel-tui +``` + +Then, start goose like this: +```bash +uv run opentelemetry-instrument --service_name goose --exporter_otlp_protocol http/protobuf goose session start +# or `just otel-goose session start` +``` + [issues]: https://github.com/square/goose/issues [goose-plugins]: https://github.com/square/goose-plugins [ai-exchange]: https://github.com/square/exchange @@ -115,4 +132,5 @@ This project follows the [Conventional Commits](https://www.conventionalcommits. [uv]: https://docs.astral.sh/uv/ [ruff]: https://docs.astral.sh/ruff/ [just]: https://github.com/casey/just -[toolkits]: docs/docs/toolkits.md \ No newline at end of file +[toolkits]: docs/docs/toolkits.md +[otel-tui]: https://github.com/ymtdzzz/otel-tui diff --git a/justfile b/justfile index 4d3ac634e..889c39f84 100644 --- a/justfile +++ b/justfile @@ -20,3 +20,6 @@ coverage *FLAGS: docs: cd docs && uv sync && uv run mkdocs serve + +otel-goose *FLAGS: + uv run opentelemetry-instrument --service_name goose --exporter_otlp_protocol http/protobuf goose {{FLAGS}} diff --git a/pyproject.toml b/pyproject.toml index b51e57421..1d6953cb3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,6 +11,7 @@ dependencies = [ "ai-exchange>=0.9.0", "click>=8.1.7", "prompt-toolkit>=3.0.47", + "opentelemetry-api>=1.27.0", ] author = [{ name = "Block", email = "ai-oss-tools@block.xyz" }] packages = [{ include = "goose", from = "src" }] @@ -61,6 +62,10 @@ dev-dependencies = [ "mkdocs-redirects>=1.2.1", "mkdocs-include-markdown-plugin>=6.2.2", "mkdocs-callouts>=1.14.0", + "opentelemetry-sdk>=1.27.0", + "opentelemetry-exporter-otlp-proto-http>=1.27.0", + "opentelemetry-distro>=0.48b0", + "opentelemetry-instrumentation-httpx>=0.48b0", ] diff --git a/src/goose/cli/session.py b/src/goose/cli/session.py index 1f72a3020..ac6fabf1c 100644 --- a/src/goose/cli/session.py +++ b/src/goose/cli/session.py @@ -3,6 +3,8 @@ from typing import Any, Dict, List, Optional from exchange import Message, ToolResult, ToolUse, Text +from opentelemetry import trace +from opentelemetry.trace.status import Status as OtelStatus, StatusCode as OtelStatusCode from prompt_toolkit.shortcuts import confirm from rich import print from rich.console import RenderableType @@ -90,12 +92,17 @@ def __init__( name: Optional[str] = None, profile: Optional[str] = None, plan: Optional[dict] = None, + tracer: trace.Tracer = trace.get_tracer("goose"), **kwargs: Dict[str, Any], ) -> None: self.name = name self.status_indicator = Status("", spinner="dots") self.notifier = SessionNotifier(self.status_indicator) + # Set the tracer as a field in session, as opposed to a module variable + # so that tests can swap this out and safely run in parallel. + self.tracer = tracer + self.exchange = build_exchange(profile=load_profile(profile), notifier=self.notifier) if name is not None and self.session_file_path.exists(): @@ -150,22 +157,38 @@ def run(self) -> None: """ message = self.process_first_message() while message: # Loop until no input (empty string). - self.notifier.start() - try: - self.exchange.add(message) - self.reply() # Process the user message. - except KeyboardInterrupt: - self.interrupt_reply() - except Exception: - # rewind to right before the last user message - self.exchange.rewind() - print(traceback.format_exc()) - print( - "\n[red]The error above was an exception we were not able to handle.\n\n[/]" - + "These errors are often related to connection or authentication\n" - + "We've removed the conversation up to the most recent user message" - + " - [yellow]depending on the error you may be able to continue[/]" - ) + span_attributes = { + "goose.role": message.role, + "goose.id": message.id, + } + + # For starters, only add to the trace the first text content + first_content = message.content[0] if message.content else None + if isinstance(first_content, Text): + span_attributes["goose.text"] = first_content.text + + with self.tracer.start_as_current_span(message.role, attributes=span_attributes) as span: + self.notifier.start() + try: + self.exchange.add(message) + self.reply() # Process the user message. + except KeyboardInterrupt: + # TODO: should we make interrupting an error? If not, how should we mark this? + # span as it was interrupted? + span.set_status(OtelStatus(OtelStatusCode.ERROR, "KeyboardInterrupt")) + self.interrupt_reply() + except Exception as e: + # rewind to right before the last user message + self.exchange.rewind() + span.set_status(OtelStatus(OtelStatusCode.ERROR, str(e))) + span.record_exception(e) + print(traceback.format_exc()) + print( + "\n[red]The error above was an exception we were not able to handle.\n\n[/]" + + "These errors are often related to connection or authentication\n" + + "We've removed the conversation up to the most recent user message" + + " - [yellow]depending on the error you may be able to continue[/]" + ) self.notifier.stop() print() # Print a newline for separation. diff --git a/tests/cli/test_session_otel.py b/tests/cli/test_session_otel.py new file mode 100644 index 000000000..02e7e5acd --- /dev/null +++ b/tests/cli/test_session_otel.py @@ -0,0 +1,101 @@ +import pytest +from httpx import HTTPStatusError +from unittest.mock import MagicMock, patch +from exchange import Message, Text +from goose.cli.session import Session +from goose.cli.prompt.goose_prompt_session import GoosePromptSession +from goose.cli.prompt.user_input import PromptAction, UserInput +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import SimpleSpanProcessor +from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter + + +@pytest.fixture +def memory_exporter(): + yield InMemorySpanExporter() + + +@pytest.fixture +def create_session_with_mock_configs(mock_sessions_path, exchange_factory, profile_factory, memory_exporter): + # Create a tracer with the same memory exporter as test assertions use. We do this for each + # test to ensure they can run in parallel without interfering with each other. + tracer_provider = TracerProvider() + memory_exporter = memory_exporter + span_processor = SimpleSpanProcessor(memory_exporter) + tracer_provider.add_span_processor(span_processor) + + with patch("goose.cli.session.build_exchange", return_value=exchange_factory()), patch( + "goose.cli.session.load_profile", return_value=profile_factory() + ), patch("goose.cli.session.SessionNotifier") as mock_session_notifier, patch( + "goose.cli.session.load_provider", return_value="provider" + ): + mock_session_notifier.return_value = MagicMock() + + def create_session(session_attributes: dict = {}): + return Session(**session_attributes, tracer=tracer_provider.get_tracer(__name__)) + + yield create_session + + +def test_trace_run(create_session_with_mock_configs, memory_exporter): + session = create_session_with_mock_configs() + + message = Message(role="user", id="abracadabra", content=[Text("List the files in this directory")]) + + # Call the run function, for a single message which results in an exit. + with patch.object(Session, "process_first_message", return_value=message), patch.object( + Session, "reply", return_value=None + ), patch.object( + GoosePromptSession, "get_user_input", return_value=UserInput(action=PromptAction.EXIT) + ), patch.object(Session, "save_session", return_value=None): + session.run() + + spans = memory_exporter.get_finished_spans() + assert len(spans) == 1 + span = spans[0] + assert span.name == message.role + assert span.attributes == {"goose.id": message.id, "goose.role": message.role, "goose.text": message.text} + + +def test_trace_run_interrupted(create_session_with_mock_configs, memory_exporter): + session = create_session_with_mock_configs() + + message = Message(role="user", id="abracadabra", content=[Text("List the files in this directory")]) + + # Call the run function, for a single message which results in an interrupt. + with patch.object(Session, "process_first_message", return_value=message), patch.object( + Session, "reply", side_effect=KeyboardInterrupt + ), patch.object( + GoosePromptSession, "get_user_input", return_value=UserInput(action=PromptAction.EXIT) + ), patch.object(Session, "save_session", return_value=None): + session.run() + + spans = memory_exporter.get_finished_spans() + assert len(spans) == 1 + span = spans[0] + assert span.name == message.role + assert span.attributes == {"goose.id": message.id, "goose.role": message.role, "goose.text": message.text} + assert span.status.is_ok is False + assert span.status.description == "KeyboardInterrupt" + + +def test_trace_run_error(create_session_with_mock_configs, memory_exporter): + session = create_session_with_mock_configs() + + message = Message(role="user", id="abracadabra", content=[Text("List the files in this directory")]) + + # Call the run function, for a single message which results in an HTTP error. + with patch.object(Session, "process_first_message", return_value=message), patch.object( + Session, "reply", side_effect=HTTPStatusError("HTTP error", request=None, response=None) + ), patch.object( + GoosePromptSession, "get_user_input", return_value=UserInput(action=PromptAction.EXIT) + ), patch.object(Session, "save_session", return_value=None): + session.run() + + spans = memory_exporter.get_finished_spans() + assert len(spans) == 1 + span = spans[0] + assert span.name == message.role + assert span.attributes == {"goose.id": message.id, "goose.role": message.role, "goose.text": message.text} + assert span.status.is_ok is False + assert span.status.description == "HTTP error"