diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index dbe5ab6dd..5b7c0985d 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -28,7 +28,7 @@ jobs: uv run pytest tests -m 'not integration' goose: - runs-on: ubuntu-latest + runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -48,6 +48,27 @@ jobs: run: | uv run pytest tests -m 'not integration' + langfuse-wrapper: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Install UV + run: curl -LsSf https://astral.sh/uv/install.sh | sh + + - name: Source Cargo Environment + run: source $HOME/.cargo/env + + - name: Ruff + run: | + uvx ruff check packages/langfuse-wrapper + uvx ruff format packages/langfuse-wrapper --check + + - name: Run tests + working-directory: ./packages/langfuse-wrapper + run: | + uv run pytest tests -m 'not integration' # This runs integration tests of the OpenAI API, using Ollama to host models. # This lets us test PRs from forks which can't access secrets like API keys. diff --git a/.gitignore b/.gitignore index f799b7221..e002e6176 100644 --- a/.gitignore +++ b/.gitignore @@ -125,3 +125,6 @@ docs/docs/reference # uv lock file uv.lock + +# langfuse docker file +**/packages/langfuse-wrapper/scripts/docker-compose.yaml diff --git a/packages/exchange/pyproject.toml b/packages/exchange/pyproject.toml index 502e224c3..caadef975 100644 --- a/packages/exchange/pyproject.toml +++ b/packages/exchange/pyproject.toml @@ -13,6 +13,8 @@ dependencies = [ "tiktoken>=0.7.0", "httpx>=0.27.0", "tenacity>=9.0.0", + "python-dotenv>=1.0.1", + "langfuse-wrapper" ] [tool.hatch.build.targets.wheel] @@ -47,3 +49,10 @@ ai-exchange = "exchange:module_name" markers = [ "integration: marks tests that need to authenticate (deselect with '-m \"not integration\"')", ] + +[tool.uv.sources] +langfuse-wrapper = { workspace = true} + +[tool.uv.workspace] +members = ["../langfuse-wrapper"] + diff --git a/packages/exchange/src/exchange/exchange.py b/packages/exchange/src/exchange/exchange.py index b2fdbc5ec..31acaf2cf 100644 --- a/packages/exchange/src/exchange/exchange.py +++ b/packages/exchange/src/exchange/exchange.py @@ -14,6 +14,7 @@ from exchange.providers import Provider, Usage from exchange.tool import Tool from exchange.token_usage_collector import _token_usage_collector +from langfuse_wrapper.langfuse_wrapper import observe_wrapper def validate_tool_output(output: str) -> None: @@ -127,6 +128,7 @@ def reply(self, max_tool_use: int = 128) -> Message: return response + @observe_wrapper() def call_function(self, tool_use: ToolUse) -> ToolResult: """Call the function indicated by the tool use""" tool = self._toolmap.get(tool_use.name) diff --git a/packages/exchange/src/exchange/providers/anthropic.py b/packages/exchange/src/exchange/providers/anthropic.py index 84ecd12fb..b6c0df71a 100644 --- a/packages/exchange/src/exchange/providers/anthropic.py +++ b/packages/exchange/src/exchange/providers/anthropic.py @@ -8,6 +8,7 @@ from exchange.providers.base import Provider, Usage from tenacity import retry, wait_fixed, stop_after_attempt from exchange.providers.utils import retry_if_status, raise_for_status +from langfuse_wrapper.langfuse_wrapper import observe_wrapper ANTHROPIC_HOST = "https://api.anthropic.com/v1/messages" @@ -123,6 +124,7 @@ def messages_to_anthropic_spec(messages: List[Message]) -> List[Dict[str, Any]]: messages_spec.append(converted) return messages_spec + @observe_wrapper(as_type="generation") def complete( self, model: str, diff --git a/packages/exchange/src/exchange/providers/bedrock.py b/packages/exchange/src/exchange/providers/bedrock.py index 6c32d7cb3..ad5734266 100644 --- a/packages/exchange/src/exchange/providers/bedrock.py +++ b/packages/exchange/src/exchange/providers/bedrock.py @@ -15,6 +15,7 @@ from tenacity import retry, wait_fixed, stop_after_attempt from exchange.providers.utils import raise_for_status, retry_if_status from exchange.tool import Tool +from langfuse_wrapper.langfuse_wrapper import observe_wrapper SERVICE = "bedrock-runtime" UTC = timezone.utc @@ -175,6 +176,7 @@ def from_env(cls: Type["BedrockProvider"]) -> "BedrockProvider": ) return cls(client=client) + @observe_wrapper(as_type="generation") def complete( self, model: str, diff --git a/packages/exchange/src/exchange/providers/databricks.py b/packages/exchange/src/exchange/providers/databricks.py index 9bd582dc5..56d41ee01 100644 --- a/packages/exchange/src/exchange/providers/databricks.py +++ b/packages/exchange/src/exchange/providers/databricks.py @@ -13,7 +13,7 @@ tools_to_openai_spec, ) from exchange.tool import Tool - +from langfuse_wrapper.langfuse_wrapper import observe_wrapper retry_procedure = retry( wait=wait_fixed(2), @@ -69,6 +69,7 @@ def get_usage(data: dict) -> Usage: total_tokens=total_tokens, ) + @observe_wrapper(as_type="generation") def complete( self, model: str, diff --git a/packages/exchange/src/exchange/providers/google.py b/packages/exchange/src/exchange/providers/google.py index fe83cd605..388b0be0f 100644 --- a/packages/exchange/src/exchange/providers/google.py +++ b/packages/exchange/src/exchange/providers/google.py @@ -8,6 +8,7 @@ from exchange.providers.base import Provider, Usage from tenacity import retry, wait_fixed, stop_after_attempt from exchange.providers.utils import raise_for_status, retry_if_status +from langfuse_wrapper.langfuse_wrapper import observe_wrapper GOOGLE_HOST = "https://generativelanguage.googleapis.com/v1beta" @@ -121,6 +122,7 @@ def messages_to_google_spec(messages: List[Message]) -> List[Dict[str, Any]]: return messages_spec + @observe_wrapper(as_type="generation") def complete( self, model: str, diff --git a/packages/exchange/src/exchange/providers/openai.py b/packages/exchange/src/exchange/providers/openai.py index b25c5a70a..6bf02d011 100644 --- a/packages/exchange/src/exchange/providers/openai.py +++ b/packages/exchange/src/exchange/providers/openai.py @@ -15,6 +15,7 @@ from exchange.tool import Tool from tenacity import retry, wait_fixed, stop_after_attempt from exchange.providers.utils import retry_if_status +from langfuse_wrapper.langfuse_wrapper import observe_wrapper OPENAI_HOST = "https://api.openai.com/" @@ -65,6 +66,7 @@ def get_usage(data: dict) -> Usage: total_tokens=total_tokens, ) + @observe_wrapper(as_type="generation") def complete( self, model: str, diff --git a/packages/langfuse-wrapper/.ruff.toml b/packages/langfuse-wrapper/.ruff.toml new file mode 100644 index 000000000..9accb3c35 --- /dev/null +++ b/packages/langfuse-wrapper/.ruff.toml @@ -0,0 +1,2 @@ +lint.select = ["E", "W", "F", "N"] +line-length = 120 diff --git a/packages/langfuse-wrapper/README.md b/packages/langfuse-wrapper/README.md new file mode 100644 index 000000000..ae28c8713 --- /dev/null +++ b/packages/langfuse-wrapper/README.md @@ -0,0 +1,28 @@ +# Langfuse Wrapper + +This package provides a wrapper for [Langfuse](https://langfuse.com/). The wrapper serves to initialize Langfuse appropriately if the Langfuse server is running locally and otherwise to skip applying the Langfuse observe descorators. + +**Note: This Langfuse integration is experimental and we don't currently have integration tests for it.** + + +## Usage + +### Start your local Langfuse server + +Run `setup_langfuse.sh` to start your local Langfuse server. It requires Docker. + +Read more about local Langfuse deployments [here](https://langfuse.com/docs/deployment/local). + +### Exchange and Goose integration + +Import `from langfuse_wrapper.langfuse_wrapper import observe_wrapper` and use the `observe_wrapper()` decorator on functions you wish to enable tracing for. `observe_wrapper` functions the same way as Langfuse's observe decorator. + +Read more about Langfuse's decorator-based tracing [here](https://langfuse.com/docs/sdk/python/decorators). + +In Goose, initialization requires certain environment variables to be present: + +- `LANGFUSE_PUBLIC_KEY`: Your Langfuse public key +- `LANGFUSE_SECRET_KEY`: Your Langfuse secret key +- `LANGFUSE_BASE_URL`: The base URL of your Langfuse instance + +By default your local deployment and Goose will use the values in `env/.env.langfuse.local`. diff --git a/packages/langfuse-wrapper/env/.env.langfuse.local b/packages/langfuse-wrapper/env/.env.langfuse.local new file mode 100644 index 000000000..98056309e --- /dev/null +++ b/packages/langfuse-wrapper/env/.env.langfuse.local @@ -0,0 +1,16 @@ +# These variables are default initialization variables for the Langfuse server +LANGFUSE_INIT_PROJECT_NAME=goose-local +LANGFUSE_INIT_PROJECT_PUBLIC_KEY=publickey-local +LANGFUSE_INIT_PROJECT_SECRET_KEY=secretkey-local +LANGFUSE_INIT_USER_EMAIL=local@block.xyz +LANGFUSE_INIT_USER_NAME=localdev +LANGFUSE_INIT_USER_PASSWORD=localpwd + +LANGFUSE_INIT_ORG_ID=local-id +LANGFUSE_INIT_ORG_NAME=local-org +LANGFUSE_INIT_PROJECT_ID=goose + +# These variables are used by Goose +LANGFUSE_PUBLIC_KEY=publickey-local +LANGFUSE_SECRET_KEY=secretkey-local +LANGFUSE_HOST=http://localhost:3000 diff --git a/packages/langfuse-wrapper/pyproject.toml b/packages/langfuse-wrapper/pyproject.toml new file mode 100644 index 000000000..7fda9f4ef --- /dev/null +++ b/packages/langfuse-wrapper/pyproject.toml @@ -0,0 +1,28 @@ +[project] +name = "langfuse-wrapper" +version = "0.1.0" +description = "A wrapper for Langfuse integration" +readme = "README.md" +requires-python = ">=3.10" +author = [{ name = "Block", email = "ai-oss-tools@block.xyz" }] +packages = [{ include = "langfuse_wrapper", from = "src" }] + +dependencies = [ + "langfuse>=2.38.2", + "python-dotenv>=1.0.1" +] + +[tool.hatch.build.targets.wheel] +packages = ["src/langfuse_wrapper"] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.uv] +dev-dependencies = ["pytest>=8.3.2"] + +[tool.pytest.ini_options] +markers = [ + "integration: marks tests that need to authenticate (deselect with '-m \"not integration\"')", +] diff --git a/packages/langfuse-wrapper/scripts/setup_langfuse.sh b/packages/langfuse-wrapper/scripts/setup_langfuse.sh new file mode 100755 index 000000000..6b340a41f --- /dev/null +++ b/packages/langfuse-wrapper/scripts/setup_langfuse.sh @@ -0,0 +1,99 @@ +#!/bin/bash + +# setup_langfuse.sh +# +# This script sets up and runs Langfuse locally for development and testing purposes. +# +# Key functionalities: +# 1. Downloads the latest docker-compose.yaml from the Langfuse repository +# 2. Starts Langfuse using Docker Compose with default initialization variables +# 3. Waits for the service to be available +# 4. Launches a browser to open the local Langfuse UI +# 5. Prints login credentials from the environment file +# +# Usage: +# ./setup_langfuse.sh +# +# Requirements: +# - Docker +# - curl +# - A .env.langfuse.local file in the env directory +# +# Note: This script is intended for local development use only. + +set -e + +SCRIPT_DIR=$(realpath "$(dirname "${BASH_SOURCE[0]}")") +LANGFUSE_DOCKER_COMPOSE_URL="https://raw.githubusercontent.com/langfuse/langfuse/main/docker-compose.yml" +LANGFUSE_DOCKER_COMPOSE_FILE="docker-compose.yaml" +LANGFUSE_ENV_FILE="$SCRIPT_DIR/../env/.env.langfuse.local" + +check_dependencies() { + local dependencies=("curl" "docker") + local missing_dependencies=() + + for cmd in "${dependencies[@]}"; do + if ! command -v "$cmd" &> /dev/null; then + missing_dependencies+=("$cmd") + fi + done + + if [ ${#missing_dependencies[@]} -ne 0 ]; then + echo "Missing dependencies: ${missing_dependencies[*]}" + exit 1 + fi +} + +download_docker_compose() { + if ! curl --fail --location --output "$SCRIPT_DIR/docker-compose.yaml" "$LANGFUSE_DOCKER_COMPOSE_URL"; then + echo "Failed to download docker-compose file from $LANGFUSE_DOCKER_COMPOSE_URL" + exit 1 + fi +} + +start_docker_compose() { + docker compose --env-file "$LANGFUSE_ENV_FILE" -f "$LANGFUSE_DOCKER_COMPOSE_FILE" up --detach +} + +wait_for_service() { + echo "Waiting for Langfuse to start..." + local retries=10 + local count=0 + until curl --silent http://localhost:3000 > /dev/null; do + ((count++)) + if [ "$count" -ge "$retries" ]; then + echo "Max retries reached. Langfuse did not start in time." + exit 1 + fi + sleep 1 + done + echo "Langfuse is now available!" +} + +launch_browser() { + if [[ "$OSTYPE" == "linux-gnu"* ]]; then + xdg-open "http://localhost:3000" + elif [[ "$OSTYPE" == "darwin"* ]]; then + open "http://localhost:3000" + else + echo "Please open http://localhost:3000 to view Langfuse traces." + fi +} + +print_login_variables() { + if [ -f "$LANGFUSE_ENV_FILE" ]; then + echo "If not already logged in use the following credentials to log in:" + grep -E "LANGFUSE_INIT_USER_EMAIL|LANGFUSE_INIT_USER_PASSWORD" "$LANGFUSE_ENV_FILE" + else + echo "Langfuse environment file with local credentials not found." + fi +} + +check_dependencies +pushd "$SCRIPT_DIR" > /dev/null +download_docker_compose +start_docker_compose +wait_for_service +print_login_variables +launch_browser +popd > /dev/null diff --git a/packages/langfuse-wrapper/src/langfuse_wrapper/__init__.py b/packages/langfuse-wrapper/src/langfuse_wrapper/__init__.py new file mode 100644 index 000000000..f0816138c --- /dev/null +++ b/packages/langfuse-wrapper/src/langfuse_wrapper/__init__.py @@ -0,0 +1,3 @@ +from langfuse_wrapper.langfuse_wrapper import observe_wrapper # noqa + +module_name = "langfuse-wrapper" diff --git a/packages/langfuse-wrapper/src/langfuse_wrapper/langfuse_wrapper.py b/packages/langfuse-wrapper/src/langfuse_wrapper/langfuse_wrapper.py new file mode 100644 index 000000000..4f464be6e --- /dev/null +++ b/packages/langfuse-wrapper/src/langfuse_wrapper/langfuse_wrapper.py @@ -0,0 +1,84 @@ +""" +Langfuse Integration Module + +This module provides integration with Langfuse, a tool for monitoring and tracing LLM applications. + +Usage: + Import this module to enable Langfuse integration. + It automatically checks for Langfuse credentials in the .env.langfuse file and for a running Langfuse server. + If these are found, it will set up the necessary client and context for tracing. + +Note: + Run setup_langfuse.sh which automates the steps for running local Langfuse. +""" + +import os +from typing import Callable +from dotenv import load_dotenv +from langfuse.decorators import langfuse_context +import sys +from io import StringIO +from pathlib import Path +from functools import wraps # Add this import + + +def find_package_root(start_path: Path, marker_file="pyproject.toml") -> Path: + while start_path != start_path.parent: + if (start_path / marker_file).exists(): + return start_path + start_path = start_path.parent + return None + + +def auth_check() -> bool: + # Temporarily redirect stdout and stderr to suppress print statements from Langfuse + temp_stderr = StringIO() + sys.stderr = temp_stderr + + # Load environment variables + load_dotenv(LANGFUSE_ENV_FILE, override=True) + + auth_val = langfuse_context.auth_check() + + # Restore stderr + sys.stderr = sys.__stderr__ + return auth_val + + +CURRENT_DIR = Path(__file__).parent +PACKAGE_ROOT = find_package_root(CURRENT_DIR) + +LANGFUSE_ENV_FILE = os.path.join(PACKAGE_ROOT, "env", ".env.langfuse.local") +HAS_LANGFUSE_CREDENTIALS = False +load_dotenv(LANGFUSE_ENV_FILE, override=True) + +HAS_LANGFUSE_CREDENTIALS = auth_check() + + +def observe_wrapper(*args, **kwargs) -> Callable: # noqa + """ + A decorator that wraps a function with Langfuse context observation if credentials are available. + + If Langfuse credentials were found, the function will be wrapped with Langfuse's observe method. + Otherwise, the function will be returned as-is. + + Args: + *args: Positional arguments to pass to langfuse_context.observe. + **kwargs: Keyword arguments to pass to langfuse_context.observe. + + Returns: + Callable: The wrapped function if credentials are available, otherwise the original function. + """ + + def _wrapper(fn: Callable) -> Callable: + if HAS_LANGFUSE_CREDENTIALS: + + @wraps(fn) + def wrapped_fn(*fargs, **fkwargs): + return langfuse_context.observe(*args, **kwargs)(fn)(*fargs, **fkwargs) + + return wrapped_fn + else: + return fn + + return _wrapper diff --git a/packages/langfuse-wrapper/tests/test_observe.py b/packages/langfuse-wrapper/tests/test_observe.py new file mode 100644 index 000000000..752719ea1 --- /dev/null +++ b/packages/langfuse-wrapper/tests/test_observe.py @@ -0,0 +1,46 @@ +import pytest +from unittest.mock import patch, MagicMock +from langfuse_wrapper.langfuse_wrapper import observe_wrapper + + +@pytest.fixture +def mock_langfuse_context(): + with patch("langfuse_wrapper.langfuse_wrapper.langfuse_context") as mock: + yield mock + + +@patch("langfuse_wrapper.langfuse_wrapper.HAS_LANGFUSE_CREDENTIALS", True) +def test_function_is_wrapped(mock_langfuse_context): + mock_observe = MagicMock(side_effect=lambda *args, **kwargs: lambda fn: fn) + mock_langfuse_context.observe = mock_observe + + def original_function(x: int, y: int) -> int: + return x + y + + # test function before we decorate it with + # @observe_wrapper("arg1", kwarg1="kwarg1") + assert not hasattr(original_function, "__wrapped__") + + # ensure we args get passed along (e.g. @observe(capture_input=False, capture_output=False)) + decorated_function = observe_wrapper("arg1", kwarg1="kwarg1")(original_function) + assert hasattr(decorated_function, "__wrapped__") + assert decorated_function.__wrapped__ is original_function, "Function is not properly wrapped" + + assert decorated_function(2, 3) == 5 + mock_observe.assert_called_once() + mock_observe.assert_called_with("arg1", kwarg1="kwarg1") + + +@patch("langfuse_wrapper.langfuse_wrapper.HAS_LANGFUSE_CREDENTIALS", False) +def test_function_is_not_wrapped(mock_langfuse_context): + mock_observe = MagicMock(return_value=lambda f: f) + mock_langfuse_context.observe = mock_observe + + @observe_wrapper("arg1", kwarg1="kwarg1") + def hello() -> str: + return "Hello" + + assert not hasattr(hello, "__wrapped__") + assert hello() == "Hello" + + mock_observe.assert_not_called() diff --git a/pyproject.toml b/pyproject.toml index 08ad00612..3ee11275f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,9 @@ dependencies = [ "click>=8.1.7", "prompt-toolkit>=3.0.47", "keyring>=25.4.1", + "langfuse-wrapper", ] + author = [{ name = "Block", email = "ai-oss-tools@block.xyz" }] packages = [{ include = "goose", from = "src" }] @@ -65,11 +67,12 @@ dev-dependencies = [ "mkdocstrings-python>=1.11.1", "mkdocstrings>=0.26.1", "pytest-mock>=3.14.0", - "pytest>=8.3.2" + "pytest>=8.3.2", ] [tool.uv.sources] ai-exchange = { workspace = true } +langfuse-wrapper = { workspace = true } [tool.uv.workspace] members = ["packages/*"] diff --git a/src/goose/cli/main.py b/src/goose/cli/main.py index 7d1359889..7527b0adc 100644 --- a/src/goose/cli/main.py +++ b/src/goose/cli/main.py @@ -136,7 +136,10 @@ def get_session_files() -> dict[str, Path]: @click.option("--profile") @click.option("--plan", type=click.Path(exists=True)) @click.option("--log-level", type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]), default="INFO") -def session_start(name: Optional[str], profile: str, log_level: str, plan: Optional[str] = None) -> None: +@click.option("--tracing", is_flag=True, required=False) +def session_start( + name: Optional[str], profile: str, log_level: str, plan: Optional[str] = None, tracing: bool = False +) -> None: """Start a new goose session""" if plan: yaml = YAML() @@ -144,8 +147,12 @@ def session_start(name: Optional[str], profile: str, log_level: str, plan: Optio _plan = yaml.load(f) else: _plan = None - session = Session(name=name, profile=profile, plan=_plan, log_level=log_level) - session.run() + + try: + session = Session(name=name, profile=profile, plan=_plan, log_level=log_level, tracing=tracing) + session.run() + except RuntimeError as e: + print(f"[red]Error: {e}") def parse_args(ctx: click.Context, param: click.Parameter, value: str) -> dict[str, str]: diff --git a/src/goose/cli/session.py b/src/goose/cli/session.py index cf9dab700..fdf9a8d2a 100644 --- a/src/goose/cli/session.py +++ b/src/goose/cli/session.py @@ -1,8 +1,11 @@ import traceback from pathlib import Path from typing import Any, Optional +import logging +from langfuse.decorators import langfuse_context -from exchange import Message, Text, ToolResult, ToolUse +from exchange import Message, ToolResult, ToolUse, Text +from langfuse_wrapper.langfuse_wrapper import observe_wrapper, auth_check from rich import print from rich.markdown import Markdown from rich.panel import Panel @@ -62,6 +65,7 @@ def __init__( profile: Optional[str] = None, plan: Optional[dict] = None, log_level: Optional[str] = "INFO", + tracing: bool = False, **kwargs: dict[str, Any], ) -> None: if name is None: @@ -72,10 +76,20 @@ def __init__( self.prompt_session = GoosePromptSession() self.status_indicator = Status("", spinner="dots") self.notifier = SessionNotifier(self.status_indicator) - + if not tracing: + logging.getLogger("langfuse").setLevel(logging.ERROR) + else: + langfuse_auth = auth_check() + if langfuse_auth: + print("Local Langfuse initialized. View your traces at http://localhost:3000") + else: + raise RuntimeError( + "You passed --tracing, but a Langfuse object was not found in the current context. " + "Please initialize the local Langfuse server and restart Goose." + ) + langfuse_context.configure(enabled=tracing) self.exchange = create_exchange(profile=load_profile(profile), notifier=self.notifier) setup_logging(log_file_directory=LOG_PATH, log_level=log_level) - self.exchange.messages.extend(self._get_initial_messages()) if len(self.exchange.messages) == 0 and plan: @@ -186,6 +200,7 @@ def run(self) -> None: self._remove_empty_session() self._log_cost() + @observe_wrapper() def reply(self) -> None: """Reply to the last user message, calling tools as needed""" self.status_indicator.update("responding") diff --git a/tests/cli/test_main.py b/tests/cli/test_main.py index 9f6f2f66f..38b38920f 100644 --- a/tests/cli/test_main.py +++ b/tests/cli/test_main.py @@ -34,7 +34,9 @@ def test_session_start_command_with_session_name(mock_session): mock_session_class, mock_session_instance = mock_session runner = CliRunner() runner.invoke(goose_cli, ["session", "start", "session1", "--profile", "default"]) - mock_session_class.assert_called_once_with(name="session1", profile="default", plan=None, log_level="INFO") + mock_session_class.assert_called_once_with( + name="session1", profile="default", plan=None, log_level="INFO", tracing=False + ) mock_session_instance.run.assert_called_once()