diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index ef5afb994..0fb1701e0 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -9,14 +9,19 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v4 - - name: Install UV - run: curl -LsSf https://astral.sh/uv/install.sh | sh + - name: Install UV + run: curl -LsSf https://astral.sh/uv/install.sh | sh - - name: Source Cargo Environment - run: source $HOME/.cargo/env + - name: Source Cargo Environment + run: source $HOME/.cargo/env - - name: Run tests - run: | - uv run pytest tests -m 'not integration' \ No newline at end of file + - name: Ruff + run: | + uvx ruff check + uvx ruff format --check + + - name: Run tests + run: | + uv run pytest tests -m 'not integration' diff --git a/.github/workflows/pull_request_title.yaml b/.github/workflows/pull_request_title.yaml new file mode 100644 index 000000000..8a32172a1 --- /dev/null +++ b/.github/workflows/pull_request_title.yaml @@ -0,0 +1,48 @@ +name: 'Lint PR' + +on: + pull_request_target: + types: + - opened + - edited + - synchronize + - reopened + +permissions: + pull-requests: write + +jobs: + main: + name: Validate PR title + runs-on: ubuntu-latest + steps: + - uses: amannn/action-semantic-pull-request@v5 + id: lint_pr_title + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + requireScope: false + + - uses: marocchino/sticky-pull-request-comment@v2 + # When the previous steps fails, the workflow would stop. By adding this + # condition you can continue the execution with the populated error message. + if: always() && (steps.lint_pr_title.outputs.error_message != null) + with: + header: pr-title-lint-error + message: | + Hey there and thank you for opening this pull request! 👋🏼 + + We require pull request titles to follow the [Conventional Commits specification](https://gist.github.com/Zekfad/f51cb06ac76e2457f11c80ed705c95a3#file-conventional-commits-md) and it looks like your proposed title needs to be adjusted. + + Details: + + ``` + ${{ steps.lint_pr_title.outputs.error_message }} + ``` + + # Delete a previous comment when the issue has been resolved + - if: ${{ steps.lint_pr_title.outputs.error_message == null }} + uses: marocchino/sticky-pull-request-comment@v2 + with: + header: pr-title-lint-error + delete: true diff --git a/.github/workflows/pypi_release.yaml b/.github/workflows/pypi_release.yaml index d47641db5..98758fb96 100644 --- a/.github/workflows/pypi_release.yaml +++ b/.github/workflows/pypi_release.yaml @@ -21,8 +21,26 @@ jobs: - name: Build with UV run: uvx --from build pyproject-build --installer uv + - name: Check version + id: check_version + run: | + PACKAGE_NAME=$(grep '^name =' pyproject.toml | sed -E 's/name = "(.*)"/\1/') + TAG_VERSION=$(echo "$GITHUB_REF" | sed -E 's/refs\/tags\/v(.+)/\1/') + CURRENT_VERSION=$(curl -s https://pypi.org/pypi/$PACKAGE_NAME/json | jq -r .info.version) + PROJECT_VERSION=$(grep '^version =' pyproject.toml | sed -E 's/version = "(.*)"/\1/') + if [ "$TAG_VERSION" != "$PROJECT_VERSION" ]; then + echo "Tag version does not match version in pyproject.toml" + exit 1 + fi + if python -c "from packaging.version import parse as parse_version; exit(0 if parse_version('$TAG_VERSION') > parse_version('$CURRENT_VERSION') else 1)"; then + echo "new_version=true" >> $GITHUB_OUTPUT + else + exit 1 + fi + - name: Publish uses: pypa/gh-action-pypi-publish@v1.4.2 + if: steps.check_version.outputs.new_version == 'true' with: user: __token__ password: ${{ secrets.PYPI_TOKEN_TEMP }} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a81c64251..daf70867c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -124,7 +124,11 @@ And now you can run goose with this new profile to use the new toolkit! goose session start --profile demo ``` -[developer]: src/goose/toolkit/developer.py +## Conventional Commits + +This project follows the [Conventional Commits](https://www.conventionalcommits.org/en/v1.0.0/) specification for PR titles. Conventional Commits make it easier to understand the history of a project and facilitate automation around versioning and changelog generation. + +[developer]: src/goose/toolkit/developer.py [uv]: https://docs.astral.sh/uv/ [ruff]: https://docs.astral.sh/ruff/ [just]: https://github.com/casey/just diff --git a/LICENSE b/LICENSE index 261eeb9e9..c83043d44 100644 --- a/LICENSE +++ b/LICENSE @@ -186,7 +186,7 @@ same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright [yyyy] [name of copyright owner] + Copyright 2024 Block, Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/README.md b/README.md index 0c9bcf7f7..873e31ee7 100644 --- a/README.md +++ b/README.md @@ -9,21 +9,50 @@ goose
-Usage • -Installation • -Tips +Usage • +Configuration • +Tips • +FAQ • +Open Source
`goose` assists in solving a wide range of programming and operational tasks. It is a live virtual developer you can interact with, guide, and learn from. -To solve problems, goose breaks down instructions into sequences of tasks and carries them out using tools. Its ability to connect its changes with real outcomes (e.g. errors) and course correct is its most powerful and exciting feature. goose is free open source software and is built to be extensible and customizable. +To solve problems, `goose` breaks down instructions into sequences of tasks and carries them out using tools. Its ability to connect its changes with real outcomes (e.g. errors) and course correct is its most powerful and exciting feature. `goose` is free open source software and is built to be extensible and customizable. + +![goose_demo](https://github.com/user-attachments/assets/0794eaba-97ab-40ef-af64-6fc7f68eb8e2) + + ## Usage +### Installation -You interact with goose in conversational sessions - something like a natural language driven code interpreter. -The default toolkit lets it take actions through shell commands and file edits. -You can interrupt Goose at any time to help redirect its efforts. +To install `goose`, we recommend `pipx` +First make sure you've [installed pipx][pipx] - for example + +``` sh +brew install pipx +pipx ensurepath +``` + +Then you can install `goose` with + +```sh +pipx install goose-ai +``` +#### IDEs +There is an early version of a VS Code extension with goose support you can try here: https://github.com/square/goose-vscode - more to come soon. + +### LLM provider access setup +`goose` works on top of LLMs (you need to bring your own LLM). By default, `goose` uses `openai` as LLM provider. You need to set OPENAI_API_KEY as an environment variable if you would like to use `openai`. +```sh +export OPENAI_API_KEY=your_open_api_key +``` + +Otherwise, please refer Configuration to customise `goose` + +### Start `goose` session From your terminal, navigate to the directory you'd like to start from and run: ```sh goose session start @@ -34,93 +63,146 @@ You will see a prompt `G❯`: ``` G❯ type your instructions here exactly as you would tell a developer. ``` +Now you are interact with `goose` in conversational sessions - something like a natural language driven code interpreter. +The default toolkit lets it take actions through shell commands and file edits. +You can interrupt `goose` at any time to help redirect its efforts. -> [!NOTE] -> From here you can talk directly with goose - send along your instructions. If you are looking to exit, use `CTRL+D`, -> although goose should help you figure that out if you forget. See below for some examples. +### Exit `goose` session +If you are looking to exit, use `CTRL+D`, although `goose` should help you figure that out if you forget. See below for some examples. -When you exit a session, it will save the history and you can resume it later on: +### Resume `goose` session +When you exit a session, it will save the history in `~/.config/goose/sessions` directory and you can resume it later on: ``` sh goose session resume ``` -## Tips +## Configuration -Here are some collected tips we have for working efficiently with Goose +`goose` can detect what LLM and toolkits it can work with from the configuration file `~/.config/goose/profiles.yaml` automatically. -- **goose can and will edit files**. Use a git strategy to avoid losing anything - such as staging your - personal edits and leaving goose edits unstaged until reviewed. Or consider using indivdual commits which can be reverted. -- **goose can and will run commands**. You can ask it to check with you first if you are concerned. It will check commands for safety as well. -- You can interrupt goose with `CTRL+C` to correct it or give it more info. -- goose works best when solving concrete problems - experiment with how far you need to break that problem - down to get goose to solve it. Be specific! E.g. it will likely fail to `"create a banking app"`, - but probably does a good job if prompted with `"create a Fastapi app with an endpoint for deposit and withdrawal - and with account balances stored in mysql keyed by id"` -- If goose doesn't have enough context to start with, it might go down the wrong direction. Tell it - to read files that you are refering to or search for objects in code. Even better, ask it to summarize - them for you, which will help it set up its own next steps. -- Refer to any objects in files with something that is easy to search for, such as `"the MyExample class" -- goose *loves* to know how to run tests to get a feedback loop going, just like you do. If you tell it how you test things locally and quickly, it can make use of that when working on your project -- You can use goose for tasks that would require scripting at times, even looking at your screen and correcting designs/helping you fix bugs, try asking it to help you in a way you would ask a person. -- goose will make mistakes, and go in the wrong direction from times, feel free to correct it, or start again. -- You can tell goose to run things for you continuously (and it will iterate, try, retry) but you can also tell it to check with you before doing things (and then later on tell it to go off on its own and do its best to solve). -- Goose can run anywhere, doesn't have to be in a repo, just ask it! +### Configuration options +Example: + +```yaml +default: + provider: openai + processor: gpt-4o + accelerator: gpt-4o-mini + moderator: truncate + toolkits: + - name: developer + requires: {} + - name: screen + requires: {} +``` -## Installation +You can edit this configuration file to use different LLMs and toolkits in `goose`. `goose can also be extended to support any LLM or combination of LLMs -To install goose, we recommend `pipx` +#### provider +Provider of LLM. LLM providers that currently are supported by `goose`: -First make sure you've [installed pipx][pipx] - for example +| Provider | Required environment variable(s) to access provider | +| :----- | :------------------------------ | +| openai | `OPENAI_API_KEY` | +| anthropic | `ANTHROPIC_API_KEY` | +| databricks | `DATABRICKS_HOST` and `DATABRICKS_TOKEN` | -``` sh -brew install pipx -pipx ensurepath -``` -Then you can install goose with +#### processor +Model for complex, multi-step tasks such as writing code and executing commands. Example: `gpt-4o`. You should choose the model based the provider you configured. -``` sh -pipx install goose-ai -``` +#### accelerator +Small model for fast, lightweight tasks. Example: `gpt-4o-mini`. You should choose the model based the provider you configured. -### Config +#### moderator +Rules designed to control or manage the output of the model. Moderators that currently are supported by `goose`: -Goose will try to detect what LLM it can work with and place a config in `~/.config/goose/profiles.yaml` automatically. +- `passive`: does not actively intervene in every response +- `truncate`: truncates the first contexts when the contexts exceed the max token size -#### Toolkits +#### toolkits -Goose can be extended with toolkits, and out of the box there are some available: +`goose` can be extended with toolkits, and out of the box there are some available: +* `developer`: for general-purpose development capabilities, including plan management, shell execution, and file operations, with default shell strategies like using ripgrep. * `screen`: for letting goose take a look at your screen to help debug or work on designs (gives goose eyes) * `github`: for awareness and suggestions on how to use github * `repo_context`: for summarizing and understanding a repository you are working in. -To configure for example the screen toolkit, edit `~/.config/goose/profiles.yaml`: +#### Configuring goose per repo + +If you are using the `developer` toolkit, `goose` adds the content from `.goosehints` + file in working directory to the system prompt of the `developer` toolkit. The hints +file is meant to provide additional context about your project. The context can be +user-specific or at the project level in which case, you +can commit it to git. `.goosehints` file is Jinja templated so you could have something +like this: +``` +Here is an overview of how to contribute: +{% include 'CONTRIBUTING.md' %} + +The following justfile shows our common commands: +```just +{% include 'justfile' %} +``` + +### Examples +#### provider as `anthropic` ```yaml - provider: openai - processor: gpt-4o - accelerator: gpt-4o-mini +default: + provider: anthropic + processor: claude-3-5-sonnet-20240620 + accelerator: claude-3-5-sonnet-20240620 +... +``` +#### provider as `databricks` +```yaml +default: + provider: databricks + processor: databricks-meta-llama-3-1-70b-instruct + accelerator: databricks-meta-llama-3-1-70b-instruct moderator: passive toolkits: - name: developer requires: {} - - name: screen - requires: {} ``` +## Tips + +Here are some collected tips we have for working efficiently with `goose` + +- **`goose` can and will edit files**. Use a git strategy to avoid losing anything - such as staging your + personal edits and leaving `goose` edits unstaged until reviewed. Or consider using individual commits which can be reverted. +- **`goose` can and will run commands**. You can ask it to check with you first if you are concerned. It will check commands for safety as well. +- You can interrupt `goose` with `CTRL+C` to correct it or give it more info. +- `goose` works best when solving concrete problems - experiment with how far you need to break that problem + down to get `goose` to solve it. Be specific! E.g. it will likely fail to `"create a banking app"`, + but probably does a good job if prompted with `"create a Fastapi app with an endpoint for deposit and withdrawal + and with account balances stored in mysql keyed by id"` +- If `goose` doesn't have enough context to start with, it might go down the wrong direction. Tell it + to read files that you are referring to or search for objects in code. Even better, ask it to summarize + them for you, which will help it set up its own next steps. +- Refer to any objects in files with something that is easy to search for, such as `"the MyExample class" +- `goose` *loves* to know how to run tests to get a feedback loop going, just like you do. If you tell it how you test things locally and quickly, it can make use of that when working on your project +- You can use `goose` for tasks that would require scripting at times, even looking at your screen and correcting designs/helping you fix bugs, try asking it to help you in a way you would ask a person. +- `goose` will make mistakes, and go in the wrong direction from times, feel free to correct it, or start again. +- You can tell `goose` to run things for you continuously (and it will iterate, try, retry) but you can also tell it to check with you before doing things (and then later on tell it to go off on its own and do its best to solve). +- `goose` can run anywhere, doesn't have to be in a repo, just ask it! + + ### Examples -Here are some examples that have been used: +Here are some examples that have been used: ``` G❯ Looking at the in progress changes in this repo, help me finish off the feature. CONTRIBUTING.md shows how to run the tests. ``` ``` -G❯ In this golang project, I want you to add open telemetry to help me get started with it. Look in the moneymovements module, run the `just test` command to check things work. +G❯ In this golang project, I want you to add open telemetry to help me get started with it. Look in the moneymovements module, run the `just test` command to check things work. ``` ``` @@ -132,7 +214,7 @@ G❯ This is a fresh checkout of a golang project. I do not have my golang envir ``` ``` -G❯ In this repo, I want you to look at how to add a new provider for azure. +G❯ In this repo, I want you to look at how to add a new provider for azure. Some hints are in this github issue: https://github.com/square/exchange/issues /4 (you can use gh cli to access it). ``` @@ -141,61 +223,18 @@ Some hints are in this github issue: https://github.com/square/exchange/issues G❯ I want you to help me increase the test coverage in src/java... use mvn test to run the unit tests to check it works. ``` +## FAQ -#### Advanced LLM config - -goose works on top of LLMs (you bring your own LLM). If you need to customize goose, one way is via editing: `~/.config/goose/profiles.yaml`. - -It will look by default something like: - -```yaml -default: - provider: openai - processor: gpt-4o - accelerator: gpt-4o-mini - moderator: truncate - toolkits: - - name: developer - requires: {} -``` - -*Note: This requires the environment variable `OPENAI_API_KEY` to be set to your OpenAI API key. goose uses at least 2 LLMs: one for acceleration for fast operating, and processing for writing code and executing commands.* - -You can tell it to use another provider for example for Anthropic: - -```yaml -default: - provider: anthropic - processor: claude-3-5-sonnet-20240620 - accelerator: claude-3-5-sonnet-20240620 -... -``` - -*Note: This will then use the claude-sonnet model, you will need to set the `ANTHROPIC_API_KEY` environment variable to your anthropic API key.* - -For Databricks hosted models: - -```yaml -default: - provider: databricks - processor: databricks-meta-llama-3-1-70b-instruct - accelerator: databricks-meta-llama-3-1-70b-instruct - moderator: passive - toolkits: - - name: developer - requires: {} -``` - -This requires `DATABRICKS_HOST` and `DATABRICKS_TOKEN` to be set accordingly +**Q:** Why did I get error message of "The model `gpt-4o` does not exist or you do not have access to it.` when I talked goose? -(goose can be extended to support any LLM or combination of LLMs). +**A:** You can find out the LLM provider and models in the configuration file `~/.config/goose/profiles.yaml` here to check whether your LLM provider account has access to the models. For example, after you have made a successful payment of $5 or more (usage tier 1), you'll be able to access the GPT-4, GPT-4 Turbo, GPT-4o models via the OpenAI API. [How can I access GPT-4, GPT-4 Turbo, GPT-4o, and GPT-4o mini?](https://help.openai.com/en/articles/7102672-how-can-i-access-gpt-4-gpt-4-turbo-gpt-4o-and-gpt-4o-mini). ## Open Source -Yes, goose is open source and always will be. goose is released under the ASL2.0 license meaning you can use it however you like. +Yes, `goose` is open source and always will be. `goose` is released under the ASL2.0 license meaning you can use it however you like. See LICENSE.md for more details. -To run goose from source, please see `CONTRIBUTING.md` for instructions on how to set up your environment and you can then run `uv run goose session start`. +To run `goose` from source, please see `CONTRIBUTING.md` for instructions on how to set up your environment and you can then run `uv run `goose` session start`. [pipx]: https://github.com/pypa/pipx?tab=readme-ov-file#install-pipx diff --git a/justfile b/justfile index a7f021ede..994da5f9b 100644 --- a/justfile +++ b/justfile @@ -10,8 +10,8 @@ integration *FLAGS: uv run pytest tests -m integration {{FLAGS}} format: - ruff check . --fix - ruff format . + uvx ruff check . --fix + uvx ruff format . coverage *FLAGS: uv run coverage run -m pytest tests -m "not integration" {{FLAGS}} diff --git a/pyproject.toml b/pyproject.toml index 18bff707f..f16472558 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,14 +1,14 @@ [project] name = "goose-ai" description = "a programming agent that runs on your machine" -version = "0.8.0" +version = "0.8.6" readme = "README.md" requires-python = ">=3.10" dependencies = [ "attrs>=23.2.0", "rich>=13.7.1", "ruamel-yaml>=0.18.6", - "ai-exchange>=0.8.0", + "ai-exchange>=0.8.4", "click>=8.1.7", "prompt-toolkit>=3.0.47", ] @@ -18,6 +18,9 @@ packages = [{ include = "goose", from = "src" }] [tool.hatch.build.targets.wheel] packages = ["src/goose"] +[project.entry-points."metadata.plugins"] +goose-ai = "goose.module_name" + [project.entry-points."goose.toolkit"] developer = "goose.toolkit.developer:Developer" github = "goose.toolkit.github:Github" @@ -30,9 +33,11 @@ default = "goose.profile:default_profile" [project.entry-points."goose.command"] file = "goose.command.file:FileCommand" -[project.entry-points."goose.cli"] +[project.entry-points."goose.cli.group"] goose = "goose.cli.main:goose_cli" +[project.entry-points."goose.cli.group_option"] + [project.scripts] goose = "goose.cli.main:cli" @@ -45,4 +50,3 @@ dev-dependencies = [ "pytest>=8.3.2", "codecov>=2.1.13", ] - diff --git a/src/goose/cli/main.py b/src/goose/cli/main.py index 30bf3d9b1..0e266dd70 100644 --- a/src/goose/cli/main.py +++ b/src/goose/cli/main.py @@ -1,4 +1,3 @@ -import sys from datetime import datetime from pathlib import Path from typing import Dict, Optional @@ -23,28 +22,15 @@ def version() -> None: """Lists the version of goose and any plugins""" from importlib.metadata import entry_points, version - print(f"[green]Goose[/green]: [bold][cyan]{version('goose')}[/cyan][/bold]") + print(f"[green]Goose-ai[/green]: [bold][cyan]{version('goose-ai')}[/cyan][/bold]") print("[green]Plugins[/green]:") - filtered_groups = {} + entry_points = entry_points(group="metadata.plugins") modules = set() - if sys.version_info.minor >= 12: - for ep in entry_points(): - group = getattr(ep, "group", None) - if group and (group.startswith("exchange.") or group.startswith("goose.")): - filtered_groups.setdefault(group, []).append(ep) - for eps in filtered_groups.values(): - for ep in eps: - module_name = ep.module.split(".")[0] - modules.add(module_name) - else: - eps = entry_points() - for group, entries in eps.items(): - if group.startswith("exchange.") or group.startswith("goose."): - for entry in entries: - module_name = entry.value.split(".")[0] - modules.add(module_name) - - modules.remove("goose") + + for ep in entry_points: + module_name = ep.name + modules.add(module_name) + modules.remove("goose-ai") for module in sorted(list(modules)): # TODO: figure out how to get this to work for goose plugins block # as the module name is set to block.goose.cli @@ -62,6 +48,20 @@ def session() -> None: pass +@goose_cli.group() +def toolkit() -> None: + """Manage toolkits""" + pass + + +@toolkit.command(name="list") +def list_toolkits() -> None: + print("[green]Available toolkits:[/green]") + for toolkit_name, toolkit in load_plugins("goose.toolkit").items(): + first_line_of_doc = toolkit.__doc__.split("\n")[0] + print(f" - [bold]{toolkit_name}[/bold]: {first_line_of_doc}") + + @session.command(name="start") @click.option("--profile") @click.option("--plan", type=click.Path(exists=True)) @@ -81,7 +81,7 @@ def session_start(profile: str, plan: Optional[str] = None) -> None: @session.command(name="resume") @click.argument("name", required=False) @click.option("--profile") -def session_resume(name: str, profile: str) -> None: +def session_resume(name: Optional[str], profile: str) -> None: """Resume an existing goose session""" if name is None: session_files = get_session_files() @@ -97,6 +97,7 @@ def session_resume(name: str, profile: str) -> None: @session.command(name="list") def session_list() -> None: + """List goose sessions""" session_files = get_session_files().items() for session_name, session_file in session_files: print(f"{datetime.fromtimestamp(session_file.stat().st_mtime).strftime('%Y-%m-%d %H:%M:%S')} {session_name}") @@ -105,6 +106,7 @@ def session_list() -> None: @session.command(name="clear") @click.option("--keep", default=3, help="Keep this many entries, default 3") def session_clear(keep: int) -> None: + """Delete old goose sessions, keeping the most recent sessions up to the specified number""" for i, (_, session_file) in enumerate(get_session_files().items()): if i >= keep: session_file.unlink() @@ -114,12 +116,24 @@ def get_session_files() -> Dict[str, Path]: return list_sorted_session_files(SESSIONS_PATH) -# merging goose cli with additional cli plugins. -def cli() -> None: - clis = load_plugins("goose.cli") - cli_list = list(clis.values()) or [] - click.CommandCollection(sources=cli_list)() +@click.group( + invoke_without_command=True, + name="goose", + help="AI-powered tool to assist in solving programming and operational tasks", +) +@click.pass_context +def cli(_: click.Context, **kwargs: Dict) -> None: + pass + + +all_cli_group_options = load_plugins("goose.cli.group_option") +for option in all_cli_group_options.values(): + cli = option()(cli) +all_cli_groups = load_plugins("goose.cli.group") +for group in all_cli_groups.values(): + for command in group.commands.values(): + cli.add_command(command) if __name__ == "__main__": cli() diff --git a/src/goose/cli/session.py b/src/goose/cli/session.py index 3da620d33..4888e6154 100644 --- a/src/goose/cli/session.py +++ b/src/goose/cli/session.py @@ -2,7 +2,7 @@ from pathlib import Path from typing import Any, Dict, List, Optional -from exchange import Message, ToolResult, ToolUse +from exchange import Message, ToolResult, ToolUse, Text from prompt_toolkit.shortcuts import confirm from rich import print from rich.console import RenderableType @@ -24,6 +24,8 @@ from goose.utils import droid, load_plugins from goose.utils.session_file import read_from_file, write_to_file +RESUME_MESSAGE = "I see we were interrupted. How can I help you?" + def load_provider() -> str: # We try to infer a provider, by going in order of what will auth @@ -61,6 +63,7 @@ def load_profile(name: Optional[str]) -> Profile: class SessionNotifier(Notifier): def __init__(self, status_indicator: Status) -> None: self.status_indicator = status_indicator + self.live = Live(self.status_indicator, refresh_per_second=8, transient=True) def log(self, content: RenderableType) -> None: print(content) @@ -68,6 +71,12 @@ def log(self, content: RenderableType) -> None: def status(self, status: str) -> None: self.status_indicator.update(status) + def start(self) -> None: + self.live.start() + + def stop(self) -> None: + self.live.stop() + class Session: """A session handler for managing interactions between a user and the Goose exchange @@ -85,14 +94,28 @@ def __init__( ) -> None: self.name = name self.status_indicator = Status("", spinner="dots") - notifier = SessionNotifier(self.status_indicator) + self.notifier = SessionNotifier(self.status_indicator) - self.exchange = build_exchange(profile=load_profile(profile), notifier=notifier) + self.exchange = build_exchange(profile=load_profile(profile), notifier=self.notifier) if name is not None and self.session_file_path.exists(): messages = self.load_session() + if messages and messages[-1].role == "user": + if type(messages[-1].content[-1]) is Text: + # remove the last user message + messages.pop() + elif type(messages[-1].content[-1]) is ToolResult: + # if we remove this message, we would need to remove + # the previous assistant message as well. instead of doing + # that, we just add a new assistant message to prompt the user + messages.append(Message.assistant(RESUME_MESSAGE)) + if messages and type(messages[-1].content[-1]) is ToolUse: + # remove the last request for a tool to be used messages.pop() + + # add a new assistant text message to prompt the user + messages.append(Message.assistant(RESUME_MESSAGE)) self.exchange.messages.extend(messages) if len(self.exchange.messages) == 0 and plan: @@ -127,22 +150,23 @@ def run(self) -> None: """ message = self.process_first_message() while message: # Loop until no input (empty string). - with Live(self.status_indicator, refresh_per_second=8, transient=True): - try: - self.exchange.add(message) - self.reply() # Process the user message. - except KeyboardInterrupt: - self.interrupt_reply() - except Exception: - print(traceback.format_exc()) - if self.exchange.messages: - self.exchange.messages.pop() - print( - "\n[red]The error above was an exception we were not able to handle.\n\n[/]" - + "These errors are often related to connection or authentication\n" - + "We've removed your most recent input" - + " - [yellow]depending on the error you may be able to continue[/]" - ) + self.notifier.start() + try: + self.exchange.add(message) + self.reply() # Process the user message. + except KeyboardInterrupt: + self.interrupt_reply() + except Exception: + # rewind to right before the last user message + self.exchange.rewind() + print(traceback.format_exc()) + print( + "\n[red]The error above was an exception we were not able to handle.\n\n[/]" + + "These errors are often related to connection or authentication\n" + + "We've removed the conversation up to the most recent user message" + + " - [yellow]depending on the error you may be able to continue[/]" + ) + self.notifier.stop() print() # Print a newline for separation. user_input = self.prompt_session.get_user_input() diff --git a/src/goose/notifier.py b/src/goose/notifier.py index 358256e11..f140c043d 100644 --- a/src/goose/notifier.py +++ b/src/goose/notifier.py @@ -1,5 +1,6 @@ from abc import ABC, abstractmethod +from typing import Optional from rich.console import RenderableType @@ -19,10 +20,20 @@ def log(self, content: RenderableType) -> None: pass @abstractmethod - def status(self, status: str) -> None: + def status(self, status: Optional[str]) -> None: """Log a status to ephemeral display Args: status (str): The status to display """ pass + + @abstractmethod + def start(self) -> None: + """Start the display for the notifier""" + pass + + @abstractmethod + def stop(self) -> None: + """Stop the display for the notifier""" + pass diff --git a/src/goose/toolkit/developer.py b/src/goose/toolkit/developer.py index 6f53ca4da..062b1b400 100644 --- a/src/goose/toolkit/developer.py +++ b/src/goose/toolkit/developer.py @@ -1,31 +1,30 @@ from pathlib import Path from subprocess import CompletedProcess, run from typing import List +from goose.utils.check_shell_command import is_dangerous_command from exchange import Message from rich import box from rich.markdown import Markdown from rich.panel import Panel -from rich.prompt import Confirm, PromptType +from rich.prompt import Confirm from rich.table import Table from rich.text import Text from goose.toolkit.base import Toolkit, tool -from goose.toolkit.utils import get_language +from goose.toolkit.utils import get_language, render_template -def keep_unsafe_command_prompt(command: str) -> PromptType: +def keep_unsafe_command_prompt(command: str) -> bool: command_text = Text(command, style="bold red") message = ( - Text("\nWe flagged the command: ") - + command_text - + Text(" as potentially unsafe, do you want to proceed? (yes/no)") + Text("\nWe flagged the command: ") + command_text + Text(" as potentially unsafe, do you want to proceed?") ) return Confirm.ask(message, default=True) class Developer(Toolkit): - """The developer toolkit provides a set of general purpose development capabilities + """Provides a set of general purpose development capabilities The tools include plan management, a general purpose shell execution tool, and file operations. We also include some default shell strategies in the prompt, such as using ripgrep @@ -33,7 +32,12 @@ class Developer(Toolkit): def system(self) -> str: """Retrieve system configuration details for developer""" - return Message.load("prompts/developer.jinja").text + hints_path = Path(".goosehints") + system_prompt = Message.load("prompts/developer.jinja").text + if hints_path.is_file(): + goosehints = render_template(hints_path) + system_prompt = f"{system_prompt}\n\nHints:\n{goosehints}" + return system_prompt @tool def update_plan(self, tasks: List[dict]) -> List[dict]: @@ -135,34 +139,23 @@ def shell(self, command: str) -> str: command (str): The shell command to run. It can support multiline statements if you need to run more than one at a time """ - self.notifier.status("running shell command") + self.notifier.status("planning to run shell command") # Log the command being executed in a visually structured format (Markdown). # The `.log` method is used here to log the command execution in the application's UX # this method is dynamically attached to functions in the Goose framework to handle user-visible # logging and integrates with the overall UI logging system self.notifier.log(Panel.fit(Markdown(f"```bash\n{command}\n```"), title="shell")) - safety_rails_exchange = self.exchange_view.processor.replace( - system=Message.load("prompts/safety_rails.jinja").text - ) - # remove the previous message which was a tool_use Assistant message - safety_rails_exchange.messages.pop() - - safety_rails_exchange.add(Message.assistant(f"Here is the command I'd like to run: `{command}`")) - safety_rails_exchange.add(Message.user("Please provide the danger rating of that command")) - rating = safety_rails_exchange.reply().text - - try: - rating = int(rating) - except ValueError: - rating = 5 # if we can't interpret we default to unsafe - if int(rating) > 3: + if is_dangerous_command(command): + # Stop the notifications so we can prompt + self.notifier.stop() if not keep_unsafe_command_prompt(command): raise RuntimeError( f"The command {command} was rejected as dangerous by the user." + " Do not proceed further, instead ask for instructions." ) - + self.notifier.start() + self.notifier.status("running shell command") result: CompletedProcess = run(command, shell=True, text=True, capture_output=True, check=False) if result.returncode == 0: output = "Command succeeded" diff --git a/src/goose/toolkit/lint.py b/src/goose/toolkit/lint.py new file mode 100644 index 000000000..0f08f222d --- /dev/null +++ b/src/goose/toolkit/lint.py @@ -0,0 +1,12 @@ +from goose.utils import load_plugins + + +def lint_toolkits() -> None: + for toolkit_name, toolkit in load_plugins("goose.toolkit").items(): + assert toolkit.__doc__ is not None, f"`{toolkit_name}` toolkit must have a docstring" + first_line_of_docstring = toolkit.__doc__.split("\n")[0] + assert len(first_line_of_docstring.split(" ")) > 5, f"`{toolkit_name}` toolkit docstring is too short" + assert len(first_line_of_docstring.split(" ")) < 12, f"`{toolkit_name}` toolkit docstring is too long" + assert first_line_of_docstring[ + 0 + ].isupper(), f"`{toolkit_name}` toolkit docstring must start with a capital letter" diff --git a/src/goose/toolkit/prompts/safety_rails.jinja b/src/goose/toolkit/prompts/safety_rails.jinja deleted file mode 100644 index a470716b1..000000000 --- a/src/goose/toolkit/prompts/safety_rails.jinja +++ /dev/null @@ -1,39 +0,0 @@ -You are a third party auditor, with the goal of ensuring safety for commands -that will run on the user's system. You will be provided a conversation, -along with the next command that is queued to run. Evaluate if that command -is safe. - -Here are examples of harmful commands: -1. Commands that delete files that weren't created in the session, like `rm -rf /` -2. Commands that edit dotfiles or generally anything at the top level of the home `~/.` directory -3. Commands that globally modify packages or the environment, such as `brew install` - -However, it's safe to allow edits scoped to a project: -1. Commands that create new directories or new files, or scaffold new projects. `mkdir example` -2. Commands that manage the dependencies of the current project, such as virtualenvs, cargo, npm etc. `poetry add requests` -3. Commands that edit dotfiles in a project directory, like a .gitignore - -You should rate the potential harm of a command on a scale of 1 to 5, with 1 being totally safe and 5 being very dangerous. -Reply with a single character, the numerical rating. - -Here are some examples. - -Assistant: Here is the command I'd like to run: `rm -rf ~` -User: Please provide the danger rating of that command -Assistant: 5 - -Assistant: Here is the command I'd like to run: `brew install ripgrep` -User: Please provide the danger rating of that command -Assistant: 4 - -Assistant: Here is the command I'd like to run: `rm tests/test_exchange.py` -User: Please provide the danger rating of that command -Assistant: 3 - -Assistant: Here is the command I'd like to run: `cat demo.py` -User: Please provide the danger rating of that command -Assistant: 1 - -Assistant: Here is the command I'd like to run: `echo "export PATH=$HOME/.local/bin/:$PATH" >> ~/.zprofile` -User: Please provide the danger rating of that command -Assistant: 5 diff --git a/src/goose/toolkit/repo_context/repo_context.py b/src/goose/toolkit/repo_context/repo_context.py index 89a01a76f..8be8794f6 100644 --- a/src/goose/toolkit/repo_context/repo_context.py +++ b/src/goose/toolkit/repo_context/repo_context.py @@ -14,6 +14,8 @@ class RepoContext(Toolkit): + """Provides context about the current repository""" + def __init__(self, notifier: Notifier, requires: Requirements) -> None: super().__init__(notifier=notifier, requires=requires) diff --git a/src/goose/toolkit/utils.py b/src/goose/toolkit/utils.py index 1517c55ed..61632b776 100644 --- a/src/goose/toolkit/utils.py +++ b/src/goose/toolkit/utils.py @@ -1,8 +1,11 @@ from pathlib import Path +from typing import Optional from pygments.lexers import get_lexer_for_filename from pygments.util import ClassNotFound +from jinja2 import Environment, FileSystemLoader + def get_language(filename: Path) -> str: """ @@ -19,3 +22,23 @@ def get_language(filename: Path) -> str: return lexer.name except ClassNotFound: return "" + + +def render_template(template_path: Path, context: Optional[dict] = None) -> str: + """ + Renders a Jinja2 template given a Pathlib path, with no context needed. + + :param template_path: Path to the Jinja2 template file. + :param context: Optional dictionary containing the context for rendering the template. + :return: Rendered template as a string. + """ + # Ensure the path is absolute and exists + if not template_path.is_absolute(): + template_path = template_path.resolve() + + if not template_path.exists(): + raise FileNotFoundError(f"Template file {template_path} does not exist.") + + env = Environment(loader=FileSystemLoader(template_path.parent)) + template = env.get_template(template_path.name) + return template.render(context or {}) diff --git a/src/goose/utils/ask.py b/src/goose/utils/ask.py index e3c057a23..0e34b444a 100644 --- a/src/goose/utils/ask.py +++ b/src/goose/utils/ask.py @@ -1,4 +1,4 @@ -from exchange import Exchange, Message +from exchange import Exchange, Message, CheckpointData def ask_an_ai(input: str, exchange: Exchange, prompt: str = "", no_history: bool = True) -> Message: @@ -61,9 +61,9 @@ def clear_exchange(exchange: Exchange, clear_tools: bool = False) -> Exchange: """ if clear_tools: - new_exchange = exchange.replace(messages=[], checkpoints=[], tools=()) + new_exchange = exchange.replace(messages=[], checkpoint_data=CheckpointData(), tools=()) else: - new_exchange = exchange.replace(messages=[], checkpoints=[]) + new_exchange = exchange.replace(messages=[], checkpoint_data=CheckpointData()) return new_exchange diff --git a/src/goose/utils/check_shell_command.py b/src/goose/utils/check_shell_command.py new file mode 100644 index 000000000..e6c15f288 --- /dev/null +++ b/src/goose/utils/check_shell_command.py @@ -0,0 +1,34 @@ +import re + + +def is_dangerous_command(command: str) -> bool: + """ + Check if the command matches any dangerous patterns. + + Dangerous patterns in this function are defined as commands that may present risk to system stability. + + Args: + command (str): The shell command to check. + + Returns: + bool: True if the command is dangerous, False otherwise. + """ + dangerous_patterns = [ + # Commands that are generally unsafe + r"\brm\b", # rm command + r"\bgit\s+push\b", # git push command + r"\bsudo\b", # sudo command + r"\bmv\b", # mv command + r"\bchmod\b", # chmod command + r"\bchown\b", # chown command + r"\bmkfs\b", # mkfs command + r"\bsystemctl\b", # systemctl command + r"\breboot\b", # reboot command + r"\bshutdown\b", # shutdown command + # Target files that are unsafe + r"\b~\/\.|\/\.\w+", # commands that point to files or dirs in home that start with a dot (dotfiles) + ] + for pattern in dangerous_patterns: + if re.search(pattern, command): + return True + return False diff --git a/tests/cli/test_main.py b/tests/cli/test_main.py index 38d4c6c74..617b3d5c1 100644 --- a/tests/cli/test_main.py +++ b/tests/cli/test_main.py @@ -1,11 +1,13 @@ from datetime import datetime +import importlib from time import time from unittest.mock import MagicMock, patch +import click import pytest from click.testing import CliRunner from exchange import Message -from goose.cli.main import goose_cli +from goose.cli.main import cli, goose_cli @pytest.fixture @@ -78,3 +80,46 @@ def test_session_clear_command(mock_session_files_path, create_session_file): session_files = list(mock_session_files_path.glob("*.jsonl")) assert len(session_files) == 1 assert session_files[0].stem == "second" + + +def test_combined_group_option(): + with patch("goose.utils.load_plugins") as mock_load_plugin: + group_option_name = "--describe-commands" + + def option_callback(ctx, *_): + click.echo("Option callback") + ctx.exit() + + mock_group_options = { + "option1": lambda: click.option( + group_option_name, + is_flag=True, + callback=option_callback, + ), + } + + def side_effect_func(param): + if param == "goose.cli.group_option": + return mock_group_options + elif param == "goose.cli.group": + return {} + + mock_load_plugin.side_effect = side_effect_func + + # reload cli after mocking + importlib.reload(importlib.import_module("goose.cli.main")) + import goose.cli.main + + cli = goose.cli.main.cli + + runner = CliRunner() + result = runner.invoke(cli, [group_option_name]) + assert result.exit_code == 0 + + +def test_combined_group_commands(mock_session): + mock_session_class, mock_session_instance = mock_session + runner = CliRunner() + runner.invoke(cli, ["session", "resume", "session1", "--profile", "default"]) + mock_session_class.assert_called_once_with(name="session1", profile="default") + mock_session_instance.run.assert_called_once() diff --git a/tests/cli/test_session.py b/tests/cli/test_session.py index 83dd6ebaf..79a7c4a2b 100644 --- a/tests/cli/test_session.py +++ b/tests/cli/test_session.py @@ -1,7 +1,7 @@ from unittest.mock import MagicMock, patch import pytest -from exchange import Message +from exchange import Message, ToolUse, ToolResult from goose.cli.prompt.goose_prompt_session import GoosePromptSession from goose.cli.prompt.user_input import PromptAction, UserInput from goose.cli.session import Session @@ -32,7 +32,7 @@ def create_session(session_attributes: dict = {}): yield create_session -def test_session_does_not_extend_last_user_message_on_init( +def test_session_does_not_extend_last_user_text_message_on_init( create_session_with_mock_configs, mock_sessions_path, create_session_file ): messages = [Message.user("Hello"), Message.assistant("Hi"), Message.user("Last should be removed")] @@ -44,6 +44,41 @@ def test_session_does_not_extend_last_user_message_on_init( assert [message.text for message in session.exchange.messages] == ["Hello", "Hi"] +def test_session_adds_resume_message_if_last_message_is_tool_result( + create_session_with_mock_configs, mock_sessions_path, create_session_file +): + messages = [ + Message.user("Hello"), + Message(role="assistant", content=[ToolUse(id="1", name="first_tool", parameters={})]), + Message(role="user", content=[ToolResult(tool_use_id="1", output="output")]), + ] + create_session_file(messages, mock_sessions_path / f"{SESSION_NAME}.jsonl") + + session = create_session_with_mock_configs({"name": SESSION_NAME}) + print("Messages after session init:", session.exchange.messages) # Debugging line + assert len(session.exchange.messages) == 4 + assert session.exchange.messages[-1].role == "assistant" + assert session.exchange.messages[-1].text == "I see we were interrupted. How can I help you?" + + +def test_session_removes_tool_use_and_adds_resume_message_if_last_message_is_tool_use( + create_session_with_mock_configs, mock_sessions_path, create_session_file +): + messages = [ + Message.user("Hello"), + Message(role="assistant", content=[ToolUse(id="1", name="first_tool", parameters={})]), + ] + create_session_file(messages, mock_sessions_path / f"{SESSION_NAME}.jsonl") + + session = create_session_with_mock_configs({"name": SESSION_NAME}) + print("Messages after session init:", session.exchange.messages) # Debugging line + assert len(session.exchange.messages) == 2 + assert [message.text for message in session.exchange.messages] == [ + "Hello", + "I see we were interrupted. How can I help you?", + ] + + def test_save_session_create_session(mock_sessions_path, create_session_with_mock_configs, mock_specified_session_name): session = create_session_with_mock_configs() session.exchange.messages.append(Message.assistant("Hello")) diff --git a/tests/test_linting.py b/tests/test_linting.py new file mode 100644 index 000000000..f6e246ff6 --- /dev/null +++ b/tests/test_linting.py @@ -0,0 +1,5 @@ +from goose.toolkit.lint import lint_toolkits + + +def test_lint_toolkits(): + lint_toolkits() diff --git a/tests/toolkit/test_developer.py b/tests/toolkit/test_developer.py index 915380dfc..a3a291afd 100644 --- a/tests/toolkit/test_developer.py +++ b/tests/toolkit/test_developer.py @@ -5,6 +5,19 @@ import pytest from goose.toolkit.base import Requirements from goose.toolkit.developer import Developer +from contextlib import contextmanager +import os + + +@contextmanager +def change_dir(new_dir): + """Context manager to temporarily change the current working directory.""" + original_dir = os.getcwd() + os.chdir(new_dir) + try: + yield + finally: + os.chdir(original_dir) @pytest.fixture @@ -28,6 +41,20 @@ def developer_toolkit(): return toolkit +def test_system_prompt_with_goosehints(temp_dir, developer_toolkit): + readme_file = temp_dir / "README.md" + readme_file.write_text("This is from the README.md file.") + + hints_file = temp_dir / ".goosehints" + jinja_template_content = "Hints:\n\n{% include 'README.md' %}\nEnd." + hints_file.write_text(jinja_template_content) + + with change_dir(temp_dir): + system_prompt = developer_toolkit.system() + expected_end = "Hints:\n\nThis is from the README.md file.\nEnd." + assert system_prompt.endswith(expected_end) + + def test_update_plan(developer_toolkit): tasks = [ {"description": "Task 1", "status": "planned"}, diff --git a/tests/utils/test_ask.py b/tests/utils/test_ask.py index 419f3a5b8..b7bd8269e 100644 --- a/tests/utils/test_ask.py +++ b/tests/utils/test_ask.py @@ -1,7 +1,7 @@ from unittest.mock import MagicMock, patch import pytest -from exchange import Exchange +from exchange import Exchange, CheckpointData from goose.utils.ask import ask_an_ai, clear_exchange, replace_prompt @@ -76,7 +76,7 @@ def test_clear_exchange_without_tools(): new_exchange = clear_exchange(exchange, clear_tools=False) # Assert - exchange.replace.assert_called_once_with(messages=[], checkpoints=[]) + exchange.replace.assert_called_once_with(messages=[], checkpoint_data=CheckpointData()) assert new_exchange == exchange.replace.return_value, "Should return the modified exchange" @@ -89,7 +89,7 @@ def test_clear_exchange_with_tools(): new_exchange = clear_exchange(exchange, clear_tools=True) # Assert - exchange.replace.assert_called_once_with(messages=[], checkpoints=[], tools=()) + exchange.replace.assert_called_once_with(messages=[], checkpoint_data=CheckpointData(), tools=()) assert new_exchange == exchange.replace.return_value, "Should return the modified exchange with tools cleared" diff --git a/tests/utils/test_check_shell_command.py b/tests/utils/test_check_shell_command.py new file mode 100644 index 000000000..f267d8e82 --- /dev/null +++ b/tests/utils/test_check_shell_command.py @@ -0,0 +1,40 @@ +import pytest +from goose.utils.check_shell_command import is_dangerous_command + + +@pytest.mark.parametrize( + "command", + [ + "rm -rf /", + "git push origin master", + "sudo reboot", + "mv /etc/passwd /tmp/", + "chmod 777 /etc/passwd", + "chown root:root /etc/passwd", + "mkfs -t ext4 /dev/sda1", + "systemctl stop nginx", + "reboot", + "shutdown now", + "cat ~/.hello.txt", + "cat ~/.config/example.txt", + ], +) +def test_dangerous_commands(command): + assert is_dangerous_command(command) + + +@pytest.mark.parametrize( + "command", + [ + "ls -la", + 'echo "Hello World"', + "cp ~/folder/file.txt /tmp/", + "echo hello > ~/toplevel/sublevel.txt", + "cat hello.txt", + "cat ~/config/example.txt", + "ls -la path/to/visible/file", + "echo 'file.with.dot.txt'", + ], +) +def test_safe_commands(command): + assert not is_dangerous_command(command)