From c72b938069616a4f69f8ebd404f76a081cebaee3 Mon Sep 17 00:00:00 2001 From: Joshua Aresty Date: Mon, 15 Jul 2024 14:27:32 -0700 Subject: [PATCH 01/10] Add support for clipboard processing from images - This add support for image processing - requires the use of gpt-4o --- GPT/gpt.py | 3 ++- lib/modelHelpers.py | 17 ++++++++++++++--- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/GPT/gpt.py b/GPT/gpt.py index d32235af..18d11f80 100644 --- a/GPT/gpt.py +++ b/GPT/gpt.py @@ -47,6 +47,7 @@ def gpt_query(prompt: str, content: str) -> str: url = settings.get("user.model_endpoint") headers, data = generate_payload(prompt, content) + print(headers, json.dumps(data)) response = requests.post(url, headers=headers, data=json.dumps(data)) match response.status_code: @@ -244,7 +245,7 @@ def gpt_get_source_text(spoken_text: str) -> str: """Get the source text that is will have the prompt applied to it""" match spoken_text: case "clipboard": - return clip.text() + return "clip" case "gptResponse": if GPTState.last_response == "": raise Exception( diff --git a/lib/modelHelpers.py b/lib/modelHelpers.py index 449e2e4d..f7c02b39 100644 --- a/lib/modelHelpers.py +++ b/lib/modelHelpers.py @@ -53,6 +53,19 @@ def generate_payload( "Authorization": f"Bearer {TOKEN}", } + message = {"type": "text", "text": content} + if content == "clip": + clipped_image = clip.image() + if clipped_image: + data = clipped_image.encode().data() + base64_image = base64.b64encode(data).decode("utf-8") + message = { + "type": "image_url", + "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}, + } + else: + message = {"type": "text", "text": clip.text()} + data = { "messages": [ { @@ -60,15 +73,13 @@ def generate_payload( "content": settings.get("user.model_system_prompt") + additional_context, }, - {"role": "user", "content": f"{prompt}:\n{content}"}, + {"role": "user", "content": [{"type": "text", "text": prompt}, message]}, ], "max_tokens": 2024, "temperature": settings.get("user.model_temperature"), "n": 1, - "stop": None, "model": settings.get("user.openai_model"), } - if tools is not None: data["tools"] = tools From 26b177363afd4a9e0617bb9863ee9578a93841da Mon Sep 17 00:00:00 2001 From: Joshua Aresty Date: Mon, 15 Jul 2024 14:28:55 -0700 Subject: [PATCH 02/10] Remove stray logging code --- GPT/gpt.py | 1 - 1 file changed, 1 deletion(-) diff --git a/GPT/gpt.py b/GPT/gpt.py index 18d11f80..7b53c2f7 100644 --- a/GPT/gpt.py +++ b/GPT/gpt.py @@ -47,7 +47,6 @@ def gpt_query(prompt: str, content: str) -> str: url = settings.get("user.model_endpoint") headers, data = generate_payload(prompt, content) - print(headers, json.dumps(data)) response = requests.post(url, headers=headers, data=json.dumps(data)) match response.status_code: From 1320f7f5c11c480ad878cdebba936b027a5db767 Mon Sep 17 00:00:00 2001 From: Joshua Aresty Date: Mon, 15 Jul 2024 17:10:21 -0700 Subject: [PATCH 03/10] Fixed support for passing clipboard --- GPT/gpt.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/GPT/gpt.py b/GPT/gpt.py index 7b53c2f7..363f9c64 100644 --- a/GPT/gpt.py +++ b/GPT/gpt.py @@ -165,6 +165,8 @@ def gpt_apply_prompt( prompt = """Generate text that satisfies the question or request given in the input.""" # If the user is just moving the source to the destination, we don't need to apply a query elif prompt == "pass": + if text_to_process == "clip": + return clip.text() return text_to_process return gpt_query(prompt, text_to_process) From 2d962dac9f6ce6442842ffbd912d54eedd9c30b9 Mon Sep 17 00:00:00 2001 From: Joshua Aresty Date: Mon, 15 Jul 2024 17:51:15 -0700 Subject: [PATCH 04/10] Update model bland clip to use new clip source - This allows it to pull from images as well --- GPT/beta-commands/beta-gpt.talon | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/GPT/beta-commands/beta-gpt.talon b/GPT/beta-commands/beta-gpt.talon index 0c033530..eb344142 100644 --- a/GPT/beta-commands/beta-gpt.talon +++ b/GPT/beta-commands/beta-gpt.talon @@ -7,9 +7,8 @@ model find : user.gpt_find_talon_commands(user.text) # Using the context of the text on the clipboard, update the selected text model blend clip: - clipboard_text = clip.text() destination_text = edit.selected_text() - result = user.gpt_blend(clipboard_text, destination_text) + result = user.gpt_blend("clip", destination_text) user.gpt_insert_response(result, "") # Pass the raw text of a prompt to a destination without actually calling GPT with it From 3c543810916156a95984f170956dc4dbe38a50cf Mon Sep 17 00:00:00 2001 From: Joshua Aresty Date: Thu, 18 Jul 2024 17:58:20 -0700 Subject: [PATCH 05/10] Respond to pull request feedback - Make image the only clipboard special case --- GPT/beta-commands/beta-gpt.talon | 2 +- GPT/gpt.py | 7 ++++--- lib/modelHelpers.py | 4 +--- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/GPT/beta-commands/beta-gpt.talon b/GPT/beta-commands/beta-gpt.talon index eb344142..bc8f7244 100644 --- a/GPT/beta-commands/beta-gpt.talon +++ b/GPT/beta-commands/beta-gpt.talon @@ -8,7 +8,7 @@ model find : user.gpt_find_talon_commands(user.text) # Using the context of the text on the clipboard, update the selected text model blend clip: destination_text = edit.selected_text() - result = user.gpt_blend("clip", destination_text) + result = user.gpt_blend(user.gpt_get_source_text("clipboard"), destination_text) user.gpt_insert_response(result, "") # Pass the raw text of a prompt to a destination without actually calling GPT with it diff --git a/GPT/gpt.py b/GPT/gpt.py index 363f9c64..1fa647d3 100644 --- a/GPT/gpt.py +++ b/GPT/gpt.py @@ -165,8 +165,6 @@ def gpt_apply_prompt( prompt = """Generate text that satisfies the question or request given in the input.""" # If the user is just moving the source to the destination, we don't need to apply a query elif prompt == "pass": - if text_to_process == "clip": - return clip.text() return text_to_process return gpt_query(prompt, text_to_process) @@ -246,7 +244,10 @@ def gpt_get_source_text(spoken_text: str) -> str: """Get the source text that is will have the prompt applied to it""" match spoken_text: case "clipboard": - return "clip" + clipboard_text = clip.text() + if clipboard_text is None: + return "image" + return clipboard_text case "gptResponse": if GPTState.last_response == "": raise Exception( diff --git a/lib/modelHelpers.py b/lib/modelHelpers.py index f7c02b39..3294b027 100644 --- a/lib/modelHelpers.py +++ b/lib/modelHelpers.py @@ -54,7 +54,7 @@ def generate_payload( } message = {"type": "text", "text": content} - if content == "clip": + if content == "image": clipped_image = clip.image() if clipped_image: data = clipped_image.encode().data() @@ -63,8 +63,6 @@ def generate_payload( "type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}, } - else: - message = {"type": "text", "text": clip.text()} data = { "messages": [ From c355aa217bb7a1e901a8fa2c56faa93e60ff1282 Mon Sep 17 00:00:00 2001 From: Colton Loftus <70598503+C-Loftus@users.noreply.github.com> Date: Thu, 18 Jul 2024 21:14:24 -0400 Subject: [PATCH 06/10] Better error handling. Move to gpt-4o-mini as default --- GPT/gpt.py | 10 +++++++--- GPT/readme.md | 2 +- lib/talonSettings.py | 2 +- readme.md | 2 +- talon-ai-settings.talon.example | 3 +-- 5 files changed, 11 insertions(+), 8 deletions(-) diff --git a/GPT/gpt.py b/GPT/gpt.py index 1fa647d3..38b339d8 100644 --- a/GPT/gpt.py +++ b/GPT/gpt.py @@ -246,7 +246,11 @@ def gpt_get_source_text(spoken_text: str) -> str: case "clipboard": clipboard_text = clip.text() if clipboard_text is None: - return "image" + if clip.image(): + return "image" + else: + notify("GPT Failure: User applied a prompt to the phrase clipboard, but there was no clipboard text or image stored") + return return clipboard_text case "gptResponse": if GPTState.last_response == "": @@ -261,7 +265,7 @@ def gpt_get_source_text(spoken_text: str) -> str: actions.user.clear_last_phrase() return last_output else: - notify("No text to reformat") - raise Exception("No text to reformat") + notify("GPT Failure: User applied a prompt to the phrase last Talon Dictation, but there was no text to reformat") + raise Exception("GPT Failure: User applied a prompt to the phrase last Talon Dictation, but there was no text to reformat") case "this" | _: return actions.edit.selected_text() diff --git a/GPT/readme.md b/GPT/readme.md index efec426f..ff99360f 100644 --- a/GPT/readme.md +++ b/GPT/readme.md @@ -32,7 +32,7 @@ If you wish to change any configuration settings, copy the [example configuratio | Setting | Default | Notes | | ------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------- | -| user.openai_model | `"gpt-3.5-turbo"` | The model to use for the queries. NOTE: To access gpt-4 you may need prior API use | +| user.openai_model | `"gpt-4o-mini"` | The model to use for the queries. NOTE: To access certain models you may need prior API use | | user.model_temperature | `0.6` | Higher temperatures will make the model more creative and less accurate | | user.model_endpoint | `"https://api.openai.com/v1/chat/completions"` | Any OpenAI compatible endpoint address can be used (Azure, local llamafiles, etc) | | user.model_shell_default | `"bash"` | The default shell for `model shell` commands | diff --git a/lib/talonSettings.py b/lib/talonSettings.py index 42c9d74b..789216a1 100644 --- a/lib/talonSettings.py +++ b/lib/talonSettings.py @@ -23,7 +23,7 @@ def modelPrompt(matched_prompt) -> str: mod.setting( - "openai_model", type=Literal["gpt-3.5-turbo", "gpt-4"], default="gpt-3.5-turbo" + "openai_model", type=Literal["gpt-3.5-turbo", "gpt-4", "gpt-4o-mini"], default="gpt-4o-mini" ) mod.setting( diff --git a/readme.md b/readme.md index 1a489b0a..6a030af8 100644 --- a/readme.md +++ b/readme.md @@ -12,7 +12,7 @@ This functionality is especially helpful for users who: **Prompts and extends the following tools:** - Github Copilot -- OpenAI API (GPT-3.5/GPT-4) for text generation and processing +- OpenAI API (with any GPT model) for text generation and processing - Any OpenAI compatible model endpoint can be used (Azure, local llamafiles, etc) - OpenAI API for image generation and vision diff --git a/talon-ai-settings.talon.example b/talon-ai-settings.talon.example index 63c04ea6..fe7817c6 100644 --- a/talon-ai-settings.talon.example +++ b/talon-ai-settings.talon.example @@ -9,8 +9,7 @@ settings(): # user.model_system_prompt = "You are an assistant helping an office worker to be more productive." - # Change to 'gpt-4' for GPT-4 - # NOTE, you may not have access to GPT-4 yet: https://help.openai.com/en/articles/7102672-how-can-i-access-gpt-4 + # Change to 'gpt-4' or the model of your choice # user.openai_model = 'gpt-3.5-turbo' # Only uncomment the line below if you want experimental behavior to parse Talon files From e7eac2d5a19714143460f831eb8dc5f11bdf4c98 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 19 Jul 2024 01:14:35 +0000 Subject: [PATCH 07/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- GPT/gpt.py | 12 +++++++++--- GPT/readme.md | 14 +++++++------- lib/talonSettings.py | 4 +++- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/GPT/gpt.py b/GPT/gpt.py index 38b339d8..35526480 100644 --- a/GPT/gpt.py +++ b/GPT/gpt.py @@ -249,7 +249,9 @@ def gpt_get_source_text(spoken_text: str) -> str: if clip.image(): return "image" else: - notify("GPT Failure: User applied a prompt to the phrase clipboard, but there was no clipboard text or image stored") + notify( + "GPT Failure: User applied a prompt to the phrase clipboard, but there was no clipboard text or image stored" + ) return return clipboard_text case "gptResponse": @@ -265,7 +267,11 @@ def gpt_get_source_text(spoken_text: str) -> str: actions.user.clear_last_phrase() return last_output else: - notify("GPT Failure: User applied a prompt to the phrase last Talon Dictation, but there was no text to reformat") - raise Exception("GPT Failure: User applied a prompt to the phrase last Talon Dictation, but there was no text to reformat") + notify( + "GPT Failure: User applied a prompt to the phrase last Talon Dictation, but there was no text to reformat" + ) + raise Exception( + "GPT Failure: User applied a prompt to the phrase last Talon Dictation, but there was no text to reformat" + ) case "this" | _: return actions.edit.selected_text() diff --git a/GPT/readme.md b/GPT/readme.md index ff99360f..27bc03c1 100644 --- a/GPT/readme.md +++ b/GPT/readme.md @@ -30,10 +30,10 @@ To add additional prompts, copy the [Talon list for custom prompts](lists/custom If you wish to change any configuration settings, copy the [example configuration file](../talon-ai-settings.talon.example) into your user directory and modify settings that you want to change. -| Setting | Default | Notes | -| ------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------- | -| user.openai_model | `"gpt-4o-mini"` | The model to use for the queries. NOTE: To access certain models you may need prior API use | -| user.model_temperature | `0.6` | Higher temperatures will make the model more creative and less accurate | -| user.model_endpoint | `"https://api.openai.com/v1/chat/completions"` | Any OpenAI compatible endpoint address can be used (Azure, local llamafiles, etc) | -| user.model_shell_default | `"bash"` | The default shell for `model shell` commands | -| user.model_system_prompt | `"You are an assistant helping an office worker to be more productive. Output just the response to the request and no additional content. Do not generate any markdown formatting such as backticks for programming languages unless it is explicitly requested."` | The meta-prompt for how to respond to all prompts | +| Setting | Default | Notes | +| ------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------- | +| user.openai_model | `"gpt-4o-mini"` | The model to use for the queries. NOTE: To access certain models you may need prior API use | +| user.model_temperature | `0.6` | Higher temperatures will make the model more creative and less accurate | +| user.model_endpoint | `"https://api.openai.com/v1/chat/completions"` | Any OpenAI compatible endpoint address can be used (Azure, local llamafiles, etc) | +| user.model_shell_default | `"bash"` | The default shell for `model shell` commands | +| user.model_system_prompt | `"You are an assistant helping an office worker to be more productive. Output just the response to the request and no additional content. Do not generate any markdown formatting such as backticks for programming languages unless it is explicitly requested."` | The meta-prompt for how to respond to all prompts | diff --git a/lib/talonSettings.py b/lib/talonSettings.py index 789216a1..a57b95d7 100644 --- a/lib/talonSettings.py +++ b/lib/talonSettings.py @@ -23,7 +23,9 @@ def modelPrompt(matched_prompt) -> str: mod.setting( - "openai_model", type=Literal["gpt-3.5-turbo", "gpt-4", "gpt-4o-mini"], default="gpt-4o-mini" + "openai_model", + type=Literal["gpt-3.5-turbo", "gpt-4", "gpt-4o-mini"], + default="gpt-4o-mini", ) mod.setting( From 7aaca382e2084ad6389ea08e2483ec128b697ef8 Mon Sep 17 00:00:00 2001 From: Colton Loftus <70598503+C-Loftus@users.noreply.github.com> Date: Thu, 18 Jul 2024 21:32:20 -0400 Subject: [PATCH 08/10] make it hard to trigger accidentally. Image is a bit too simple --- GPT/gpt.py | 2 +- lib/a11yHelpers.py | 1 + lib/modelHelpers.py | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 lib/a11yHelpers.py diff --git a/GPT/gpt.py b/GPT/gpt.py index 35526480..d9586312 100644 --- a/GPT/gpt.py +++ b/GPT/gpt.py @@ -247,7 +247,7 @@ def gpt_get_source_text(spoken_text: str) -> str: clipboard_text = clip.text() if clipboard_text is None: if clip.image(): - return "image" + return "__IMAGE__" else: notify( "GPT Failure: User applied a prompt to the phrase clipboard, but there was no clipboard text or image stored" diff --git a/lib/a11yHelpers.py b/lib/a11yHelpers.py new file mode 100644 index 00000000..f5c5fe7d --- /dev/null +++ b/lib/a11yHelpers.py @@ -0,0 +1 @@ +from talon import \ No newline at end of file diff --git a/lib/modelHelpers.py b/lib/modelHelpers.py index 3294b027..631b21ee 100644 --- a/lib/modelHelpers.py +++ b/lib/modelHelpers.py @@ -54,7 +54,7 @@ def generate_payload( } message = {"type": "text", "text": content} - if content == "image": + if content == "__IMAGE__": clipped_image = clip.image() if clipped_image: data = clipped_image.encode().data() From cb65c322bb1d6536881719261af7b965f489632f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 19 Jul 2024 01:32:30 +0000 Subject: [PATCH 09/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- lib/a11yHelpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/a11yHelpers.py b/lib/a11yHelpers.py index f5c5fe7d..080790f4 100644 --- a/lib/a11yHelpers.py +++ b/lib/a11yHelpers.py @@ -1 +1 @@ -from talon import \ No newline at end of file +from talon import From b0a9712342e4141a8b124833b04d77bc6b3263ea Mon Sep 17 00:00:00 2001 From: Colton Loftus <70598503+C-Loftus@users.noreply.github.com> Date: Thu, 18 Jul 2024 21:32:35 -0400 Subject: [PATCH 10/10] Delete lib/a11yHelpers.py --- lib/a11yHelpers.py | 1 - 1 file changed, 1 deletion(-) delete mode 100644 lib/a11yHelpers.py diff --git a/lib/a11yHelpers.py b/lib/a11yHelpers.py deleted file mode 100644 index 080790f4..00000000 --- a/lib/a11yHelpers.py +++ /dev/null @@ -1 +0,0 @@ -from talon import