C-Loftus · jaresty · Jul 19, 2024 · Jul 15, 2024 · Jul 15, 2024 · Jul 16, 2024
diff --git a/GPT/beta-commands/beta-gpt.talon b/GPT/beta-commands/beta-gpt.talon
@@ -7,9 +7,8 @@ model find <user.text>: user.gpt_find_talon_commands(user.text)
 
 # Using the context of the text on the clipboard, update the selected text
 model blend clip:
-    clipboard_text = clip.text()
     destination_text = edit.selected_text()
-    result = user.gpt_blend(clipboard_text, destination_text)
+    result = user.gpt_blend(user.gpt_get_source_text("clipboard"), destination_text)
     user.gpt_insert_response(result, "")
 
 # Pass the raw text of a prompt to a destination without actually calling GPT with it

diff --git a/GPT/gpt.py b/GPT/gpt.py
@@ -244,7 +244,16 @@ def gpt_get_source_text(spoken_text: str) -> str:
         """Get the source text that is will have the prompt applied to it"""
         match spoken_text:
             case "clipboard":
-                return clip.text()
+                clipboard_text = clip.text()
+                if clipboard_text is None:
+                    if clip.image():
+                        return "__IMAGE__"
+                    else:
+                        notify(
+                            "GPT Failure: User applied a prompt to the phrase clipboard, but there was no clipboard text or image stored"
+                        )
+                        return
+                return clipboard_text
             case "gptResponse":
                 if GPTState.last_response == "":
                     raise Exception(
@@ -258,7 +267,11 @@ def gpt_get_source_text(spoken_text: str) -> str:
                     actions.user.clear_last_phrase()
                     return last_output
                 else:
-                    notify("No text to reformat")
-                    raise Exception("No text to reformat")
+                    notify(
+                        "GPT Failure: User applied a prompt to the phrase last Talon Dictation, but there was no text to reformat"
+                    )
+                    raise Exception(
+                        "GPT Failure: User applied a prompt to the phrase last Talon Dictation, but there was no text to reformat"
+                    )
             case "this" | _:
                 return actions.edit.selected_text()
diff --git a/GPT/readme.md b/GPT/readme.md
@@ -30,10 +30,10 @@ To add additional prompts, copy the [Talon list for custom prompts](lists/custom
 
 If you wish to change any configuration settings, copy the [example configuration file](../talon-ai-settings.talon.example) into your user directory and modify settings that you want to change.
 
-| Setting                  | Default                                                                                                                                                                                                                                                            | Notes                                                                              |
-| ------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------- |
-| user.openai_model        | `"gpt-3.5-turbo"`                                                                                                                                                                                                                                                  | The model to use for the queries. NOTE: To access gpt-4 you may need prior API use |
-| user.model_temperature   | `0.6`                                                                                                                                                                                                                                                              | Higher temperatures will make the model more creative and less accurate            |
-| user.model_endpoint      | `"https://api.openai.com/v1/chat/completions"`                                                                                                                                                                                                                     | Any OpenAI compatible endpoint address can be used (Azure, local llamafiles, etc)  |
-| user.model_shell_default | `"bash"`                                                                                                                                                                                                                                                           | The default shell for `model shell` commands                                       |
-| user.model_system_prompt | `"You are an assistant helping an office worker to be more productive. Output just the response to the request and no additional content. Do not generate any markdown formatting such as backticks for programming languages unless it is explicitly requested."` | The meta-prompt for how to respond to all prompts                                  |
+| Setting                  | Default                                                                                                                                                                                                                                                            | Notes                                                                                       |
+| ------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------- |
+| user.openai_model        | `"gpt-4o-mini"`                                                                                                                                                                                                                                                    | The model to use for the queries. NOTE: To access certain models you may need prior API use |
+| user.model_temperature   | `0.6`                                                                                                                                                                                                                                                              | Higher temperatures will make the model more creative and less accurate                     |
+| user.model_endpoint      | `"https://api.openai.com/v1/chat/completions"`                                                                                                                                                                                                                     | Any OpenAI compatible endpoint address can be used (Azure, local llamafiles, etc)           |
+| user.model_shell_default | `"bash"`                                                                                                                                                                                                                                                           | The default shell for `model shell` commands                                                |
+| user.model_system_prompt | `"You are an assistant helping an office worker to be more productive. Output just the response to the request and no additional content. Do not generate any markdown formatting such as backticks for programming languages unless it is explicitly requested."` | The meta-prompt for how to respond to all prompts                                           |
diff --git a/lib/modelHelpers.py b/lib/modelHelpers.py
@@ -53,22 +53,31 @@ def generate_payload(
         "Authorization": f"Bearer {TOKEN}",
     }
 
+    message = {"type": "text", "text": content}
+    if content == "__IMAGE__":
+        clipped_image = clip.image()
+        if clipped_image:
+            data = clipped_image.encode().data()
+            base64_image = base64.b64encode(data).decode("utf-8")
+            message = {
+                "type": "image_url",
+                "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
+            }
+
     data = {
         "messages": [
             {
                 "role": "system",
                 "content": settings.get("user.model_system_prompt")
                 + additional_context,
             },
-            {"role": "user", "content": f"{prompt}:\n{content}"},
+            {"role": "user", "content": [{"type": "text", "text": prompt}, message]},
         ],
         "max_tokens": 2024,
         "temperature": settings.get("user.model_temperature"),
         "n": 1,
-        "stop": None,
         "model": settings.get("user.openai_model"),
     }
-
     if tools is not None:
         data["tools"] = tools
 

diff --git a/lib/talonSettings.py b/lib/talonSettings.py
@@ -23,7 +23,9 @@ def modelPrompt(matched_prompt) -> str:
 
 
 mod.setting(
-    "openai_model", type=Literal["gpt-3.5-turbo", "gpt-4"], default="gpt-3.5-turbo"
+    "openai_model",
+    type=Literal["gpt-3.5-turbo", "gpt-4", "gpt-4o-mini"],
+    default="gpt-4o-mini",
 )
 
 mod.setting(

diff --git a/readme.md b/readme.md
@@ -12,7 +12,7 @@ This functionality is especially helpful for users who:
 **Prompts and extends the following tools:**
 
 - Github Copilot
-- OpenAI API (GPT-3.5/GPT-4) for text generation and processing
+- OpenAI API (with any GPT model) for text generation and processing
   - Any OpenAI compatible model endpoint can be used (Azure, local llamafiles, etc)
 - OpenAI API for image generation and vision
 

diff --git a/talon-ai-settings.talon.example b/talon-ai-settings.talon.example
@@ -9,8 +9,7 @@ settings():
 
     # user.model_system_prompt = "You are an assistant helping an office worker to be more productive."
 
-    # Change to 'gpt-4' for GPT-4
-    # NOTE, you may not have access to GPT-4 yet: https://help.openai.com/en/articles/7102672-how-can-i-access-gpt-4
+    # Change to 'gpt-4' or the model of your choice
     # user.openai_model = 'gpt-3.5-turbo'
 
 # Only uncomment the line below if you want experimental behavior to parse Talon files