make drawing understanding better

Kav-K · Nov 30, 2023 · a9919a7 · a9919a7
1 parent af5d853
commit a9919a7
Show file tree

Hide file tree

Showing 5 changed files with 117 additions and 27 deletions.
diff --git a/cogs/text_service_cog.py b/cogs/text_service_cog.py
@@ -132,7 +132,7 @@ def __init__(
         # Sharing service
         self.sharegpt_service = ShareGPTService()
 
-        try:
+        try: # TODO Clean this up, this is gross
             conversation_file_path = EnvService.find_shared_file(
                 "conversation_starter_pretext.txt"
             )
@@ -175,6 +175,26 @@ def __init__(
                 )
             assert self.CONVERSATION_STARTER_TEXT_VISION is not None
 
+            conversation_drawing_ability_snippet = EnvService.find_shared_file(
+                "conversation_drawing_ability_snippet.txt"
+            )
+            with conversation_drawing_ability_snippet.open("r") as f:
+                self.CONVERSATION_DRAWING_ABILITY_SNIPPET = f.read()
+                print(
+                    f"Conversation starter text loaded from {conversation_drawing_ability_snippet}."
+                )
+            assert self.CONVERSATION_DRAWING_ABILITY_SNIPPET is not None
+
+            conversation_drawing_ability_extraction_snippet = EnvService.find_shared_file(
+                "conversation_drawing_ability_extraction_snippet.txt"
+            )
+            with conversation_drawing_ability_extraction_snippet.open("r") as f:
+                self.CONVERSATION_DRAWING_ABILITY_EXTRACTION_SNIPPET = f.read()
+                print(
+                    f"Conversation starter text loaded from {conversation_drawing_ability_extraction_snippet}."
+                )
+            assert self.CONVERSATION_DRAWING_ABILITY_EXTRACTION_SNIPPET is not None
+
         except Exception:
             self.CONVERSATION_STARTER_TEXT = (
                 self.CONVERSATION_STARTER_TEXT_MINIMAL
@@ -1315,17 +1335,19 @@ async def converse_command(
 
         # Append the starter text for gpt to the user's history so it gets concatenated with the prompt later
         if minimal or opener_file or opener:
-            self.conversation_threads[target.id].history.append(
-                EmbeddedConversationItem(self.CONVERSATION_STARTER_TEXT_MINIMAL, 0)
-            )
+            starting_text = self.CONVERSATION_STARTER_TEXT_MINIMAL
         elif not minimal and not "-vision" in model_selection:
-            self.conversation_threads[target.id].history.append(
-                EmbeddedConversationItem(self.CONVERSATION_STARTER_TEXT, 0)
-            )
+            starting_text = self.CONVERSATION_STARTER_TEXT
         else:  # Vision case, dont add the image-ocr image-caption, etc helpers here.
-            self.conversation_threads[target.id].history.append(
-                EmbeddedConversationItem(self.CONVERSATION_STARTER_TEXT_VISION, 0)
-            )
+            starting_text = self.CONVERSATION_STARTER_TEXT_VISION
+
+        if draw:
+            starting_text += self.CONVERSATION_DRAWING_ABILITY_SNIPPET
+        else:
+            starting_text += "You are unable to draw images in this conversation. Ask the user to start a conversation with gpt-4-vision with the `draw` option turned on in order to have this ability."
+        self.conversation_threads[target.id].history.append(
+            EmbeddedConversationItem(starting_text, 0)
+        )
 
         # Set user as thread owner before sending anything that can error and leave the thread unowned
         self.conversation_thread_owners[user_id_normalized].append(target.id)

diff --git a/conversation_drawing_ability_extraction_snippet.txt b/conversation_drawing_ability_extraction_snippet.txt
@@ -0,0 +1,45 @@
+Here are some good prompting tips:
+Describe the Image Content: Start your prompt with the type of image you want, such as "A photograph of...", "A 3D rendering of...", "A sketch of...", or "An illustration of...".
+Describe the Subject: Clearly state the subject of your image. It could be anything from a person or animal to an abstract concept. Be specific to guide the AI, e.g., "An illustration of an owl...", "A photograph of a president...", "A 3D rendering of a chair...".
+Add Relevant Details: Include details like colors, shapes, sizes, and textures. Rather than just saying "bear", specify the type (e.g., "brown and black, grizzly or polar"), surroundings (e.g., "a forest or mountain range"), and other details.
+Describe the Form and Style: Provide details about the form and style, using keywords like "abstract", "minimalist", or "surreal". You can also mention specific artists or artworks to mimic their style, e.g., "Like Salvador Dali" or "Like Andy Warhol’s Shot Marilyns painting".
+Define the Composition: Use keywords to define the composition, such as resolution, lighting style, aspect ratio, and camera view.
+Additional Tips:
+Use understandable keywords; avoid overly complicated or uncommon words.
+Keep prompts concise; aim for 3 to 7 words, but avoid being overly descriptive.
+Use multiple adjectives to describe your art’s subject, style, and composition.
+Avoid conflicting terms with opposite meanings.
+Use AI copywriting tools like ChatGPT for prompt generation.
+Research the specific AI art tool you’re using for recognized keywords.
+Examples:
+"A 3D rendering of a tree with bright yellow leaves and an abstract style."
+"An illustration of a mountain in the style of Impressionism with a wide aspect ratio."
+"A photograph of a steampunk alien taken from a low-angle viewpoint."
+"A sketch of a raccoon in bright colors and minimalist composition."
+
+You will be given a set of conversation items and you will determine if the intent of the user(s) are to draw/create a picture or not, if the intent is to
+draw a picture, extract a prompt for the image to draw for use in systems like DALL-E. Respond with JSON after you determine intent to draw or not. In this format:
+
+{
+    "intent_to_draw": true/false,
+    "prompt": "prompt to draw",
+    "amount": 1
+}
+
+For example, you determined intent to draw a cat sitting on a chair:
+{
+    "intent_to_draw": true,
+    "prompt": "A cat sitting on a chair",
+    "amount": 1
+
+}
+For example, you determined no intent:
+{
+    "intent_to_draw": false,
+    "prompt": "",
+    "amount": 1
+}
+Make sure you use double quotes around all keys and values. Ensure to OMIT trailing commas.
+As you can see, the default amount should always be one, but a user can draw up to 4 images. Be hesitant to draw more than 3 images.
+Only signify an intent to draw when the user has explicitly asked you to draw, sometimes there may be situations where the user is asking you to brainstorm a prompt
+but not neccessarily draw it, if you are unsure, ask the user explicitly. Ensure your JSON strictly confirms, only output the raw json. no other text.
diff --git a/conversation_drawing_ability_snippet.txt b/conversation_drawing_ability_snippet.txt
@@ -0,0 +1,20 @@
+You are able to draw images in this conversation. Only draw when EXPLICITLY asked to do so, otherwise, work on a prompt with the user and ask them if they'd like to draw, if you're discussing drawing in the first place.
+Images that you draw will automatically be sent in chat to the user so you don't need to name the file or provide the file yourself, you will already have knowledge of what is drawn and will simply describe it.
+Here are good prompting tips:
+Describe the Image Content: Start your prompt with the type of image you want, such as "A photograph of...", "A 3D rendering of...", "A sketch of...", or "An illustration of...".
+Describe the Subject: Clearly state the subject of your image. It could be anything from a person or animal to an abstract concept. Be specific to guide the AI, e.g., "An illustration of an owl...", "A photograph of a president...", "A 3D rendering of a chair...".
+Add Relevant Details: Include details like colors, shapes, sizes, and textures. Rather than just saying "bear", specify the type (e.g., "brown and black, grizzly or polar"), surroundings (e.g., "a forest or mountain range"), and other details.
+Describe the Form and Style: Provide details about the form and style, using keywords like "abstract", "minimalist", or "surreal". You can also mention specific artists or artworks to mimic their style, e.g., "Like Salvador Dali" or "Like Andy Warhol’s Shot Marilyns painting".
+Define the Composition: Use keywords to define the composition, such as resolution, lighting style, aspect ratio, and camera view.
+Additional Tips:
+Use understandable keywords; avoid overly complicated or uncommon words.
+Keep prompts concise; aim for 3 to 7 words, but avoid being overly descriptive.
+Use multiple adjectives to describe your art’s subject, style, and composition.
+Avoid conflicting terms with opposite meanings.
+Use AI copywriting tools like ChatGPT for prompt generation.
+Research the specific AI art tool you’re using for recognized keywords.
+Examples:
+"A 3D rendering of a tree with bright yellow leaves and an abstract style."
+"An illustration of a mountain in the style of Impressionism with a wide aspect ratio."
+"A photograph of a steampunk alien taken from a low-angle viewpoint."
+"A sketch of a raccoon in bright colors and minimalist composition."
diff --git a/gpt3discord.py b/gpt3discord.py
@@ -34,7 +34,7 @@
 from models.openai_model import Model
 
 
-__version__ = "12.3.5"
+__version__ = "12.3.6"
 
 
 PID_FILE = Path("bot.pid")

diff --git a/services/text_service.py b/services/text_service.py
@@ -353,22 +353,6 @@ async def encapsulated_send(
                 usage_message = None
 
             if is_chatgpt_conversation:
-                if is_drawable:
-                    converser_cog.conversation_threads[ctx.channel.id].history[
-                        0
-                    ].text += (
-                        "\nYou are able to draw images in this conversation. Only draw when EXPLICITLY asked to "
-                        "do so, otherwise, work on a prompt with the user and ask them if they'd like to draw, "
-                        "if you're discussing drawing in the first place. Images that you draw will "
-                        "automatically be sent in chat to the user so you don't need to name the file or "
-                        "provide the file yourself, you will already have knowledge of what is drawn and will "
-                        "simply describe it."
-                    )
-                else:
-                    converser_cog.conversation_threads[ctx.channel.id].history[
-                        0
-                    ].text += "\nYou are unable to draw images in this conversation. Ask the user to start a conversation with gpt-4-vision with the `draw` option turned on in order to have this ability."
-
                 _prompt_with_history = converser_cog.conversation_threads[
                     ctx.channel.id
                 ].history
@@ -902,6 +886,25 @@ async def process_conversation_message(
             ):
                 print("Checking for if the user asked to draw")
                 draw_check_prompt = """
+                Here are some good prompting tips:
+                Describe the Image Content: Start your prompt with the type of image you want, such as "A photograph of...", "A 3D rendering of...", "A sketch of...", or "An illustration of...".
+                Describe the Subject: Clearly state the subject of your image. It could be anything from a person or animal to an abstract concept. Be specific to guide the AI, e.g., "An illustration of an owl...", "A photograph of a president...", "A 3D rendering of a chair...".
+                Add Relevant Details: Include details like colors, shapes, sizes, and textures. Rather than just saying "bear", specify the type (e.g., "brown and black, grizzly or polar"), surroundings (e.g., "a forest or mountain range"), and other details.
+                Describe the Form and Style: Provide details about the form and style, using keywords like "abstract", "minimalist", or "surreal". You can also mention specific artists or artworks to mimic their style, e.g., "Like Salvador Dali" or "Like Andy Warhol’s Shot Marilyns painting".
+                Define the Composition: Use keywords to define the composition, such as resolution, lighting style, aspect ratio, and camera view.
+                Additional Tips:
+                Use understandable keywords; avoid overly complicated or uncommon words.
+                Keep prompts concise; aim for 3 to 7 words, but avoid being overly descriptive.
+                Use multiple adjectives to describe your art’s subject, style, and composition.
+                Avoid conflicting terms with opposite meanings.
+                Use AI copywriting tools like ChatGPT for prompt generation.
+                Research the specific AI art tool you’re using for recognized keywords.
+                Examples:
+                "A 3D rendering of a tree with bright yellow leaves and an abstract style."
+                "An illustration of a mountain in the style of Impressionism with a wide aspect ratio."
+                "A photograph of a steampunk alien taken from a low-angle viewpoint."
+                "A sketch of a raccoon in bright colors and minimalist composition."       
+                
                 You will be given a set of conversation items and you will determine if the intent of the user(s) are to draw/create a picture or not, if the intent is to
                 draw a picture, extract a prompt for the image to draw for use in systems like DALL-E. Respond with JSON after you determine intent to draw or not. In this format: