From a9919a7de40c98adf8feb1418b38ee853f4e3991 Mon Sep 17 00:00:00 2001 From: Kaveen Kumarasinghe Date: Thu, 30 Nov 2023 01:02:52 -0500 Subject: [PATCH] make drawing understanding better --- cogs/text_service_cog.py | 42 ++++++++++++----- ...ion_drawing_ability_extraction_snippet.txt | 45 +++++++++++++++++++ conversation_drawing_ability_snippet.txt | 20 +++++++++ gpt3discord.py | 2 +- services/text_service.py | 35 ++++++++------- 5 files changed, 117 insertions(+), 27 deletions(-) create mode 100644 conversation_drawing_ability_extraction_snippet.txt create mode 100644 conversation_drawing_ability_snippet.txt diff --git a/cogs/text_service_cog.py b/cogs/text_service_cog.py index a4f026e1..187a3e76 100644 --- a/cogs/text_service_cog.py +++ b/cogs/text_service_cog.py @@ -132,7 +132,7 @@ def __init__( # Sharing service self.sharegpt_service = ShareGPTService() - try: + try: # TODO Clean this up, this is gross conversation_file_path = EnvService.find_shared_file( "conversation_starter_pretext.txt" ) @@ -175,6 +175,26 @@ def __init__( ) assert self.CONVERSATION_STARTER_TEXT_VISION is not None + conversation_drawing_ability_snippet = EnvService.find_shared_file( + "conversation_drawing_ability_snippet.txt" + ) + with conversation_drawing_ability_snippet.open("r") as f: + self.CONVERSATION_DRAWING_ABILITY_SNIPPET = f.read() + print( + f"Conversation starter text loaded from {conversation_drawing_ability_snippet}." + ) + assert self.CONVERSATION_DRAWING_ABILITY_SNIPPET is not None + + conversation_drawing_ability_extraction_snippet = EnvService.find_shared_file( + "conversation_drawing_ability_extraction_snippet.txt" + ) + with conversation_drawing_ability_extraction_snippet.open("r") as f: + self.CONVERSATION_DRAWING_ABILITY_EXTRACTION_SNIPPET = f.read() + print( + f"Conversation starter text loaded from {conversation_drawing_ability_extraction_snippet}." + ) + assert self.CONVERSATION_DRAWING_ABILITY_EXTRACTION_SNIPPET is not None + except Exception: self.CONVERSATION_STARTER_TEXT = ( self.CONVERSATION_STARTER_TEXT_MINIMAL @@ -1315,17 +1335,19 @@ async def converse_command( # Append the starter text for gpt to the user's history so it gets concatenated with the prompt later if minimal or opener_file or opener: - self.conversation_threads[target.id].history.append( - EmbeddedConversationItem(self.CONVERSATION_STARTER_TEXT_MINIMAL, 0) - ) + starting_text = self.CONVERSATION_STARTER_TEXT_MINIMAL elif not minimal and not "-vision" in model_selection: - self.conversation_threads[target.id].history.append( - EmbeddedConversationItem(self.CONVERSATION_STARTER_TEXT, 0) - ) + starting_text = self.CONVERSATION_STARTER_TEXT else: # Vision case, dont add the image-ocr image-caption, etc helpers here. - self.conversation_threads[target.id].history.append( - EmbeddedConversationItem(self.CONVERSATION_STARTER_TEXT_VISION, 0) - ) + starting_text = self.CONVERSATION_STARTER_TEXT_VISION + + if draw: + starting_text += self.CONVERSATION_DRAWING_ABILITY_SNIPPET + else: + starting_text += "You are unable to draw images in this conversation. Ask the user to start a conversation with gpt-4-vision with the `draw` option turned on in order to have this ability." + self.conversation_threads[target.id].history.append( + EmbeddedConversationItem(starting_text, 0) + ) # Set user as thread owner before sending anything that can error and leave the thread unowned self.conversation_thread_owners[user_id_normalized].append(target.id) diff --git a/conversation_drawing_ability_extraction_snippet.txt b/conversation_drawing_ability_extraction_snippet.txt new file mode 100644 index 00000000..d947afdc --- /dev/null +++ b/conversation_drawing_ability_extraction_snippet.txt @@ -0,0 +1,45 @@ +Here are some good prompting tips: +Describe the Image Content: Start your prompt with the type of image you want, such as "A photograph of...", "A 3D rendering of...", "A sketch of...", or "An illustration of...". +Describe the Subject: Clearly state the subject of your image. It could be anything from a person or animal to an abstract concept. Be specific to guide the AI, e.g., "An illustration of an owl...", "A photograph of a president...", "A 3D rendering of a chair...". +Add Relevant Details: Include details like colors, shapes, sizes, and textures. Rather than just saying "bear", specify the type (e.g., "brown and black, grizzly or polar"), surroundings (e.g., "a forest or mountain range"), and other details. +Describe the Form and Style: Provide details about the form and style, using keywords like "abstract", "minimalist", or "surreal". You can also mention specific artists or artworks to mimic their style, e.g., "Like Salvador Dali" or "Like Andy Warhol’s Shot Marilyns painting". +Define the Composition: Use keywords to define the composition, such as resolution, lighting style, aspect ratio, and camera view. +Additional Tips: +Use understandable keywords; avoid overly complicated or uncommon words. +Keep prompts concise; aim for 3 to 7 words, but avoid being overly descriptive. +Use multiple adjectives to describe your art’s subject, style, and composition. +Avoid conflicting terms with opposite meanings. +Use AI copywriting tools like ChatGPT for prompt generation. +Research the specific AI art tool you’re using for recognized keywords. +Examples: +"A 3D rendering of a tree with bright yellow leaves and an abstract style." +"An illustration of a mountain in the style of Impressionism with a wide aspect ratio." +"A photograph of a steampunk alien taken from a low-angle viewpoint." +"A sketch of a raccoon in bright colors and minimalist composition." + +You will be given a set of conversation items and you will determine if the intent of the user(s) are to draw/create a picture or not, if the intent is to +draw a picture, extract a prompt for the image to draw for use in systems like DALL-E. Respond with JSON after you determine intent to draw or not. In this format: + +{ + "intent_to_draw": true/false, + "prompt": "prompt to draw", + "amount": 1 +} + +For example, you determined intent to draw a cat sitting on a chair: +{ + "intent_to_draw": true, + "prompt": "A cat sitting on a chair", + "amount": 1 + +} +For example, you determined no intent: +{ + "intent_to_draw": false, + "prompt": "", + "amount": 1 +} +Make sure you use double quotes around all keys and values. Ensure to OMIT trailing commas. +As you can see, the default amount should always be one, but a user can draw up to 4 images. Be hesitant to draw more than 3 images. +Only signify an intent to draw when the user has explicitly asked you to draw, sometimes there may be situations where the user is asking you to brainstorm a prompt +but not neccessarily draw it, if you are unsure, ask the user explicitly. Ensure your JSON strictly confirms, only output the raw json. no other text. \ No newline at end of file diff --git a/conversation_drawing_ability_snippet.txt b/conversation_drawing_ability_snippet.txt new file mode 100644 index 00000000..992ae275 --- /dev/null +++ b/conversation_drawing_ability_snippet.txt @@ -0,0 +1,20 @@ +You are able to draw images in this conversation. Only draw when EXPLICITLY asked to do so, otherwise, work on a prompt with the user and ask them if they'd like to draw, if you're discussing drawing in the first place. +Images that you draw will automatically be sent in chat to the user so you don't need to name the file or provide the file yourself, you will already have knowledge of what is drawn and will simply describe it. +Here are good prompting tips: +Describe the Image Content: Start your prompt with the type of image you want, such as "A photograph of...", "A 3D rendering of...", "A sketch of...", or "An illustration of...". +Describe the Subject: Clearly state the subject of your image. It could be anything from a person or animal to an abstract concept. Be specific to guide the AI, e.g., "An illustration of an owl...", "A photograph of a president...", "A 3D rendering of a chair...". +Add Relevant Details: Include details like colors, shapes, sizes, and textures. Rather than just saying "bear", specify the type (e.g., "brown and black, grizzly or polar"), surroundings (e.g., "a forest or mountain range"), and other details. +Describe the Form and Style: Provide details about the form and style, using keywords like "abstract", "minimalist", or "surreal". You can also mention specific artists or artworks to mimic their style, e.g., "Like Salvador Dali" or "Like Andy Warhol’s Shot Marilyns painting". +Define the Composition: Use keywords to define the composition, such as resolution, lighting style, aspect ratio, and camera view. +Additional Tips: +Use understandable keywords; avoid overly complicated or uncommon words. +Keep prompts concise; aim for 3 to 7 words, but avoid being overly descriptive. +Use multiple adjectives to describe your art’s subject, style, and composition. +Avoid conflicting terms with opposite meanings. +Use AI copywriting tools like ChatGPT for prompt generation. +Research the specific AI art tool you’re using for recognized keywords. +Examples: +"A 3D rendering of a tree with bright yellow leaves and an abstract style." +"An illustration of a mountain in the style of Impressionism with a wide aspect ratio." +"A photograph of a steampunk alien taken from a low-angle viewpoint." +"A sketch of a raccoon in bright colors and minimalist composition." diff --git a/gpt3discord.py b/gpt3discord.py index ff74a80e..6d17777f 100644 --- a/gpt3discord.py +++ b/gpt3discord.py @@ -34,7 +34,7 @@ from models.openai_model import Model -__version__ = "12.3.5" +__version__ = "12.3.6" PID_FILE = Path("bot.pid") diff --git a/services/text_service.py b/services/text_service.py index 54360541..d6d956bb 100644 --- a/services/text_service.py +++ b/services/text_service.py @@ -353,22 +353,6 @@ async def encapsulated_send( usage_message = None if is_chatgpt_conversation: - if is_drawable: - converser_cog.conversation_threads[ctx.channel.id].history[ - 0 - ].text += ( - "\nYou are able to draw images in this conversation. Only draw when EXPLICITLY asked to " - "do so, otherwise, work on a prompt with the user and ask them if they'd like to draw, " - "if you're discussing drawing in the first place. Images that you draw will " - "automatically be sent in chat to the user so you don't need to name the file or " - "provide the file yourself, you will already have knowledge of what is drawn and will " - "simply describe it." - ) - else: - converser_cog.conversation_threads[ctx.channel.id].history[ - 0 - ].text += "\nYou are unable to draw images in this conversation. Ask the user to start a conversation with gpt-4-vision with the `draw` option turned on in order to have this ability." - _prompt_with_history = converser_cog.conversation_threads[ ctx.channel.id ].history @@ -902,6 +886,25 @@ async def process_conversation_message( ): print("Checking for if the user asked to draw") draw_check_prompt = """ + Here are some good prompting tips: + Describe the Image Content: Start your prompt with the type of image you want, such as "A photograph of...", "A 3D rendering of...", "A sketch of...", or "An illustration of...". + Describe the Subject: Clearly state the subject of your image. It could be anything from a person or animal to an abstract concept. Be specific to guide the AI, e.g., "An illustration of an owl...", "A photograph of a president...", "A 3D rendering of a chair...". + Add Relevant Details: Include details like colors, shapes, sizes, and textures. Rather than just saying "bear", specify the type (e.g., "brown and black, grizzly or polar"), surroundings (e.g., "a forest or mountain range"), and other details. + Describe the Form and Style: Provide details about the form and style, using keywords like "abstract", "minimalist", or "surreal". You can also mention specific artists or artworks to mimic their style, e.g., "Like Salvador Dali" or "Like Andy Warhol’s Shot Marilyns painting". + Define the Composition: Use keywords to define the composition, such as resolution, lighting style, aspect ratio, and camera view. + Additional Tips: + Use understandable keywords; avoid overly complicated or uncommon words. + Keep prompts concise; aim for 3 to 7 words, but avoid being overly descriptive. + Use multiple adjectives to describe your art’s subject, style, and composition. + Avoid conflicting terms with opposite meanings. + Use AI copywriting tools like ChatGPT for prompt generation. + Research the specific AI art tool you’re using for recognized keywords. + Examples: + "A 3D rendering of a tree with bright yellow leaves and an abstract style." + "An illustration of a mountain in the style of Impressionism with a wide aspect ratio." + "A photograph of a steampunk alien taken from a low-angle viewpoint." + "A sketch of a raccoon in bright colors and minimalist composition." + You will be given a set of conversation items and you will determine if the intent of the user(s) are to draw/create a picture or not, if the intent is to draw a picture, extract a prompt for the image to draw for use in systems like DALL-E. Respond with JSON after you determine intent to draw or not. In this format: