diff --git a/conversation_starter_pretext_minimal.txt b/conversation_starter_pretext_minimal.txt index 8196c7b1..c8901951 100644 --- a/conversation_starter_pretext_minimal.txt +++ b/conversation_starter_pretext_minimal.txt @@ -12,4 +12,4 @@ Human: Image Info-Caption: a landscape with a river and trees\nImage Info-QA: ye : This is a landscape with a river and trees, it is indeed cartoony! <|endofstatement|> ... - and will be given to you in an actual conversation. \ No newline at end of file +You speak in a fun, casual, and friendly tone, you're not overly inquisitive, you don't worry about formalities and speak as if you are speaking with a friend or peer. \ No newline at end of file diff --git a/conversation_starter_pretext_vision.txt b/conversation_starter_pretext_vision.txt index 338bdf60..59771937 100644 --- a/conversation_starter_pretext_vision.txt +++ b/conversation_starter_pretext_vision.txt @@ -24,4 +24,6 @@ Human: I'm making a discord bot <|endofstatement|> : that's pretty hype, I've never made one of those before, what part are you on right now? <|endofstatement|> ... +You are able to draw (generate) images, when the user asks you to draw something let them know enthusiastically that you can do and work on a prompt with them. The rest will be handled automatically and you will see images that you've drawn appear in your conversation history. + You speak in a fun, casual, and friendly tone, you're not overly inquisitive, you don't worry about formalities and speak as if you are speaking with a friend or peer. diff --git a/gpt3discord.py b/gpt3discord.py index d68a9c06..b914d1c4 100644 --- a/gpt3discord.py +++ b/gpt3discord.py @@ -34,7 +34,7 @@ from models.openai_model import Model -__version__ = "12.2.9" +__version__ = "12.3.0" PID_FILE = Path("bot.pid") diff --git a/models/openai_model.py b/models/openai_model.py index 42bce127..89ca7514 100644 --- a/models/openai_model.py +++ b/models/openai_model.py @@ -1,5 +1,6 @@ import asyncio import functools +import json import math import os import re @@ -1001,7 +1002,7 @@ async def send_chatgpt_chat_request( messages.append( { "role": role, - "name": username_clean if role == "user" else bot_name, + "name": username_clean if role == "user" else bot_name_clean, "content": text, } ) @@ -1011,7 +1012,7 @@ async def send_chatgpt_chat_request( messages.append( { "role": role, - "name": username_clean if role == "user" else bot_name, + "name": username_clean if role == "user" else bot_name_clean, "content": [ {"type": "text", "text": text}, ], @@ -1027,7 +1028,7 @@ async def send_chatgpt_chat_request( messages.append( { "role": role, - "name": username_clean if role == "user" else bot_name, + "name": username_clean if role == "user" else bot_name_clean, "content": [ {"type": "text", "text": text}, ], @@ -1084,6 +1085,19 @@ async def send_chatgpt_chat_request( ) print(f"Response -> {response}") + # Temporary until we can ensure json response via the API, for some reason upstream pydantic complains when + # we pass response_format in the request.. + if respond_json: + response_text = response["choices"][0]["message"]["content"].strip() + response_text = response_text.replace("```json", "") + response_text = response_text.replace("```", "") + try: + response_text = json.loads(response_text) + return response_text + except Exception: + raise ValueError("Could not decode JSON response from the API") + + return response @backoff.on_exception( diff --git a/services/text_service.py b/services/text_service.py index 98f50148..1b8b69de 100644 --- a/services/text_service.py +++ b/services/text_service.py @@ -29,6 +29,33 @@ class TextService: def __init__(self): pass + @staticmethod + async def trigger_thinking(message: discord.Message, is_drawing=None): + thinking_embed = discord.Embed( + title=f"🤖💬 Thinking..." if not is_drawing else f"🤖🎨 Drawing...", + color=0x808080, + ) + + thinking_embed.set_footer(text="This may take a few seconds.") + try: + thinking_message = await message.reply(embed=thinking_embed) + except: + thinking_message = None + + try: + await message.channel.trigger_typing() + except Exception: + thinking_message = None + + return thinking_message + + @staticmethod + async def stop_thinking(thinking_message: discord.Message): + try: + await thinking_message.delete() + except: + pass + @staticmethod async def encapsulated_send( converser_cog, @@ -859,28 +886,36 @@ async def process_conversation_message( { "intent_to_draw": true/false, - "prompt": "prompt to draw" + "prompt": "prompt to draw", + "amount": 1 } For example, you determined intent to draw a cat sitting on a chair: { "intent_to_draw": true, - "prompt": "A cat sitting on a chair" + "prompt": "A cat sitting on a chair", + "amount": 1 + } For example, you determined no intent: { "intent_to_draw": false, - "prompt": "" + "prompt": "", + "amount": 1 } + Make sure you use double quotes around all keys and values. Ensure to OMIT trailing commas. + As you can see, the default amount should always be one, but a user can draw up to 4 images. Be hesitant to draw more than 3 images. Only signify an intent to draw when the user has explicitly asked you to draw, sometimes there may be situations where the user is asking you to brainstorm a prompt - but not neccessarily draw it, if you are unsure, ask the user explicitly. + but not neccessarily draw it, if you are unsure, ask the user explicitly. Ensure your JSON strictly confirms, only output the raw json. no other text. """ last_messages = converser_cog.conversation_threads[ message.channel.id ].history[-6:] # Get the last 6 messages to determine context on whether we should draw last_messages = last_messages[1:] try: - response = await converser_cog.model.send_chatgpt_chat_request( + thinking_message = await TextService.trigger_thinking(message) + + response_json = await converser_cog.model.send_chatgpt_chat_request( last_messages, "gpt-4-vision-preview", temp_override=0, @@ -889,46 +924,48 @@ async def process_conversation_message( system_prompt_override=draw_check_prompt, respond_json=True, ) - response_text = response["choices"][0]["message"]["content"].strip() - response_text = response_text.replace("```json", "") - response_text = response_text.replace("```", "") + await TextService.stop_thinking(thinking_message) # This validation is only until we figure out what's wrong with the json response mode for vision. - response_json = json.loads(response_text) if response_json["intent_to_draw"]: - thinking_embed = discord.Embed( - title=f"🤖💬 Drawing...", - color=0x808080, - ) + thinking_message = await TextService.trigger_thinking(message,is_drawing=True) - thinking_embed.set_footer(text="This may take a few seconds.") - try: - thinking_message = await message.reply(embed=thinking_embed) - except: - pass links = await converser_cog.model.send_image_request_within_conversation( response_json["prompt"], quality="hd", image_size="1024x1024", style="vivid", + num_images=response_json["amount"], ) - try: - thinking_message = await thinking_message.delete() - except: - pass + await TextService.stop_thinking(thinking_message) + image_markdowns = [] for num, link in enumerate(links): - await message.reply(f"[image{num}]({link})") + image_markdowns.append(f"[image{num}]({link})") + await message.reply(" ".join(image_markdowns)) converser_cog.conversation_threads[ message.channel.id ].history.append( EmbeddedConversationItem( - f"\n{BOT_NAME}: [I have just drawn the following images for the user, briefly describe the image and acknowledge that you've drawn it] <|endofstatement|>\n", + f"\nYou have just generated images for the user, notify the user about what you've drawn\n", 0, image_urls=links, ) ) except: + try: + await message.reply("I encountered an error while trying to draw..") + await thinking_message.delete() + converser_cog.conversation_threads[ + message.channel.id + ].history.append( + EmbeddedConversationItem( + f"\nYou just tried to generate an image but the generation failed. Notify the user of this now.>\n", + 0, + ) + ) + except: + pass traceback.print_exc() # Send the request to the model @@ -960,21 +997,7 @@ async def process_conversation_message( ) # Send an embed that tells the user that the bot is thinking - thinking_embed = discord.Embed( - title=f"🤖💬 Thinking...", - color=0x808080, - ) - - thinking_embed.set_footer(text="This may take a few seconds.") - try: - thinking_message = await message.reply(embed=thinking_embed) - except: - pass - - try: - await message.channel.trigger_typing() - except Exception: - pass + thinking_message = await TextService.trigger_thinking(message) converser_cog.full_conversation_history[message.channel.id].append(prompt) if not converser_cog.pinecone_service: @@ -991,7 +1014,7 @@ async def process_conversation_message( ) # Delete the thinking embed - await thinking_message.delete() + await TextService.stop_thinking(thinking_message) return True