diff --git a/cogs/text_service_cog.py b/cogs/text_service_cog.py index fa5151be..b2b67d45 100644 --- a/cogs/text_service_cog.py +++ b/cogs/text_service_cog.py @@ -87,7 +87,7 @@ def __init__( DEBUG_GUILD, DEBUG_CHANNEL, data_path: Path, - pinecone_service, + qdrant_service, pickle_queue, ): super().__init__() @@ -126,8 +126,11 @@ def __init__( self.instructions = defaultdict(list) self.summarize = self.model.summarize_conversations + # Pinecone data - self.pinecone_service = pinecone_service + # self.pinecone_service = pinecone_service + + self.qdrant_service = qdrant_service # Sharing service self.sharegpt_service = ShareGPTService() @@ -1380,7 +1383,7 @@ async def converse_command( ) if target.id in self.conversation_threads: self.awaiting_responses.append(user_id_normalized) - if not self.pinecone_service: + if not self.qdrant_service: self.conversation_threads[target.id].history.append( EmbeddedConversationItem( f"\n{ctx.author.display_name}: {opener} <|endofstatement|>\n", @@ -1404,7 +1407,7 @@ async def converse_command( ( opener if target.id not in self.conversation_threads - or self.pinecone_service + or self.qdrant_service else "".join( [ item.text diff --git a/gpt3discord.py b/gpt3discord.py index af81e187..1b83be9b 100644 --- a/gpt3discord.py +++ b/gpt3discord.py @@ -33,6 +33,9 @@ from models.openai_model import Model +from qdrant_client import QdrantClient +from services.qdrant_service import QdrantService + __version__ = "12.3.8" @@ -70,6 +73,26 @@ pinecone_service = PineconeService(pinecone.Index(PINECONE_INDEX)) print("Got the pinecone service") +try: + QDRANT_TOKEN = os.getenv("QDRANT_TOKEN") + QDRANT_URL = os.getenv("QDRANT_CLUSTER_URL") +except Exception: + QDRANT_TOKEN = None + QDRANT_URL = None + +qdrant_service = None +if QDRANT_TOKEN and QDRANT_URL: + qdrant_client = QdrantClient( + url= QDRANT_URL, + api_key=QDRANT_TOKEN, + ) + qdrant_service = QdrantService(qdrant_client) + # Check if conversation-embeddings collection already exists, else make one + collections = qdrant_client.get_collections().collections + if not any(collection.name == "conversation-embeddings" for collection in collections): + qdrant_service.make_collection() + print("Got the Qdrant service") + # # Message queueing for the debug service, defer debug messages to be sent later so we don't hit rate limits. # @@ -143,7 +166,7 @@ async def main(): debug_guild, debug_channel, data_path, - pinecone_service=pinecone_service, + qdrant_service = qdrant_service, pickle_queue=pickle_queue, ) ) diff --git a/requirements.txt b/requirements.txt index 67495019..4cc71a4e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -35,3 +35,4 @@ replicate==0.15.4 tiktoken==0.4.0 pydantic==2.4.2 e2b==0.10.6 +qdrant-client diff --git a/sample.env b/sample.env index 69959ef0..8b0f0fed 100644 --- a/sample.env +++ b/sample.env @@ -13,6 +13,8 @@ OPENAI_TOKEN = "" DISCORD_TOKEN = "" ## PINECONE_TOKEN = "" # pinecone token, if you have it enabled. See readme ## PINECONE_REGION = "" # add your region here if it's not us-west1-gcp +QDRANT_TOKEN = "" +QDRANT_CLUSTER_URL = "" ## GOOGLE_SEARCH_API_KEY = "" # allows internet searches and chats ## GOOGLE_SEARCH_ENGINE_ID = "" # allows internet searches and chats ## DEEPL_TOKEN = "" # allows human language translations from DeepL API @@ -25,10 +27,10 @@ DISCORD_TOKEN = "" ### DISCORD SERVER/CHANNEL CONFIGURATION ################################################################################ -DEBUG_GUILD = "974519864045756446" # discord_server_id for debug messages -DEBUG_CHANNEL = "977697652147892304" # discord_chanel_id where the messages will be posted -ALLOWED_GUILDS = "971268468148166697,971268468148166697" # server ids where the bot should add its commands -MODERATIONS_ALERT_CHANNEL = "977697652147892304" # Moderations Service alert channel, this is where moderation alerts will be sent as a default if enabled +DEBUG_GUILD = "" # discord_server_id for debug messages +DEBUG_CHANNEL = "" # discord_chanel_id where the messages will be posted +ALLOWED_GUILDS = "" # server ids where the bot should add its commands +MODERATIONS_ALERT_CHANNEL = "" # Moderations Service alert channel, this is where moderation alerts will be sent as a default if enabled ################################################################################ ### ROLE PERMISSIONS diff --git a/services/qdrant_service.py b/services/qdrant_service.py new file mode 100644 index 00000000..55ca87b2 --- /dev/null +++ b/services/qdrant_service.py @@ -0,0 +1,74 @@ +from qdrant_client import QdrantClient +from qdrant_client.models import Distance, VectorParams +from qdrant_client.http import models +import uuid + +class QdrantService: + def __init__(self, client: QdrantClient): + self.client = client + + def make_collection(self): + self.client.create_collection( + collection_name="conversation-embeddings", + vectors_config=VectorParams(size=1536, distance=Distance.COSINE)) + + def upsert_basic(self, text, embedding, conversation_id: int, timestamp): + self.client.upsert( + collection_name = "conversation-embeddings", + points = [ + models.PointStruct( + # Can't upsert the text as the ID like pinecone, use a random UUID instead and add text as a payload + id= str(uuid.uuid4()), + payload={ + "text" : text, + "conversation_id" : conversation_id, + "timestamp" : timestamp, + }, + vector= embedding, + ) + ] + ) + + async def upsert_conversation_embedding(self, model, conversation_id:int, text, timestamp, custom_api_key=None): + first_embedding = None + if len(text) > 500: + # Split the text into 512 character chunks + chunks = [text[i : i + 500] for i in range(0, len(text), 500)] + for chunk in chunks: + # Create an embedding for the split chunk + embedding = await model.send_embedding_request( + chunk, custom_api_key=custom_api_key + ) + if not first_embedding: + first_embedding = embedding + self.upsert_basic(chunk, embedding, conversation_id, timestamp) + return first_embedding + embedding = await model.send_embedding_request( + text, custom_api_key=custom_api_key + ) + self.upsert_basic(text, embedding, conversation_id, timestamp) + return embedding + + def get_n_similar(self, conversation_id: int, embedding, n=10): + response = self.client.search( + collection_name= "conversation-embeddings", + query_filter=models.Filter( + must=[ + models.FieldCondition( + key="conversation_id", + match = models.MatchValue( + value=conversation_id, + ), + ) + ] + ), + query_vector=embedding, + with_payload=["text", "timestamp"], + limit = n, + ) + relevant_phrases = [ + (match.payload["text"], match.payload["timestamp"]) + for match in response + ] + return relevant_phrases + diff --git a/services/text_service.py b/services/text_service.py index ff1c2319..3b72044d 100644 --- a/services/text_service.py +++ b/services/text_service.py @@ -121,9 +121,9 @@ async def encapsulated_send( ctx.author.display_name if not user else user.display_name ) - # Pinecone is enabled, we will create embeddings for this conversation. + # qdrant is enabled, we will create embeddings for this conversation. if ( - converser_cog.pinecone_service + converser_cog.qdrant_service and ctx.channel.id in converser_cog.conversation_threads ): for item in converser_cog.conversation_threads[ctx.channel.id].history: @@ -168,7 +168,7 @@ async def encapsulated_send( converser_cog.redo_users[ctx.author.id].prompt = new_prompt else: # Create and upsert the embedding for the conversation id, prompt, timestamp - await converser_cog.pinecone_service.upsert_conversation_embedding( + await converser_cog.qdrant_service.upsert_conversation_embedding( converser_cog.model, conversation_id, new_prompt, @@ -181,8 +181,8 @@ async def encapsulated_send( prompt_less_author, custom_api_key=custom_api_key ) # Use the version of the prompt without the author's name for better clarity on retrieval. - # Now, build the new prompt by getting the X most similar with pinecone - similar_prompts = converser_cog.pinecone_service.get_n_similar( + # Now, build the new prompt by getting the X most similar with qdrant + similar_prompts = converser_cog.qdrant_service.get_n_similar( conversation_id, embedding_prompt_less_author, n=converser_cog.model.num_conversation_lookback, @@ -228,8 +228,8 @@ async def encapsulated_send( # remove duplicates from prompt_with_history and set the conversation history _prompt_with_history = list(dict.fromkeys(_prompt_with_history)) - # Sort the prompt_with_history by increasing timestamp if pinecone is enabled - if converser_cog.pinecone_service: + # Sort the prompt_with_history by increasing timestamp if qdrant is enabled + if converser_cog.qdrant_service: _prompt_with_history.sort(key=lambda x: x.timestamp) # Remove the last two entries after sort, this is from the end of the list as prompt(redo), answer, prompt(original), leaving only prompt(original) and further history @@ -256,13 +256,13 @@ async def encapsulated_send( tokens = converser_cog.usage_service.count_tokens(new_prompt) - # No pinecone, we do conversation summarization for long term memory instead + # No qdrant, we do conversation summarization for long term memory instead elif ( id in converser_cog.conversation_threads and tokens > converser_cog.model.summarize_threshold and not from_ask_command and not from_edit_command - and not converser_cog.pinecone_service + and not converser_cog.qdrant_service # This should only happen if we are not doing summarizations. ): # We don't need to worry about the differences between interactions and messages in this block, @@ -443,7 +443,7 @@ async def encapsulated_send( if ( ctx.channel.id in converser_cog.conversation_threads and not from_ask_command - and not converser_cog.pinecone_service + and not converser_cog.qdrant_service ): if not redo_request: converser_cog.conversation_threads[ctx.channel.id].history.append( @@ -461,7 +461,7 @@ async def encapsulated_send( ctx.channel.id in converser_cog.conversation_threads and not from_ask_command and not from_edit_command - and converser_cog.pinecone_service + and converser_cog.qdrant_service ): conversation_id = ctx.channel.id @@ -486,7 +486,7 @@ async def encapsulated_send( # Create and upsert the embedding for the conversation id, prompt, timestamp embedding = ( - await converser_cog.pinecone_service.upsert_conversation_embedding( + await converser_cog.qdrant_service.upsert_conversation_embedding( converser_cog.model, conversation_id, response_text, @@ -574,7 +574,7 @@ async def encapsulated_send( ), ) converser_cog.redo_users[ctx.author.id] = RedoUser( - prompt=new_prompt if not converser_cog.pinecone_service else prompt, + prompt=new_prompt if not converser_cog.qdrant_service else prompt, instruction=instruction, ctx=ctx, message=ctx, @@ -859,7 +859,7 @@ async def process_conversation_message( converser_cog.awaiting_responses.append(message.author.id) converser_cog.awaiting_thread_responses.append(message.channel.id) - if not converser_cog.pinecone_service: + if not converser_cog.qdrant_service: converser_cog.conversation_threads[ message.channel.id ].history.append( @@ -877,7 +877,7 @@ async def process_conversation_message( # TODO: This should be encapsulated better into some other service or function so we're not cluttering this text service file, this text service file is gross right now.. if ( "-vision" in model - and not converser_cog.pinecone_service + and not converser_cog.qdrant_service and converser_cog.conversation_threads[message.channel.id].drawable ): print("Checking for if the user asked to draw") @@ -997,7 +997,7 @@ async def process_conversation_message( # Send the request to the model # If conversing, the prompt to send is the history, otherwise, it's just the prompt if ( - converser_cog.pinecone_service + converser_cog.qdrant_service or message.channel.id not in converser_cog.conversation_threads ): primary_prompt = prompt @@ -1026,7 +1026,7 @@ async def process_conversation_message( thinking_message = await TextService.trigger_thinking(message) converser_cog.full_conversation_history[message.channel.id].append(prompt) - if not converser_cog.pinecone_service: + if not converser_cog.qdrant_service: primary_prompt += BOT_NAME await TextService.encapsulated_send( @@ -1083,8 +1083,8 @@ async def process_conversation_edit(converser_cog, after, original_message): ] ) - pinecone_dont_reinsert = None - if not converser_cog.pinecone_service: + qdrant_dont_reinsert = None + if not converser_cog.qdrant_service: converser_cog.conversation_threads[ after.channel.id ].history.append( @@ -1118,7 +1118,7 @@ async def process_conversation_edit(converser_cog, after, original_message): edited_request=True, ) - if not converser_cog.pinecone_service: + if not converser_cog.qdrant_service: converser_cog.redo_users[after.author.id].prompt = edited_content