Kav-K · kepler452b123 · Mar 24, 2024 · Mar 24, 2024
diff --git a/cogs/text_service_cog.py b/cogs/text_service_cog.py
@@ -87,7 +87,7 @@ def __init__(
         DEBUG_GUILD,
         DEBUG_CHANNEL,
         data_path: Path,
-        pinecone_service,
+        qdrant_service,
         pickle_queue,
     ):
         super().__init__()
@@ -126,8 +126,11 @@ def __init__(
         self.instructions = defaultdict(list)
         self.summarize = self.model.summarize_conversations
 
+
         # Pinecone data
-        self.pinecone_service = pinecone_service
+        # self.pinecone_service = pinecone_service
+
+        self.qdrant_service = qdrant_service
 
         # Sharing service
         self.sharegpt_service = ShareGPTService()
@@ -1380,7 +1383,7 @@ async def converse_command(
                 )
             if target.id in self.conversation_threads:
                 self.awaiting_responses.append(user_id_normalized)
-                if not self.pinecone_service:
+                if not self.qdrant_service:
                     self.conversation_threads[target.id].history.append(
                         EmbeddedConversationItem(
                             f"\n{ctx.author.display_name}: {opener} <|endofstatement|>\n",
@@ -1404,7 +1407,7 @@ async def converse_command(
                 (
                     opener
                     if target.id not in self.conversation_threads
-                    or self.pinecone_service
+                    or self.qdrant_service
                     else "".join(
                         [
                             item.text

diff --git a/gpt3discord.py b/gpt3discord.py
@@ -33,6 +33,9 @@
 
 from models.openai_model import Model
 
+from qdrant_client import QdrantClient
+from services.qdrant_service import QdrantService
+
 
 __version__ = "12.3.8"
 
@@ -70,6 +73,26 @@
     pinecone_service = PineconeService(pinecone.Index(PINECONE_INDEX))
     print("Got the pinecone service")
 
+try: 
+    QDRANT_TOKEN = os.getenv("QDRANT_TOKEN")
+    QDRANT_URL = os.getenv("QDRANT_CLUSTER_URL")
+except Exception:
+    QDRANT_TOKEN = None
+    QDRANT_URL = None
+
+qdrant_service = None
+if QDRANT_TOKEN and QDRANT_URL:
+    qdrant_client = QdrantClient(
+    url= QDRANT_URL, 
+    api_key=QDRANT_TOKEN,
+    )
+    qdrant_service = QdrantService(qdrant_client)
+    # Check if conversation-embeddings collection already exists, else make one
+    collections = qdrant_client.get_collections().collections
+    if not any(collection.name == "conversation-embeddings" for collection in collections):
+        qdrant_service.make_collection()
+    print("Got the Qdrant service")
+
 #
 # Message queueing for the debug service, defer debug messages to be sent later so we don't hit rate limits.
 #
@@ -143,7 +166,7 @@ async def main():
             debug_guild,
             debug_channel,
             data_path,
-            pinecone_service=pinecone_service,
+            qdrant_service = qdrant_service,
             pickle_queue=pickle_queue,
         )
     )

diff --git a/requirements.txt b/requirements.txt
@@ -35,3 +35,4 @@ replicate==0.15.4
 tiktoken==0.4.0
 pydantic==2.4.2
 e2b==0.10.6
+qdrant-client
diff --git a/sample.env b/sample.env
@@ -13,6 +13,8 @@ OPENAI_TOKEN = "<openai_api_token>"
 DISCORD_TOKEN = "<discord_bot_token>"
 ## PINECONE_TOKEN = "<pinecone_token>" # pinecone token, if you have it enabled. See readme
 ## PINECONE_REGION = "<pinecone_region>" # add your region here if it's not us-west1-gcp
+QDRANT_TOKEN = "<qdrant_cluster_token>"
+QDRANT_CLUSTER_URL = "<qdrant_cluster_url>"
 ## GOOGLE_SEARCH_API_KEY = "<google_api_key>" # allows internet searches and chats
 ## GOOGLE_SEARCH_ENGINE_ID = "<google_engine_id>" # allows internet searches and chats
 ## DEEPL_TOKEN = "<deepl_token>" # allows human language translations from DeepL API
@@ -25,10 +27,10 @@ DISCORD_TOKEN = "<discord_bot_token>"
 ### DISCORD SERVER/CHANNEL CONFIGURATION
 ################################################################################
 
-DEBUG_GUILD = "974519864045756446"  # discord_server_id for debug messages
-DEBUG_CHANNEL = "977697652147892304"  # discord_chanel_id where the messages will be posted
-ALLOWED_GUILDS = "971268468148166697,971268468148166697"  # server ids where the bot should add its commands
-MODERATIONS_ALERT_CHANNEL = "977697652147892304"  # Moderations Service alert channel, this is where moderation alerts will be sent as a default if enabled
+DEBUG_GUILD = "<discord_server_id>"  # discord_server_id for debug messages
+DEBUG_CHANNEL = "<discord_chanel_id>"  # discord_chanel_id where the messages will be posted
+ALLOWED_GUILDS = "<discord_server_id>"  # server ids where the bot should add its commands
+MODERATIONS_ALERT_CHANNEL = "<discord_chanel_id>"  # Moderations Service alert channel, this is where moderation alerts will be sent as a default if enabled
 
 ################################################################################
 ### ROLE PERMISSIONS

diff --git a/services/qdrant_service.py b/services/qdrant_service.py
@@ -0,0 +1,74 @@
+from qdrant_client import QdrantClient
+from qdrant_client.models import Distance, VectorParams
+from qdrant_client.http import models
+import uuid
+
+class QdrantService:
+    def __init__(self, client: QdrantClient):
+        self.client = client
+
+    def make_collection(self):
+        self.client.create_collection(
+            collection_name="conversation-embeddings",
+            vectors_config=VectorParams(size=1536, distance=Distance.COSINE))
+
+    def upsert_basic(self, text, embedding, conversation_id: int, timestamp):
+        self.client.upsert(
+            collection_name = "conversation-embeddings",
+            points = [
+                models.PointStruct(
+                    # Can't upsert the text as the ID like pinecone, use a random UUID instead and add text as a payload
+                    id= str(uuid.uuid4()),
+                    payload={
+                        "text" : text,
+                        "conversation_id" : conversation_id,
+                        "timestamp" : timestamp,
+                    },
+                    vector= embedding,
+                )
+            ]
+        )
+
+    async def upsert_conversation_embedding(self, model, conversation_id:int, text, timestamp, custom_api_key=None):
+        first_embedding = None
+        if len(text) > 500:
+            # Split the text into 512 character chunks
+            chunks = [text[i : i + 500] for i in range(0, len(text), 500)]
+            for chunk in chunks:
+                # Create an embedding for the split chunk
+                embedding = await model.send_embedding_request(
+                    chunk, custom_api_key=custom_api_key
+                )
+                if not first_embedding:
+                    first_embedding = embedding
+                self.upsert_basic(chunk, embedding, conversation_id, timestamp)
+            return first_embedding
+        embedding = await model.send_embedding_request(
+            text, custom_api_key=custom_api_key
+        )
+        self.upsert_basic(text, embedding, conversation_id, timestamp)
+        return embedding
+
+    def get_n_similar(self, conversation_id: int, embedding, n=10):
+        response = self.client.search(
+            collection_name= "conversation-embeddings",
+            query_filter=models.Filter(
+                must=[
+                    models.FieldCondition(
+                        key="conversation_id",
+                        match = models.MatchValue(
+                            value=conversation_id,
+                        ),
+                    )
+                ]
+            ),
+            query_vector=embedding,
+            with_payload=["text", "timestamp"],
+            limit = n,
+        )
+        relevant_phrases = [
+            (match.payload["text"], match.payload["timestamp"])
+            for match in response
+        ]
+        return relevant_phrases
+
diff --git a/services/text_service.py b/services/text_service.py
@@ -121,9 +121,9 @@ async def encapsulated_send(
                 ctx.author.display_name if not user else user.display_name
             )
 
-            # Pinecone is enabled, we will create embeddings for this conversation.
+            # qdrant is enabled, we will create embeddings for this conversation.
             if (
-                converser_cog.pinecone_service
+                converser_cog.qdrant_service
                 and ctx.channel.id in converser_cog.conversation_threads
             ):
                 for item in converser_cog.conversation_threads[ctx.channel.id].history:
@@ -168,7 +168,7 @@ async def encapsulated_send(
                     converser_cog.redo_users[ctx.author.id].prompt = new_prompt
                 else:
                     # Create and upsert the embedding for  the conversation id, prompt, timestamp
-                    await converser_cog.pinecone_service.upsert_conversation_embedding(
+                    await converser_cog.qdrant_service.upsert_conversation_embedding(
                         converser_cog.model,
                         conversation_id,
                         new_prompt,
@@ -181,8 +181,8 @@ async def encapsulated_send(
                         prompt_less_author, custom_api_key=custom_api_key
                     )  # Use the version of the prompt without the author's name for better clarity on retrieval.
 
-                    # Now, build the new prompt by getting the X most similar with pinecone
-                    similar_prompts = converser_cog.pinecone_service.get_n_similar(
+                    # Now, build the new prompt by getting the X most similar with qdrant
+                    similar_prompts = converser_cog.qdrant_service.get_n_similar(
                         conversation_id,
                         embedding_prompt_less_author,
                         n=converser_cog.model.num_conversation_lookback,
@@ -228,8 +228,8 @@ async def encapsulated_send(
                     # remove duplicates from prompt_with_history and set the conversation history
                     _prompt_with_history = list(dict.fromkeys(_prompt_with_history))
 
-                    # Sort the prompt_with_history by increasing timestamp if pinecone is enabled
-                    if converser_cog.pinecone_service:
+                    # Sort the prompt_with_history by increasing timestamp if qdrant is enabled
+                    if converser_cog.qdrant_service:
                         _prompt_with_history.sort(key=lambda x: x.timestamp)
 
                     # Remove the last two entries after sort, this is from the end of the list as prompt(redo), answer, prompt(original), leaving only prompt(original) and further history
@@ -256,13 +256,13 @@ async def encapsulated_send(
 
                 tokens = converser_cog.usage_service.count_tokens(new_prompt)
 
-            # No pinecone, we do conversation summarization for long term memory instead
+            # No qdrant, we do conversation summarization for long term memory instead
             elif (
                 id in converser_cog.conversation_threads
                 and tokens > converser_cog.model.summarize_threshold
                 and not from_ask_command
                 and not from_edit_command
-                and not converser_cog.pinecone_service
+                and not converser_cog.qdrant_service
                 # This should only happen if we are not doing summarizations.
             ):
                 # We don't need to worry about the differences between interactions and messages in this block,
@@ -443,7 +443,7 @@ async def encapsulated_send(
             if (
                 ctx.channel.id in converser_cog.conversation_threads
                 and not from_ask_command
-                and not converser_cog.pinecone_service
+                and not converser_cog.qdrant_service
             ):
                 if not redo_request:
                     converser_cog.conversation_threads[ctx.channel.id].history.append(
@@ -461,7 +461,7 @@ async def encapsulated_send(
                 ctx.channel.id in converser_cog.conversation_threads
                 and not from_ask_command
                 and not from_edit_command
-                and converser_cog.pinecone_service
+                and converser_cog.qdrant_service
             ):
                 conversation_id = ctx.channel.id
 
@@ -486,7 +486,7 @@ async def encapsulated_send(
 
                 # Create and upsert the embedding for  the conversation id, prompt, timestamp
                 embedding = (
-                    await converser_cog.pinecone_service.upsert_conversation_embedding(
+                    await converser_cog.qdrant_service.upsert_conversation_embedding(
                         converser_cog.model,
                         conversation_id,
                         response_text,
@@ -574,7 +574,7 @@ async def encapsulated_send(
                             ),
                         )
                 converser_cog.redo_users[ctx.author.id] = RedoUser(
-                    prompt=new_prompt if not converser_cog.pinecone_service else prompt,
+                    prompt=new_prompt if not converser_cog.qdrant_service else prompt,
                     instruction=instruction,
                     ctx=ctx,
                     message=ctx,
@@ -859,7 +859,7 @@ async def process_conversation_message(
                 converser_cog.awaiting_responses.append(message.author.id)
                 converser_cog.awaiting_thread_responses.append(message.channel.id)
 
-                if not converser_cog.pinecone_service:
+                if not converser_cog.qdrant_service:
                     converser_cog.conversation_threads[
                         message.channel.id
                     ].history.append(
@@ -877,7 +877,7 @@ async def process_conversation_message(
             # TODO: This should be encapsulated better into some other service or function so we're not cluttering this text service file, this text service file is gross right now..
             if (
                 "-vision" in model
-                and not converser_cog.pinecone_service
+                and not converser_cog.qdrant_service
                 and converser_cog.conversation_threads[message.channel.id].drawable
             ):
                 print("Checking for if the user asked to draw")
@@ -997,7 +997,7 @@ async def process_conversation_message(
             # Send the request to the model
             # If conversing, the prompt to send is the history, otherwise, it's just the prompt
             if (
-                converser_cog.pinecone_service
+                converser_cog.qdrant_service
                 or message.channel.id not in converser_cog.conversation_threads
             ):
                 primary_prompt = prompt
@@ -1026,7 +1026,7 @@ async def process_conversation_message(
             thinking_message = await TextService.trigger_thinking(message)
             converser_cog.full_conversation_history[message.channel.id].append(prompt)
 
-            if not converser_cog.pinecone_service:
+            if not converser_cog.qdrant_service:
                 primary_prompt += BOT_NAME
 
             await TextService.encapsulated_send(
@@ -1083,8 +1083,8 @@ async def process_conversation_edit(converser_cog, after, original_message):
                         ]
                     )
 
-                    pinecone_dont_reinsert = None
-                    if not converser_cog.pinecone_service:
+                    qdrant_dont_reinsert = None
+                    if not converser_cog.qdrant_service:
                         converser_cog.conversation_threads[
                             after.channel.id
                         ].history.append(
@@ -1118,7 +1118,7 @@ async def process_conversation_edit(converser_cog, after, original_message):
                     edited_request=True,
                 )
 
-                if not converser_cog.pinecone_service:
+                if not converser_cog.qdrant_service:
                     converser_cog.redo_users[after.author.id].prompt = edited_content