Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Kashifb/qdrant migration #472

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions cogs/text_service_cog.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def __init__(
DEBUG_GUILD,
DEBUG_CHANNEL,
data_path: Path,
pinecone_service,
qdrant_service,
pickle_queue,
):
super().__init__()
Expand Down Expand Up @@ -126,8 +126,11 @@ def __init__(
self.instructions = defaultdict(list)
self.summarize = self.model.summarize_conversations


# Pinecone data
self.pinecone_service = pinecone_service
# self.pinecone_service = pinecone_service

self.qdrant_service = qdrant_service

# Sharing service
self.sharegpt_service = ShareGPTService()
Expand Down Expand Up @@ -1380,7 +1383,7 @@ async def converse_command(
)
if target.id in self.conversation_threads:
self.awaiting_responses.append(user_id_normalized)
if not self.pinecone_service:
if not self.qdrant_service:
self.conversation_threads[target.id].history.append(
EmbeddedConversationItem(
f"\n{ctx.author.display_name}: {opener} <|endofstatement|>\n",
Expand All @@ -1404,7 +1407,7 @@ async def converse_command(
(
opener
if target.id not in self.conversation_threads
or self.pinecone_service
or self.qdrant_service
else "".join(
[
item.text
Expand Down
25 changes: 24 additions & 1 deletion gpt3discord.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@

from models.openai_model import Model

from qdrant_client import QdrantClient
from services.qdrant_service import QdrantService


__version__ = "12.3.8"

Expand Down Expand Up @@ -70,6 +73,26 @@
pinecone_service = PineconeService(pinecone.Index(PINECONE_INDEX))
print("Got the pinecone service")

try:
QDRANT_TOKEN = os.getenv("QDRANT_TOKEN")
QDRANT_URL = os.getenv("QDRANT_CLUSTER_URL")
except Exception:
QDRANT_TOKEN = None
QDRANT_URL = None

qdrant_service = None
if QDRANT_TOKEN and QDRANT_URL:
qdrant_client = QdrantClient(
url= QDRANT_URL,
api_key=QDRANT_TOKEN,
)
qdrant_service = QdrantService(qdrant_client)
# Check if conversation-embeddings collection already exists, else make one
collections = qdrant_client.get_collections().collections
if not any(collection.name == "conversation-embeddings" for collection in collections):
qdrant_service.make_collection()
print("Got the Qdrant service")

#
# Message queueing for the debug service, defer debug messages to be sent later so we don't hit rate limits.
#
Expand Down Expand Up @@ -143,7 +166,7 @@ async def main():
debug_guild,
debug_channel,
data_path,
pinecone_service=pinecone_service,
qdrant_service = qdrant_service,
pickle_queue=pickle_queue,
)
)
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,4 @@ replicate==0.15.4
tiktoken==0.4.0
pydantic==2.4.2
e2b==0.10.6
qdrant-client
10 changes: 6 additions & 4 deletions sample.env
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ OPENAI_TOKEN = "<openai_api_token>"
DISCORD_TOKEN = "<discord_bot_token>"
## PINECONE_TOKEN = "<pinecone_token>" # pinecone token, if you have it enabled. See readme
## PINECONE_REGION = "<pinecone_region>" # add your region here if it's not us-west1-gcp
QDRANT_TOKEN = "<qdrant_cluster_token>"
QDRANT_CLUSTER_URL = "<qdrant_cluster_url>"
## GOOGLE_SEARCH_API_KEY = "<google_api_key>" # allows internet searches and chats
## GOOGLE_SEARCH_ENGINE_ID = "<google_engine_id>" # allows internet searches and chats
## DEEPL_TOKEN = "<deepl_token>" # allows human language translations from DeepL API
Expand All @@ -25,10 +27,10 @@ DISCORD_TOKEN = "<discord_bot_token>"
### DISCORD SERVER/CHANNEL CONFIGURATION
################################################################################

DEBUG_GUILD = "974519864045756446" # discord_server_id for debug messages
DEBUG_CHANNEL = "977697652147892304" # discord_chanel_id where the messages will be posted
ALLOWED_GUILDS = "971268468148166697,971268468148166697" # server ids where the bot should add its commands
MODERATIONS_ALERT_CHANNEL = "977697652147892304" # Moderations Service alert channel, this is where moderation alerts will be sent as a default if enabled
DEBUG_GUILD = "<discord_server_id>" # discord_server_id for debug messages
DEBUG_CHANNEL = "<discord_chanel_id>" # discord_chanel_id where the messages will be posted
ALLOWED_GUILDS = "<discord_server_id>" # server ids where the bot should add its commands
MODERATIONS_ALERT_CHANNEL = "<discord_chanel_id>" # Moderations Service alert channel, this is where moderation alerts will be sent as a default if enabled

################################################################################
### ROLE PERMISSIONS
Expand Down
74 changes: 74 additions & 0 deletions services/qdrant_service.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams
from qdrant_client.http import models
import uuid

class QdrantService:
def __init__(self, client: QdrantClient):
self.client = client

def make_collection(self):
self.client.create_collection(
collection_name="conversation-embeddings",
vectors_config=VectorParams(size=1536, distance=Distance.COSINE))

def upsert_basic(self, text, embedding, conversation_id: int, timestamp):
self.client.upsert(
collection_name = "conversation-embeddings",
points = [
models.PointStruct(
# Can't upsert the text as the ID like pinecone, use a random UUID instead and add text as a payload
id= str(uuid.uuid4()),
payload={
"text" : text,
"conversation_id" : conversation_id,
"timestamp" : timestamp,
},
vector= embedding,
)
]
)

async def upsert_conversation_embedding(self, model, conversation_id:int, text, timestamp, custom_api_key=None):
first_embedding = None
if len(text) > 500:
# Split the text into 512 character chunks
chunks = [text[i : i + 500] for i in range(0, len(text), 500)]
for chunk in chunks:
# Create an embedding for the split chunk
embedding = await model.send_embedding_request(
chunk, custom_api_key=custom_api_key
)
if not first_embedding:
first_embedding = embedding
self.upsert_basic(chunk, embedding, conversation_id, timestamp)
return first_embedding
embedding = await model.send_embedding_request(
text, custom_api_key=custom_api_key
)
self.upsert_basic(text, embedding, conversation_id, timestamp)
return embedding

def get_n_similar(self, conversation_id: int, embedding, n=10):
response = self.client.search(
collection_name= "conversation-embeddings",
query_filter=models.Filter(
must=[
models.FieldCondition(
key="conversation_id",
match = models.MatchValue(
value=conversation_id,
),
)
]
),
query_vector=embedding,
with_payload=["text", "timestamp"],
limit = n,
)
relevant_phrases = [
(match.payload["text"], match.payload["timestamp"])
for match in response
]
return relevant_phrases

40 changes: 20 additions & 20 deletions services/text_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,9 @@ async def encapsulated_send(
ctx.author.display_name if not user else user.display_name
)

# Pinecone is enabled, we will create embeddings for this conversation.
# qdrant is enabled, we will create embeddings for this conversation.
if (
converser_cog.pinecone_service
converser_cog.qdrant_service
and ctx.channel.id in converser_cog.conversation_threads
):
for item in converser_cog.conversation_threads[ctx.channel.id].history:
Expand Down Expand Up @@ -168,7 +168,7 @@ async def encapsulated_send(
converser_cog.redo_users[ctx.author.id].prompt = new_prompt
else:
# Create and upsert the embedding for the conversation id, prompt, timestamp
await converser_cog.pinecone_service.upsert_conversation_embedding(
await converser_cog.qdrant_service.upsert_conversation_embedding(
converser_cog.model,
conversation_id,
new_prompt,
Expand All @@ -181,8 +181,8 @@ async def encapsulated_send(
prompt_less_author, custom_api_key=custom_api_key
) # Use the version of the prompt without the author's name for better clarity on retrieval.

# Now, build the new prompt by getting the X most similar with pinecone
similar_prompts = converser_cog.pinecone_service.get_n_similar(
# Now, build the new prompt by getting the X most similar with qdrant
similar_prompts = converser_cog.qdrant_service.get_n_similar(
conversation_id,
embedding_prompt_less_author,
n=converser_cog.model.num_conversation_lookback,
Expand Down Expand Up @@ -228,8 +228,8 @@ async def encapsulated_send(
# remove duplicates from prompt_with_history and set the conversation history
_prompt_with_history = list(dict.fromkeys(_prompt_with_history))

# Sort the prompt_with_history by increasing timestamp if pinecone is enabled
if converser_cog.pinecone_service:
# Sort the prompt_with_history by increasing timestamp if qdrant is enabled
if converser_cog.qdrant_service:
_prompt_with_history.sort(key=lambda x: x.timestamp)

# Remove the last two entries after sort, this is from the end of the list as prompt(redo), answer, prompt(original), leaving only prompt(original) and further history
Expand All @@ -256,13 +256,13 @@ async def encapsulated_send(

tokens = converser_cog.usage_service.count_tokens(new_prompt)

# No pinecone, we do conversation summarization for long term memory instead
# No qdrant, we do conversation summarization for long term memory instead
elif (
id in converser_cog.conversation_threads
and tokens > converser_cog.model.summarize_threshold
and not from_ask_command
and not from_edit_command
and not converser_cog.pinecone_service
and not converser_cog.qdrant_service
# This should only happen if we are not doing summarizations.
):
# We don't need to worry about the differences between interactions and messages in this block,
Expand Down Expand Up @@ -443,7 +443,7 @@ async def encapsulated_send(
if (
ctx.channel.id in converser_cog.conversation_threads
and not from_ask_command
and not converser_cog.pinecone_service
and not converser_cog.qdrant_service
):
if not redo_request:
converser_cog.conversation_threads[ctx.channel.id].history.append(
Expand All @@ -461,7 +461,7 @@ async def encapsulated_send(
ctx.channel.id in converser_cog.conversation_threads
and not from_ask_command
and not from_edit_command
and converser_cog.pinecone_service
and converser_cog.qdrant_service
):
conversation_id = ctx.channel.id

Expand All @@ -486,7 +486,7 @@ async def encapsulated_send(

# Create and upsert the embedding for the conversation id, prompt, timestamp
embedding = (
await converser_cog.pinecone_service.upsert_conversation_embedding(
await converser_cog.qdrant_service.upsert_conversation_embedding(
converser_cog.model,
conversation_id,
response_text,
Expand Down Expand Up @@ -574,7 +574,7 @@ async def encapsulated_send(
),
)
converser_cog.redo_users[ctx.author.id] = RedoUser(
prompt=new_prompt if not converser_cog.pinecone_service else prompt,
prompt=new_prompt if not converser_cog.qdrant_service else prompt,
instruction=instruction,
ctx=ctx,
message=ctx,
Expand Down Expand Up @@ -859,7 +859,7 @@ async def process_conversation_message(
converser_cog.awaiting_responses.append(message.author.id)
converser_cog.awaiting_thread_responses.append(message.channel.id)

if not converser_cog.pinecone_service:
if not converser_cog.qdrant_service:
converser_cog.conversation_threads[
message.channel.id
].history.append(
Expand All @@ -877,7 +877,7 @@ async def process_conversation_message(
# TODO: This should be encapsulated better into some other service or function so we're not cluttering this text service file, this text service file is gross right now..
if (
"-vision" in model
and not converser_cog.pinecone_service
and not converser_cog.qdrant_service
and converser_cog.conversation_threads[message.channel.id].drawable
):
print("Checking for if the user asked to draw")
Expand Down Expand Up @@ -997,7 +997,7 @@ async def process_conversation_message(
# Send the request to the model
# If conversing, the prompt to send is the history, otherwise, it's just the prompt
if (
converser_cog.pinecone_service
converser_cog.qdrant_service
or message.channel.id not in converser_cog.conversation_threads
):
primary_prompt = prompt
Expand Down Expand Up @@ -1026,7 +1026,7 @@ async def process_conversation_message(
thinking_message = await TextService.trigger_thinking(message)
converser_cog.full_conversation_history[message.channel.id].append(prompt)

if not converser_cog.pinecone_service:
if not converser_cog.qdrant_service:
primary_prompt += BOT_NAME

await TextService.encapsulated_send(
Expand Down Expand Up @@ -1083,8 +1083,8 @@ async def process_conversation_edit(converser_cog, after, original_message):
]
)

pinecone_dont_reinsert = None
if not converser_cog.pinecone_service:
qdrant_dont_reinsert = None
if not converser_cog.qdrant_service:
converser_cog.conversation_threads[
after.channel.id
].history.append(
Expand Down Expand Up @@ -1118,7 +1118,7 @@ async def process_conversation_edit(converser_cog, after, original_message):
edited_request=True,
)

if not converser_cog.pinecone_service:
if not converser_cog.qdrant_service:
converser_cog.redo_users[after.author.id].prompt = edited_content


Expand Down