Skip to content

Commit

Permalink
v4.1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
AstraBert committed Jun 12, 2024
1 parent a0ca4b4 commit 34f8ce7
Show file tree
Hide file tree
Showing 6 changed files with 176 additions and 20 deletions.
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<img src="https://img.shields.io/github/languages/top/AstraBert/everything-ai" alt="GitHub top language">
<img src="https://img.shields.io/github/commit-activity/t/AstraBert/everything-ai" alt="GitHub commit activity">
<img src="https://img.shields.io/badge/everything_ai-stable-green" alt="Static Badge">
<img src="https://img.shields.io/badge/Release-v3.0.0-purple" alt="Static Badge">
<img src="https://img.shields.io/badge/Release-v4.1.0-purple" alt="Static Badge">
<img src="https://img.shields.io/docker/image-size/astrabert/everything-ai
" alt="Docker image size">
<img src="https://img.shields.io/badge/Supported_platforms-Windows/macOS-brown" alt="Static Badge">
Expand Down Expand Up @@ -72,7 +72,8 @@ Choose the task among:
- *autotrain*: fine-tune a model on a specific downstream task with autotrain-advanced, just by specifying you HF username, HF writing token and the path to a yaml config file for the training
- *spaces-api-supabase*: use HF Spaces API in combination with Supabase PostgreSQL databases in order to unleash more powerful LLMs and larger RAG-oriented vector databases - **MULTILINGUAL**
- *llama.cpp-and-qdrant*: same as *retrieval-text-generation*, but uses **llama.cpp** as inference engine, so you MUST NOT specify a model - **MULTILINGUAL**
- *build-your-llm*: Build a customizable chat LLM combining a Qdrant database with your PDFs and the power of Anthropic, OpenAI, Cohere or Groq models: you just need an API key! To build the Qdrant database, have to pass either a pdf/a bunch of pdfs specified as comma-separated paths or a directory where all the pdfs of interest are stored (**DO NOT** provide both); you can also specify the language in which the PDF is written, using [ISO nomenclature](https://en.wikipedia.org/wiki/List_of_ISO_639_language_codes) - **MULTILINGUAL**
- *build-your-llm*: Build a customizable chat LLM combining a Qdrant database with your PDFs and the power of Anthropic, OpenAI, Cohere or Groq models: you just need an API key! To build the Qdrant database, have to pass either a pdf/a bunch of pdfs specified as comma-separated paths or a directory where all the pdfs of interest are stored (**DO NOT** provide both); you can also specify the language in which the PDF is written, using [ISO nomenclature](https://en.wikipedia.org/wiki/List_of_ISO_639_language_codes) - **MULTILINGUAL**, **LANGFUSE INTEGRATION**
- *build-your-llm*: Build a customizable chat LLM with the power of Anthropic, OpenAI, Cohere or Groq models (no RAG pipeline): you just need an API key! - **MULTILINGUAL**, **LANGFUSE INTEGRATION**
- *image-retrieval-search*: search an image database uploading a folder as database input. The folder should have the following structure:

```
Expand Down
4 changes: 0 additions & 4 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,6 @@ WORKDIR /app
# Add the current directory contents into the container at /app
ADD . /app

RUN pip uninstall -y tokenizers

RUN pip install tokenizers==0.19.1

# Expose the port that the application will run on
EXPOSE 8760

Expand Down
Binary file modified docker/__pycache__/utils.cpython-310.pyc
Binary file not shown.
68 changes: 57 additions & 11 deletions docker/build_your_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
from utils import *
import os
import subprocess as sp
import time
from langfuse.callback import CallbackHandler

argparse = ArgumentParser()

Expand Down Expand Up @@ -72,9 +74,18 @@ def get_session_history(session_id):

system_template = "You are an helpful assistant that can rely on this: {context} and on the previous message history as context, and from that you build a context and history-aware reply to this user input:"

def build_langfuse_handler(langfuse_host, langfuse_pkey, langfuse_skey):
if langfuse_host!="None" and langfuse_pkey!="None" and langfuse_skey!="None":
langfuse_handler = CallbackHandler(
public_key=langfuse_pkey,
secret_key=langfuse_skey,
host=langfuse_host
)
return langfuse_handler, True
else:
return "No langfuse", False


def reply(message, history, name, api_key, temperature, max_new_tokens, sessionid):
def reply(message, history, name, api_key, temperature, max_new_tokens,langfuse_host, langfuse_pkey, langfuse_skey, sessionid):
global pdfdb
os.environ[NAME2APIKEY[name]] = api_key
if name == "Cohere":
Expand All @@ -86,6 +97,7 @@ def reply(message, history, name, api_key, temperature, max_new_tokens, sessioni
MessagesPlaceholder(variable_name="history"),
("human", "{input}")]
)
lf_handler, truth = build_langfuse_handler(langfuse_host, langfuse_pkey, langfuse_skey)
chain = prompt_template | model
runnable_with_history = RunnableWithMessageHistory(
chain,
Expand All @@ -97,37 +109,65 @@ def reply(message, history, name, api_key, temperature, max_new_tokens, sessioni
if txt.original == "en" and lan.replace("\\","").replace("'","") == "None":
txt2txt = NeuralSearcher(pdfdb.collection_name, pdfdb.client, pdfdb.encoder)
results = txt2txt.search(message)
response = runnable_with_history.invoke({"context": results[0]["text"], "input": message}, config={"configurable": {"session_id": sessionid}})##CONFIGURE!
return response.content
if not truth:
response = runnable_with_history.invoke({"context": results[0]["text"], "input": message}, config={"configurable": {"session_id": sessionid}})##CONFIGURE!
else:
response = runnable_with_history.invoke({"context": results[0]["text"], "input": message}, config={"configurable": {"session_id": sessionid}, "callbacks": [lf_handler]})##CONFIGURE!
llm=''
for char in response.content:
llm+=char
time.sleep(0.001)
yield llm
elif txt.original == "en" and lan.replace("\\","").replace("'","") != "None":
txt2txt = NeuralSearcher(pdfdb.collection_name, pdfdb.client, pdfdb.encoder)
transl = Translation(message, lan.replace("\\","").replace("'",""))
message = transl.translatef()
results = txt2txt.search(message)
t = Translation(results[0]["text"], txt.original)
res = t.translatef()
response = runnable_with_history.invoke({"context": res, "input": message}, config={"configurable": {"session_id": sessionid}})##CONFIGURE!
return response.content
if not truth:
response = runnable_with_history.invoke({"context": res, "input": message}, config={"configurable": {"session_id": sessionid}})##CONFIGURE!
else:
response = runnable_with_history.invoke({"context": res, "input": message}, config={"configurable": {"session_id": sessionid}, "callbacks": [lf_handler]})##CONFIGURE!
llm = ''
for char in response.content:
llm+=char
time.sleep(0.001)
yield llm
elif txt.original != "en" and lan.replace("\\","").replace("'","") == "None":
txt2txt = NeuralSearcher(pdfdb.collection_name, pdfdb.client, pdfdb.encoder)
results = txt2txt.search(message)
transl = Translation(results[0]["text"], "en")
translation = transl.translatef()
response = runnable_with_history.invoke({"context": translation, "input": message}, config={"configurable": {"session_id": sessionid}})##CONFIGURE!
if not truth:
response = runnable_with_history.invoke({"context": translation, "input": message}, config={"configurable": {"session_id": sessionid}})##CONFIGURE!
else:
response = runnable_with_history.invoke({"context": translation, "input": message}, config={"configurable": {"session_id": sessionid}, "callbacks": [lf_handler]})##CONFIGURE!
t = Translation(response.content, txt.original)
res = t.translatef()
return res
llm = ''
for char in res:
llm+=char
time.sleep(0.001)
yield llm
else:
txt2txt = NeuralSearcher(pdfdb.collection_name, pdfdb.client, pdfdb.encoder)
transl = Translation(message, lan.replace("\\","").replace("'",""))
message = transl.translatef()
results = txt2txt.search(message)
t = Translation(results[0]["text"], txt.original)
res = t.translatef()
response = runnable_with_history.invoke({"context": res, "input": message}, config={"configurable": {"session_id": sessionid}})##CONFIGURE!
if not truth:
response = runnable_with_history.invoke({"context": res, "input": message}, config={"configurable": {"session_id": sessionid}})##CONFIGURE!
else:
response = runnable_with_history.invoke({"context": res, "input": message}, config={"configurable": {"session_id": sessionid}, "callbacks": [lf_handler]})##CONFIGURE!
tr = Translation(response.content, txt.original)
ress = tr.translatef()
return ress
llm = ''
for char in ress:
llm+=char
time.sleep(0.001)
yield llm

chat_model = gr.Dropdown(
[m for m in list(NAME2APIKEY)], label="Chat Model", info="Choose one of the available chat models"
Expand All @@ -144,11 +184,17 @@ def reply(message, history, name, api_key, temperature, max_new_tokens, sessioni

user_max_new_tokens = gr.Slider(0, 8192, value=1024, label="Max new tokens", info="Select max output tokens (higher number of tokens will result in a longer latency)")

user_lf_host = gr.Textbox(label="LangFuse Host",info="Provide LangFuse host URL, or type 'None' if you do not wish to use LangFuse",value="https://cloud.langfuse.com")

user_lf_pkey = gr.Textbox(label="LangFuse Public Key",info="Provide LangFuse Public key, or type 'None' if you do not wish to use LangFuse",value="pk-*************************", type="password")

user_lf_skey = gr.Textbox(label="LangFuse Secret Key",info="Provide LangFuse Secret key, or type 'None' if you do not wish to use LangFuse",value="sk-*************************", type="password")

user_session_id = gr.Textbox(label="Session ID",info="This alphanumeric code will link model reply to a specific message history of which the models will be aware when replying. Changing it will result in the loss of memory for your model",value="1")

additional_accordion = gr.Accordion(label="Parameters to be set before you start chatting", open=True)

demo = gr.ChatInterface(fn=reply, additional_inputs=[chat_model, user_api_key, user_temperature, user_max_new_tokens, user_session_id], additional_inputs_accordion=additional_accordion, title="everything-ai-buildyourllm")
demo = gr.ChatInterface(fn=reply, additional_inputs=[chat_model, user_api_key, user_temperature, user_max_new_tokens, user_lf_host, user_lf_pkey, user_lf_skey, user_session_id], additional_inputs_accordion=additional_accordion, title="everything-ai-buildyourllm")


if __name__=="__main__":
Expand Down
113 changes: 113 additions & 0 deletions docker/chat_your_llm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
from langchain_anthropic import ChatAnthropic
from langchain_cohere import ChatCohere
from langchain_groq import ChatGroq
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_community.chat_message_histories import SQLChatMessageHistory
from utils import Translation
import time
import os
from langfuse.callback import CallbackHandler
import gradio as gr
import subprocess as sp


NAME2CHAT = {"Cohere": ChatCohere, "claude-3-opus-20240229": ChatAnthropic, "claude-3-sonnet-20240229": ChatAnthropic, "claude-3-haiku-20240307": ChatAnthropic, "llama3-8b-8192": ChatGroq, "llama3-70b-8192": ChatGroq, "mixtral-8x7b-32768": ChatGroq, "gemma-7b-it": ChatGroq, "gpt-4o": ChatOpenAI, "gpt-3.5-turbo-0125": ChatOpenAI}
NAME2APIKEY = {"Cohere": "COHERE_API_KEY", "claude-3-opus-20240229": "ANTHROPIC_API_KEY", "claude-3-sonnet-20240229": "ANTHROPIC_API_KEY", "claude-3-haiku-20240307": "ANTHROPIC_API_KEY", "llama3-8b-8192": "GROQ_API_KEY", "llama3-70b-8192": "GROQ_API_KEY", "mixtral-8x7b-32768": "GROQ_API_KEY", "gemma-7b-it": "GROQ_API_KEY", "gpt-4o": "OPENAI_API_KEY", "gpt-3.5-turbo-0125": "OPENAI_API_KEY"}

sp.run("rm -rf memory.db", shell=True)

def build_langfuse_handler(langfuse_host, langfuse_pkey, langfuse_skey):
if langfuse_host!="None" and langfuse_pkey!="None" and langfuse_skey!="None":
langfuse_handler = CallbackHandler(
public_key=langfuse_pkey,
secret_key=langfuse_skey,
host=langfuse_host
)
return langfuse_handler, True
else:
return "No langfuse", False

def get_session_history(session_id):
return SQLChatMessageHistory(session_id, "sqlite:///chatmemory.db")

def reply(message, history, name, api_key, temperature, max_new_tokens,langfuse_host, langfuse_pkey, langfuse_skey, system_template, sessionid):
os.environ[NAME2APIKEY[name]] = api_key
if name == "Cohere":
model = NAME2CHAT[name](temperature=temperature, max_tokens=max_new_tokens)
else:
model = NAME2CHAT[name](model=name,temperature=temperature, max_tokens=max_new_tokens)
prompt_template = ChatPromptTemplate.from_messages(
[("system", system_template),
MessagesPlaceholder(variable_name="history"),
("human", "{input}")]
)
lf_handler, truth = build_langfuse_handler(langfuse_host, langfuse_pkey, langfuse_skey)
chain = prompt_template | model
runnable_with_history = RunnableWithMessageHistory(
chain,
get_session_history,
input_messages_key="input",
history_messages_key="history",
)
txt = Translation(message, "en")
if txt.original == "en":
if not truth:
response = runnable_with_history.invoke({"input": message}, config={"configurable": {"session_id": sessionid}})##CONFIGURE!
else:
response = runnable_with_history.invoke({"input": message}, config={"configurable": {"session_id": sessionid}, "callbacks": [lf_handler]})
r = ''
for c in response.content:
r+=c
time.sleep(0.001)
yield r
else:
translation = txt.translatef()
if not truth:
response = runnable_with_history.invoke({"input": translation}, config={"configurable": {"session_id": sessionid}})##CONFIGURE!
else:
response = runnable_with_history.invoke({"input": translation}, config={"configurable": {"session_id": sessionid}, "callbacks": [lf_handler]})
t = Translation(response.content, txt.original)
res = t.translatef()
r = ''
for c in res:
r+=c
time.sleep(0.001)
yield r


chat_model = gr.Dropdown(
[m for m in list(NAME2APIKEY)], label="Chat Model", info="Choose one of the available chat models"
)

user_api_key = gr.Textbox(
label="API key",
info="Paste your API key here",
lines=1,
type="password",
)

user_temperature = gr.Slider(0, 1, value=0.5, label="Temperature", info="Select model temperature")

user_max_new_tokens = gr.Slider(0, 8192, value=1024, label="Max new tokens", info="Select max output tokens (higher number of tokens will result in a longer latency)")

user_lf_host = gr.Textbox(label="LangFuse Host",info="Provide LangFuse host URL, or type 'None' if you do not wish to use LangFuse",value="https://cloud.langfuse.com")

user_lf_pkey = gr.Textbox(label="LangFuse Public Key",info="Provide LangFuse Public key, or type 'None' if you do not wish to use LangFuse",value="pk-*************************", type="password")

user_lf_skey = gr.Textbox(label="LangFuse Secret Key",info="Provide LangFuse Secret key, or type 'None' if you do not wish to use LangFuse",value="sk-*************************", type="password")

user_template = gr.Textbox(label="System Template",info="Customize your assistant with your instructions",value="You are an helpful assistant")

user_session_id = gr.Textbox(label="Session ID",info="This alphanumeric code will link model reply to a specific message history of which the models will be aware when replying. Changing it will result in the loss of memory for your model",value="1")

additional_accordion = gr.Accordion(label="Parameters to be set before you start chatting", open=True)

demo = gr.ChatInterface(fn=reply, additional_inputs=[chat_model, user_api_key, user_temperature, user_max_new_tokens, user_lf_host, user_lf_pkey, user_lf_skey, user_template, user_session_id], additional_inputs_accordion=additional_accordion, title="everything-ai-simplychatting")


if __name__=="__main__":
demo.launch(server_name="0.0.0.0", share=False)

Loading

0 comments on commit 34f8ce7

Please sign in to comment.