diff --git a/README.md b/README.md index 647cb61..96a73d4 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ GitHub top language GitHub commit activity Static Badge - Static Badge + Static Badge Docker image size Static Badge @@ -72,7 +72,8 @@ Choose the task among: - *autotrain*: fine-tune a model on a specific downstream task with autotrain-advanced, just by specifying you HF username, HF writing token and the path to a yaml config file for the training - *spaces-api-supabase*: use HF Spaces API in combination with Supabase PostgreSQL databases in order to unleash more powerful LLMs and larger RAG-oriented vector databases - **MULTILINGUAL** - *llama.cpp-and-qdrant*: same as *retrieval-text-generation*, but uses **llama.cpp** as inference engine, so you MUST NOT specify a model - **MULTILINGUAL** -- *build-your-llm*: Build a customizable chat LLM combining a Qdrant database with your PDFs and the power of Anthropic, OpenAI, Cohere or Groq models: you just need an API key! To build the Qdrant database, have to pass either a pdf/a bunch of pdfs specified as comma-separated paths or a directory where all the pdfs of interest are stored (**DO NOT** provide both); you can also specify the language in which the PDF is written, using [ISO nomenclature](https://en.wikipedia.org/wiki/List_of_ISO_639_language_codes) - **MULTILINGUAL** +- *build-your-llm*: Build a customizable chat LLM combining a Qdrant database with your PDFs and the power of Anthropic, OpenAI, Cohere or Groq models: you just need an API key! To build the Qdrant database, have to pass either a pdf/a bunch of pdfs specified as comma-separated paths or a directory where all the pdfs of interest are stored (**DO NOT** provide both); you can also specify the language in which the PDF is written, using [ISO nomenclature](https://en.wikipedia.org/wiki/List_of_ISO_639_language_codes) - **MULTILINGUAL**, **LANGFUSE INTEGRATION** +- *build-your-llm*: Build a customizable chat LLM with the power of Anthropic, OpenAI, Cohere or Groq models (no RAG pipeline): you just need an API key! - **MULTILINGUAL**, **LANGFUSE INTEGRATION** - *image-retrieval-search*: search an image database uploading a folder as database input. The folder should have the following structure: ``` diff --git a/docker/Dockerfile b/docker/Dockerfile index da429cd..98940ec 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -7,10 +7,6 @@ WORKDIR /app # Add the current directory contents into the container at /app ADD . /app -RUN pip uninstall -y tokenizers - -RUN pip install tokenizers==0.19.1 - # Expose the port that the application will run on EXPOSE 8760 diff --git a/docker/__pycache__/utils.cpython-310.pyc b/docker/__pycache__/utils.cpython-310.pyc index 89576f1..7de9507 100644 Binary files a/docker/__pycache__/utils.cpython-310.pyc and b/docker/__pycache__/utils.cpython-310.pyc differ diff --git a/docker/build_your_llm.py b/docker/build_your_llm.py index 17aec54..a36103c 100644 --- a/docker/build_your_llm.py +++ b/docker/build_your_llm.py @@ -12,6 +12,8 @@ from utils import * import os import subprocess as sp +import time +from langfuse.callback import CallbackHandler argparse = ArgumentParser() @@ -72,9 +74,18 @@ def get_session_history(session_id): system_template = "You are an helpful assistant that can rely on this: {context} and on the previous message history as context, and from that you build a context and history-aware reply to this user input:" +def build_langfuse_handler(langfuse_host, langfuse_pkey, langfuse_skey): + if langfuse_host!="None" and langfuse_pkey!="None" and langfuse_skey!="None": + langfuse_handler = CallbackHandler( + public_key=langfuse_pkey, + secret_key=langfuse_skey, + host=langfuse_host + ) + return langfuse_handler, True + else: + return "No langfuse", False - -def reply(message, history, name, api_key, temperature, max_new_tokens, sessionid): +def reply(message, history, name, api_key, temperature, max_new_tokens,langfuse_host, langfuse_pkey, langfuse_skey, sessionid): global pdfdb os.environ[NAME2APIKEY[name]] = api_key if name == "Cohere": @@ -86,6 +97,7 @@ def reply(message, history, name, api_key, temperature, max_new_tokens, sessioni MessagesPlaceholder(variable_name="history"), ("human", "{input}")] ) + lf_handler, truth = build_langfuse_handler(langfuse_host, langfuse_pkey, langfuse_skey) chain = prompt_template | model runnable_with_history = RunnableWithMessageHistory( chain, @@ -97,8 +109,15 @@ def reply(message, history, name, api_key, temperature, max_new_tokens, sessioni if txt.original == "en" and lan.replace("\\","").replace("'","") == "None": txt2txt = NeuralSearcher(pdfdb.collection_name, pdfdb.client, pdfdb.encoder) results = txt2txt.search(message) - response = runnable_with_history.invoke({"context": results[0]["text"], "input": message}, config={"configurable": {"session_id": sessionid}})##CONFIGURE! - return response.content + if not truth: + response = runnable_with_history.invoke({"context": results[0]["text"], "input": message}, config={"configurable": {"session_id": sessionid}})##CONFIGURE! + else: + response = runnable_with_history.invoke({"context": results[0]["text"], "input": message}, config={"configurable": {"session_id": sessionid}, "callbacks": [lf_handler]})##CONFIGURE! + llm='' + for char in response.content: + llm+=char + time.sleep(0.001) + yield llm elif txt.original == "en" and lan.replace("\\","").replace("'","") != "None": txt2txt = NeuralSearcher(pdfdb.collection_name, pdfdb.client, pdfdb.encoder) transl = Translation(message, lan.replace("\\","").replace("'","")) @@ -106,17 +125,31 @@ def reply(message, history, name, api_key, temperature, max_new_tokens, sessioni results = txt2txt.search(message) t = Translation(results[0]["text"], txt.original) res = t.translatef() - response = runnable_with_history.invoke({"context": res, "input": message}, config={"configurable": {"session_id": sessionid}})##CONFIGURE! - return response.content + if not truth: + response = runnable_with_history.invoke({"context": res, "input": message}, config={"configurable": {"session_id": sessionid}})##CONFIGURE! + else: + response = runnable_with_history.invoke({"context": res, "input": message}, config={"configurable": {"session_id": sessionid}, "callbacks": [lf_handler]})##CONFIGURE! + llm = '' + for char in response.content: + llm+=char + time.sleep(0.001) + yield llm elif txt.original != "en" and lan.replace("\\","").replace("'","") == "None": txt2txt = NeuralSearcher(pdfdb.collection_name, pdfdb.client, pdfdb.encoder) results = txt2txt.search(message) transl = Translation(results[0]["text"], "en") translation = transl.translatef() - response = runnable_with_history.invoke({"context": translation, "input": message}, config={"configurable": {"session_id": sessionid}})##CONFIGURE! + if not truth: + response = runnable_with_history.invoke({"context": translation, "input": message}, config={"configurable": {"session_id": sessionid}})##CONFIGURE! + else: + response = runnable_with_history.invoke({"context": translation, "input": message}, config={"configurable": {"session_id": sessionid}, "callbacks": [lf_handler]})##CONFIGURE! t = Translation(response.content, txt.original) res = t.translatef() - return res + llm = '' + for char in res: + llm+=char + time.sleep(0.001) + yield llm else: txt2txt = NeuralSearcher(pdfdb.collection_name, pdfdb.client, pdfdb.encoder) transl = Translation(message, lan.replace("\\","").replace("'","")) @@ -124,10 +157,17 @@ def reply(message, history, name, api_key, temperature, max_new_tokens, sessioni results = txt2txt.search(message) t = Translation(results[0]["text"], txt.original) res = t.translatef() - response = runnable_with_history.invoke({"context": res, "input": message}, config={"configurable": {"session_id": sessionid}})##CONFIGURE! + if not truth: + response = runnable_with_history.invoke({"context": res, "input": message}, config={"configurable": {"session_id": sessionid}})##CONFIGURE! + else: + response = runnable_with_history.invoke({"context": res, "input": message}, config={"configurable": {"session_id": sessionid}, "callbacks": [lf_handler]})##CONFIGURE! tr = Translation(response.content, txt.original) ress = tr.translatef() - return ress + llm = '' + for char in ress: + llm+=char + time.sleep(0.001) + yield llm chat_model = gr.Dropdown( [m for m in list(NAME2APIKEY)], label="Chat Model", info="Choose one of the available chat models" @@ -144,11 +184,17 @@ def reply(message, history, name, api_key, temperature, max_new_tokens, sessioni user_max_new_tokens = gr.Slider(0, 8192, value=1024, label="Max new tokens", info="Select max output tokens (higher number of tokens will result in a longer latency)") +user_lf_host = gr.Textbox(label="LangFuse Host",info="Provide LangFuse host URL, or type 'None' if you do not wish to use LangFuse",value="https://cloud.langfuse.com") + +user_lf_pkey = gr.Textbox(label="LangFuse Public Key",info="Provide LangFuse Public key, or type 'None' if you do not wish to use LangFuse",value="pk-*************************", type="password") + +user_lf_skey = gr.Textbox(label="LangFuse Secret Key",info="Provide LangFuse Secret key, or type 'None' if you do not wish to use LangFuse",value="sk-*************************", type="password") + user_session_id = gr.Textbox(label="Session ID",info="This alphanumeric code will link model reply to a specific message history of which the models will be aware when replying. Changing it will result in the loss of memory for your model",value="1") additional_accordion = gr.Accordion(label="Parameters to be set before you start chatting", open=True) -demo = gr.ChatInterface(fn=reply, additional_inputs=[chat_model, user_api_key, user_temperature, user_max_new_tokens, user_session_id], additional_inputs_accordion=additional_accordion, title="everything-ai-buildyourllm") +demo = gr.ChatInterface(fn=reply, additional_inputs=[chat_model, user_api_key, user_temperature, user_max_new_tokens, user_lf_host, user_lf_pkey, user_lf_skey, user_session_id], additional_inputs_accordion=additional_accordion, title="everything-ai-buildyourllm") if __name__=="__main__": diff --git a/docker/chat_your_llm.py b/docker/chat_your_llm.py new file mode 100644 index 0000000..d67d29c --- /dev/null +++ b/docker/chat_your_llm.py @@ -0,0 +1,113 @@ +from langchain_anthropic import ChatAnthropic +from langchain_cohere import ChatCohere +from langchain_groq import ChatGroq +from langchain_openai import ChatOpenAI +from langchain_core.output_parsers import StrOutputParser +from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder +from langchain_core.runnables.history import RunnableWithMessageHistory +from langchain_community.chat_message_histories import SQLChatMessageHistory +from utils import Translation +import time +import os +from langfuse.callback import CallbackHandler +import gradio as gr +import subprocess as sp + + +NAME2CHAT = {"Cohere": ChatCohere, "claude-3-opus-20240229": ChatAnthropic, "claude-3-sonnet-20240229": ChatAnthropic, "claude-3-haiku-20240307": ChatAnthropic, "llama3-8b-8192": ChatGroq, "llama3-70b-8192": ChatGroq, "mixtral-8x7b-32768": ChatGroq, "gemma-7b-it": ChatGroq, "gpt-4o": ChatOpenAI, "gpt-3.5-turbo-0125": ChatOpenAI} +NAME2APIKEY = {"Cohere": "COHERE_API_KEY", "claude-3-opus-20240229": "ANTHROPIC_API_KEY", "claude-3-sonnet-20240229": "ANTHROPIC_API_KEY", "claude-3-haiku-20240307": "ANTHROPIC_API_KEY", "llama3-8b-8192": "GROQ_API_KEY", "llama3-70b-8192": "GROQ_API_KEY", "mixtral-8x7b-32768": "GROQ_API_KEY", "gemma-7b-it": "GROQ_API_KEY", "gpt-4o": "OPENAI_API_KEY", "gpt-3.5-turbo-0125": "OPENAI_API_KEY"} + +sp.run("rm -rf memory.db", shell=True) + +def build_langfuse_handler(langfuse_host, langfuse_pkey, langfuse_skey): + if langfuse_host!="None" and langfuse_pkey!="None" and langfuse_skey!="None": + langfuse_handler = CallbackHandler( + public_key=langfuse_pkey, + secret_key=langfuse_skey, + host=langfuse_host + ) + return langfuse_handler, True + else: + return "No langfuse", False + +def get_session_history(session_id): + return SQLChatMessageHistory(session_id, "sqlite:///chatmemory.db") + +def reply(message, history, name, api_key, temperature, max_new_tokens,langfuse_host, langfuse_pkey, langfuse_skey, system_template, sessionid): + os.environ[NAME2APIKEY[name]] = api_key + if name == "Cohere": + model = NAME2CHAT[name](temperature=temperature, max_tokens=max_new_tokens) + else: + model = NAME2CHAT[name](model=name,temperature=temperature, max_tokens=max_new_tokens) + prompt_template = ChatPromptTemplate.from_messages( + [("system", system_template), + MessagesPlaceholder(variable_name="history"), + ("human", "{input}")] + ) + lf_handler, truth = build_langfuse_handler(langfuse_host, langfuse_pkey, langfuse_skey) + chain = prompt_template | model + runnable_with_history = RunnableWithMessageHistory( + chain, + get_session_history, + input_messages_key="input", + history_messages_key="history", + ) + txt = Translation(message, "en") + if txt.original == "en": + if not truth: + response = runnable_with_history.invoke({"input": message}, config={"configurable": {"session_id": sessionid}})##CONFIGURE! + else: + response = runnable_with_history.invoke({"input": message}, config={"configurable": {"session_id": sessionid}, "callbacks": [lf_handler]}) + r = '' + for c in response.content: + r+=c + time.sleep(0.001) + yield r + else: + translation = txt.translatef() + if not truth: + response = runnable_with_history.invoke({"input": translation}, config={"configurable": {"session_id": sessionid}})##CONFIGURE! + else: + response = runnable_with_history.invoke({"input": translation}, config={"configurable": {"session_id": sessionid}, "callbacks": [lf_handler]}) + t = Translation(response.content, txt.original) + res = t.translatef() + r = '' + for c in res: + r+=c + time.sleep(0.001) + yield r + + +chat_model = gr.Dropdown( + [m for m in list(NAME2APIKEY)], label="Chat Model", info="Choose one of the available chat models" + ) + +user_api_key = gr.Textbox( + label="API key", + info="Paste your API key here", + lines=1, + type="password", +) + +user_temperature = gr.Slider(0, 1, value=0.5, label="Temperature", info="Select model temperature") + +user_max_new_tokens = gr.Slider(0, 8192, value=1024, label="Max new tokens", info="Select max output tokens (higher number of tokens will result in a longer latency)") + +user_lf_host = gr.Textbox(label="LangFuse Host",info="Provide LangFuse host URL, or type 'None' if you do not wish to use LangFuse",value="https://cloud.langfuse.com") + +user_lf_pkey = gr.Textbox(label="LangFuse Public Key",info="Provide LangFuse Public key, or type 'None' if you do not wish to use LangFuse",value="pk-*************************", type="password") + +user_lf_skey = gr.Textbox(label="LangFuse Secret Key",info="Provide LangFuse Secret key, or type 'None' if you do not wish to use LangFuse",value="sk-*************************", type="password") + +user_template = gr.Textbox(label="System Template",info="Customize your assistant with your instructions",value="You are an helpful assistant") + +user_session_id = gr.Textbox(label="Session ID",info="This alphanumeric code will link model reply to a specific message history of which the models will be aware when replying. Changing it will result in the loss of memory for your model",value="1") + +additional_accordion = gr.Accordion(label="Parameters to be set before you start chatting", open=True) + +demo = gr.ChatInterface(fn=reply, additional_inputs=[chat_model, user_api_key, user_temperature, user_max_new_tokens, user_lf_host, user_lf_pkey, user_lf_skey, user_template, user_session_id], additional_inputs_accordion=additional_accordion, title="everything-ai-simplychatting") + + +if __name__=="__main__": + demo.launch(server_name="0.0.0.0", share=False) + diff --git a/docker/select_and_run.py b/docker/select_and_run.py index 884fb98..ef43d74 100644 --- a/docker/select_and_run.py +++ b/docker/select_and_run.py @@ -1,7 +1,7 @@ import subprocess as sp import gradio as gr -TASK_TO_SCRIPT = {"retrieval-text-generation": "retrieval_text_generation.py", "agnostic-text-generation": "agnostic_text_generation.py", "text-summarization": "text_summarization.py", "image-generation": "image_generation.py", "image-generation-pollinations": "image_generation_pollinations.py", "image-classification": "image_classification.py", "image-to-text": "image_to_text.py", "retrieval-image-search": "retrieval_image_search.py", "protein-folding": "protein_folding_with_esm.py", "video-generation": "video_generation.py", "speech-recognition": "speech_recognition.py", "spaces-api-supabase": "spaces_api_supabase.py", "audio-classification": "audio_classification.py", "autotrain": "autotrain_interface.py", "llama.cpp-and-qdrant": "llama_cpp_int.py", "build-your-llm": "build_your_llm.py"} +TASK_TO_SCRIPT = {"retrieval-text-generation": "retrieval_text_generation.py", "agnostic-text-generation": "agnostic_text_generation.py", "text-summarization": "text_summarization.py", "image-generation": "image_generation.py", "image-generation-pollinations": "image_generation_pollinations.py", "image-classification": "image_classification.py", "image-to-text": "image_to_text.py", "retrieval-image-search": "retrieval_image_search.py", "protein-folding": "protein_folding_with_esm.py", "video-generation": "video_generation.py", "speech-recognition": "speech_recognition.py", "spaces-api-supabase": "spaces_api_supabase.py", "audio-classification": "audio_classification.py", "autotrain": "autotrain_interface.py", "llama.cpp-and-qdrant": "llama_cpp_int.py", "build-your-llm": "build_your_llm.py", "simply-chatting": "chat_your_llm.py"} def build_command(tsk, mod="None", pdff="None", dirs="None", lan="None", imdim="512", gradioclient="None", supabaseurl="None", collectname="None", supenc="all-MiniLM-L6-v2", supdim="384"): @@ -14,7 +14,7 @@ def build_command(tsk, mod="None", pdff="None", dirs="None", lan="None", imdim=" elif tsk == "llama.cpp-and-qdrant" or tsk== "build-your-llm": sp.run(f"python3 {TASK_TO_SCRIPT[tsk]} -pf '{pdff}' -d '{dirs}' -l '{lan}'", shell=True) return f"python3 {TASK_TO_SCRIPT[tsk]} -pf '{pdff}' -d '{dirs}' -l '{lan}'" - elif tsk == "image-generation-pollinations" or tsk == "autotrain" or tsk == "protein-folding": + elif tsk == "image-generation-pollinations" or tsk == "autotrain" or tsk == "protein-folding" or tsk=="simply-chatting": sp.run(f"python3 {TASK_TO_SCRIPT[tsk]}", shell=True) return f"python3 {TASK_TO_SCRIPT[tsk]}" elif tsk == "spaces-api-supabase": @@ -34,7 +34,7 @@ def build_command(tsk, mod="None", pdff="None", dirs="None", lan="None", imdim=" label="Task", info="Task you want your assistant to help you with", lines=3, - value=f"Choose one of the following: {','.join(list(TASK_TO_SCRIPT.keys()))}; if you choose 'image-generation-pollinations' or 'autotrain' or 'protein-folding', you do not need to specify anything else. If you choose 'spaces-api-supabase' you need to specify the Spaces API client, the database URL, the collection name, the Sentence-Transformers encoder used to upload the vectors to the Supabase database and the vectors size (optionally also the language)", + value=f"Choose one of the following: {','.join(list(TASK_TO_SCRIPT.keys()))}; if you choose 'image-generation-pollinations' or 'autotrain' or 'protein-folding' or 'simply-chatting', you do not need to specify anything else. If you choose 'spaces-api-supabase' you need to specify the Spaces API client, the database URL, the collection name, the Sentence-Transformers encoder used to upload the vectors to the Supabase database and the vectors size (optionally also the language)", ), gr.Textbox( label="Model",