[DERCBOT-1173] Indexing improvements & RAG Evaluation on langfuse (#1767

)
theopenconversationkit · Jan 8, 2025 · f874cd7 · f874cd7
1 parent a2a2897
commit f874cd7
Show file tree

Hide file tree

Showing 21 changed files with 1,956 additions and 1,548 deletions.
diff --git a/.gitignore b/.gitignore
@@ -47,3 +47,4 @@ scripts/connector-messenger/ngrok.exe
 **/requirements.txt
 **/.venv/
 **/.python-version
+gen-ai/orchestrator-server/src/main/python/tock-llm-indexing-tools/**/*.json
diff --git a/gen-ai/orchestrator-server/src/main/python/server/poetry.lock b/gen-ai/orchestrator-server/src/main/python/server/poetry.lock
diff --git a/gen-ai/orchestrator-server/src/main/python/server/pyproject.toml b/gen-ai/orchestrator-server/src/main/python/server/pyproject.toml
@@ -8,23 +8,23 @@ packages = [{include = "gen_ai_orchestrator", from = "src"}]
 
 [tool.poetry.dependencies]
 python = "^3.10"
-uvicorn = "^0.31.1"
-pydantic-settings="^2.5.2"
-fastapi = "^0.115.0"
-langchain = "^0.3.3"
-langchain-community = "^0.3.2"
-langchain-openai = "^0.2.2"
+uvicorn = "^0.32.0"
+pydantic-settings="^2.6.1"
+fastapi = "^0.115.5"
+langchain = "^0.3.7"
+langchain-community = "^0.3.7"
+langchain-openai = "^0.2.9"
 tiktoken = "^0.8.0"
 opensearch-py = "^2.7.1"
 path = "^17.0.0"
-colorlog = "^6.8.2"
-boto3 = "^1.35.37"
+colorlog = "^6.9.0"
+boto3 = "^1.35.65"
 urllib3 = "^2.2.3"
 jinja2 = "^3.1.4"
-langfuse = "^2.52.0"
+langfuse = "^2.54.0"
 httpx-auth-awssigv4 = "^0.1.4"
 langchain-postgres = "^0.0.12"
-google-cloud-secret-manager = "^2.20.2"
+google-cloud-secret-manager = "^2.21.1"
 psycopg = {extras = ["binary"], version = "^3.2.3"}
 
 

diff --git a/...ver/src/main/python/server/src/gen_ai_orchestrator/configurations/environment/settings.py b/...ver/src/main/python/server/src/gen_ai_orchestrator/configurations/environment/settings.py
@@ -55,6 +55,8 @@ class _Settings(BaseSettings):
     """Request timeout: set the maximum time (in seconds) for the request to be completed."""
     llm_provider_timeout: int = 30
     llm_provider_max_retries: int = 0
+    """ Enable or not the rate limit for the LLM call"""
+    llm_rate_limits: bool = True
     em_provider_timeout: int = 4
 
     vector_store_provider: Optional[VectorStoreProvider] = VectorStoreProvider.OPEN_SEARCH
@@ -87,7 +89,8 @@ class _Settings(BaseSettings):
     observability_proxy_server_authorization_header_name: Optional[str] = None
 
     """GCP"""
-    gcp_project_id: Optional[str] = Field(alias='tock_gcp_project_id', default=None) # GCP project ID used for GCP Secrets
+    # GCP project ID used for GCP Secrets
+    gcp_project_id: Optional[str] = Field(alias='tock_gcp_project_id', default=None)
 
 
 application_settings = _Settings()

diff --git a/...rver/src/gen_ai_orchestrator/services/langchain/factories/llm/azure_openai_llm_factory.py b/...rver/src/gen_ai_orchestrator/services/langchain/factories/llm/azure_openai_llm_factory.py
@@ -29,11 +29,8 @@
 from gen_ai_orchestrator.models.llm.azureopenai.azure_openai_llm_setting import (
     AzureOpenAILLMSetting,
 )
-from gen_ai_orchestrator.models.security.raw_secret_key.raw_secret_key import (
-    RawSecretKey,
-)
 from gen_ai_orchestrator.services.langchain.factories.llm.llm_factory import (
-    LangChainLLMFactory,
+    LangChainLLMFactory, rate_limiter,
 )
 from gen_ai_orchestrator.services.security.security_service import (
     fetch_secret_key_value,
@@ -55,6 +52,7 @@ def get_language_model(self) -> BaseLanguageModel:
             temperature=self.setting.temperature,
             request_timeout=application_settings.llm_provider_timeout,
             max_retries=application_settings.llm_provider_max_retries,
+            rate_limiter=rate_limiter if application_settings.llm_rate_limits else None
         )
 
     @openai_exception_handler(provider='AzureOpenAIService')

diff --git a/...ain/python/server/src/gen_ai_orchestrator/services/langchain/factories/llm/llm_factory.py b/...ain/python/server/src/gen_ai_orchestrator/services/langchain/factories/llm/llm_factory.py
@@ -19,11 +19,13 @@
 from typing import Optional
 
 from langchain.base_language import BaseLanguageModel
+from langchain.callbacks.base import BaseCallbackHandler as LangchainBaseCallbackHandler
+from langchain_core.rate_limiters import BaseRateLimiter, InMemoryRateLimiter
 from langchain_core.runnables import RunnableConfig
 from langchain_core.runnables.utils import Input, Output
-from langchain.callbacks.base import BaseCallbackHandler as LangchainBaseCallbackHandler
 from pydantic import BaseModel
 
+from gen_ai_orchestrator.configurations.environment.settings import application_settings
 from gen_ai_orchestrator.models.llm.llm_setting import BaseLLMSetting
 
 logger = logging.getLogger(__name__)
@@ -76,3 +78,13 @@ async def invoke(self, _input: Input, config: Optional[RunnableConfig] = None) -
             The output of the runnable.
         """
         return await self.get_language_model().ainvoke(_input, config)
+
+
+rate_limiter = InMemoryRateLimiter(
+    # We can only make a request once every 10 seconds!!
+    requests_per_second=0.1,
+    # Wake up every 100 ms to check whether allowed to make a request,
+    check_every_n_seconds=0.1,
+    # Controls the maximum burst size.
+    max_bucket_size=10,
+)
diff --git a/...hon/server/src/gen_ai_orchestrator/services/langchain/factories/llm/openai_llm_factory.py b/...hon/server/src/gen_ai_orchestrator/services/langchain/factories/llm/openai_llm_factory.py
@@ -29,11 +29,8 @@
 from gen_ai_orchestrator.models.llm.openai.openai_llm_setting import (
     OpenAILLMSetting,
 )
-from gen_ai_orchestrator.models.security.raw_secret_key.raw_secret_key import (
-    RawSecretKey,
-)
 from gen_ai_orchestrator.services.langchain.factories.llm.llm_factory import (
-    LangChainLLMFactory,
+    LangChainLLMFactory, rate_limiter,
 )
 from gen_ai_orchestrator.services.security.security_service import (
     fetch_secret_key_value,
@@ -53,6 +50,7 @@ def get_language_model(self) -> BaseLanguageModel:
             temperature=self.setting.temperature,
             request_timeout=application_settings.llm_provider_timeout,
             max_retries=application_settings.llm_provider_max_retries,
+            rate_limiter=rate_limiter if application_settings.llm_rate_limits else None
         )
 
     @openai_exception_handler(provider='OpenAI')

diff --git a/...src/gen_ai_orchestrator/services/langchain/factories/vector_stores/open_search_factory.py b/...src/gen_ai_orchestrator/services/langchain/factories/vector_stores/open_search_factory.py
@@ -66,8 +66,8 @@ def get_vector_store(self, async_mode: Optional[bool] = True) -> OpenSearchVecto
             timeout=application_settings.vector_store_timeout,
         )
 
-    def get_vector_store_retriever(self, search_kwargs: dict) -> VectorStoreRetriever:
-        return self.get_vector_store().as_retriever(
+    def get_vector_store_retriever(self, search_kwargs: dict, async_mode: Optional[bool] = True) -> VectorStoreRetriever:
+        return self.get_vector_store(async_mode).as_retriever(
             search_kwargs=search_kwargs
         )
 

diff --git a/...er/src/gen_ai_orchestrator/services/langchain/factories/vector_stores/pgvector_factory.py b/...er/src/gen_ai_orchestrator/services/langchain/factories/vector_stores/pgvector_factory.py
@@ -54,8 +54,8 @@ def get_vector_store(self, async_mode: Optional[bool] = True) -> PGVector:
             async_mode=async_mode
         )
 
-    def get_vector_store_retriever(self, search_kwargs: dict) -> VectorStoreRetriever:
-        return self.get_vector_store().as_retriever(
+    def get_vector_store_retriever(self, search_kwargs: dict, async_mode: Optional[bool] = True) -> VectorStoreRetriever:
+        return self.get_vector_store(async_mode).as_retriever(
             search_kwargs=search_kwargs
         )
 

diff --git a/...rc/gen_ai_orchestrator/services/langchain/factories/vector_stores/vector_store_factory.py b/...rc/gen_ai_orchestrator/services/langchain/factories/vector_stores/vector_store_factory.py
@@ -52,11 +52,12 @@ def get_vector_store(self, async_mode: Optional[bool] = True) -> VectorStore:
         pass
 
     @abstractmethod
-    def get_vector_store_retriever(self, search_kwargs: dict) -> VectorStoreRetriever:
+    def get_vector_store_retriever(self, search_kwargs: dict, async_mode: Optional[bool] = True) -> VectorStoreRetriever:
         """
         Fabric the Vector Store and return it as retriever
         Args:
             search_kwargs: the search filter
+            async_mode: enable/disable the async_mode for vector DB client (if supported). Default to True.
         :return: A VectorStoreRetriever.
         """
         pass

diff --git a/...tor-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/rag_chain.py b/...tor-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/rag_chain.py
@@ -185,12 +185,13 @@ def get_source_content(doc: Document) -> str:
         return doc.page_content
 
 
-def create_rag_chain(query: RagQuery) -> ConversationalRetrievalChain:
+def create_rag_chain(query: RagQuery, vector_db_async_mode: Optional[bool] = True) -> ConversationalRetrievalChain:
     """
     Create the RAG chain from RagQuery, using the LLM and Embedding settings specified in the query
 
     Args:
         query: The RAG query
+        vector_db_async_mode: enable/disable the async_mode for vector DB client (if supported). Default to True.
     Returns:
         The RAG chain.
     """
@@ -203,7 +204,8 @@ def create_rag_chain(query: RagQuery) -> ConversationalRetrievalChain:
     )
 
     retriever = vector_store_factory.get_vector_store_retriever(
-        search_kwargs=query.document_search_params.to_dict()
+        search_kwargs=query.document_search_params.to_dict(),
+        async_mode=vector_db_async_mode
     )
     if query.compressor_setting:
         retriever = add_compressor(retriever, query.compressor_setting)

diff --git a/gen-ai/orchestrator-server/src/main/python/tock-llm-indexing-tools/.env b/gen-ai/orchestrator-server/src/main/python/tock-llm-indexing-tools/.env
@@ -0,0 +1,13 @@
+#for LangFuse dataset provider
+LANGFUSE_SECRET_KEY=
+LANGFUSE_PUBLIC_KEY=
+LANGFUSE_HOST=
+
+# for LangsSmith dataset_provider
+LANGCHAIN_API_KEY=
+
+# for smarttribune_consumer.py script
+API_KEY=
+API_SECRET=
+
+
diff --git a/gen-ai/orchestrator-server/src/main/python/tock-llm-indexing-tools/README.md b/gen-ai/orchestrator-server/src/main/python/tock-llm-indexing-tools/README.md
@@ -240,12 +240,12 @@ To configure the default vector store, you can use the following environment var
 
 ### generate_dataset.py
 
-Generates a testing dataset based on an input file. The input file should have the correct format (see generate_datset_input.xlsx for sample). The generated dataset can be saved on filesystem, using the --csv-output option, on langsmith, using the --langsmith-dataset-name option, or both.
+Generates a testing dataset based on an input file. The input file should have the correct format (see generate_datset_input.xlsx for sample). The generated dataset can be saved on filesystem, using the --csv-output option, on langsmith, using the --langsmith-dataset-name option, on langfuse using the --langfuse-dataset-name option, or both.
 
 ```
 Usage:
-    generate_dataset.py [-v] <input_excel> --range=<s> [--csv-output=<path>] [ --langsmith-dataset-name=<name> ] [--locale=<locale>] [--no-answer=<na>]
-    generate_dataset.py [-v] <input_excel> --sheet=<n>... [--csv-output=<path>] [ --langsmith-dataset-name=<name> ] [--locale=<locale>] [--no-answer=<na>]
+    generate_dataset.py [-v] <input_excel> --range=<s> [--csv-output=<path>] [ --langsmith-dataset-name=<name> ] [ --langfuse-dataset-name=<name> ] [--locale=<locale>] [--no-answer=<na>]
+    generate_dataset.py [-v] <input_excel> --sheet=<n>... [--csv-output=<path>] [ --langsmith-dataset-name=<name> ] [ --langfuse-dataset-name=<name> ] [--locale=<locale>] [--no-answer=<na>]
 
 Arguments:
     input_excel path to the input excel file
@@ -255,22 +255,22 @@ Options:
     --sheet=<n>                     Sheet numbers to be parsed. Indices are 0-indexed.
     --csv-output=<path>             Output path of csv file to be generated.
     --langsmith-dataset-name=<name> Name of the dataset to be saved on langsmith.
+    --langfuse-dataset-name=<name> Name of the dataset to be saved on langfuse.
     --locale=<locale>               Locale to be included in de dataset. [default: French]
     --no-answer=<na>                Label of no_answer to be included in the dataset. [default: NO_RAG_SENTENCE]
     -h --help                       Show this screen
     --version                       Show version
     -v                              Verbose output for debugging (without this option, script will be silent but for errors)
-
-Generates a testing dataset based on an input file. The input file should have the correct format (see generate_datset_input.xlsx for sample). The generated dataset can be saved on filesystem, using the --csv-output option, on langsmith, using the --langsmith-dataset-name option, or both.
+Generates a testing dataset based on an input file. The input file should have the correct format (see generate_datset_input.xlsx for sample). The generated dataset can be saved on filesystem, using the --csv-output option, on langsmith, using the --langsmith-dataset-name option, on langfuse using the --langfuse-dataset-name option, or both.
 ```
 
 ### rag_testing_tool.py
 
-Retrieval-Augmented Generation (RAG) endpoint settings testing tool based on LangSmith's SDK: runs a specific RAG Settings configuration against a reference dataset.
+Retrieval-Augmented Generation (RAG) endpoint settings testing tool based on LangSmith's or LangFuse's SDK: runs a specific RAG Settings configuration against a reference dataset.
 
 ```
 Usage:
-    rag_testing_tool.py [-v] <rag_query> <dataset_name> <test_name> [<delay>]
+    rag_testing_tool.py [-v] <rag_query> <dataset_provider> <dataset_name> <test_name>
     rag_testing_tool.py -h | --help
     rag_testing_tool.py --version
 
@@ -280,18 +280,19 @@ Arguments:
                     provider, indexation session's unique id, and 'k', i.e. nb
                     of retrieved docs (question and chat history are ignored,
                     as they will come from the dataset)
+    dataset_provider the dataset provider (langsmith or langfuse)
     dataset_name    the reference dataset name
     test_name       name of the test run
 
 Options:
-    delay       Delay between two calls to the inference method in ms
     -h --help   Show this screen
     --version   Show version
     -v          Verbose output for debugging (without this option, script will
                 be silent but for errors)
 ```
 
-Build a RAG (Lang)chain from the RAG Query and runs it against the provided LangSmith dataset. The chain is created anew for each entry of the dataset, and if a delay is provided each chain creation will be delayed accordingly.
+Build a RAG (Lang)chain from the RAG Query and runs it against the provided LangSmith or LangSmith dataset.
+
 ### export_run_results.py
 
 Export a LangSmith dataset run results, in csv format.
@@ -314,4 +315,38 @@ Options:
 The exported CSV file will have these columns :
 'Reference input'|'Reference output'|'Response 1'|'Sources 1'|...|'Response N'|'Sources N'
 NB: There will be as many responses as run sessions
+
+```
+
+### export_run_results.py
+
+Export a LangFuse dataset run results, in csv format.
+
+```
+Export a LangSmith or LangFuse dataset run results.
+Usage:
+        export_run_results_both.py [-v] <dataset_provider> <dataset_id_or_name> <session_or_run_ids>...
+        export_run_results_both.py -h | --help
+        export_run_results_both.py --version
+
+Arguments:
+    dataset_provider       specify either 'langfuse' or 'langsmith'
+    dataset_id_or_name     dataset id if langsmith or name if langfuse
+    session_or_run_ids     list of session or run ids
+
+Options:
+    -v          Verbose output
+    -h --help   Show this screen
+    --version   Show version
+
+
+The exported CSV file will have these columns :
+'Reference input'|'Reference output'|'Response 1'|'Sources 1'|...|'Response N'|'Sources N'
+The CSV file will be saved in the same location as the script.
+NB: There will be as many responses as run sessions
+
+Note that you need to set the LANGFUSE_SECRET_KEY and LANGFUSE_PUBLIC_KEY environment variables in order to use Langfuse.
+The LANGFUSE_SECRET_KEY and LANGFUSE_PUBLIC_KEY are the secret and public keys provided by Langfuse
+
+And you need to set the LANGCHAIN_API_KEY to use Langsmith.
 ```
diff --git a/...rator-server/src/main/python/tock-llm-indexing-tools/docs/rag_testing_tools.png b/...rator-server/src/main/python/tock-llm-indexing-tools/docs/rag_testing_tools.png
diff --git a/gen-ai/orchestrator-server/src/main/python/tock-llm-indexing-tools/examples/.exemple.env b/gen-ai/orchestrator-server/src/main/python/tock-llm-indexing-tools/examples/.exemple.env
@@ -0,0 +1,11 @@
+#for LangFuse dataset provider
+LANGFUSE_SECRET_KEY=
+LANGFUSE_PUBLIC_KEY=
+LANGFUSE_HOST=
+
+# for LangsSmith dataset_provider
+LANGCHAIN_API_KEY=
+
+# for smarttribune_consumer.py script
+API_KEY=
+API_SECRET=
diff --git a/...chestrator-server/src/main/python/tock-llm-indexing-tools/examples/rag_query.example.json b/...chestrator-server/src/main/python/tock-llm-indexing-tools/examples/rag_query.example.json
@@ -0,0 +1,55 @@
+{
+    "history": [],
+    "question_answering_llm_setting": {
+        "provider": "AzureOpenAIService",
+         "api_key": {
+             "type": "Raw",
+             "value": "*****************"
+            },
+        "model": "gpt-4o",
+        "deployment_name": "*******************",
+        "repetition_penalty":1.0,
+        "max_new_tokens":256,
+        "api_base": "******************************",
+        "api_version": "2024-03-01-preview",
+        "temperature": 0.4 ,
+        "prompt": "Use the following context to answer the question at the end.\nIf you don't know the answer, just say {no_answer}.\n Context: {context}\nQuestion: {question}  \n Answer in {locale}:"
+    },
+    "question_answering_prompt_inputs": {
+            "question": "How to get started playing guitar ?",
+            "no_answer": "Sorry, I don't know.",
+            "locale": "French"
+        },
+    "embedding_question_em_setting": {
+        "provider": "AzureOpenAIService",
+         "api_key": {
+             "type": "Raw",
+             "value": "*****************************"
+            },
+        "deployment_name":  "********************",
+        "api_base":         "***********************************",
+        "api_version":      "2024-03-01-preview",
+         "model": "text-embedding-ada-002"
+    },
+    "document_index_name": "ns-03-bot-cmso",
+    "document_search_params": {
+        "provider": "OpenSearch",
+        "filter": [
+            {
+                "term": {
+                    "metadata.index_session_id.keyword": "****************************************"
+                }
+            }
+        ],
+        "k": 4
+    },
+    "observability_setting": {
+        "provider": "Langfuse",
+        "url": "http://localhost:3000",
+        "secret_key":{
+            "type": "Raw",
+            "value": "************************"
+        },
+        "public_key":"********************************"
+    }
+}