Skip to content

Commit

Permalink
[DERCBOT-1173] Indexing improvements & RAG Evaluation on langfuse (#1767
Browse files Browse the repository at this point in the history
)
  • Loading branch information
assouktim authored Jan 8, 2025
1 parent a2a2897 commit f874cd7
Show file tree
Hide file tree
Showing 21 changed files with 1,956 additions and 1,548 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,4 @@ scripts/connector-messenger/ngrok.exe
**/requirements.txt
**/.venv/
**/.python-version
gen-ai/orchestrator-server/src/main/python/tock-llm-indexing-tools/**/*.json
2,379 changes: 1,219 additions & 1,160 deletions gen-ai/orchestrator-server/src/main/python/server/poetry.lock

Large diffs are not rendered by default.

20 changes: 10 additions & 10 deletions gen-ai/orchestrator-server/src/main/python/server/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,23 @@ packages = [{include = "gen_ai_orchestrator", from = "src"}]

[tool.poetry.dependencies]
python = "^3.10"
uvicorn = "^0.31.1"
pydantic-settings="^2.5.2"
fastapi = "^0.115.0"
langchain = "^0.3.3"
langchain-community = "^0.3.2"
langchain-openai = "^0.2.2"
uvicorn = "^0.32.0"
pydantic-settings="^2.6.1"
fastapi = "^0.115.5"
langchain = "^0.3.7"
langchain-community = "^0.3.7"
langchain-openai = "^0.2.9"
tiktoken = "^0.8.0"
opensearch-py = "^2.7.1"
path = "^17.0.0"
colorlog = "^6.8.2"
boto3 = "^1.35.37"
colorlog = "^6.9.0"
boto3 = "^1.35.65"
urllib3 = "^2.2.3"
jinja2 = "^3.1.4"
langfuse = "^2.52.0"
langfuse = "^2.54.0"
httpx-auth-awssigv4 = "^0.1.4"
langchain-postgres = "^0.0.12"
google-cloud-secret-manager = "^2.20.2"
google-cloud-secret-manager = "^2.21.1"
psycopg = {extras = ["binary"], version = "^3.2.3"}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ class _Settings(BaseSettings):
"""Request timeout: set the maximum time (in seconds) for the request to be completed."""
llm_provider_timeout: int = 30
llm_provider_max_retries: int = 0
""" Enable or not the rate limit for the LLM call"""
llm_rate_limits: bool = True
em_provider_timeout: int = 4

vector_store_provider: Optional[VectorStoreProvider] = VectorStoreProvider.OPEN_SEARCH
Expand Down Expand Up @@ -87,7 +89,8 @@ class _Settings(BaseSettings):
observability_proxy_server_authorization_header_name: Optional[str] = None

"""GCP"""
gcp_project_id: Optional[str] = Field(alias='tock_gcp_project_id', default=None) # GCP project ID used for GCP Secrets
# GCP project ID used for GCP Secrets
gcp_project_id: Optional[str] = Field(alias='tock_gcp_project_id', default=None)


application_settings = _Settings()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,8 @@
from gen_ai_orchestrator.models.llm.azureopenai.azure_openai_llm_setting import (
AzureOpenAILLMSetting,
)
from gen_ai_orchestrator.models.security.raw_secret_key.raw_secret_key import (
RawSecretKey,
)
from gen_ai_orchestrator.services.langchain.factories.llm.llm_factory import (
LangChainLLMFactory,
LangChainLLMFactory, rate_limiter,
)
from gen_ai_orchestrator.services.security.security_service import (
fetch_secret_key_value,
Expand All @@ -55,6 +52,7 @@ def get_language_model(self) -> BaseLanguageModel:
temperature=self.setting.temperature,
request_timeout=application_settings.llm_provider_timeout,
max_retries=application_settings.llm_provider_max_retries,
rate_limiter=rate_limiter if application_settings.llm_rate_limits else None
)

@openai_exception_handler(provider='AzureOpenAIService')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,13 @@
from typing import Optional

from langchain.base_language import BaseLanguageModel
from langchain.callbacks.base import BaseCallbackHandler as LangchainBaseCallbackHandler
from langchain_core.rate_limiters import BaseRateLimiter, InMemoryRateLimiter
from langchain_core.runnables import RunnableConfig
from langchain_core.runnables.utils import Input, Output
from langchain.callbacks.base import BaseCallbackHandler as LangchainBaseCallbackHandler
from pydantic import BaseModel

from gen_ai_orchestrator.configurations.environment.settings import application_settings
from gen_ai_orchestrator.models.llm.llm_setting import BaseLLMSetting

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -76,3 +78,13 @@ async def invoke(self, _input: Input, config: Optional[RunnableConfig] = None) -
The output of the runnable.
"""
return await self.get_language_model().ainvoke(_input, config)


rate_limiter = InMemoryRateLimiter(
# We can only make a request once every 10 seconds!!
requests_per_second=0.1,
# Wake up every 100 ms to check whether allowed to make a request,
check_every_n_seconds=0.1,
# Controls the maximum burst size.
max_bucket_size=10,
)
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,8 @@
from gen_ai_orchestrator.models.llm.openai.openai_llm_setting import (
OpenAILLMSetting,
)
from gen_ai_orchestrator.models.security.raw_secret_key.raw_secret_key import (
RawSecretKey,
)
from gen_ai_orchestrator.services.langchain.factories.llm.llm_factory import (
LangChainLLMFactory,
LangChainLLMFactory, rate_limiter,
)
from gen_ai_orchestrator.services.security.security_service import (
fetch_secret_key_value,
Expand All @@ -53,6 +50,7 @@ def get_language_model(self) -> BaseLanguageModel:
temperature=self.setting.temperature,
request_timeout=application_settings.llm_provider_timeout,
max_retries=application_settings.llm_provider_max_retries,
rate_limiter=rate_limiter if application_settings.llm_rate_limits else None
)

@openai_exception_handler(provider='OpenAI')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ def get_vector_store(self, async_mode: Optional[bool] = True) -> OpenSearchVecto
timeout=application_settings.vector_store_timeout,
)

def get_vector_store_retriever(self, search_kwargs: dict) -> VectorStoreRetriever:
return self.get_vector_store().as_retriever(
def get_vector_store_retriever(self, search_kwargs: dict, async_mode: Optional[bool] = True) -> VectorStoreRetriever:
return self.get_vector_store(async_mode).as_retriever(
search_kwargs=search_kwargs
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ def get_vector_store(self, async_mode: Optional[bool] = True) -> PGVector:
async_mode=async_mode
)

def get_vector_store_retriever(self, search_kwargs: dict) -> VectorStoreRetriever:
return self.get_vector_store().as_retriever(
def get_vector_store_retriever(self, search_kwargs: dict, async_mode: Optional[bool] = True) -> VectorStoreRetriever:
return self.get_vector_store(async_mode).as_retriever(
search_kwargs=search_kwargs
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,12 @@ def get_vector_store(self, async_mode: Optional[bool] = True) -> VectorStore:
pass

@abstractmethod
def get_vector_store_retriever(self, search_kwargs: dict) -> VectorStoreRetriever:
def get_vector_store_retriever(self, search_kwargs: dict, async_mode: Optional[bool] = True) -> VectorStoreRetriever:
"""
Fabric the Vector Store and return it as retriever
Args:
search_kwargs: the search filter
async_mode: enable/disable the async_mode for vector DB client (if supported). Default to True.
:return: A VectorStoreRetriever.
"""
pass
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -185,12 +185,13 @@ def get_source_content(doc: Document) -> str:
return doc.page_content


def create_rag_chain(query: RagQuery) -> ConversationalRetrievalChain:
def create_rag_chain(query: RagQuery, vector_db_async_mode: Optional[bool] = True) -> ConversationalRetrievalChain:
"""
Create the RAG chain from RagQuery, using the LLM and Embedding settings specified in the query
Args:
query: The RAG query
vector_db_async_mode: enable/disable the async_mode for vector DB client (if supported). Default to True.
Returns:
The RAG chain.
"""
Expand All @@ -203,7 +204,8 @@ def create_rag_chain(query: RagQuery) -> ConversationalRetrievalChain:
)

retriever = vector_store_factory.get_vector_store_retriever(
search_kwargs=query.document_search_params.to_dict()
search_kwargs=query.document_search_params.to_dict(),
async_mode=vector_db_async_mode
)
if query.compressor_setting:
retriever = add_compressor(retriever, query.compressor_setting)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#for LangFuse dataset provider
LANGFUSE_SECRET_KEY=
LANGFUSE_PUBLIC_KEY=
LANGFUSE_HOST=

# for LangsSmith dataset_provider
LANGCHAIN_API_KEY=

# for smarttribune_consumer.py script
API_KEY=
API_SECRET=


Original file line number Diff line number Diff line change
Expand Up @@ -240,12 +240,12 @@ To configure the default vector store, you can use the following environment var

### generate_dataset.py

Generates a testing dataset based on an input file. The input file should have the correct format (see generate_datset_input.xlsx for sample). The generated dataset can be saved on filesystem, using the --csv-output option, on langsmith, using the --langsmith-dataset-name option, or both.
Generates a testing dataset based on an input file. The input file should have the correct format (see generate_datset_input.xlsx for sample). The generated dataset can be saved on filesystem, using the --csv-output option, on langsmith, using the --langsmith-dataset-name option, on langfuse using the --langfuse-dataset-name option, or both.

```
Usage:
generate_dataset.py [-v] <input_excel> --range=<s> [--csv-output=<path>] [ --langsmith-dataset-name=<name> ] [--locale=<locale>] [--no-answer=<na>]
generate_dataset.py [-v] <input_excel> --sheet=<n>... [--csv-output=<path>] [ --langsmith-dataset-name=<name> ] [--locale=<locale>] [--no-answer=<na>]
generate_dataset.py [-v] <input_excel> --range=<s> [--csv-output=<path>] [ --langsmith-dataset-name=<name> ] [ --langfuse-dataset-name=<name> ] [--locale=<locale>] [--no-answer=<na>]
generate_dataset.py [-v] <input_excel> --sheet=<n>... [--csv-output=<path>] [ --langsmith-dataset-name=<name> ] [ --langfuse-dataset-name=<name> ] [--locale=<locale>] [--no-answer=<na>]
Arguments:
input_excel path to the input excel file
Expand All @@ -255,22 +255,22 @@ Options:
--sheet=<n> Sheet numbers to be parsed. Indices are 0-indexed.
--csv-output=<path> Output path of csv file to be generated.
--langsmith-dataset-name=<name> Name of the dataset to be saved on langsmith.
--langfuse-dataset-name=<name> Name of the dataset to be saved on langfuse.
--locale=<locale> Locale to be included in de dataset. [default: French]
--no-answer=<na> Label of no_answer to be included in the dataset. [default: NO_RAG_SENTENCE]
-h --help Show this screen
--version Show version
-v Verbose output for debugging (without this option, script will be silent but for errors)
Generates a testing dataset based on an input file. The input file should have the correct format (see generate_datset_input.xlsx for sample). The generated dataset can be saved on filesystem, using the --csv-output option, on langsmith, using the --langsmith-dataset-name option, or both.
Generates a testing dataset based on an input file. The input file should have the correct format (see generate_datset_input.xlsx for sample). The generated dataset can be saved on filesystem, using the --csv-output option, on langsmith, using the --langsmith-dataset-name option, on langfuse using the --langfuse-dataset-name option, or both.
```

### rag_testing_tool.py

Retrieval-Augmented Generation (RAG) endpoint settings testing tool based on LangSmith's SDK: runs a specific RAG Settings configuration against a reference dataset.
Retrieval-Augmented Generation (RAG) endpoint settings testing tool based on LangSmith's or LangFuse's SDK: runs a specific RAG Settings configuration against a reference dataset.

```
Usage:
rag_testing_tool.py [-v] <rag_query> <dataset_name> <test_name> [<delay>]
rag_testing_tool.py [-v] <rag_query> <dataset_provider> <dataset_name> <test_name>
rag_testing_tool.py -h | --help
rag_testing_tool.py --version
Expand All @@ -280,18 +280,19 @@ Arguments:
provider, indexation session's unique id, and 'k', i.e. nb
of retrieved docs (question and chat history are ignored,
as they will come from the dataset)
dataset_provider the dataset provider (langsmith or langfuse)
dataset_name the reference dataset name
test_name name of the test run
Options:
delay Delay between two calls to the inference method in ms
-h --help Show this screen
--version Show version
-v Verbose output for debugging (without this option, script will
be silent but for errors)
```

Build a RAG (Lang)chain from the RAG Query and runs it against the provided LangSmith dataset. The chain is created anew for each entry of the dataset, and if a delay is provided each chain creation will be delayed accordingly.
Build a RAG (Lang)chain from the RAG Query and runs it against the provided LangSmith or LangSmith dataset.

### export_run_results.py

Export a LangSmith dataset run results, in csv format.
Expand All @@ -314,4 +315,38 @@ Options:
The exported CSV file will have these columns :
'Reference input'|'Reference output'|'Response 1'|'Sources 1'|...|'Response N'|'Sources N'
NB: There will be as many responses as run sessions
```

### export_run_results.py

Export a LangFuse dataset run results, in csv format.

```
Export a LangSmith or LangFuse dataset run results.
Usage:
export_run_results_both.py [-v] <dataset_provider> <dataset_id_or_name> <session_or_run_ids>...
export_run_results_both.py -h | --help
export_run_results_both.py --version
Arguments:
dataset_provider specify either 'langfuse' or 'langsmith'
dataset_id_or_name dataset id if langsmith or name if langfuse
session_or_run_ids list of session or run ids
Options:
-v Verbose output
-h --help Show this screen
--version Show version
The exported CSV file will have these columns :
'Reference input'|'Reference output'|'Response 1'|'Sources 1'|...|'Response N'|'Sources N'
The CSV file will be saved in the same location as the script.
NB: There will be as many responses as run sessions
Note that you need to set the LANGFUSE_SECRET_KEY and LANGFUSE_PUBLIC_KEY environment variables in order to use Langfuse.
The LANGFUSE_SECRET_KEY and LANGFUSE_PUBLIC_KEY are the secret and public keys provided by Langfuse
And you need to set the LANGCHAIN_API_KEY to use Langsmith.
```
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#for LangFuse dataset provider
LANGFUSE_SECRET_KEY=
LANGFUSE_PUBLIC_KEY=
LANGFUSE_HOST=

# for LangsSmith dataset_provider
LANGCHAIN_API_KEY=

# for smarttribune_consumer.py script
API_KEY=
API_SECRET=
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
{
"history": [],
"question_answering_llm_setting": {
"provider": "AzureOpenAIService",
"api_key": {
"type": "Raw",
"value": "*****************"
},
"model": "gpt-4o",
"deployment_name": "*******************",
"repetition_penalty":1.0,
"max_new_tokens":256,
"api_base": "******************************",
"api_version": "2024-03-01-preview",
"temperature": 0.4 ,
"prompt": "Use the following context to answer the question at the end.\nIf you don't know the answer, just say {no_answer}.\n Context: {context}\nQuestion: {question} \n Answer in {locale}:"
},
"question_answering_prompt_inputs": {
"question": "How to get started playing guitar ?",
"no_answer": "Sorry, I don't know.",
"locale": "French"
},
"embedding_question_em_setting": {
"provider": "AzureOpenAIService",
"api_key": {
"type": "Raw",
"value": "*****************************"
},
"deployment_name": "********************",
"api_base": "***********************************",
"api_version": "2024-03-01-preview",
"model": "text-embedding-ada-002"
},
"document_index_name": "ns-03-bot-cmso",
"document_search_params": {
"provider": "OpenSearch",
"filter": [
{
"term": {
"metadata.index_session_id.keyword": "****************************************"
}
}
],
"k": 4
},
"observability_setting": {
"provider": "Langfuse",
"url": "http://localhost:3000",
"secret_key":{
"type": "Raw",
"value": "************************"
},
"public_key":"********************************"
}
}
Loading

0 comments on commit f874cd7

Please sign in to comment.