Skip to content

Commit

Permalink
Allow setting up a name for experiment and the job
Browse files Browse the repository at this point in the history
  • Loading branch information
LizaShak committed Mar 17, 2024
1 parent 3a063cb commit 2b076b3
Show file tree
Hide file tree
Showing 13 changed files with 81 additions and 30 deletions.
6 changes: 6 additions & 0 deletions .env.template
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@ AZURE_DOCUMENT_INTELLIGENCE_ADMIN_KEY=
# OPTIONAL
############

EXPERIMENT_NAME= # Optional, if not provided, the prefix of the seach index will be used from the config.json file

# Azure ML Job name and description, it is useful to set the same name for the experiment and for each run set unique names, that will group all results in the save dashboard inside AzureML
JOB_NAME=
JOB_DESCRIPTION=

#### Azure Search Skillsets
AZURE_LANGUAGE_SERVICE_ENDPOINT=
AZURE_LANGUAGE_SERVICE_KEY=
Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ To use the **RAG Experiment Accelerator**, follow these steps:

```json
{
"name_prefix": "Name of experiment, search index name used for tracking and comparing jobs",
"index_name_prefix": "Search index name prefix used for tracking and comparing jobs",
"chunking": {
"chunk_size": "Size of each chunk e.g. [500, 1000, 2000]" ,
"overlap_size": "Overlap Size for each chunk e.g. [100, 200, 300]"
Expand All @@ -193,6 +193,7 @@ To use the **RAG Experiment Accelerator**, follow these steps:
"index_analyzer_name" : "name of the analyzer used at indexing time for the field. This option can be used only with searchable fields. It must be set together with searchAnalyzer and it cannot be set together with the analyzer option.",
"search_analyzer_name" : "name of the analyzer used at search time for the field. This option can be used only with searchable fields. It must be set together with indexAnalyzer and it cannot be set together with the analyzer option. This property cannot be set to the name of a language analyzer; use the analyzer property instead if you need a language analyzer.",
},
"experiment_name": "name of the experiment",
"rerank": "determines if search results should be re-ranked. Value values are TRUE or FALSE" ,
"rerank_type": "determines the type of re-ranking. Value values are llm or crossencoder",
"llm_re_rank_threshold": "determines the threshold when using llm re-ranking. Chunks with rank above this number are selected in range from 1 - 10." ,
Expand Down
5 changes: 4 additions & 1 deletion config.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
{
"name_prefix": "surface",
"index_name_prefix": "surface",
"experiment_name": "surface",
"job_name": "baseline",
"job_description": "",
"chunking": {
"chunk_size": [1000],
"overlap_size": [200]
Expand Down
5 changes: 3 additions & 2 deletions promptflow/rag-experiment-accelerator/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ The `setup` node runs first and loads the required environment variables from a

### Index
The `index` node will:
- Create indexes based on the parameters set in `config.json`. Each index name will be in the following format: `{name_prefix}-{chunk_size}-{overlap}-{dimension}-{ef_construction}-{ef_search}`
- Create indexes based on the parameters set in `config.json`. Each index name will be in the following format: `{index_name_prefix}-{chunk_size}-{overlap}-{dimension}-{ef_construction}-{ef_search}`
- Chunk documents based on the chunking parameters in `config.json`
- Generate a summary and title for each chunk
- Create embeddings for each chunk's content, generated title, and generated summary
Expand Down Expand Up @@ -96,7 +96,7 @@ az ml environment create --file ./environment.yaml -w $MLWorkSpaceName

```json
{
"name_prefix": "Name of experiment, search index name used for tracking and comparing jobs",
"index_name_prefix": "Search index name prefix used for tracking and comparing jobs",
"chunking": {
"chunk_size": "Size of each chunk e.g. [500, 1000, 2000]" ,
"overlap_size": "Overlap Size for each chunk e.g. [100, 200, 300]"
Expand All @@ -109,6 +109,7 @@ az ml environment create --file ./environment.yaml -w $MLWorkSpaceName
"index_analyzer_name" : "name of the analyzer used at indexing time for the field. This option can be used only with searchable fields. It must be set together with searchAnalyzer and it cannot be set together with the analyzer option.",
"search_analyzer_name" : "name of the analyzer used at search time for the field. This option can be used only with searchable fields. It must be set together with indexAnalyzer and it cannot be set together with the analyzer option. This property cannot be set to the name of a language analyzer; use the analyzer property instead if you need a language analyzer.",
},
"experiment_name": "name of the experiment",
"rerank": "determines if search results should be re-ranked. Value values are TRUE or FALSE" ,
"rerank_type": "determines the type of re-ranking. Value values are llm or crossencoder",
"llm_re_rank_threshold": "determines the threshold when using llm re-ranking. Chunks with rank above this number are selected in range from 1 - 10." ,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ def __init__(
"""
super().__init__(data_location=data_location, writer=writer, loader=loader)

def _get_output_name(self, index_name: str) -> str:
def _get_output_name(
self, index_name: str, experiment_name: str, job_name: str
) -> str:
"""
Returns the output name for a given index name.
Expand All @@ -35,7 +37,7 @@ def _get_output_name(self, index_name: str) -> str:
Returns:
str: The output name.
"""
return f"eval_output_{index_name}.jsonl"
return f"eval_output_{index_name}_{experiment_name}_{job_name}.jsonl"

def get_output_path(self, index_name: str) -> str:
"""
Expand All @@ -49,7 +51,9 @@ def get_output_path(self, index_name: str) -> str:
"""
return f"{self.data_location}/{self._get_output_name(index_name)}"

def load(self, index_name: str) -> list[QueryOutput]:
def load(
self, index_name: str, experiment_name: str, job_name: str
) -> list[QueryOutput]:
"""
Loads the query outputs for a given index name.
Expand All @@ -59,7 +63,7 @@ def load(self, index_name: str) -> list[QueryOutput]:
Returns:
list[QueryOutput]: The loaded query outputs.
"""
output_name = self._get_output_name(index_name)
output_name = self._get_output_name(index_name, experiment_name, job_name)

query_outputs = []
data_load = super().load(output_name)
Expand All @@ -72,7 +76,9 @@ def load(self, index_name: str) -> list[QueryOutput]:

return query_outputs

def handle_archive_by_index(self, index_name: str) -> str | None:
def handle_archive_by_index(
self, index_name: str, experiment_name: str, job_name: str
) -> str | None:
"""
Handles archiving of query output for a given index name.
Expand All @@ -82,16 +88,18 @@ def handle_archive_by_index(self, index_name: str) -> str | None:
Returns:
str | None: The output filename if successful, None otherwise.
"""
output_filename = self._get_output_name(index_name)
output_filename = self._get_output_name(index_name, experiment_name, job_name)
return self.handle_archive(output_filename)

def save(self, data: QueryOutput, index_name: str):
def save(
self, data: QueryOutput, index_name: str, experiment_name: str, job_name: str
):
"""
Saves the query output for a given index name.
Args:
data (QueryOutput): The query output to be saved.
index_name (str): The name of the index.
"""
output_filename = self._get_output_name(index_name)
output_filename = self._get_output_name(index_name, experiment_name, job_name)
self.save_dict(data.__dict__, output_filename)
14 changes: 11 additions & 3 deletions rag_experiment_accelerator/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,10 @@ class Config:
EMBEDDING_DIMENSIONS (list[int]): The number of dimensions to use for document embeddings.
EF_CONSTRUCTIONS (list[int]): The number of ef_construction to use for HNSW index.
EF_SEARCHES (list[int]): The number of ef_search to use for HNSW index.
NAME_PREFIX (str): A prefix to use for the names of saved models.
INDEX_NAME_PREFIX (str): A prefix to use for the names of saved models.
EXPERIMENT_NAME (str): The name of the experiment in Azure ML (optional, if not set INDEX_NAME_PREFIX will be used).
JOB_NAME (str): The name of the job in Azure ML (optional, if not set EXPERIMENT_NAME and current datetime will be used).
JOB_DESCRIPTION (str): The description of the job in Azure ML (optional).
SEARCH_VARIANTS (list[str]): A list of search types to use.
AZURE_OAI_CHAT_DEPLOYMENT_NAME (str): The name of the Azure deployment to use.
AZURE_OAI_EVAL_DEPLOYMENT_NAME (str): The name of the deployment to use for evaluation.
Expand Down Expand Up @@ -100,7 +103,10 @@ def _initialize(self, config_dir: str, data_dir: str, filename: str) -> None:
self.OVERLAP_SIZES = data["chunking"]["overlap_size"]
self.EF_CONSTRUCTIONS = data["ef_construction"]
self.EF_SEARCHES = data["ef_search"]
self.NAME_PREFIX = data["name_prefix"]
self.INDEX_NAME_PREFIX = data["index_name_prefix"]
self.EXPERIMENT_NAME = data["experiment_name"] or self.INDEX_NAME_PREFIX
self.JOB_NAME = data["job_name"]
self.JOB_DESCRIPTION = data["job_description"]
self.SEARCH_VARIANTS = data["search_types"]
self.AZURE_OAI_CHAT_DEPLOYMENT_NAME = data.get(
"azure_oai_chat_deployment_name", None
Expand All @@ -124,7 +130,9 @@ def _initialize(self, config_dir: str, data_dir: str, filename: str) -> None:
self.AzureSearchCredentials = AzureSearchCredentials.from_env()
self.AzureMLCredentials = AzureMLCredentials.from_env()
self.AzureSkillsCredentials = AzureSkillsCredentials.from_env()
self.AzureDocumentIntelligenceCredentials = AzureDocumentIntelligenceCredentials.from_env()
self.AzureDocumentIntelligenceCredentials = (
AzureDocumentIntelligenceCredentials.from_env()
)

self.embedding_models: list[EmbeddingModel] = []
embedding_model_config = data.get("embedding_models", [])
Expand Down
5 changes: 4 additions & 1 deletion rag_experiment_accelerator/config/tests/data/config.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
{
"name_prefix": "test_prefix",
"index_name_prefix": "test_prefix",
"experiment_name": "experiment_1",
"job_name": "baseline",
"job_description": "",
"chunking": {
"chunk_size": [512],
"overlap_size": [128]
Expand Down
3 changes: 2 additions & 1 deletion rag_experiment_accelerator/config/tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ def test_config_init(mock_embedding_model_factory):

config.embedding_models = [embedding_model_1, embedding_model_2]

assert config.NAME_PREFIX == mock_config_data["name_prefix"]
assert config.INDEX_NAME_PREFIX == mock_config_data["index_name_prefix"]
assert config.EXPERIMENT_NAME == mock_config_data["experiment_name"]
assert config.CHUNK_SIZES == mock_config_data["chunking"]["chunk_size"]
assert config.OVERLAP_SIZES == mock_config_data["chunking"]["overlap_size"]
assert config.CHUNKING_STRATEGY == mock_config_data["chunking_strategy"]
Expand Down
12 changes: 9 additions & 3 deletions rag_experiment_accelerator/evaluation/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -566,6 +566,8 @@ def compute_metrics(question, actual, expected, context, metric_type):

def evaluate_prompts(
exp_name: str,
job_name,
job_description,
index_name: str,
config: Config,
client: mlflow.MlflowClient,
Expand Down Expand Up @@ -607,9 +609,13 @@ def evaluate_prompts(
metric_types = config.METRIC_TYPES
num_search_type = config.SEARCH_VARIANTS
data_list = []
run_name = f"{exp_name}_{formatted_datetime}"
run_name = (
job_name
if (job_name is not None) and (job_name != "")
else f"{exp_name}_{formatted_datetime}"
)
mlflow.set_experiment(exp_name)
mlflow.start_run(run_name=run_name)
mlflow.start_run(run_name=run_name, description=job_description)
pd.set_option("display.max_columns", None)

run_id = mlflow.active_run().info.run_id
Expand All @@ -619,7 +625,7 @@ def evaluate_prompts(
average_precision_for_search_type = {}

handler = QueryOutputHandler(config.QUERY_DATA_LOCATION)
query_data_load = handler.load(index_name)
query_data_load = handler.load(index_name, config.EXPERIMENT_NAME, config.JOB_NAME)
for data in query_data_load:
actual = remove_spaces(lower(data.actual))
expected = remove_spaces(lower(data.expected))
Expand Down
6 changes: 4 additions & 2 deletions rag_experiment_accelerator/run/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def run(config_dir: str, filename: str = "config.json"):
for ef_construction in config.EF_CONSTRUCTIONS:
for ef_search in config.EF_SEARCHES:
index_name = get_index_name(
config.NAME_PREFIX,
config.INDEX_NAME_PREFIX,
chunk_size,
overlap,
embedding_model.name,
Expand All @@ -55,7 +55,9 @@ def run(config_dir: str, filename: str = "config.json"):
logger.info(f"Evaluating Index: {index_name}")

eval.evaluate_prompts(
exp_name=config.NAME_PREFIX,
exp_name=config.EXPERIMENT_NAME,
job_name=config.JOB_NAME,
job_description=config.JOB_DESCRIPTION,
index_name=index_name,
config=config,
client=client,
Expand Down
11 changes: 8 additions & 3 deletions rag_experiment_accelerator/run/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def run(config_dir: str, data_dir: str = "data", filename: str = "config.json")
for ef_construction in config.EF_CONSTRUCTIONS:
for ef_search in config.EF_SEARCHES:
index_name = get_index_name(
config.NAME_PREFIX,
config.INDEX_NAME_PREFIX,
chunk_size,
overlap,
embedding_model.name,
Expand All @@ -76,13 +76,18 @@ def run(config_dir: str, data_dir: str = "data", filename: str = "config.json")
for chunk_size in config.CHUNK_SIZES:
for overlap in config.OVERLAP_SIZES:
all_docs = load_documents(
config.CHUNKING_STRATEGY, config.AzureDocumentIntelligenceCredentials, config.DATA_FORMATS, config.data_dir, chunk_size, overlap
config.CHUNKING_STRATEGY,
config.AzureDocumentIntelligenceCredentials,
config.DATA_FORMATS,
config.data_dir,
chunk_size,
overlap,
)
for embedding_model in config.embedding_models:
for ef_construction in config.EF_CONSTRUCTIONS:
for ef_search in config.EF_SEARCHES:
index_name = get_index_name(
config.NAME_PREFIX,
config.INDEX_NAME_PREFIX,
chunk_size,
overlap,
embedding_model.name,
Expand Down
13 changes: 10 additions & 3 deletions rag_experiment_accelerator/run/querying.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ def run(config_dir: str, filename: str = "config.json"):
for ef_construction in config.EF_CONSTRUCTIONS:
for ef_search in config.EF_SEARCHES:
index_name = get_index_name(
config.NAME_PREFIX,
config.INDEX_NAME_PREFIX,
chunk_size,
overlap,
embedding_model.name,
Expand All @@ -277,7 +277,9 @@ def run(config_dir: str, filename: str = "config.json"):
)
logger.info(f"Index: {index_name}")

handler.handle_archive_by_index(index_name)
handler.handle_archive_by_index(
index_name, config.EXPERIMENT_NAME, config.JOB_NAME
)

search_client = create_client(
service_endpoint, index_name, search_admin_key
Expand Down Expand Up @@ -394,7 +396,12 @@ def run(config_dir: str, filename: str = "config.json"):
context=qna_context,
question=user_prompt,
)
handler.save(index_name=index_name, data=output)
handler.save(
index_name=index_name,
data=output,
experiment_name=config.EXPERIMENT_NAME,
job_name=config.JOB_NAME,
)

except BadRequestError as e:
logger.error(
Expand Down
4 changes: 2 additions & 2 deletions rag_experiment_accelerator/run/tests/test_querying.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def setUp(self):
self.mock_config.EF_CONSTRUCTIONS = [400]
self.mock_config.EF_SEARCHES = [400]
self.mock_config.SEARCH_VARIANTS = ["search_for_match_semantic"]
self.mock_config.NAME_PREFIX = "prefix"
self.mock_config.INDEX_NAME_PREFIX = "prefix"
self.mock_config.RERANK_TYPE = "llm"
self.mock_config.CHUNK_SIZES = [1]
self.mock_config.OVERLAP_SIZES = [1]
Expand Down Expand Up @@ -277,7 +277,7 @@ def test_run_no_multi_no_rerank(
mock_config.return_value.EF_CONSTRUCTIONS = [400]
mock_config.return_value.EF_SEARCHES = [400]
mock_config.return_value.SEARCH_VARIANTS = ["search_for_match_semantic"]
mock_config.return_value.NAME_PREFIX = "prefix"
mock_config.return_value.INDEX_NAME_PREFIX = "prefix"
mock_config.return_value.RERANK = False
mock_do_we_need_multiple_questions.return_value = False
mock_query_and_eval_acs.return_value = [MagicMock(), MagicMock()]
Expand Down

0 comments on commit 2b076b3

Please sign in to comment.