Skip to content

Commit

Permalink
Merge pull request #1033 from kritinv/filters
Browse files Browse the repository at this point in the history
filter synthesizer
  • Loading branch information
penguine-ip authored Sep 25, 2024
2 parents 4df91c1 + 4caab87 commit 520ab1a
Show file tree
Hide file tree
Showing 7 changed files with 1,154 additions and 1,172 deletions.
23 changes: 16 additions & 7 deletions deepeval/progress_context.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from rich.console import Console
from rich.progress import Progress, SpinnerColumn, TextColumn
from contextlib import contextmanager
from tqdm.asyncio import tqdm as async_tqdm_bar
from typing import Optional, Generator
import sys
from contextlib import contextmanager
from tqdm import tqdm as tqdm_bar
from rich.console import Console
import tqdm
import sys


from deepeval.telemetry import capture_synthesizer_run

Expand Down Expand Up @@ -32,6 +34,7 @@ def synthesizer_progress_context(
max_generations: str = None,
use_case: str = "QA",
progress_bar: Optional[tqdm.std.tqdm] = None,
async_mode: bool = False
) -> Generator[Optional[tqdm.std.tqdm], None, None]:
with capture_synthesizer_run(max_generations, method):
if embedder is None:
Expand All @@ -40,9 +43,15 @@ def synthesizer_progress_context(
description = f"✨ Generating up to {max_generations} goldens using DeepEval (using {evaluation_model} and {embedder}, use case={use_case}, method={method})"
# Direct output to stderr, using TQDM progress bar for visual feedback
if not progress_bar:
with tqdm_bar(
total=max_generations, desc=description, file=sys.stderr
) as progress_bar:
yield progress_bar # Pass progress bar to use in outer loop
if async_mode:
with async_tqdm_bar(
total=max_generations, desc=description, file=sys.stderr
) as progress_bar:
yield progress_bar
else:
with tqdm_bar(
total=max_generations, desc=description, file=sys.stderr
) as progress_bar:
yield progress_bar
else:
yield progress_bar
151 changes: 120 additions & 31 deletions deepeval/synthesizer/chunking/context_generator.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,34 @@
from typing import List, Tuple, Dict, Optional
from typing import List, Tuple, Dict, Optional, Union
from tqdm.asyncio import tqdm_asyncio
from tqdm import tqdm as tqdm_bar
from pydantic import BaseModel
import random
import math
import os

from deepeval.models.base_model import DeepEvalBaseEmbeddingModel, DeepEvalBaseLLM
from deepeval.synthesizer.chunking.doc_chunker import DocumentChunker
from deepeval.models.base_model import DeepEvalBaseEmbeddingModel
from deepeval.metrics.utils import trimAndLoadJson, initialize_model
from deepeval.synthesizer.templates.template import FilterTemplate

class ContextScore(BaseModel):
clarity: float
depth: float
structure: float
relevance: float


class ContextGenerator:
def __init__(
self,
document_paths: List[str],
embedder: DeepEvalBaseEmbeddingModel,
model: Optional[Union[str, DeepEvalBaseLLM]] = None,
chunk_size: int = 1024,
chunk_overlap: int = 0,
):
from chromadb.api.models.Collection import Collection

self.model, self.using_native_model = initialize_model(model)
self.embedder = embedder
self.chunk_size = chunk_size
self.chunk_overlap = chunk_overlap
Expand All @@ -32,12 +42,14 @@ def __init__(
Dict[str, Collection]
] = None


async def a_generate_contexts(
self, num_context_per_document: int, max_context_size: int = 3
) -> Tuple[List[List[str]], List[str]]:
self.check_if_docs_are_loaded()
contexts: List[List[str]] = []
source_files: List[str] = []
scores = []
contexts = []
source_files = []

# Check if chunk_size is valid for document lengths
if self.doc_to_chunker_map is not None:
Expand Down Expand Up @@ -76,32 +88,33 @@ async def a_chunk_and_store(key, chunker: DocumentChunker):
*tasks, desc="✨ 📚 ✨ Chunking Documents"
)

# Generate contexts
# Generate contexts
for path, collection in tqdm_bar(
self.source_files_to_collections_map.items(),
desc="✨ 🧩 ✨ Generating Contexts",
):
num_chunks = collection.count()
min_num_context = min(num_context_per_document, num_chunks)
contexts.extend(
self._get_n_random_contexts_per_doc(
contexts_per_doc, scores_per_doc = self._get_n_random_contexts_per_doc(
path=path,
n_contexts_per_doc=min_num_context,
context_size=max_context_size,
similarity_threshold=0.7,
similarity_threshold=0.5,
)
)
for _ in contexts:
contexts.extend(contexts_per_doc)
scores.extend(scores_per_doc)
for _ in contexts_per_doc:
source_files.append(path)
self.total_chunks += num_chunks
return contexts, source_files
return contexts, source_files, scores

def generate_contexts(
self, num_context_per_document: int, max_context_size: int = 3
) -> Tuple[List[List[str]], List[str]]:
) -> Tuple[List[List[str]], List[str], List[float]]:
self.check_if_docs_are_loaded()
contexts: List[List[str]] = []
source_files: List[str] = []
scores = []
contexts = []
source_files = []

# Check if chunk_size is valid for document lengths
if self.doc_to_chunker_map is not None:
Expand Down Expand Up @@ -132,25 +145,26 @@ def generate_contexts(
):
self.source_files_to_collections_map[key] = chunker.chunk_doc()

# Generate contexts
# Generate contexts
for path, collection in tqdm_bar(
self.source_files_to_collections_map.items(),
desc="✨ 🧩 ✨ Generating Contexts",
):
num_chunks = collection.count()
min_num_context = min(num_context_per_document, num_chunks)
contexts.extend(
self._get_n_random_contexts_per_doc(
contexts_per_doc, scores_per_doc = self._get_n_random_contexts_per_doc(
path=path,
n_contexts_per_doc=min_num_context,
context_size=max_context_size,
similarity_threshold=0.5,
)
)
for _ in contexts:
contexts.extend(contexts_per_doc)
scores.extend(scores_per_doc)
for _ in contexts_per_doc:
source_files.append(path)
self.total_chunks += num_chunks
return contexts, source_files
return contexts, source_files, scores


async def _a_load_docs(self):
async def a_process_document(path):
Expand Down Expand Up @@ -186,6 +200,7 @@ async def a_process_document(path):
tasks = [a_process_document(path) for path in self.document_paths]
await tqdm_asyncio.gather(*tasks, desc="✨ 🚀 ✨ Loading Documents")


def _load_docs(self):
for path in tqdm_bar(self.document_paths, "✨ 🚀 ✨ Loading Documents"):
import chromadb
Expand Down Expand Up @@ -216,6 +231,7 @@ def _load_docs(self):
if path not in self.doc_to_chunker_map:
self.doc_to_chunker_map[path] = doc_chunker


def _get_n_random_contexts_per_doc(
self,
path: str,
Expand All @@ -230,12 +246,12 @@ def _get_n_random_contexts_per_doc(
assert (
0 <= similarity_threshold <= 1
), "similarity_threshold must be between 0 and 1."

contexts = []
scores = []
num_query_docs = 0

# get [n=n_contexts_per_doc] random chunks per doc
random_chunks = self._get_n_random_chunks_per_doc(
random_chunks, scores = self._get_n_random_chunks_per_doc(
path=path, n_chunks=n_contexts_per_doc
)
collection = self.source_files_to_collections_map[path]
Expand Down Expand Up @@ -264,23 +280,96 @@ def _get_n_random_contexts_per_doc(
):
context.append(similar_chunk_text)
contexts.append(context)
return contexts
return contexts, scores


def evaluate_chunk(
self, chunk
) -> float:
prompt = FilterTemplate.evaluate_context(chunk)
if self.using_native_model:
res, _ = self.model.generate(prompt)
data = trimAndLoadJson(res, self)
score = (data["clarity"] + data["depth"] + data["structure"] + data["relevance"])/4
return score
else:
try:
res: ContextScore = self.model.generate(prompt, schema=ContextScore)
return (res.clarity + res.depth + res.structure + res.relevance)/4
except TypeError:
res, _ = self.model.generate(prompt)
data = trimAndLoadJson(res, self)
score = (data["clarity"] + data["depth"] + data["structure"] + data["relevance"])/4
return score


async def a_evaluate_chunk(
self, chunk
) -> float:
prompt = FilterTemplate.evaluate_context(chunk)
if self.using_native_model:
res, _ = await self.model.a_generate(prompt)
data = trimAndLoadJson(res, self)
score = (data["clarity"] + data["depth"] + data["structure"] + data["relevance"])/4
return score
else:
try:
res: ContextScore = await self.model.a_generate(prompt, schema=ContextScore)
return (res.clarity + res.depth + res.structure + res.relevance)/4
except TypeError:
res, _ = await self.model.a_generate(prompt)
data = trimAndLoadJson(res, self)
score = (data["clarity"] + data["depth"] + data["structure"] + data["relevance"])/4
return score


def _get_n_random_chunks_per_doc(
self, path: str, n_chunks: int
) -> Tuple[List[str], List[str]]:
) -> Tuple[List[str], List[float]]:
collection = self.source_files_to_collections_map[path]
total_chunks = collection.count()
assert (
n_chunks <= total_chunks
), f"Requested {n_chunks} chunks, but the collection only contains {total_chunks} chunks."

# randomly sample n chunks from the collection
n_random_ids = [
str(i) for i in random.sample(range(total_chunks), n_chunks)
max_retries = 3
minimum_threshold = 0.5

# Determine the number of chunks to sample for evaluation
if total_chunks >= n_chunks * max_retries:
sample_size = n_chunks * max_retries
else:
sample_size = n_chunks

# Randomly sample 'sample_size' chunks
random_ids = [
str(i) for i in random.sample(range(total_chunks), sample_size)
]
chunks = collection.get(ids=n_random_ids)
return chunks["documents"]
chunks = collection.get(ids=random_ids)["documents"]

# Evaluate chunks and filter those with a score > 0.5
evaluated_chunks = []
scores = []
retry_count = 0
for chunk in chunks:
# Evaluate the chunk
score = self.evaluate_chunk(chunk)

if score > minimum_threshold:
evaluated_chunks.append(chunk)
scores.append(score)
retry_count = 0
else:
retry_count += 1
if retry_count == max_retries:
evaluated_chunks.append(chunk)
scores.append(score)
retry_count = 0

if len(evaluated_chunks) == n_chunks:
break

return evaluated_chunks, scores


def check_if_docs_are_loaded(self):
if (
Expand Down
9 changes: 9 additions & 0 deletions deepeval/synthesizer/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,15 @@
from pydantic import BaseModel


class RewrittenInput(BaseModel):
rewritten_input: str


class InputFeedback(BaseModel):
score: float
feedback: str


class SyntheticData(BaseModel):
input: str

Expand Down
Loading

0 comments on commit 520ab1a

Please sign in to comment.