Skip to content

Commit

Permalink
Merge pull request #1253 from kritinv/fix-text-to-image-bug
Browse files Browse the repository at this point in the history
fix text-to-image metric bug
  • Loading branch information
penguine-ip authored Jan 8, 2025
2 parents 2b53e20 + 90fd253 commit e27785d
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def measure(
self.a_measure(test_case, _show_indicator=False)
)
else:
input_texts, input_images = self.separate_images_from_text(
input_texts, _ = self.separate_images_from_text(
test_case.input
)
_, output_images = self.separate_images_from_text(
Expand All @@ -64,7 +64,7 @@ def measure(
self.SC_scores, self.SC_reasoning = (
self._evaluate_semantic_consistency(
"\n".join(input_texts),
None if len(input_images) == 0 else input_images[0],
output_images[0],
)
)
self.PQ_scores, self.PQ_reasoning = (
Expand Down Expand Up @@ -103,7 +103,7 @@ async def a_measure(
async_mode=True,
_show_indicator=_show_indicator,
):
input_texts, input_images = self.separate_images_from_text(
input_texts, _ = self.separate_images_from_text(
test_case.input
)
_, output_images = self.separate_images_from_text(
Expand All @@ -115,7 +115,7 @@ async def a_measure(
) = await asyncio.gather(
self._a_evaluate_semantic_consistency(
"\n".join(input_texts),
None if len(input_images) == 0 else input_images[0],
output_images[0],
),
self._a_evaluate_perceptual_quality(output_images[0]),
)
Expand Down Expand Up @@ -154,10 +154,10 @@ def separate_images_from_text(
async def _a_evaluate_semantic_consistency(
self,
text_prompt: str,
image_input: MLLMImage,
actual_image_output: MLLMImage,
) -> Tuple[List[int], str]:
images: List[MLLMImage] = []
images.append(image_input)
images.append(actual_image_output)
prompt = [
TextToImageTemplate.generate_semantic_consistency_evaluation_results(
text_prompt=text_prompt
Expand All @@ -184,10 +184,10 @@ async def _a_evaluate_semantic_consistency(
def _evaluate_semantic_consistency(
self,
text_prompt: str,
image_input: MLLMImage,
actual_image_output: MLLMImage,
) -> Tuple[List[int], str]:
images: List[MLLMImage] = []
images.append(image_input)
images.append(actual_image_output)
prompt = [
TextToImageTemplate.generate_semantic_consistency_evaluation_results(
text_prompt=text_prompt
Expand Down
36 changes: 20 additions & 16 deletions tests/test_image_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,13 @@

text_to_image_test_case = MLLMTestCase(
input=[
"gesnerate a castle school in fantasy land with the words LLM evaluation on it"
"generate an image of the eiffel tower",
],
actual_output=[
MLLMImage(
url="https://upload.wikimedia.org/wikipedia/commons/thumb/8/85/Tour_Eiffel_Wikimedia_Commons_%28cropped%29.jpg/375px-Tour_Eiffel_Wikimedia_Commons_%28cropped%29.jpg"
),
],
actual_output=[MLLMImage(image_path, local=True)],
)

image_editing_test_case = MLLMTestCase(
Expand Down Expand Up @@ -98,20 +102,20 @@
#############################################################


# dataset = EvaluationDataset(
# test_cases=[
# # text_to_image_test_case,
# image_editing_test_case,
# llm_test_case
# ]
# )
# dataset.evaluate(
# [
# # TextToImageMetric(),
# ImageEditingMetric(),
# AnswerRelevancyMetric(),
# ]
# )
dataset = EvaluationDataset(
test_cases=[
text_to_image_test_case,
# image_editing_test_case,
# llm_test_case
]
)
dataset.evaluate(
[
TextToImageMetric(),
# ImageEditingMetric(),
# AnswerRelevancyMetric(),
]
)

evaluate(
test_cases=[rag_test_case],
Expand Down

0 comments on commit e27785d

Please sign in to comment.