From d214f521d806ddbf772b9461092863a859f544d7 Mon Sep 17 00:00:00 2001 From: Salman Mohammadi Date: Fri, 8 Nov 2024 16:09:37 +0000 Subject: [PATCH] tidy tidy tidy tidy fresh clean --- tests/recipes/test_eleuther_eval.py | 6 ------ tests/recipes/utils.py | 17 ++++++----------- 2 files changed, 6 insertions(+), 17 deletions(-) diff --git a/tests/recipes/test_eleuther_eval.py b/tests/recipes/test_eleuther_eval.py index 255634e27b..78c39e128f 100644 --- a/tests/recipes/test_eleuther_eval.py +++ b/tests/recipes/test_eleuther_eval.py @@ -246,10 +246,7 @@ def test_meta_eval_vision(self, caplog, monkeypatch, tmpdir, expected_vision_acc pattern = r"^\|\s*(?:-\s*)?([^\|]+?)\s*\|\s*(\d+)\s*\|.*?\|.*?\|acc\s*\|\s*↑\s*\|\s*([\d.]+)" - # Find all matches in the table text matches = re.findall(pattern, out, re.MULTILINE) - - # Print the task names and their corresponding accuracy scores for task_name, _, accuracy in matches: assert math.isclose(float(accuracy), expected_vision_acc[task_name]) @@ -291,9 +288,6 @@ def test_hf_eval_vision(self, caplog, monkeypatch, tmpdir, expected_vision_acc): pattern = r"^\|\s*(?:-\s*)?([^\|]+?)\s*\|\s*(\d+)\s*\|.*?\|.*?\|acc\s*\|\s*↑\s*\|\s*([\d.]+)" - # Find all matches in the table text matches = re.findall(pattern, out, re.MULTILINE) - - # Print the task names and their corresponding accuracy scores for task_name, _, accuracy in matches: assert math.isclose(float(accuracy), expected_vision_acc[task_name]) diff --git a/tests/recipes/utils.py b/tests/recipes/utils.py index 5696a289fa..7c35eedc2a 100644 --- a/tests/recipes/utils.py +++ b/tests/recipes/utils.py @@ -137,11 +137,6 @@ def llama3_2_vision_test_config() -> List[str]: "tokenizer.tile_size=18", "tokenizer.max_seq_len=4096", ] - return [ - "model._component_=torchtune.modules.model_fusion.DeepFusionModel", - "model.encoder._component_=torchtune.models.llama3_2_vision._component_builders.llama3_2_vision_encoder", - "model.encoder._component_=torchtune.models.llama3_2_vision._component_builders.llama3_2_vision_decoder", - ] def dummy_vision_model(): @@ -259,16 +254,16 @@ def write_hf_ckpt_config(ckpt_dir: str): def write_hf_vision_ckpt_config(ckpt_dir: str): config = { "text_config": { - "num_attention_heads": 8, # Ensure this matches your expectations - "num_key_value_heads": 4, # This should match your expected key - "hidden_size": 128, # Corresponds to dim + "num_attention_heads": 8, + "num_key_value_heads": 4, + "hidden_size": 128, "vocab_size": 128256, "cross_attention_layers": [1, 4], }, "vision_config": { - "hidden_size": 128, # Corresponds to encoder_dim - "image_size": 18, # This corresponds to tile_size - "max_num_tiles": 2, # Corresponds to num_tiles + "hidden_size": 128, + "image_size": 18, + "max_num_tiles": 2, "supported_aspect_ratios": [[1, 1], [1, 2], [2, 1]], }, }