techconative · akash-das2000 · Feb 27, 2024 · Feb 27, 2024 · Feb 27, 2024 · Mar 8, 2024
diff --git a/.gitattributes b/.gitattributes
@@ -1,9 +1,7 @@
 # https://git-scm.com/docs/gitattributes
-
 # Set the default behavior, in case people don't have core.autocrlf set.
 # https://git-scm.com/docs/gitattributes#_end_of_line_conversion
 * text=auto
-
 # common python attributes, taken from https://github.com/alexkaratarakis/gitattributes/blob/710900479a2bedeec7003d381719521ffbb18bf8/Python.gitattributes
 # Source files
 # ============
@@ -14,7 +12,6 @@
 *.pyx    text diff=python
 *.pyz    text diff=python
 *.pyi    text diff=python
-
 # Binary files
 # ============
 *.db     binary
@@ -24,6 +21,7 @@
 *.pyc    binary export-ignore
 *.pyo    binary export-ignore
 *.pyd    binary
-
 # Jupyter notebook
 *.ipynb  text eol=lf
+llava_data_r1.zip filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
diff --git a/.gitignore b/.gitignore
@@ -33,3 +33,4 @@ ckpts*
 
 # Demo
 serve_images/
+llava/
diff --git a/LICENSE b/LICENSE
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # 🌋 LLaVA: Large Language and Vision Assistant
 
-*Visual instruction tuning towards large language and vision models with GPT-4 level capabilities.*
+*Visual instruction tuning towards large language and vision models with GPT-4 level capabilities*
 
 [📢 [LLaVA-NeXT Blog](https://llava-vl.github.io/blog/2024-01-30-llava-next/)] [[Project Page](https://llava-vl.github.io/)] [[Demo](https://llava.hliu.cc/)]  [[Data](https://github.com/haotian-liu/LLaVA/blob/main/docs/Data.md)] [[Model Zoo](https://github.com/haotian-liu/LLaVA/blob/main/docs/MODEL_ZOO.md)]
 

diff --git a/cog.yaml b/cog.yaml
diff --git a/data_prep/LLaVA_InitialJson.py b/data_prep/LLaVA_InitialJson.py
@@ -0,0 +1,40 @@
+import os
+import json
+
+# Function to extract conversation data from .gui file
+def extract_conversations_from_gui(gui_file):
+    with open(gui_file, 'r') as file:
+        gui_text = file.read().strip()
+        return [{'from': 'human', 'value': '<image>\nWrite a code for the given UI'}, {'from': 'gpt', 'value': gui_text}]
+
+
+# Function to convert data to JSON format
+def convert_data_to_json(input_folder, output_folder):
+    data = []
+    for filename in os.listdir(input_folder):
+        if filename.endswith('.gui'):
+            sample_id = filename.split('.')[0]
+            image_path = "./Sketch2Code_og/" + os.path.relpath(os.path.join(input_folder, f"{sample_id}.png"), output_folder)
+            gui_path = os.path.join(input_folder, filename)
+            conversations = extract_conversations_from_gui(gui_path)
+            sample = {
+                'id': sample_id,
+                'image': image_path,
+                'conversations': conversations
+            }
+            data.append(sample)
+
+    output_path = os.path.join(output_folder, 'samples.json')
+    with open(output_path, 'w') as json_file:
+        json.dump(data, json_file, indent=2)
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser(description="Convert data to JSON format")
+    parser.add_argument("input_folder", help="Input folder containing .gui files and corresponding images")
+    parser.add_argument("output_folder", help="Output folder to store the JSON file")
+
+    args = parser.parse_args()
+
+    convert_data_to_json(args.input_folder, args.output_folder)
diff --git a/data_prep/LLaVA_dataSplit.py b/data_prep/LLaVA_dataSplit.py
@@ -0,0 +1,87 @@
+import os
+import json
+import random
+import shutil
+import argparse
+
+def split_dataset(input_folder, output_folder, test_split=10, validation_split=10, seed=42):
+    # Create output folders for train, test, and validation sets
+    train_folder = os.path.join(output_folder, 'train')
+    test_folder = os.path.join(output_folder, 'test')
+    validation_folder = os.path.join(output_folder, 'validation')
+    split_json_folder = os.path.join(output_folder, 'split_json_files')
+
+    for folder in [train_folder, test_folder, validation_folder, split_json_folder]:
+        os.makedirs(folder, exist_ok=True)
+
+    # Load the JSON file containing the data
+    json_file_path = os.path.join(input_folder, 'samples.json')
+    with open(json_file_path, 'r') as json_file:
+        data = json.load(json_file)
+
+    # Calculate the number of samples for test and validation sets
+    num_samples = len(data)
+    num_test_samples = num_samples * test_split // 100
+    num_validation_samples = num_samples * validation_split // 100
+
+    # Set random seed for reproducibility
+    random.seed(seed)
+
+    # Randomly select indices for test set
+    test_indices = random.sample(range(num_samples), num_test_samples)
+
+    # Remove test indices from the list of all indices
+    remaining_indices = [i for i in range(num_samples) if i not in test_indices]
+
+    # Randomly select indices for validation set from remaining indices
+    validation_indices = random.sample(remaining_indices, num_validation_samples)
+
+    # The remaining indices are for the training set
+    train_indices = [i for i in remaining_indices if i not in validation_indices]
+
+    # Copy files to corresponding folders and update JSON files
+    for idx, sample in enumerate(data):
+        source_image = sample['image']
+        source_gui = os.path.join(input_folder, 'data', f"{sample['id']}.gui")
+        destination_folder = None
+        if idx in test_indices:
+            destination_folder = test_folder
+        elif idx in validation_indices:
+            destination_folder = validation_folder
+        else:
+            destination_folder = train_folder
+
+        # Copy files to destination folder
+        image_filename = os.path.basename(source_image)
+        gui_filename = f"{sample['id']}.gui"
+        destination_image = os.path.join(destination_folder, image_filename)
+        destination_gui = os.path.join(destination_folder, gui_filename)
+        shutil.copy(source_image, destination_image)
+        shutil.copy(source_gui, destination_gui)
+
+        # Update JSON data with relative paths
+        relative_image_path = os.path.relpath(destination_image, output_folder)
+        relative_gui_path = os.path.relpath(destination_gui, output_folder)
+        sample['image'] = "./" + relative_image_path
+        sample['gui'] = "./" + relative_gui_path
+
+    # Create updated JSON files for each split
+    splits = {'train': train_indices, 'test': test_indices, 'validation': validation_indices}
+    for split, indices in splits.items():
+        split_data = [data[i] for i in indices]
+        split_json_path = os.path.join(split_json_folder, f"{split}_json.json")
+        with open(split_json_path, 'w') as json_file:
+            json.dump(split_data, json_file, indent=2)
+
+    print("Dataset splitting and JSON file creation completed.")
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Split dataset and generate JSON files")
+    parser.add_argument("input_folder", help="Path to the input folder containing the dataset")
+    parser.add_argument("output_folder", help="Path to the output folder to save the split dataset")
+    parser.add_argument("--test_split", type=int, default=10, help="Percentage of data to use for test (default: 10)")
+    parser.add_argument("--validation_split", type=int, default=10, help="Percentage of data to use for validation (default: 10)")
+    parser.add_argument("--seed", type=int, default=42, help="Random seed for reproducibility (default: 42)")
+    args = parser.parse_args()
+
+    split_dataset(args.input_folder, args.output_folder, args.test_split, args.validation_split, args.seed)
diff --git a/data_prep/Readme.md b/data_prep/Readme.md
@@ -0,0 +1,15 @@
+The python file LLaVA_InitialJson.py will make the json data in the customised required format for LLaVA model to work on.
+The python file LLaVA_dataSplit.py will perform the data splitting.
+
+
+
+
+
+Use the following command line prompt to runs the .py files to generate the data splits
+
+
+python LLaVA_InitialJson.py ~/LLaVA/data_prep/Sketch2Code_og/data ~/LLaVA/data_prep/Sketch2Code_og
+
+python LLaVA_dataSplit.py ~/LLaVA/data_prep/Sketch2Code_og/ ~/splitted_data_verify/
+
+
diff --git a/llava/model/builder.py b/llava/model/builder.py
@@ -24,7 +24,7 @@
 
 
 def load_pretrained_model(model_path, model_base, model_name, load_8bit=False, load_4bit=False, device_map="auto", device="cuda", use_flash_attn=False, **kwargs):
-    kwargs = {"device_map": device_map, **kwargs}
+    kwargs = {"device_map": device_map, "offload_folder": "offload", **kwargs}
 
     if device != "cuda":
         kwargs['device_map'] = {"": device}

diff --git a/predict.py b/predict.py
diff --git a/pyproject.toml b/pyproject.toml
diff --git a/scripts/convert_gqa_for_eval.py b/scripts/convert_gqa_for_eval.py
diff --git a/scripts/convert_mmbench_for_submission.py b/scripts/convert_mmbench_for_submission.py
diff --git a/scripts/convert_mmvet_for_eval.py b/scripts/convert_mmvet_for_eval.py
diff --git a/scripts/convert_seed_for_submission.py b/scripts/convert_seed_for_submission.py
diff --git a/scripts/convert_sqa_to_llava.py b/scripts/convert_sqa_to_llava.py
diff --git a/scripts/convert_sqa_to_llava_base_prompt.py b/scripts/convert_sqa_to_llava_base_prompt.py
diff --git a/scripts/convert_vizwiz_for_submission.py b/scripts/convert_vizwiz_for_submission.py
diff --git a/scripts/convert_vqav2_for_submission.py b/scripts/convert_vqav2_for_submission.py
diff --git a/scripts/extract_mm_projector.py b/scripts/extract_mm_projector.py
diff --git a/scripts/finetune.sh b/scripts/finetune.sh
diff --git a/scripts/finetune_full_schedule.sh b/scripts/finetune_full_schedule.sh
diff --git a/scripts/finetune_lora.sh b/scripts/finetune_lora.sh
diff --git a/scripts/finetune_qlora.sh b/scripts/finetune_qlora.sh
diff --git a/scripts/finetune_sqa.sh b/scripts/finetune_sqa.sh
diff --git a/scripts/merge_lora_weights.py b/scripts/merge_lora_weights.py
diff --git a/scripts/pretrain.sh b/scripts/pretrain.sh
diff --git a/scripts/pretrain_xformers.sh b/scripts/pretrain_xformers.sh
diff --git a/scripts/sqa_eval_batch.sh b/scripts/sqa_eval_batch.sh
diff --git a/scripts/sqa_eval_gather.sh b/scripts/sqa_eval_gather.sh
diff --git a/scripts/v1_5/finetune.sh b/scripts/v1_5/finetune.sh
diff --git a/scripts/v1_5/finetune_lora.sh b/scripts/v1_5/finetune_lora.sh
@@ -1,24 +1,24 @@
 #!/bin/bash
 
+
 deepspeed llava/train/train_mem.py \
     --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 \
     --deepspeed ./scripts/zero3.json \
-    --model_name_or_path lmsys/vicuna-13b-v1.5 \
+    --model_name_or_path liuhaotian/llava-v1.5-7b \
     --version v1 \
-    --data_path ./playground/data/llava_v1_5_mix665k.json \
-    --image_folder ./playground/data \
+    --data_path /home/akash/data/LLAVA_data.json \
+    --image_folder /home/akash/data/ \
     --vision_tower openai/clip-vit-large-patch14-336 \
-    --pretrain_mm_mlp_adapter ./checkpoints/llava-v1.5-13b-pretrain/mm_projector.bin \
     --mm_projector_type mlp2x_gelu \
     --mm_vision_select_layer -2 \
     --mm_use_im_start_end False \
     --mm_use_im_patch_token False \
     --image_aspect_ratio pad \
     --group_by_modality_length True \
     --bf16 True \
-    --output_dir ./checkpoints/llava-v1.5-13b-lora \
+    --output_dir ./checkpoints/llava-v1.6-mistral-7b \
     --num_train_epochs 1 \
-    --per_device_train_batch_size 16 \
+    --per_device_train_batch_size 4 \
     --per_device_eval_batch_size 4 \
     --gradient_accumulation_steps 1 \
     --evaluation_strategy "no" \
@@ -34,5 +34,4 @@ deepspeed llava/train/train_mem.py \
     --model_max_length 2048 \
     --gradient_checkpointing True \
     --dataloader_num_workers 4 \
-    --lazy_preprocess True \
-    --report_to wandb
+    --lazy_preprocess True
diff --git a/scripts/v1_5/finetune_task.sh b/scripts/v1_5/finetune_task.sh
diff --git a/scripts/v1_5/finetune_task_lora.sh b/scripts/v1_5/finetune_task_lora.sh
@@ -1,12 +1,12 @@
 #!/bin/bash
 
-deepspeed llava/train/train_mem.py \
+deepspeed "${PWD}/../../llava/train/train_mem.py" \
     --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 \
-    --deepspeed ./scripts/zero3.json \
-    --model_name_or_path liuhaotian/llava-v1.5-13b \
+    --deepspeed "${PWD}/../zero3.json" \
+    --model_name_or_path liuhaotian/llava-v1.5-7b \
     --version v1 \
-    --data_path ./playground/data/llava_v1_5_mix665k.json \
-    --image_folder ./playground/data \
+    --data_path "${PWD}/../../data_prep/splitted_data/split_json_files/train_json.json" \
+    --image_folder "${PWD}/../../data_prep/splitted_data/" \
     --vision_tower openai/clip-vit-large-patch14-336 \
     --mm_projector_type mlp2x_gelu \
     --mm_vision_select_layer -2 \
@@ -15,9 +15,9 @@ deepspeed llava/train/train_mem.py \
     --image_aspect_ratio pad \
     --group_by_modality_length True \
     --bf16 True \
-    --output_dir ./checkpoints/llava-v1.5-13b-task-lora \
+    --output_dir ./checkpoints/llava-v1.5-7b-task-lora \
     --num_train_epochs 1 \
-    --per_device_train_batch_size 16 \
+    --per_device_train_batch_size 4 \
     --per_device_eval_batch_size 4 \
     --gradient_accumulation_steps 1 \
     --evaluation_strategy "no" \
@@ -34,4 +34,4 @@ deepspeed llava/train/train_mem.py \
     --gradient_checkpointing True \
     --dataloader_num_workers 4 \
     --lazy_preprocess True \
-    --report_to wandb
+
diff --git a/scripts/v1_5/pretrain.sh b/scripts/v1_5/pretrain.sh
diff --git a/scripts/zero2.json b/scripts/zero2.json
diff --git a/scripts/zero3.json b/scripts/zero3.json
@@ -24,5 +24,8 @@
         "stage3_max_live_parameters": 1e9,
         "stage3_max_reuse_distance": 1e9,
         "stage3_gather_16bit_weights_on_model_save": true
-    }
-}
+    },
+    "wandb": {
+    "enabled": false
+  }
+}
diff --git a/scripts/zero3_offload.json b/scripts/zero3_offload.json
diff --git a/wiki/navigation.md b/wiki/navigation.md
@@ -0,0 +1,52 @@
+# Welcome to the LLaVA DSL Gen Project Wiki
+
+## Table of Contents
+- [Introduction](#introduction)
+- [Installation](#installation)
+- [Usage](#usage)
+
+## Introduction
+Welcome to the LLaVA DSL Gen Project! This project is designed to demonstrate how to install and navigate through this repository.
+
+## Installation
+
+### Step-by-Step LLaVA Installation Guide from the Github Repository
+
+1. **Clone this repository and navigate to the LLaVA folder:**
+    ```shell
+    git clone https://github.com/haotian-liu/LLaVA.git
+    cd LLaVA
+    ```
+
+2. **Install Package:**
+    ```shell
+    conda create -n llava python=3.10 -y
+    conda activate llava
+    pip install --upgrade pip  # enable PEP 660 support
+    pip install -e .
+    ```
+
+3. **Install additional packages for training cases:**
+    ```shell
+    pip install -e ".[train]"
+    pip install flash-attn --no-build-isolation
+    ```
+
+_For the purpose of running the current codes use the llava_new venev._
+
+## Usage
+### Finetuning Guide
+To start finetuning, run the _LLaVA/scripts/v1_5/finetune_task_lora.sh_ with the desired hyperparameter settings. 
+
+### Data Preperation Guide
+In addition to existing steps, the current repository also offers additional feature to split your data into train_eval_test split according to your desired split ratio. Follow the below steps for the same.
+
+1. _LLaVA_InitialJson.py_ will prepare your initial custom data inot the desired LLaVA dataset format and return a .json file useful for the next steps.
+   ```shell
+   Syntax: python LLaVA_IntialJson.py --input_folder_with_gui_and_png_files --output_folder_to_store_json_file
+   ```
+
+2. _LLaVA_dataSplit.py_ will split your data intp the required train_eval_test split. Default value will be 80_10_10.
+   ```shell
+   Syntax: python LLaVA_dataSpit.py --path_to_input_folder_containing_data_and_json_file --output_folder_to_save_the_splitted_dataset
+   ```
diff --git a/wiki/navigation.md.save b/wiki/navigation.md.save
@@ -0,0 +1,53 @@
+# Welcome to the LLaVA DSL Gen Project Wiki
+
+## Table of Contents
+- [Introduction](#introduction)
+- [Installation](#installation)
+- [Usage](#usage)
+- [Contributing](#contributing)
+- [FAQ](#faq)
+
+## Introduction
+Welcome to the LLaVA DSL Gen Project! This project is designed to demonstrate how to install and navigate through this repository.
+
+## Installation
+
+### Step-by-Step LLaVA Installation Guide from the Github Repository
+
+1. **Clone this repository and navigate to the LLaVA folder:**
+    ```shell
+    git clone https://github.com/haotian-liu/LLaVA.git
+    cd LLaVA
+    ```
+
+2. **Install Package:**
+    ```shell
+    conda create -n llava python=3.10 -y
+    conda activate llava
+    pip install --upgrade pip  # enable PEP 660 support
+    pip install -e .
+    ```
+
+3. **Install additional packages for training cases:**
+    ```shell
+    pip install -e ".[train]"
+    pip install flash-attn --no-build-isolation
+    ```
+
+_For the purpose of running the current codes use the llava_new venev._
+
+### Finetuning Guide
+To start finetuning, run the _LLaVA/scripts/v1_5/finetune_task_lora.sh_ with the desired hyperparameter settings. 
+
+### Data Preperation Guide
+In addition to existing steps, the current repository also offers additional feature to split your data into train_eval_test split according to your desired split ratio. Follow the below steps for the same.
+1. _  
+
+
+
+## Usage
+After installation, you can start the project with:
+```bash
+npm start
+
+_