update

KompleteAI · Nov 10, 2023 · 2294cf4 · 2294cf4
1 parent 088b432
commit 2294cf4
Show file tree

Hide file tree

Showing 3 changed files with 41 additions and 10 deletions.
diff --git a/README.md b/README.md
@@ -56,7 +56,7 @@ Are you using **Large Language Models (LLMs)** for your work and want to train t
 
 # Quickstart 🦖
 
-## Installation
+### Installation
 
 X—LLM is tested on Python 3.8+, PyTorch 2.0.1+ and CUDA 11.8.
 
@@ -72,7 +72,7 @@ pip install xllm[train]
 
 Default `xllm` version recommended for local development, `xllm[train]` recommended for training.
 
-### Training recommended environment
+#### Training recommended environment
 
 CUDA version: `11.8`  
 Docker: `huggingface/transformers-pytorch-gpu:latest`
@@ -84,7 +84,7 @@ from xllm import Config
 from xllm.datasets import GeneralDataset
 from xllm.experiments import Experiment
 
-# 1. Init Config
+# 1. Init Config. It controls the internal logic of xllm, whether to apply LoRA and so on
 config = Config(model_name_or_path="facebook/opt-350m")
 
 # 2. Prepare data
@@ -93,16 +93,19 @@ train_data = ["Hello!"] * 100
 # 3. Load data
 train_dataset = GeneralDataset.from_list(data=train_data)
 
-# 4. Init Experiment
+# 4. Init Experiment. Putting everything you need for training together
 experiment = Experiment(config=config, train_dataset=train_dataset)
 
-# 5. Build Experiment
+# 5. Build Experiment. This step takes some time. Make the tokenizer initialized, the model is applied, LoRA is applied, bittsandbytes quantization is applied, etc
 experiment.build()
 
-# 6. Run Experiment
+# 6. Run Experiment. This is where the model is trained and all the actions that are specified after the training
 experiment.run()
 
-# 7. [Optional] Push to HF Hub
+# 7. [Optional] Fuse LoRA layers. Works even with 4bit and 8bit bitsandbytes quantization
+experiment.fuse_lora()
+
+# 8. [Optional] Push fused model to the HuggingFace Hub
 experiment.push_to_hub(repo_id="YOUR_NAME/MODEL_NAME")
 ```
 
@@ -112,13 +115,19 @@ experiment.push_to_hub(repo_id="YOUR_NAME/MODEL_NAME")
 #### Simple
 
 ```python
-config = Config(model_name_or_path="facebook/opt-350m")
+config = Config(apply_lora=True)
 ```
 
 #### Advanced
 
 ```python
-config = Config(model_name_or_path="facebook/opt-350m")
+config = Config(
+    apply_lora=True,
+    lora_rank=8,
+    lora_alpha=32,
+    lora_dropout=0.05,
+    raw_lora_target_modules="k,q,v",  # Names of modules to apply LoRA. A comma-separated string, for example: "k,q,v".
+)
 ```
 
 </details>
@@ -127,6 +136,10 @@ config = Config(model_name_or_path="facebook/opt-350m")
   <summary>QLoRA</summary>
 </details>
 
+<details>
+  <summary>Push checkpoints to the HuggingFace Hub</summary>
+</details>
+
 <details>
   <summary>Gradient checkpointing</summary>
 </details>
@@ -151,6 +164,10 @@ config = Config(model_name_or_path="facebook/opt-350m")
   <summary>GPTQ Quantization</summary>
 </details>
 
+### Colab notebooks
+
+- 
+
 ## Production solution 🚀
 
 Run the existing project

diff --git a/src/xllm/core/config.py b/src/xllm/core/config.py
@@ -152,6 +152,12 @@ class Config:
             "help": "Local path to fused model. Useful if you want to quantize model after fusing on the same machine",
         },
     )
+    fuse_after_train: bool = field(
+        default=False,
+        metadata={
+            "help": "Fuse or not model after training",
+        },
+    )
 
     # gptq quantization
     quantization_dataset_id: Optional[str] = field(
@@ -367,7 +373,7 @@ class Config:
         },
     )
     lora_rank: int = field(
-        default=64,
+        default=8,
         metadata={
             "help": "LoRA rank value. LoRA matrices W_A x R and R x W_B, where R is LoRA rank",
         },

diff --git a/src/xllm/experiments/base.py b/src/xllm/experiments/base.py
@@ -425,6 +425,9 @@ def run(self):
 
         self.after_train()
 
+        if self.config.fuse_after_train:
+            self.fuse_lora()
+
         if is_distributed_training():
             if distributed.get_rank() == self.config.local_rank:
                 post_training(config=self.config, tokenizer=self.tokenizer)
@@ -484,8 +487,13 @@ def fuse_lora(self) -> PreTrainedModel:
 
         dist_logger("LoRA fused")
 
+        self.after_fuse()
+
         return self.model
 
+    def after_fuse(self) -> None:
+        return None
+
     def after_train(self) -> None:
         return None