From 7e11d074dede210697a9039f0b8fdd8fcf83421f Mon Sep 17 00:00:00 2001
From: Randy Gelhausen <rgelhau@gmail.com>
Date: Wed, 24 May 2023 21:38:43 -0400
Subject: [PATCH 1/8] Added other dependencies and clarification about HF
 models

---
 README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 2b2dda8e..167ca320 100644
--- a/README.md
+++ b/README.md
@@ -37,11 +37,12 @@ pip install -q -U bitsandbytes
 pip install -q -U git+https://github.com/huggingface/transformers.git
 pip install -q -U git+https://github.com/huggingface/peft.git
 pip install -q -U git+https://github.com/huggingface/accelerate.git
+pip install -U datasets evaluate scipy nltk
 ```
 
 ## Getting Started
 The `qlora.py` code is a starting point for finetuning and inference on various datasets.
-Basic command for finetuning a baseline model on the Alpaca dataset:
+Basic command for finetuning a baseline (HuggingFace formatted) llama model on the Alpaca dataset:
 ```bash
 python qlora.py --model_name_or_path <path_or_name>
 ```

From 8532e0117d85b1d63eaa3ef96598d77ecd27a5d6 Mon Sep 17 00:00:00 2001
From: Qubitium <417764+Qubitium@users.noreply.github.com>
Date: Thu, 25 May 2023 07:00:32 +0000
Subject: [PATCH 2/8] Check for LlamaTokenizerFast rather than infer type from
 path name. Fix cases where non-standard llama model path names gets bypassed
 in tokenizer check. The tokenizer is init with use_fast=True and qlora
 requires >4.29.2 transformers so the only possible tokenizer is
 LlamaTokenizerFast.

---
 qlora.py | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/qlora.py b/qlora.py
index ea72bbc4..954fbd09 100644
--- a/qlora.py
+++ b/qlora.py
@@ -23,11 +23,12 @@
     AutoModelForCausalLM, 
     set_seed, 
     Seq2SeqTrainer,
-    BitsAndBytesConfig
+    BitsAndBytesConfig,
+    LlamaTokenizerFast
+
 )
 from datasets import load_dataset
 import evaluate
-import nltk
 
 from peft import (
     prepare_model_for_int8_training,
@@ -608,18 +609,19 @@ def train():
             tokenizer=tokenizer,
             model=model,
         )
-    if any(key in args.model_name_or_path for key in ['llama', '7B', '13B', '30B', '65B']):
-        # LLaMA tokenizer does not have special tokens set.
-        # Add them to prevent them from being parsed into different tokens.
+    if isinstance(tokenizer, LlamaTokenizerFast):
+        # LLaMA tokenizer may not have correct special tokens set.
+        # Check and add them if missing to prevent them from being parsed into different tokens.
         # Note that these are present in the vocabulary. 
         # Note also that `model.config.pad_token_id` is 0 which corresponds to `<unk>` token.
-        tokenizer.add_special_tokens(
-            {
-                "eos_token": tokenizer.convert_ids_to_tokens(model.config.eos_token_id),
-                "bos_token": tokenizer.convert_ids_to_tokens(model.config.bos_token_id),
-                "unk_token": tokenizer.convert_ids_to_tokens(model.config.pad_token_id), 
-            }
-        )
+        if tokenizer.eos_token_id != model.config.eos_token_id or tokenizer.pad_token_id != model.config.pad_token_id or tokenizer.unk_token_id != model.config.unk_token_id:
+            tokenizer.add_special_tokens(
+                {
+                    "eos_token": tokenizer.convert_ids_to_tokens(model.config.eos_token_id),
+                    "bos_token": tokenizer.convert_ids_to_tokens(model.config.bos_token_id),
+                    "unk_token": tokenizer.convert_ids_to_tokens(model.config.pad_token_id),
+                }
+            )
 
     data_module = make_data_module(tokenizer=tokenizer, args=args)
     trainer = Seq2SeqTrainer(

From 073a4856f933767ea32f557fbed96ec593a5c2fa Mon Sep 17 00:00:00 2001
From: Artidoro Pagnoni <pagnoni.artidoro@gmail.com>
Date: Thu, 25 May 2023 12:13:57 -0700
Subject: [PATCH 3/8] Update README.md

---
 README.md | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 2b2dda8e..478125bf 100644
--- a/README.md
+++ b/README.md
@@ -21,7 +21,7 @@ In addition, we release the Guanaco model family for base LLaMA model sizes of 7
 ## Demo
 Guanaco is a system purely intended for research purposes and could produce problematic outputs.
 
-1. Access the [live demo here](https://huggingface.co/spaces/uwnlp/guanaco-playground-tgi). 
+1. Access the [live demo here](https://huggingface.co/spaces/uwnlp/guanaco-playground-tgi). Note this is the 33B model, the 65B model demo will come later.
 
 2. Or host your own Guanaco gradio demo directly in Colab with [this notebook](https://colab.research.google.com/drive/17XEqL1JcmVWjHkT-WczdYkJlNINacwG7?usp=sharing). Works with free GPUs for 7B and 13B models.
 
@@ -78,10 +78,14 @@ Quantization parameters are controlled from the `BitsandbytesConfig` ([see HF do
 You can access the paged optimizer with the argument `--optim paged_adamw_32bit`
 
 ## Tutorials and Demonstrations
-Examples are found under the `examples/` folder.
+Here is [a blog](https://huggingface.co/blog/4bit-transformers-bitsandbytes) discussing 4-bit quantization, QLoRA, and how they are integrated in transformers.
 
-### Colab Gradio Demo
 You can host your own gradio Guanaco demo directly in Colab following [this notebook](https://colab.research.google.com/drive/17XEqL1JcmVWjHkT-WczdYkJlNINacwG7?usp=sharing). 
+In addition, here are Colab notebooks with examples for inference and finetuning using QLoRA:
+- [Inference notebook](https://colab.research.google.com/drive/1VoYNfYDKcKRQRor98Zbf2-9VQTtGJ24k?usp=sharing)
+- [Finetuning notebook](https://colab.research.google.com/drive/1VoYNfYDKcKRQRor98Zbf2-9VQTtGJ24k?usp=sharing)
+
+Other examples are found under the `examples/` folder.
 
 ## Sample Outputs
 We provide generations for the models described in the paper for both OA and Vicuna queries in the `eval/generations` folder. These are intended to foster further research on model evaluation and analysis.
@@ -118,7 +122,8 @@ Here a list of known issues and bugs. If your issue is not reported here, please
 }
 ```
 
-## Acknoledgements
+## Acknowledgements
 We thank the Huggingface team, in particular Younes Belkada, for their support integrating QLoRA with PEFT and transformers libraries.
+We also thank Meta for releasing the LLaMA models without which this work would not have been possible.
 
 This repo builds on the [Stanford Alpaca](https://github.com/tatsu-lab/stanford_alpaca) and [LMSYS FastChat](https://github.com/lm-sys/FastChat) repos.

From 530382b4301ac4be147fbffb3e19bedcacffd971 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Piotr=20My=C5=9Bli=C5=84ski?= <me@pmysl.dev>
Date: Sat, 27 May 2023 10:32:46 +0200
Subject: [PATCH 4/8] Fix link to inference notebook

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 478125bf..ab87c50c 100644
--- a/README.md
+++ b/README.md
@@ -82,7 +82,7 @@ Here is [a blog](https://huggingface.co/blog/4bit-transformers-bitsandbytes) dis
 
 You can host your own gradio Guanaco demo directly in Colab following [this notebook](https://colab.research.google.com/drive/17XEqL1JcmVWjHkT-WczdYkJlNINacwG7?usp=sharing). 
 In addition, here are Colab notebooks with examples for inference and finetuning using QLoRA:
-- [Inference notebook](https://colab.research.google.com/drive/1VoYNfYDKcKRQRor98Zbf2-9VQTtGJ24k?usp=sharing)
+- [Inference notebook](https://colab.research.google.com/drive/1ge2F1QSK8Q7h0hn3YKuBCOAS0bK8E0wf?usp=sharing)
 - [Finetuning notebook](https://colab.research.google.com/drive/1VoYNfYDKcKRQRor98Zbf2-9VQTtGJ24k?usp=sharing)
 
 Other examples are found under the `examples/` folder.

From 3926ee5c02449ba7f99c4645b21a07dfefe61dc8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20M=C3=BCller?=
 <11507045+muelletm@users.noreply.github.com>
Date: Sat, 27 May 2023 20:33:23 +0200
Subject: [PATCH 5/8] Update finetune.sh

Fixes a copy paste error where per_device_train_batch_size was set twice.
---
 scripts/finetune.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/finetune.sh b/scripts/finetune.sh
index ee166287..f7569a1a 100755
--- a/scripts/finetune.sh
+++ b/scripts/finetune.sh
@@ -8,7 +8,7 @@ python qlora.py \
     --source_max_len 384 \
     --target_max_len 128 \
     --per_device_train_batch_size 4 \
-    --per_device_train_batch_size 4 \
+    --per_device_eval_batch_size 4 \
     --gradient_accumulation_steps 4 \
     --logging_steps 10 \
     --max_steps 10000 \

From a3809620eedba9524b01dabc1db560050cac5ca2 Mon Sep 17 00:00:00 2001
From: Artidoro Pagnoni <pagnoni.artidoro@gmail.com>
Date: Sat, 27 May 2023 17:01:08 -0700
Subject: [PATCH 6/8] Adding guanaco openassistant dataset

---
 README.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/README.md b/README.md
index 478125bf..3082d3ed 100644
--- a/README.md
+++ b/README.md
@@ -100,6 +100,9 @@ To facilitate the replication of our evaluation and future work in this area, we
 
 More details can be found at `eval/EVAL_README.md`.
 
+## Dataset for Guanaco
+You can find the dataset used to train Guanaco models on HF at [timdettmers/openassistant-guanaco](https://huggingface.co/datasets/timdettmers/openassistant-guanaco).
+
 ## Known Issues and Limitations
 Here a list of known issues and bugs. If your issue is not reported here, please open a new issue and describe the problem.
 

From e31aedd4ba77c5cfafb60e9eeab6af0690b241e0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Piotr=20My=C5=9Bli=C5=84ski?= <me@pmysl.dev>
Date: Sun, 28 May 2023 14:00:12 +0200
Subject: [PATCH 7/8] Suppress pad_token warning message

---
 qlora.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/qlora.py b/qlora.py
index 954fbd09..59648ee8 100644
--- a/qlora.py
+++ b/qlora.py
@@ -603,7 +603,7 @@ def train():
         padding_side="right",
         use_fast=True,
     )
-    if tokenizer.pad_token is None:
+    if tokenizer._pad_token is None:
         smart_tokenizer_and_embedding_resize(
             special_tokens_dict=dict(pad_token=DEFAULT_PAD_TOKEN),
             tokenizer=tokenizer,

From a1c807ff16386e31fe30ad5b3f382ba6fbe11c5b Mon Sep 17 00:00:00 2001
From: Randy Gelhausen <rgelhau@gmail.com>
Date: Mon, 29 May 2023 20:38:38 -0400
Subject: [PATCH 8/8] moved added requirements to requirements.txt

---
 README.md        | 1 -
 requirements.txt | 3 +++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 4f84cc92..4710cb50 100644
--- a/README.md
+++ b/README.md
@@ -37,7 +37,6 @@ pip install -q -U bitsandbytes
 pip install -q -U git+https://github.com/huggingface/transformers.git
 pip install -q -U git+https://github.com/huggingface/peft.git
 pip install -q -U git+https://github.com/huggingface/accelerate.git
-pip install -U datasets evaluate scipy nltk
 ```
 
 ## Getting Started
diff --git a/requirements.txt b/requirements.txt
index 0ab6df72..51c11ef7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,3 +4,6 @@ rouge-score==0.1.2
 scikit-learn==1.2.2
 sentencepiece==0.1.99
 wandb==0.15.2
+datasets
+evaluate
+scipy