From 102bc82b7f4e2d36c1414d19cc4ef22021ce06cc Mon Sep 17 00:00:00 2001
From: Bhushan Kapkar <49440262+bpkapkar@users.noreply.github.com>
Date: Mon, 29 Apr 2024 09:28:18 +0530
Subject: [PATCH] Update clm-prompt-tuning.ipynb

The code snippet needs a correction in the line:
labels["input_ids"][i] = [-100] * (max_length - len(sample_input_ids)) + label_input_ids
Change it to:
labels["input_ids"][i] = [-100] * (max_length - len(label_input_ids)) + label_input_ids
This adjustment ensures that the label token ids are padded or truncated based on their own length, aligning with Hugging Face's recommended practice and avoiding issues with unequal lengths in input and label token ids. The same changes need to be corrected in documentation as well is been mentioned  in the https://huggingface.co/docs/peft/main/en/task_guides/prompt_based_methods and https://huggingface.co/docs/peft/main/en/task_guides/clm-prompt-tuning
---
 peft_docs/en/clm-prompt-tuning.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/peft_docs/en/clm-prompt-tuning.ipynb b/peft_docs/en/clm-prompt-tuning.ipynb
index 205ccc51..c235d2a1 100644
--- a/peft_docs/en/clm-prompt-tuning.ipynb
+++ b/peft_docs/en/clm-prompt-tuning.ipynb
@@ -197,7 +197,7 @@
     "        model_inputs[\"attention_mask\"][i] = [0] * (max_length - len(sample_input_ids)) + model_inputs[\n",
     "            \"attention_mask\"\n",
     "        ][i]\n",
-    "        labels[\"input_ids\"][i] = [-100] * (max_length - len(sample_input_ids)) + label_input_ids\n",
+    "        labels[\"input_ids\"][i] = [-100] * (max_length - len(label_input_ids)) + label_input_ids\n",
     "        model_inputs[\"input_ids\"][i] = torch.tensor(model_inputs[\"input_ids\"][i][:max_length])\n",
     "        model_inputs[\"attention_mask\"][i] = torch.tensor(model_inputs[\"attention_mask\"][i][:max_length])\n",
     "        labels[\"input_ids\"][i] = torch.tensor(labels[\"input_ids\"][i][:max_length])\n",