Skip to content

Commit

Permalink
fix: do not resize embedding layer by default (#310)
Browse files Browse the repository at this point in the history
Signed-off-by: Mehant Kammakomati <[email protected]>
  • Loading branch information
kmehant authored Aug 20, 2024
1 parent a6d093e commit 822311c
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 3 deletions.
6 changes: 4 additions & 2 deletions tuning/config/configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,12 @@ class ModelArguments:
)
torch_dtype: Optional[Union[torch.dtype, str]] = torch.bfloat16
embedding_size_multiple_of: Optional[int] = field(
default=8,
default=1,
metadata={
"help": "Resize model embedding layer to the nearest multiple of \
the given number after tokenizer modifications."
the given number after tokenizer modifications. \
NOTE: This involves extending \
the embedding layer without any corresponding real tokens."
},
)
tokenizer_name_or_path: Optional[str] = field(
Expand Down
2 changes: 1 addition & 1 deletion tuning/data/tokenizer_data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def tokenizer_and_embedding_resize(
special_tokens_dict: Dict,
tokenizer: transformers.PreTrainedTokenizer,
model: transformers.PreTrainedModel,
multiple_of: int = 8,
multiple_of: int = 1,
):
"""Resize tokenizer and embedding."""
num_new_tokens = tokenizer.add_special_tokens(special_tokens_dict)
Expand Down

0 comments on commit 822311c

Please sign in to comment.