Skip to content

Commit

Permalink
2024-12-10 nightly release (f2bd4bc)
Browse files Browse the repository at this point in the history
  • Loading branch information
pytorchbot committed Dec 10, 2024
1 parent 721c3c9 commit f542770
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 8 deletions.
5 changes: 4 additions & 1 deletion recipes/configs/llama3_2/1B_full_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,12 @@
#
# This config works only for training on single device.


output_dir: /tmp/torchtune/llama3_2_1B/full_single_device # /tmp may be deleted by your system. Change it to your preference.

# Model Arguments
model:
_component_: torchtune.models.llama3_2.llama3_2_1b

# Tokenizer
tokenizer:
_component_: torchtune.models.llama3.llama3_tokenizer
Expand Down
25 changes: 18 additions & 7 deletions torchtune/training/checkpointing/_checkpointer.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,7 @@

from torchtune import training
from torchtune.models import convert_weights
from torchtune.models.clip._convert_weights import clip_text_hf_to_tune
from torchtune.models.phi3._convert_weights import phi3_hf_to_tune, phi3_tune_to_hf
from torchtune.models.qwen2._convert_weights import qwen2_hf_to_tune, qwen2_tune_to_hf
from torchtune.rlhf.utils import reward_hf_to_tune, reward_tune_to_hf

from torchtune.training.checkpointing._utils import (
ADAPTER_CONFIG_FNAME,
ADAPTER_MODEL_FNAME,
Expand Down Expand Up @@ -159,7 +156,7 @@ def __init__(
self._resume_from_checkpoint = resume_from_checkpoint
self._model_type = ModelType[model_type]
self._output_dir = Path(output_dir)
self._output_dir.mkdir(exist_ok=True)
self._output_dir.mkdir(parents=True, exist_ok=True)

# save all files in input_dir, except model weights and mapping, to output_dir
# this is useful to preserve the tokenizer, configs, license, etc.
Expand Down Expand Up @@ -394,7 +391,7 @@ def __init__(
self._checkpoint_dir = Path(checkpoint_dir)
self._model_type = ModelType[model_type]
self._output_dir = Path(output_dir)
self._output_dir.mkdir(exist_ok=True)
self._output_dir.mkdir(parents=True, exist_ok=True)

# weight_map contains the state_dict key -> checkpoint file mapping so we can correctly
# parition the state dict into output checkpoint files. This is updated during checkpoint
Expand Down Expand Up @@ -509,17 +506,23 @@ def load_checkpoint(self) -> Dict[str, Any]:
msg="Converting Phi-3 Mini weights from HF format."
"Note that conversion of adapter weights into PEFT format is not supported.",
)
from torchtune.models.phi3._convert_weights import phi3_hf_to_tune

converted_state_dict[training.MODEL_KEY] = phi3_hf_to_tune(
merged_state_dict
)
elif self._model_type == ModelType.REWARD:
from torchtune.rlhf.utils import reward_hf_to_tune

converted_state_dict[training.MODEL_KEY] = reward_hf_to_tune(
merged_state_dict,
num_heads=self._config["num_attention_heads"],
num_kv_heads=self._config["num_key_value_heads"],
dim=self._config["hidden_size"],
)
elif self._model_type == ModelType.QWEN2:
from torchtune.models.qwen2._convert_weights import qwen2_hf_to_tune

converted_state_dict[training.MODEL_KEY] = qwen2_hf_to_tune(
merged_state_dict,
num_heads=self._config["num_attention_heads"],
Expand Down Expand Up @@ -550,6 +553,8 @@ def load_checkpoint(self) -> Dict[str, Any]:
),
)
elif self._model_type == ModelType.CLIP_TEXT:
from torchtune.models.clip._convert_weights import clip_text_hf_to_tune

converted_state_dict[training.MODEL_KEY] = clip_text_hf_to_tune(
merged_state_dict,
)
Expand Down Expand Up @@ -610,17 +615,23 @@ def save_checkpoint(
# convert the state_dict back to hf format; do this inplace
if not adapter_only:
if self._model_type == ModelType.PHI3_MINI:
from torchtune.models.phi3._convert_weights import phi3_tune_to_hf

state_dict[training.MODEL_KEY] = phi3_tune_to_hf(
state_dict[training.MODEL_KEY]
)
elif self._model_type == ModelType.REWARD:
from torchtune.rlhf.utils import reward_tune_to_hf

state_dict[training.MODEL_KEY] = reward_tune_to_hf(
state_dict[training.MODEL_KEY],
num_heads=self._config["num_attention_heads"],
num_kv_heads=self._config["num_key_value_heads"],
dim=self._config["hidden_size"],
)
elif self._model_type == ModelType.QWEN2:
from torchtune.models.qwen2._convert_weights import qwen2_tune_to_hf

state_dict[training.MODEL_KEY] = qwen2_tune_to_hf(
state_dict[training.MODEL_KEY],
num_heads=self._config["num_attention_heads"],
Expand Down Expand Up @@ -913,7 +924,7 @@ def __init__(
self._resume_from_checkpoint = resume_from_checkpoint
self._model_type = ModelType[model_type]
self._output_dir = Path(output_dir)
self._output_dir.mkdir(exist_ok=True)
self._output_dir.mkdir(parents=True, exist_ok=True)

# save all files in input_dir, except model weights and mapping, to output_dir
# this is useful to preserve the tokenizer, configs, license, etc.
Expand Down

0 comments on commit f542770

Please sign in to comment.