diff --git a/main/_modules/torchtune/training/checkpointing/_checkpointer.html b/main/_modules/torchtune/training/checkpointing/_checkpointer.html index 42f6e4a944..283e29df8a 100644 --- a/main/_modules/torchtune/training/checkpointing/_checkpointer.html +++ b/main/_modules/torchtune/training/checkpointing/_checkpointer.html @@ -540,7 +540,7 @@

Source code for torchtune.training.checkpointing._checkpointer

checkpoint_dir (str): Directory containing the checkpoint files checkpoint_files (List[str]): List of checkpoint files to load. Since the checkpointer takes care of sorting by file ID, the order in this list does not matter - model_type (ModelType): Model type of the model for which the checkpointer is being loaded + model_type (str): Model type of the model for which the checkpointer is being loaded output_dir (str): Directory to save the checkpoint files adapter_checkpoint (Optional[str]): Path to the adapter weights. Default is None recipe_checkpoint (Optional[str]): Path to the recipe state checkpoint file. Default is None @@ -559,7 +559,7 @@

Source code for torchtune.training.checkpointing._checkpointer

self, checkpoint_dir: str, checkpoint_files: List[str], - model_type: ModelType, + model_type: str, output_dir: str, adapter_checkpoint: Optional[str] = None, recipe_checkpoint: Optional[str] = None, @@ -588,7 +588,7 @@

Source code for torchtune.training.checkpointing._checkpointer

) self._resume_from_checkpoint = resume_from_checkpoint - self._model_type = model_type + self._model_type = ModelType[model_type] self._output_dir = Path(output_dir) # recipe_checkpoint contains the recipe state. This should be available if @@ -751,7 +751,7 @@

Source code for torchtune.training.checkpointing._checkpointer

checkpoint_dir (str): Directory containing the checkpoint files checkpoint_files (Union[List[str], Dict[str, str]]): List of checkpoint files to load. Since the checkpointer takes care of sorting by file ID, the order in this list does not matter. TODO: update this - model_type (ModelType): Model type of the model for which the checkpointer is being loaded + model_type (str): Model type of the model for which the checkpointer is being loaded output_dir (str): Directory to save the checkpoint files adapter_checkpoint (Optional[str]): Path to the adapter weights. Default is None recipe_checkpoint (Optional[str]): Path to the recipe state checkpoint file. Default is None @@ -767,7 +767,7 @@

Source code for torchtune.training.checkpointing._checkpointer

self, checkpoint_dir: str, checkpoint_files: Union[List[str], Dict[str, str]], - model_type: ModelType, + model_type: str, output_dir: str, adapter_checkpoint: Optional[str] = None, recipe_checkpoint: Optional[str] = None, @@ -1152,7 +1152,7 @@

Source code for torchtune.training.checkpointing._checkpointer

checkpoint_dir (str): Directory containing the checkpoint files checkpoint_files (List[str]): List of checkpoint files to load. Currently this checkpointer only supports loading a single checkpoint file. - model_type (ModelType): Model type of the model for which the checkpointer is being loaded + model_type (str): Model type of the model for which the checkpointer is being loaded output_dir (str): Directory to save the checkpoint files adapter_checkpoint (Optional[str]): Path to the adapter weights. Default is None recipe_checkpoint (Optional[str]): Path to the recipe state checkpoint file. Default is None @@ -1168,7 +1168,7 @@

Source code for torchtune.training.checkpointing._checkpointer

self, checkpoint_dir: str, checkpoint_files: List[str], - model_type: ModelType, + model_type: str, output_dir: str, adapter_checkpoint: Optional[str] = None, recipe_checkpoint: Optional[str] = None, diff --git a/main/_sources/deep_dives/checkpointer.rst.txt b/main/_sources/deep_dives/checkpointer.rst.txt index 024e555483..13aac698c6 100644 --- a/main/_sources/deep_dives/checkpointer.rst.txt +++ b/main/_sources/deep_dives/checkpointer.rst.txt @@ -443,7 +443,7 @@ For this section we'll use the Llama2 13B model in HF format. checkpoint_dir=checkpoint_dir, checkpoint_files=pytorch_files, output_dir=checkpoint_dir, - model_type=ModelType.LLAMA2 + model_type="LLAMA2" ) torchtune_sd = checkpointer.load_checkpoint() diff --git a/main/deep_dives/checkpointer.html b/main/deep_dives/checkpointer.html index 41db71fd97..f08895f2fd 100644 --- a/main/deep_dives/checkpointer.html +++ b/main/deep_dives/checkpointer.html @@ -847,7 +847,7 @@

Putting this all togethercheckpoint_dir=checkpoint_dir, checkpoint_files=pytorch_files, output_dir=checkpoint_dir, - model_type=ModelType.LLAMA2 + model_type="LLAMA2" ) torchtune_sd = checkpointer.load_checkpoint() diff --git a/main/generated/torchtune.training.FullModelHFCheckpointer.html b/main/generated/torchtune.training.FullModelHFCheckpointer.html index e6b4083e3c..1cc1bc9e70 100644 --- a/main/generated/torchtune.training.FullModelHFCheckpointer.html +++ b/main/generated/torchtune.training.FullModelHFCheckpointer.html @@ -437,7 +437,7 @@

FullModelHFCheckpointer

-class torchtune.training.FullModelHFCheckpointer(checkpoint_dir: str, checkpoint_files: Union[List[str], Dict[str, str]], model_type: ModelType, output_dir: str, adapter_checkpoint: Optional[str] = None, recipe_checkpoint: Optional[str] = None, resume_from_checkpoint: bool = False, safe_serialization: bool = False)[source]
+class torchtune.training.FullModelHFCheckpointer(checkpoint_dir: str, checkpoint_files: Union[List[str], Dict[str, str]], model_type: str, output_dir: str, adapter_checkpoint: Optional[str] = None, recipe_checkpoint: Optional[str] = None, resume_from_checkpoint: bool = False, safe_serialization: bool = False)[source]

Checkpointer which reads and writes checkpoints in HF’s format. For LoRA models this includes saving checkpoints in a format that can be loaded into PEFT via e.g. from_pretrained. Examples include the Llama-2-7b-hf model from the meta-llama repo (https://huggingface.co/meta-llama/Llama-2-7b-hf).

@@ -455,7 +455,7 @@

FullModelHFCheckpointer

checkpoint_dir (str) – Directory containing the checkpoint files

  • checkpoint_files (Union[List[str], Dict[str, str]]) – List of checkpoint files to load. Since the checkpointer takes care of sorting by file ID, the order in this list does not matter. TODO: update this

  • -
  • model_type (ModelType) – Model type of the model for which the checkpointer is being loaded

  • +
  • model_type (str) – Model type of the model for which the checkpointer is being loaded

  • output_dir (str) – Directory to save the checkpoint files

  • adapter_checkpoint (Optional[str]) – Path to the adapter weights. Default is None

  • recipe_checkpoint (Optional[str]) – Path to the recipe state checkpoint file. Default is None

  • diff --git a/main/generated/torchtune.training.FullModelMetaCheckpointer.html b/main/generated/torchtune.training.FullModelMetaCheckpointer.html index 5b4093de37..1283e4aee6 100644 --- a/main/generated/torchtune.training.FullModelMetaCheckpointer.html +++ b/main/generated/torchtune.training.FullModelMetaCheckpointer.html @@ -437,7 +437,7 @@

    FullModelMetaCheckpointer

    -class torchtune.training.FullModelMetaCheckpointer(checkpoint_dir: str, checkpoint_files: List[str], model_type: ModelType, output_dir: str, adapter_checkpoint: Optional[str] = None, recipe_checkpoint: Optional[str] = None, resume_from_checkpoint: bool = False)[source]
    +class torchtune.training.FullModelMetaCheckpointer(checkpoint_dir: str, checkpoint_files: List[str], model_type: str, output_dir: str, adapter_checkpoint: Optional[str] = None, recipe_checkpoint: Optional[str] = None, resume_from_checkpoint: bool = False)[source]

    Checkpointer which reads and writes checkpoints in Meta’s format. Examples include the Llama-2-7b model from the meta-llama repo (https://huggingface.co/meta-llama/Llama-2-7b)

    Currently we support reading from a single checkpoint file only. Support for reading from @@ -448,7 +448,7 @@

    FullModelMetaCheckpointerstr) – Directory containing the checkpoint files

  • checkpoint_files (List[str]) – List of checkpoint files to load. Currently this checkpointer only supports loading a single checkpoint file.

  • -
  • model_type (ModelType) – Model type of the model for which the checkpointer is being loaded

  • +
  • model_type (str) – Model type of the model for which the checkpointer is being loaded

  • output_dir (str) – Directory to save the checkpoint files

  • adapter_checkpoint (Optional[str]) – Path to the adapter weights. Default is None

  • recipe_checkpoint (Optional[str]) – Path to the recipe state checkpoint file. Default is None

  • diff --git a/main/generated/torchtune.training.FullModelTorchTuneCheckpointer.html b/main/generated/torchtune.training.FullModelTorchTuneCheckpointer.html index 0aa4e1b090..0aca0b529f 100644 --- a/main/generated/torchtune.training.FullModelTorchTuneCheckpointer.html +++ b/main/generated/torchtune.training.FullModelTorchTuneCheckpointer.html @@ -437,7 +437,7 @@

    FullModelTorchTuneCheckpointer

    -class torchtune.training.FullModelTorchTuneCheckpointer(checkpoint_dir: str, checkpoint_files: List[str], model_type: ModelType, output_dir: str, adapter_checkpoint: Optional[str] = None, recipe_checkpoint: Optional[str] = None, resume_from_checkpoint: bool = False)[source]
    +class torchtune.training.FullModelTorchTuneCheckpointer(checkpoint_dir: str, checkpoint_files: List[str], model_type: str, output_dir: str, adapter_checkpoint: Optional[str] = None, recipe_checkpoint: Optional[str] = None, resume_from_checkpoint: bool = False)[source]

    Checkpointer which reads and writes checkpoints in a format compatible with torchtune. No conversion of weights is required.

    Currently this supports reading a single checkpoint file only. This will likely change as @@ -448,7 +448,7 @@

    FullModelTorchTuneCheckpointerstr) – Directory containing the checkpoint files

  • checkpoint_files (List[str]) – List of checkpoint files to load. Since the checkpointer takes care of sorting by file ID, the order in this list does not matter

  • -
  • model_type (ModelType) – Model type of the model for which the checkpointer is being loaded

  • +
  • model_type (str) – Model type of the model for which the checkpointer is being loaded

  • output_dir (str) – Directory to save the checkpoint files

  • adapter_checkpoint (Optional[str]) – Path to the adapter weights. Default is None

  • recipe_checkpoint (Optional[str]) – Path to the recipe state checkpoint file. Default is None

  • diff --git a/main/searchindex.js b/main/searchindex.js index 4806c341c2..2150856747 100644 --- a/main/searchindex.js +++ b/main/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["api_ref_config", "api_ref_data", "api_ref_datasets", "api_ref_generation", "api_ref_models", "api_ref_modules", "api_ref_rlhf", "api_ref_training", "api_ref_utilities", "basics/chat_datasets", "basics/custom_components", "basics/datasets_overview", "basics/instruct_datasets", "basics/message_transforms", "basics/messages", "basics/model_transforms", "basics/multimodal_datasets", "basics/packing", "basics/preference_datasets", "basics/prompt_templates", "basics/text_completion_datasets", "basics/tokenizers", "deep_dives/checkpointer", "deep_dives/comet_logging", "deep_dives/configs", "deep_dives/recipe_deepdive", "deep_dives/wandb_logging", "generated/torchtune.config.instantiate", "generated/torchtune.config.log_config", "generated/torchtune.config.parse", "generated/torchtune.config.validate", "generated/torchtune.data.AlpacaToMessages", "generated/torchtune.data.ChatMLTemplate", "generated/torchtune.data.ChosenRejectedToMessages", "generated/torchtune.data.GrammarErrorCorrectionTemplate", "generated/torchtune.data.InputOutputToMessages", "generated/torchtune.data.Message", "generated/torchtune.data.OpenAIToMessages", "generated/torchtune.data.PromptTemplate", "generated/torchtune.data.PromptTemplateInterface", "generated/torchtune.data.QuestionAnswerTemplate", "generated/torchtune.data.Role", "generated/torchtune.data.ShareGPTToMessages", "generated/torchtune.data.SummarizeTemplate", "generated/torchtune.data.format_content_with_images", "generated/torchtune.data.left_pad_sequence", "generated/torchtune.data.load_image", "generated/torchtune.data.padded_collate", "generated/torchtune.data.padded_collate_dpo", "generated/torchtune.data.padded_collate_sft", "generated/torchtune.data.padded_collate_tiled_images_and_mask", "generated/torchtune.data.truncate", "generated/torchtune.data.validate_messages", "generated/torchtune.datasets.ConcatDataset", "generated/torchtune.datasets.PackedDataset", "generated/torchtune.datasets.PreferenceDataset", "generated/torchtune.datasets.SFTDataset", "generated/torchtune.datasets.TextCompletionDataset", "generated/torchtune.datasets.alpaca_cleaned_dataset", "generated/torchtune.datasets.alpaca_dataset", "generated/torchtune.datasets.chat_dataset", "generated/torchtune.datasets.cnn_dailymail_articles_dataset", "generated/torchtune.datasets.grammar_dataset", "generated/torchtune.datasets.hh_rlhf_helpful_dataset", "generated/torchtune.datasets.instruct_dataset", "generated/torchtune.datasets.multimodal.llava_instruct_dataset", "generated/torchtune.datasets.multimodal.the_cauldron_dataset", "generated/torchtune.datasets.preference_dataset", "generated/torchtune.datasets.samsum_dataset", "generated/torchtune.datasets.slimorca_dataset", "generated/torchtune.datasets.stack_exchange_paired_dataset", "generated/torchtune.datasets.text_completion_dataset", "generated/torchtune.datasets.wikitext_dataset", "generated/torchtune.generation.generate", "generated/torchtune.generation.generate_next_token", "generated/torchtune.generation.get_causal_mask_from_padding_mask", "generated/torchtune.generation.get_position_ids_from_padding_mask", "generated/torchtune.generation.sample", "generated/torchtune.models.clip.TilePositionalEmbedding", "generated/torchtune.models.clip.TiledTokenPositionalEmbedding", "generated/torchtune.models.clip.TokenPositionalEmbedding", "generated/torchtune.models.clip.clip_vision_encoder", "generated/torchtune.models.code_llama2.code_llama2_13b", "generated/torchtune.models.code_llama2.code_llama2_70b", "generated/torchtune.models.code_llama2.code_llama2_7b", "generated/torchtune.models.code_llama2.lora_code_llama2_13b", "generated/torchtune.models.code_llama2.lora_code_llama2_70b", "generated/torchtune.models.code_llama2.lora_code_llama2_7b", "generated/torchtune.models.code_llama2.qlora_code_llama2_13b", "generated/torchtune.models.code_llama2.qlora_code_llama2_70b", "generated/torchtune.models.code_llama2.qlora_code_llama2_7b", "generated/torchtune.models.gemma.gemma", "generated/torchtune.models.gemma.gemma_2b", "generated/torchtune.models.gemma.gemma_7b", "generated/torchtune.models.gemma.gemma_tokenizer", "generated/torchtune.models.gemma.lora_gemma", "generated/torchtune.models.gemma.lora_gemma_2b", "generated/torchtune.models.gemma.lora_gemma_7b", "generated/torchtune.models.gemma.qlora_gemma_2b", "generated/torchtune.models.gemma.qlora_gemma_7b", "generated/torchtune.models.llama2.Llama2ChatTemplate", "generated/torchtune.models.llama2.llama2", "generated/torchtune.models.llama2.llama2_13b", "generated/torchtune.models.llama2.llama2_70b", "generated/torchtune.models.llama2.llama2_7b", "generated/torchtune.models.llama2.llama2_reward_7b", "generated/torchtune.models.llama2.llama2_tokenizer", "generated/torchtune.models.llama2.lora_llama2", "generated/torchtune.models.llama2.lora_llama2_13b", "generated/torchtune.models.llama2.lora_llama2_70b", "generated/torchtune.models.llama2.lora_llama2_7b", "generated/torchtune.models.llama2.lora_llama2_reward_7b", "generated/torchtune.models.llama2.qlora_llama2_13b", "generated/torchtune.models.llama2.qlora_llama2_70b", "generated/torchtune.models.llama2.qlora_llama2_7b", "generated/torchtune.models.llama2.qlora_llama2_reward_7b", "generated/torchtune.models.llama3.llama3", "generated/torchtune.models.llama3.llama3_70b", "generated/torchtune.models.llama3.llama3_8b", "generated/torchtune.models.llama3.llama3_tokenizer", "generated/torchtune.models.llama3.lora_llama3", "generated/torchtune.models.llama3.lora_llama3_70b", "generated/torchtune.models.llama3.lora_llama3_8b", "generated/torchtune.models.llama3.qlora_llama3_70b", "generated/torchtune.models.llama3.qlora_llama3_8b", "generated/torchtune.models.llama3_1.llama3_1", "generated/torchtune.models.llama3_1.llama3_1_405b", "generated/torchtune.models.llama3_1.llama3_1_70b", "generated/torchtune.models.llama3_1.llama3_1_8b", "generated/torchtune.models.llama3_1.lora_llama3_1", "generated/torchtune.models.llama3_1.lora_llama3_1_405b", "generated/torchtune.models.llama3_1.lora_llama3_1_70b", "generated/torchtune.models.llama3_1.lora_llama3_1_8b", "generated/torchtune.models.llama3_1.qlora_llama3_1_405b", "generated/torchtune.models.llama3_1.qlora_llama3_1_70b", "generated/torchtune.models.llama3_1.qlora_llama3_1_8b", "generated/torchtune.models.llama3_2.llama3_2_1b", "generated/torchtune.models.llama3_2.llama3_2_3b", "generated/torchtune.models.llama3_2.lora_llama3_2_1b", "generated/torchtune.models.llama3_2.lora_llama3_2_3b", "generated/torchtune.models.llama3_2.qlora_llama3_2_1b", "generated/torchtune.models.llama3_2.qlora_llama3_2_3b", "generated/torchtune.models.llama3_2_vision.Llama3VisionEncoder", "generated/torchtune.models.llama3_2_vision.Llama3VisionProjectionHead", "generated/torchtune.models.llama3_2_vision.Llama3VisionTransform", "generated/torchtune.models.llama3_2_vision.llama3_2_vision_11b", "generated/torchtune.models.llama3_2_vision.llama3_2_vision_decoder", "generated/torchtune.models.llama3_2_vision.llama3_2_vision_encoder", "generated/torchtune.models.llama3_2_vision.llama3_2_vision_transform", "generated/torchtune.models.llama3_2_vision.lora_llama3_2_vision_11b", "generated/torchtune.models.llama3_2_vision.lora_llama3_2_vision_decoder", "generated/torchtune.models.llama3_2_vision.lora_llama3_2_vision_encoder", "generated/torchtune.models.llama3_2_vision.qlora_llama3_2_vision_11b", "generated/torchtune.models.mistral.MistralChatTemplate", "generated/torchtune.models.mistral.lora_mistral", "generated/torchtune.models.mistral.lora_mistral_7b", "generated/torchtune.models.mistral.lora_mistral_classifier", "generated/torchtune.models.mistral.lora_mistral_reward_7b", "generated/torchtune.models.mistral.mistral", "generated/torchtune.models.mistral.mistral_7b", "generated/torchtune.models.mistral.mistral_classifier", "generated/torchtune.models.mistral.mistral_reward_7b", "generated/torchtune.models.mistral.mistral_tokenizer", "generated/torchtune.models.mistral.qlora_mistral_7b", "generated/torchtune.models.mistral.qlora_mistral_reward_7b", "generated/torchtune.models.phi3.lora_phi3", "generated/torchtune.models.phi3.lora_phi3_mini", "generated/torchtune.models.phi3.phi3", "generated/torchtune.models.phi3.phi3_mini", "generated/torchtune.models.phi3.phi3_mini_tokenizer", "generated/torchtune.models.phi3.qlora_phi3_mini", "generated/torchtune.models.qwen2.lora_qwen2", "generated/torchtune.models.qwen2.lora_qwen2_0_5b", "generated/torchtune.models.qwen2.lora_qwen2_1_5b", "generated/torchtune.models.qwen2.lora_qwen2_7b", "generated/torchtune.models.qwen2.qwen2", "generated/torchtune.models.qwen2.qwen2_0_5b", "generated/torchtune.models.qwen2.qwen2_1_5b", "generated/torchtune.models.qwen2.qwen2_7b", "generated/torchtune.models.qwen2.qwen2_tokenizer", "generated/torchtune.models.qwen2_5.lora_qwen2_5_0_5b", "generated/torchtune.models.qwen2_5.lora_qwen2_5_14b_base", "generated/torchtune.models.qwen2_5.lora_qwen2_5_14b_instruct", "generated/torchtune.models.qwen2_5.lora_qwen2_5_1_5b_base", "generated/torchtune.models.qwen2_5.lora_qwen2_5_1_5b_instruct", "generated/torchtune.models.qwen2_5.lora_qwen2_5_32b_base", "generated/torchtune.models.qwen2_5.lora_qwen2_5_32b_instruct", "generated/torchtune.models.qwen2_5.lora_qwen2_5_3b", "generated/torchtune.models.qwen2_5.lora_qwen2_5_72b_base", "generated/torchtune.models.qwen2_5.lora_qwen2_5_72b_instruct", "generated/torchtune.models.qwen2_5.lora_qwen2_5_7b_base", "generated/torchtune.models.qwen2_5.lora_qwen2_5_7b_instruct", "generated/torchtune.models.qwen2_5.qwen2_5_0_5b", "generated/torchtune.models.qwen2_5.qwen2_5_14b_base", "generated/torchtune.models.qwen2_5.qwen2_5_14b_instruct", "generated/torchtune.models.qwen2_5.qwen2_5_1_5b_base", "generated/torchtune.models.qwen2_5.qwen2_5_1_5b_instruct", "generated/torchtune.models.qwen2_5.qwen2_5_32b_base", "generated/torchtune.models.qwen2_5.qwen2_5_32b_instruct", "generated/torchtune.models.qwen2_5.qwen2_5_3b", "generated/torchtune.models.qwen2_5.qwen2_5_72b_base", "generated/torchtune.models.qwen2_5.qwen2_5_72b_instruct", "generated/torchtune.models.qwen2_5.qwen2_5_7b_base", "generated/torchtune.models.qwen2_5.qwen2_5_7b_instruct", "generated/torchtune.models.qwen2_5.qwen2_5_tokenizer", "generated/torchtune.modules.FeedForward", "generated/torchtune.modules.Fp32LayerNorm", "generated/torchtune.modules.KVCache", "generated/torchtune.modules.MultiHeadAttention", "generated/torchtune.modules.RMSNorm", "generated/torchtune.modules.RotaryPositionalEmbeddings", "generated/torchtune.modules.TanhGate", "generated/torchtune.modules.TiedLinear", "generated/torchtune.modules.TransformerCrossAttentionLayer", "generated/torchtune.modules.TransformerDecoder", "generated/torchtune.modules.TransformerSelfAttentionLayer", "generated/torchtune.modules.VisionTransformer", "generated/torchtune.modules.common_utils.delete_kv_caches", "generated/torchtune.modules.common_utils.disable_kv_cache", "generated/torchtune.modules.common_utils.local_kv_cache", "generated/torchtune.modules.common_utils.reparametrize_as_dtype_state_dict_post_hook", "generated/torchtune.modules.loss.CEWithChunkedOutputLoss", "generated/torchtune.modules.loss.ForwardKLLoss", "generated/torchtune.modules.loss.ForwardKLWithChunkedOutputLoss", "generated/torchtune.modules.model_fusion.DeepFusionModel", "generated/torchtune.modules.model_fusion.FusionEmbedding", "generated/torchtune.modules.model_fusion.FusionLayer", "generated/torchtune.modules.model_fusion.get_fusion_params", "generated/torchtune.modules.model_fusion.register_fusion_module", "generated/torchtune.modules.peft.AdapterModule", "generated/torchtune.modules.peft.DoRALinear", "generated/torchtune.modules.peft.LoRALinear", "generated/torchtune.modules.peft.disable_adapter", "generated/torchtune.modules.peft.get_adapter_params", "generated/torchtune.modules.peft.set_trainable_params", "generated/torchtune.modules.peft.validate_missing_and_unexpected_for_lora", "generated/torchtune.modules.peft.validate_state_dict_for_lora", "generated/torchtune.modules.tokenizers.BaseTokenizer", "generated/torchtune.modules.tokenizers.ModelTokenizer", "generated/torchtune.modules.tokenizers.SentencePieceBaseTokenizer", "generated/torchtune.modules.tokenizers.TikTokenBaseTokenizer", "generated/torchtune.modules.tokenizers.parse_hf_tokenizer_json", "generated/torchtune.modules.tokenizers.tokenize_messages_no_special_tokens", "generated/torchtune.modules.transforms.Transform", "generated/torchtune.modules.transforms.VisionCrossAttentionMask", "generated/torchtune.rlhf.estimate_advantages", "generated/torchtune.rlhf.get_rewards_ppo", "generated/torchtune.rlhf.loss.DPOLoss", "generated/torchtune.rlhf.loss.PPOLoss", "generated/torchtune.rlhf.loss.RSOLoss", "generated/torchtune.rlhf.loss.SimPOLoss", "generated/torchtune.rlhf.truncate_sequence_at_first_stop_token", "generated/torchtune.training.FSDPPolicyType", "generated/torchtune.training.FormattedCheckpointFiles", "generated/torchtune.training.FullModelHFCheckpointer", "generated/torchtune.training.FullModelMetaCheckpointer", "generated/torchtune.training.FullModelTorchTuneCheckpointer", "generated/torchtune.training.ModelType", "generated/torchtune.training.OptimizerInBackwardWrapper", "generated/torchtune.training.apply_selective_activation_checkpointing", "generated/torchtune.training.create_optim_in_bwd_wrapper", "generated/torchtune.training.get_cosine_schedule_with_warmup", "generated/torchtune.training.get_dtype", "generated/torchtune.training.get_full_finetune_fsdp_wrap_policy", "generated/torchtune.training.get_lr", "generated/torchtune.training.get_memory_stats", "generated/torchtune.training.get_quantizer_mode", "generated/torchtune.training.get_unmasked_sequence_lengths", "generated/torchtune.training.get_world_size_and_rank", "generated/torchtune.training.init_distributed", "generated/torchtune.training.is_distributed", "generated/torchtune.training.log_memory_stats", "generated/torchtune.training.lora_fsdp_wrap_policy", "generated/torchtune.training.metric_logging.CometLogger", "generated/torchtune.training.metric_logging.DiskLogger", "generated/torchtune.training.metric_logging.StdoutLogger", "generated/torchtune.training.metric_logging.TensorBoardLogger", "generated/torchtune.training.metric_logging.WandBLogger", "generated/torchtune.training.register_optim_in_bwd_hooks", "generated/torchtune.training.set_activation_checkpointing", "generated/torchtune.training.set_default_dtype", "generated/torchtune.training.set_seed", "generated/torchtune.training.setup_torch_profiler", "generated/torchtune.training.update_state_dict_for_classifier", "generated/torchtune.training.validate_expected_param_dtype", "generated/torchtune.utils.batch_to_device", "generated/torchtune.utils.get_device", "generated/torchtune.utils.get_logger", "generated/torchtune.utils.torch_version_ge", "generated_examples/index", "generated_examples/sg_execution_times", "index", "install", "overview", "recipes/lora_finetune_single_device", "recipes/qat_distributed", "recipes/recipes_overview", "sg_execution_times", "tune_cli", "tutorials/chat", "tutorials/e2e_flow", "tutorials/first_finetune_tutorial", "tutorials/llama3", "tutorials/llama_kd_tutorial", "tutorials/lora_finetune", "tutorials/memory_optimizations", "tutorials/qat_finetune", "tutorials/qlora_finetune"], "filenames": ["api_ref_config.rst", "api_ref_data.rst", "api_ref_datasets.rst", "api_ref_generation.rst", "api_ref_models.rst", "api_ref_modules.rst", "api_ref_rlhf.rst", "api_ref_training.rst", "api_ref_utilities.rst", "basics/chat_datasets.rst", "basics/custom_components.rst", "basics/datasets_overview.rst", "basics/instruct_datasets.rst", "basics/message_transforms.rst", "basics/messages.rst", "basics/model_transforms.rst", "basics/multimodal_datasets.rst", "basics/packing.rst", "basics/preference_datasets.rst", "basics/prompt_templates.rst", "basics/text_completion_datasets.rst", "basics/tokenizers.rst", "deep_dives/checkpointer.rst", "deep_dives/comet_logging.rst", "deep_dives/configs.rst", "deep_dives/recipe_deepdive.rst", "deep_dives/wandb_logging.rst", "generated/torchtune.config.instantiate.rst", "generated/torchtune.config.log_config.rst", "generated/torchtune.config.parse.rst", "generated/torchtune.config.validate.rst", "generated/torchtune.data.AlpacaToMessages.rst", "generated/torchtune.data.ChatMLTemplate.rst", "generated/torchtune.data.ChosenRejectedToMessages.rst", "generated/torchtune.data.GrammarErrorCorrectionTemplate.rst", "generated/torchtune.data.InputOutputToMessages.rst", "generated/torchtune.data.Message.rst", "generated/torchtune.data.OpenAIToMessages.rst", "generated/torchtune.data.PromptTemplate.rst", "generated/torchtune.data.PromptTemplateInterface.rst", "generated/torchtune.data.QuestionAnswerTemplate.rst", "generated/torchtune.data.Role.rst", "generated/torchtune.data.ShareGPTToMessages.rst", "generated/torchtune.data.SummarizeTemplate.rst", "generated/torchtune.data.format_content_with_images.rst", "generated/torchtune.data.left_pad_sequence.rst", "generated/torchtune.data.load_image.rst", "generated/torchtune.data.padded_collate.rst", "generated/torchtune.data.padded_collate_dpo.rst", "generated/torchtune.data.padded_collate_sft.rst", "generated/torchtune.data.padded_collate_tiled_images_and_mask.rst", "generated/torchtune.data.truncate.rst", "generated/torchtune.data.validate_messages.rst", "generated/torchtune.datasets.ConcatDataset.rst", "generated/torchtune.datasets.PackedDataset.rst", "generated/torchtune.datasets.PreferenceDataset.rst", "generated/torchtune.datasets.SFTDataset.rst", "generated/torchtune.datasets.TextCompletionDataset.rst", "generated/torchtune.datasets.alpaca_cleaned_dataset.rst", "generated/torchtune.datasets.alpaca_dataset.rst", "generated/torchtune.datasets.chat_dataset.rst", "generated/torchtune.datasets.cnn_dailymail_articles_dataset.rst", "generated/torchtune.datasets.grammar_dataset.rst", "generated/torchtune.datasets.hh_rlhf_helpful_dataset.rst", "generated/torchtune.datasets.instruct_dataset.rst", "generated/torchtune.datasets.multimodal.llava_instruct_dataset.rst", "generated/torchtune.datasets.multimodal.the_cauldron_dataset.rst", "generated/torchtune.datasets.preference_dataset.rst", "generated/torchtune.datasets.samsum_dataset.rst", "generated/torchtune.datasets.slimorca_dataset.rst", "generated/torchtune.datasets.stack_exchange_paired_dataset.rst", "generated/torchtune.datasets.text_completion_dataset.rst", "generated/torchtune.datasets.wikitext_dataset.rst", "generated/torchtune.generation.generate.rst", "generated/torchtune.generation.generate_next_token.rst", "generated/torchtune.generation.get_causal_mask_from_padding_mask.rst", "generated/torchtune.generation.get_position_ids_from_padding_mask.rst", "generated/torchtune.generation.sample.rst", "generated/torchtune.models.clip.TilePositionalEmbedding.rst", "generated/torchtune.models.clip.TiledTokenPositionalEmbedding.rst", "generated/torchtune.models.clip.TokenPositionalEmbedding.rst", "generated/torchtune.models.clip.clip_vision_encoder.rst", "generated/torchtune.models.code_llama2.code_llama2_13b.rst", "generated/torchtune.models.code_llama2.code_llama2_70b.rst", "generated/torchtune.models.code_llama2.code_llama2_7b.rst", "generated/torchtune.models.code_llama2.lora_code_llama2_13b.rst", "generated/torchtune.models.code_llama2.lora_code_llama2_70b.rst", "generated/torchtune.models.code_llama2.lora_code_llama2_7b.rst", "generated/torchtune.models.code_llama2.qlora_code_llama2_13b.rst", "generated/torchtune.models.code_llama2.qlora_code_llama2_70b.rst", "generated/torchtune.models.code_llama2.qlora_code_llama2_7b.rst", "generated/torchtune.models.gemma.gemma.rst", "generated/torchtune.models.gemma.gemma_2b.rst", "generated/torchtune.models.gemma.gemma_7b.rst", "generated/torchtune.models.gemma.gemma_tokenizer.rst", "generated/torchtune.models.gemma.lora_gemma.rst", "generated/torchtune.models.gemma.lora_gemma_2b.rst", "generated/torchtune.models.gemma.lora_gemma_7b.rst", "generated/torchtune.models.gemma.qlora_gemma_2b.rst", "generated/torchtune.models.gemma.qlora_gemma_7b.rst", "generated/torchtune.models.llama2.Llama2ChatTemplate.rst", "generated/torchtune.models.llama2.llama2.rst", "generated/torchtune.models.llama2.llama2_13b.rst", "generated/torchtune.models.llama2.llama2_70b.rst", "generated/torchtune.models.llama2.llama2_7b.rst", "generated/torchtune.models.llama2.llama2_reward_7b.rst", "generated/torchtune.models.llama2.llama2_tokenizer.rst", "generated/torchtune.models.llama2.lora_llama2.rst", "generated/torchtune.models.llama2.lora_llama2_13b.rst", "generated/torchtune.models.llama2.lora_llama2_70b.rst", "generated/torchtune.models.llama2.lora_llama2_7b.rst", "generated/torchtune.models.llama2.lora_llama2_reward_7b.rst", "generated/torchtune.models.llama2.qlora_llama2_13b.rst", "generated/torchtune.models.llama2.qlora_llama2_70b.rst", "generated/torchtune.models.llama2.qlora_llama2_7b.rst", "generated/torchtune.models.llama2.qlora_llama2_reward_7b.rst", "generated/torchtune.models.llama3.llama3.rst", "generated/torchtune.models.llama3.llama3_70b.rst", "generated/torchtune.models.llama3.llama3_8b.rst", "generated/torchtune.models.llama3.llama3_tokenizer.rst", "generated/torchtune.models.llama3.lora_llama3.rst", "generated/torchtune.models.llama3.lora_llama3_70b.rst", "generated/torchtune.models.llama3.lora_llama3_8b.rst", "generated/torchtune.models.llama3.qlora_llama3_70b.rst", "generated/torchtune.models.llama3.qlora_llama3_8b.rst", "generated/torchtune.models.llama3_1.llama3_1.rst", "generated/torchtune.models.llama3_1.llama3_1_405b.rst", "generated/torchtune.models.llama3_1.llama3_1_70b.rst", "generated/torchtune.models.llama3_1.llama3_1_8b.rst", "generated/torchtune.models.llama3_1.lora_llama3_1.rst", "generated/torchtune.models.llama3_1.lora_llama3_1_405b.rst", "generated/torchtune.models.llama3_1.lora_llama3_1_70b.rst", "generated/torchtune.models.llama3_1.lora_llama3_1_8b.rst", "generated/torchtune.models.llama3_1.qlora_llama3_1_405b.rst", "generated/torchtune.models.llama3_1.qlora_llama3_1_70b.rst", "generated/torchtune.models.llama3_1.qlora_llama3_1_8b.rst", "generated/torchtune.models.llama3_2.llama3_2_1b.rst", "generated/torchtune.models.llama3_2.llama3_2_3b.rst", "generated/torchtune.models.llama3_2.lora_llama3_2_1b.rst", "generated/torchtune.models.llama3_2.lora_llama3_2_3b.rst", "generated/torchtune.models.llama3_2.qlora_llama3_2_1b.rst", "generated/torchtune.models.llama3_2.qlora_llama3_2_3b.rst", "generated/torchtune.models.llama3_2_vision.Llama3VisionEncoder.rst", "generated/torchtune.models.llama3_2_vision.Llama3VisionProjectionHead.rst", "generated/torchtune.models.llama3_2_vision.Llama3VisionTransform.rst", "generated/torchtune.models.llama3_2_vision.llama3_2_vision_11b.rst", "generated/torchtune.models.llama3_2_vision.llama3_2_vision_decoder.rst", "generated/torchtune.models.llama3_2_vision.llama3_2_vision_encoder.rst", "generated/torchtune.models.llama3_2_vision.llama3_2_vision_transform.rst", "generated/torchtune.models.llama3_2_vision.lora_llama3_2_vision_11b.rst", "generated/torchtune.models.llama3_2_vision.lora_llama3_2_vision_decoder.rst", "generated/torchtune.models.llama3_2_vision.lora_llama3_2_vision_encoder.rst", "generated/torchtune.models.llama3_2_vision.qlora_llama3_2_vision_11b.rst", "generated/torchtune.models.mistral.MistralChatTemplate.rst", "generated/torchtune.models.mistral.lora_mistral.rst", "generated/torchtune.models.mistral.lora_mistral_7b.rst", "generated/torchtune.models.mistral.lora_mistral_classifier.rst", "generated/torchtune.models.mistral.lora_mistral_reward_7b.rst", "generated/torchtune.models.mistral.mistral.rst", "generated/torchtune.models.mistral.mistral_7b.rst", "generated/torchtune.models.mistral.mistral_classifier.rst", "generated/torchtune.models.mistral.mistral_reward_7b.rst", "generated/torchtune.models.mistral.mistral_tokenizer.rst", "generated/torchtune.models.mistral.qlora_mistral_7b.rst", "generated/torchtune.models.mistral.qlora_mistral_reward_7b.rst", "generated/torchtune.models.phi3.lora_phi3.rst", "generated/torchtune.models.phi3.lora_phi3_mini.rst", "generated/torchtune.models.phi3.phi3.rst", "generated/torchtune.models.phi3.phi3_mini.rst", "generated/torchtune.models.phi3.phi3_mini_tokenizer.rst", "generated/torchtune.models.phi3.qlora_phi3_mini.rst", "generated/torchtune.models.qwen2.lora_qwen2.rst", "generated/torchtune.models.qwen2.lora_qwen2_0_5b.rst", "generated/torchtune.models.qwen2.lora_qwen2_1_5b.rst", "generated/torchtune.models.qwen2.lora_qwen2_7b.rst", "generated/torchtune.models.qwen2.qwen2.rst", "generated/torchtune.models.qwen2.qwen2_0_5b.rst", "generated/torchtune.models.qwen2.qwen2_1_5b.rst", "generated/torchtune.models.qwen2.qwen2_7b.rst", "generated/torchtune.models.qwen2.qwen2_tokenizer.rst", "generated/torchtune.models.qwen2_5.lora_qwen2_5_0_5b.rst", "generated/torchtune.models.qwen2_5.lora_qwen2_5_14b_base.rst", "generated/torchtune.models.qwen2_5.lora_qwen2_5_14b_instruct.rst", "generated/torchtune.models.qwen2_5.lora_qwen2_5_1_5b_base.rst", "generated/torchtune.models.qwen2_5.lora_qwen2_5_1_5b_instruct.rst", "generated/torchtune.models.qwen2_5.lora_qwen2_5_32b_base.rst", "generated/torchtune.models.qwen2_5.lora_qwen2_5_32b_instruct.rst", "generated/torchtune.models.qwen2_5.lora_qwen2_5_3b.rst", "generated/torchtune.models.qwen2_5.lora_qwen2_5_72b_base.rst", "generated/torchtune.models.qwen2_5.lora_qwen2_5_72b_instruct.rst", "generated/torchtune.models.qwen2_5.lora_qwen2_5_7b_base.rst", "generated/torchtune.models.qwen2_5.lora_qwen2_5_7b_instruct.rst", "generated/torchtune.models.qwen2_5.qwen2_5_0_5b.rst", "generated/torchtune.models.qwen2_5.qwen2_5_14b_base.rst", "generated/torchtune.models.qwen2_5.qwen2_5_14b_instruct.rst", "generated/torchtune.models.qwen2_5.qwen2_5_1_5b_base.rst", "generated/torchtune.models.qwen2_5.qwen2_5_1_5b_instruct.rst", "generated/torchtune.models.qwen2_5.qwen2_5_32b_base.rst", "generated/torchtune.models.qwen2_5.qwen2_5_32b_instruct.rst", "generated/torchtune.models.qwen2_5.qwen2_5_3b.rst", "generated/torchtune.models.qwen2_5.qwen2_5_72b_base.rst", "generated/torchtune.models.qwen2_5.qwen2_5_72b_instruct.rst", "generated/torchtune.models.qwen2_5.qwen2_5_7b_base.rst", "generated/torchtune.models.qwen2_5.qwen2_5_7b_instruct.rst", "generated/torchtune.models.qwen2_5.qwen2_5_tokenizer.rst", "generated/torchtune.modules.FeedForward.rst", "generated/torchtune.modules.Fp32LayerNorm.rst", "generated/torchtune.modules.KVCache.rst", "generated/torchtune.modules.MultiHeadAttention.rst", "generated/torchtune.modules.RMSNorm.rst", "generated/torchtune.modules.RotaryPositionalEmbeddings.rst", "generated/torchtune.modules.TanhGate.rst", "generated/torchtune.modules.TiedLinear.rst", "generated/torchtune.modules.TransformerCrossAttentionLayer.rst", "generated/torchtune.modules.TransformerDecoder.rst", "generated/torchtune.modules.TransformerSelfAttentionLayer.rst", "generated/torchtune.modules.VisionTransformer.rst", "generated/torchtune.modules.common_utils.delete_kv_caches.rst", "generated/torchtune.modules.common_utils.disable_kv_cache.rst", "generated/torchtune.modules.common_utils.local_kv_cache.rst", "generated/torchtune.modules.common_utils.reparametrize_as_dtype_state_dict_post_hook.rst", "generated/torchtune.modules.loss.CEWithChunkedOutputLoss.rst", "generated/torchtune.modules.loss.ForwardKLLoss.rst", "generated/torchtune.modules.loss.ForwardKLWithChunkedOutputLoss.rst", "generated/torchtune.modules.model_fusion.DeepFusionModel.rst", "generated/torchtune.modules.model_fusion.FusionEmbedding.rst", "generated/torchtune.modules.model_fusion.FusionLayer.rst", "generated/torchtune.modules.model_fusion.get_fusion_params.rst", "generated/torchtune.modules.model_fusion.register_fusion_module.rst", "generated/torchtune.modules.peft.AdapterModule.rst", "generated/torchtune.modules.peft.DoRALinear.rst", "generated/torchtune.modules.peft.LoRALinear.rst", "generated/torchtune.modules.peft.disable_adapter.rst", "generated/torchtune.modules.peft.get_adapter_params.rst", "generated/torchtune.modules.peft.set_trainable_params.rst", "generated/torchtune.modules.peft.validate_missing_and_unexpected_for_lora.rst", "generated/torchtune.modules.peft.validate_state_dict_for_lora.rst", "generated/torchtune.modules.tokenizers.BaseTokenizer.rst", "generated/torchtune.modules.tokenizers.ModelTokenizer.rst", "generated/torchtune.modules.tokenizers.SentencePieceBaseTokenizer.rst", "generated/torchtune.modules.tokenizers.TikTokenBaseTokenizer.rst", "generated/torchtune.modules.tokenizers.parse_hf_tokenizer_json.rst", "generated/torchtune.modules.tokenizers.tokenize_messages_no_special_tokens.rst", "generated/torchtune.modules.transforms.Transform.rst", "generated/torchtune.modules.transforms.VisionCrossAttentionMask.rst", "generated/torchtune.rlhf.estimate_advantages.rst", "generated/torchtune.rlhf.get_rewards_ppo.rst", "generated/torchtune.rlhf.loss.DPOLoss.rst", "generated/torchtune.rlhf.loss.PPOLoss.rst", "generated/torchtune.rlhf.loss.RSOLoss.rst", "generated/torchtune.rlhf.loss.SimPOLoss.rst", "generated/torchtune.rlhf.truncate_sequence_at_first_stop_token.rst", "generated/torchtune.training.FSDPPolicyType.rst", "generated/torchtune.training.FormattedCheckpointFiles.rst", "generated/torchtune.training.FullModelHFCheckpointer.rst", "generated/torchtune.training.FullModelMetaCheckpointer.rst", "generated/torchtune.training.FullModelTorchTuneCheckpointer.rst", "generated/torchtune.training.ModelType.rst", "generated/torchtune.training.OptimizerInBackwardWrapper.rst", "generated/torchtune.training.apply_selective_activation_checkpointing.rst", "generated/torchtune.training.create_optim_in_bwd_wrapper.rst", "generated/torchtune.training.get_cosine_schedule_with_warmup.rst", "generated/torchtune.training.get_dtype.rst", "generated/torchtune.training.get_full_finetune_fsdp_wrap_policy.rst", "generated/torchtune.training.get_lr.rst", "generated/torchtune.training.get_memory_stats.rst", "generated/torchtune.training.get_quantizer_mode.rst", "generated/torchtune.training.get_unmasked_sequence_lengths.rst", "generated/torchtune.training.get_world_size_and_rank.rst", "generated/torchtune.training.init_distributed.rst", "generated/torchtune.training.is_distributed.rst", "generated/torchtune.training.log_memory_stats.rst", "generated/torchtune.training.lora_fsdp_wrap_policy.rst", "generated/torchtune.training.metric_logging.CometLogger.rst", "generated/torchtune.training.metric_logging.DiskLogger.rst", "generated/torchtune.training.metric_logging.StdoutLogger.rst", "generated/torchtune.training.metric_logging.TensorBoardLogger.rst", "generated/torchtune.training.metric_logging.WandBLogger.rst", "generated/torchtune.training.register_optim_in_bwd_hooks.rst", "generated/torchtune.training.set_activation_checkpointing.rst", "generated/torchtune.training.set_default_dtype.rst", "generated/torchtune.training.set_seed.rst", "generated/torchtune.training.setup_torch_profiler.rst", "generated/torchtune.training.update_state_dict_for_classifier.rst", "generated/torchtune.training.validate_expected_param_dtype.rst", "generated/torchtune.utils.batch_to_device.rst", "generated/torchtune.utils.get_device.rst", "generated/torchtune.utils.get_logger.rst", "generated/torchtune.utils.torch_version_ge.rst", "generated_examples/index.rst", "generated_examples/sg_execution_times.rst", "index.rst", "install.rst", "overview.rst", "recipes/lora_finetune_single_device.rst", "recipes/qat_distributed.rst", "recipes/recipes_overview.rst", "sg_execution_times.rst", "tune_cli.rst", "tutorials/chat.rst", "tutorials/e2e_flow.rst", "tutorials/first_finetune_tutorial.rst", "tutorials/llama3.rst", "tutorials/llama_kd_tutorial.rst", "tutorials/lora_finetune.rst", "tutorials/memory_optimizations.rst", "tutorials/qat_finetune.rst", "tutorials/qlora_finetune.rst"], "titles": ["torchtune.config", "torchtune.data", "torchtune.datasets", "torchtune.generation", "torchtune.models", "torchtune.modules", "torchtune.rlhf", "torchtune.training", "torchtune.utils", "Chat Datasets", "Custom Components and Recipes", "Datasets Overview", "Instruct Datasets", "Message Transforms", "Messages", "Multimodal Transforms", "Multimodal Datasets", "Sample packing", "Preference Datasets", "Prompt Templates", "Text-completion Datasets", "Tokenizers", "Checkpointing in torchtune", "Logging to Comet", "All About Configs", "What Are Recipes?", "Logging to Weights & Biases", "instantiate", "log_config", "parse", "validate", "AlpacaToMessages", "ChatMLTemplate", "ChosenRejectedToMessages", "torchtune.data.GrammarErrorCorrectionTemplate", "InputOutputToMessages", "Message", "OpenAIToMessages", "PromptTemplate", "PromptTemplateInterface", "torchtune.data.QuestionAnswerTemplate", "torchtune.data.Role", "ShareGPTToMessages", "torchtune.data.SummarizeTemplate", "format_content_with_images", "left_pad_sequence", "load_image", "padded_collate", "padded_collate_dpo", "padded_collate_sft", "padded_collate_tiled_images_and_mask", "truncate", "validate_messages", "ConcatDataset", "PackedDataset", "PreferenceDataset", "SFTDataset", "TextCompletionDataset", "alpaca_cleaned_dataset", "alpaca_dataset", "chat_dataset", "cnn_dailymail_articles_dataset", "grammar_dataset", "hh_rlhf_helpful_dataset", "instruct_dataset", "llava_instruct_dataset", "the_cauldron_dataset", "preference_dataset", "samsum_dataset", "slimorca_dataset", "stack_exchange_paired_dataset", "text_completion_dataset", "wikitext_dataset", "generate", "generate_next_token", "get_causal_mask_from_padding_mask", "get_position_ids_from_padding_mask", "sample", "TilePositionalEmbedding", "TiledTokenPositionalEmbedding", "TokenPositionalEmbedding", "clip_vision_encoder", "code_llama2_13b", "code_llama2_70b", "code_llama2_7b", "lora_code_llama2_13b", "lora_code_llama2_70b", "lora_code_llama2_7b", "qlora_code_llama2_13b", "qlora_code_llama2_70b", "qlora_code_llama2_7b", "gemma", "gemma_2b", "gemma_7b", "gemma_tokenizer", "lora_gemma", "lora_gemma_2b", "lora_gemma_7b", "qlora_gemma_2b", "qlora_gemma_7b", "Llama2ChatTemplate", "llama2", "llama2_13b", "llama2_70b", "llama2_7b", "llama2_reward_7b", "llama2_tokenizer", "lora_llama2", "lora_llama2_13b", "lora_llama2_70b", "lora_llama2_7b", "lora_llama2_reward_7b", "qlora_llama2_13b", "qlora_llama2_70b", "qlora_llama2_7b", "qlora_llama2_reward_7b", "llama3", "llama3_70b", "llama3_8b", "llama3_tokenizer", "lora_llama3", "lora_llama3_70b", "lora_llama3_8b", "qlora_llama3_70b", "qlora_llama3_8b", "llama3_1", "llama3_1_405b", "llama3_1_70b", "llama3_1_8b", "lora_llama3_1", "lora_llama3_1_405b", "lora_llama3_1_70b", "lora_llama3_1_8b", "qlora_llama3_1_405b", "qlora_llama3_1_70b", "qlora_llama3_1_8b", "llama3_2_1b", "llama3_2_3b", "lora_llama3_2_1b", "lora_llama3_2_3b", "qlora_llama3_2_1b", "qlora_llama3_2_3b", "Llama3VisionEncoder", "Llama3VisionProjectionHead", "Llama3VisionTransform", "llama3_2_vision_11b", "llama3_2_vision_decoder", "llama3_2_vision_encoder", "llama3_2_vision_transform", "lora_llama3_2_vision_11b", "lora_llama3_2_vision_decoder", "lora_llama3_2_vision_encoder", "qlora_llama3_2_vision_11b", "MistralChatTemplate", "lora_mistral", "lora_mistral_7b", "lora_mistral_classifier", "lora_mistral_reward_7b", "mistral", "mistral_7b", "mistral_classifier", "mistral_reward_7b", "mistral_tokenizer", "qlora_mistral_7b", "qlora_mistral_reward_7b", "lora_phi3", "lora_phi3_mini", "phi3", "phi3_mini", "phi3_mini_tokenizer", "qlora_phi3_mini", "lora_qwen2", "lora_qwen2_0_5b", "lora_qwen2_1_5b", "lora_qwen2_7b", "qwen2", "qwen2_0_5b", "qwen2_1_5b", "qwen2_7b", "qwen2_tokenizer", "lora_qwen2_5_0_5b", "lora_qwen2_5_14b_base", "lora_qwen2_5_14b_instruct", "lora_qwen2_5_1_5b_base", "lora_qwen2_5_1_5b_instruct", "lora_qwen2_5_32b_base", "lora_qwen2_5_32b_instruct", "lora_qwen2_5_3b", "lora_qwen2_5_72b_base", "lora_qwen2_5_72b_instruct", "lora_qwen2_5_7b_base", "lora_qwen2_5_7b_instruct", "qwen2_5_0_5b", "qwen2_5_14b_base", "qwen2_5_14b_instruct", "qwen2_5_1_5b_base", "qwen2_5_1_5b_instruct", "qwen2_5_32b_base", "qwen2_5_32b_instruct", "qwen2_5_3b", "qwen2_5_72b_base", "qwen2_5_72b_instruct", "qwen2_5_7b_base", "qwen2_5_7b_instruct", "qwen2_5_tokenizer", "FeedForward", "Fp32LayerNorm", "KVCache", "MultiHeadAttention", "RMSNorm", "RotaryPositionalEmbeddings", "TanhGate", "TiedLinear", "TransformerCrossAttentionLayer", "TransformerDecoder", "TransformerSelfAttentionLayer", "VisionTransformer", "delete_kv_caches", "disable_kv_cache", "local_kv_cache", "reparametrize_as_dtype_state_dict_post_hook", "CEWithChunkedOutputLoss", "ForwardKLLoss", "ForwardKLWithChunkedOutputLoss", "DeepFusionModel", "FusionEmbedding", "FusionLayer", "get_fusion_params", "register_fusion_module", "AdapterModule", "DoRALinear", "LoRALinear", "disable_adapter", "get_adapter_params", "set_trainable_params", "validate_missing_and_unexpected_for_lora", "validate_state_dict_for_lora", "BaseTokenizer", "ModelTokenizer", "SentencePieceBaseTokenizer", "TikTokenBaseTokenizer", "parse_hf_tokenizer_json", "tokenize_messages_no_special_tokens", "Transform", "VisionCrossAttentionMask", "estimate_advantages", "get_rewards_ppo", "DPOLoss", "PPOLoss", "RSOLoss", "SimPOLoss", "truncate_sequence_at_first_stop_token", "torchtune.training.FSDPPolicyType", "FormattedCheckpointFiles", "FullModelHFCheckpointer", "FullModelMetaCheckpointer", "FullModelTorchTuneCheckpointer", "ModelType", "OptimizerInBackwardWrapper", "apply_selective_activation_checkpointing", "create_optim_in_bwd_wrapper", "get_cosine_schedule_with_warmup", "get_dtype", "get_full_finetune_fsdp_wrap_policy", "get_lr", "get_memory_stats", "get_quantizer_mode", "get_unmasked_sequence_lengths", "get_world_size_and_rank", "init_distributed", "is_distributed", "log_memory_stats", "lora_fsdp_wrap_policy", "CometLogger", "DiskLogger", "StdoutLogger", "TensorBoardLogger", "WandBLogger", "register_optim_in_bwd_hooks", "set_activation_checkpointing", "set_default_dtype", "set_seed", "setup_torch_profiler", "update_state_dict_for_classifier", "validate_expected_param_dtype", "batch_to_device", "get_device", "get_logger", "torch_version_ge", "<no title>", "Computation times", "Welcome to the torchtune Documentation", "Install Instructions", "torchtune Overview", "LoRA Single Device Finetuning", "Distributed Quantization-Aware Training (QAT)", "Recipes Overview", "Computation times", "torchtune CLI", "Fine-Tuning Llama3 with Chat Data", "End-to-End Workflow with torchtune", "Fine-Tune Your First LLM", "Meta Llama3 in torchtune", "Distilling Llama3.1 8B into Llama3.2 1B using Knowledge Distillation", "Fine-Tuning Llama2 with LoRA", "Memory Optimization Overview", "Fine-Tuning Llama3 with QAT", "Fine-Tuning Llama2 with QLoRA"], "terms": {"instruct": [1, 2, 4, 9, 10, 11, 13, 15, 16, 17, 18, 19, 21, 31, 32, 33, 35, 37, 42, 54, 56, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 71, 145, 148, 149, 153, 161, 167, 168, 169, 176, 177, 178, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 291, 294, 295, 298, 299, 301, 303, 304, 306, 307], "prompt": [1, 9, 10, 11, 12, 13, 18, 31, 33, 34, 35, 36, 37, 38, 39, 40, 42, 43, 55, 56, 59, 60, 62, 63, 64, 67, 68, 69, 70, 73, 74, 94, 100, 106, 119, 144, 148, 153, 162, 169, 179, 204, 214, 224, 242, 300, 302], "chat": [1, 2, 11, 13, 16, 18, 32, 37, 42, 56, 60, 100, 169, 294], "includ": [1, 9, 11, 12, 16, 18, 19, 21, 22, 24, 25, 38, 39, 56, 77, 81, 91, 101, 116, 125, 146, 147, 148, 150, 151, 158, 169, 175, 214, 230, 231, 237, 254, 255, 293, 296, 298, 299, 300, 301, 302, 303, 304, 307], "some": [1, 17, 18, 20, 21, 22, 24, 32, 156, 225, 227, 233, 234, 291, 293, 294, 295, 298, 299, 300, 301, 303, 304, 305, 306, 307], "specif": [1, 5, 11, 12, 15, 19, 21, 24, 25, 27, 55, 56, 65, 66, 144, 238, 263, 295, 299, 300, 305, 306, 307], "format": [1, 2, 7, 11, 19, 21, 36, 45, 46, 55, 56, 59, 60, 63, 64, 67, 100, 144, 153, 238, 253, 254, 255, 256, 257, 298, 299, 300, 301, 302, 304, 305], "differ": [1, 9, 10, 17, 18, 19, 21, 24, 26, 48, 53, 60, 64, 78, 79, 80, 144, 181, 182, 183, 184, 185, 186, 188, 189, 190, 191, 193, 194, 195, 196, 197, 198, 200, 201, 202, 203, 212, 216, 230, 239, 247, 257, 284, 293, 294, 295, 298, 299, 300, 302, 303, 304, 305, 306, 307], "dataset": [1, 10, 13, 14, 15, 17, 19, 24, 31, 33, 35, 36, 37, 42, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 247, 293, 301, 302, 303, 306], "model": [1, 2, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 25, 27, 31, 32, 33, 35, 36, 37, 42, 53, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 207, 208, 209, 210, 212, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 241, 242, 243, 245, 246, 247, 248, 249, 250, 254, 255, 256, 257, 259, 260, 263, 265, 272, 273, 278, 279, 283, 291, 293, 294, 295, 299, 307], "convert": [1, 9, 11, 14, 21, 22, 33, 35, 37, 42, 49, 55, 56, 60, 65, 66, 67, 75, 142, 254, 300, 306, 307], "from": [1, 2, 4, 10, 11, 13, 14, 15, 17, 19, 22, 23, 24, 25, 26, 27, 31, 33, 36, 37, 42, 45, 46, 47, 50, 53, 54, 55, 56, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 76, 77, 78, 79, 80, 81, 82, 83, 84, 92, 93, 100, 102, 103, 104, 105, 119, 143, 144, 148, 159, 161, 169, 176, 177, 178, 179, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 208, 213, 214, 215, 216, 217, 218, 219, 221, 222, 223, 226, 227, 228, 229, 233, 236, 239, 241, 244, 247, 249, 250, 253, 254, 255, 256, 258, 260, 261, 273, 276, 277, 278, 283, 290, 292, 295, 297, 298, 300, 301, 302, 303, 304, 305, 306], "common": [1, 2, 5, 9, 14, 15, 24, 242, 298, 299, 302, 304, 305, 306], "schema": [1, 9, 11, 12, 16], "convers": [1, 13, 16, 18, 19, 21, 22, 33, 42, 52, 55, 56, 60, 65, 67, 69, 254, 256, 257, 293, 299, 300, 304, 305, 307], "json": [1, 9, 12, 13, 16, 18, 21, 22, 37, 42, 55, 56, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 119, 148, 169, 179, 204, 241, 254, 298, 299, 300, 306], "list": [1, 9, 11, 14, 15, 18, 19, 21, 22, 24, 33, 36, 38, 44, 45, 47, 48, 49, 50, 51, 52, 53, 55, 56, 60, 61, 65, 66, 67, 72, 73, 81, 85, 86, 87, 88, 89, 90, 94, 95, 96, 97, 98, 99, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 119, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 143, 144, 147, 148, 149, 150, 151, 152, 154, 155, 156, 157, 162, 163, 164, 165, 166, 169, 170, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 214, 216, 221, 223, 224, 225, 226, 229, 230, 231, 235, 236, 237, 238, 239, 240, 242, 244, 253, 254, 255, 256, 273, 287, 296, 299, 300, 301, 302, 305, 306], "us": [1, 2, 4, 5, 9, 10, 11, 12, 13, 14, 16, 17, 18, 20, 21, 22, 23, 26, 27, 29, 32, 35, 36, 38, 44, 47, 50, 53, 54, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 77, 79, 80, 81, 100, 101, 107, 116, 119, 120, 125, 129, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 165, 169, 171, 175, 179, 204, 205, 207, 208, 209, 210, 212, 214, 215, 216, 217, 218, 219, 220, 221, 224, 225, 228, 232, 235, 239, 240, 244, 245, 246, 247, 248, 250, 252, 254, 255, 257, 258, 262, 263, 265, 272, 273, 274, 275, 276, 277, 281, 283, 285, 286, 291, 292, 293, 294, 295, 296, 298, 301, 302, 304, 305, 306], "collect": [1, 24, 301], "sampl": [1, 9, 11, 12, 13, 14, 15, 16, 19, 20, 21, 23, 26, 33, 35, 36, 37, 42, 44, 50, 54, 55, 56, 57, 62, 63, 65, 66, 67, 68, 69, 71, 73, 74, 208, 210, 214, 215, 216, 224, 243, 244, 249, 299, 300, 305], "batch": [1, 11, 17, 25, 47, 48, 49, 50, 54, 59, 62, 65, 66, 68, 79, 142, 143, 207, 208, 210, 213, 214, 215, 216, 219, 224, 226, 245, 246, 247, 249, 250, 267, 282, 285, 293, 301, 302, 304, 305], "handl": [1, 13, 16, 17, 24, 29, 31, 53, 56, 144, 239, 240, 299, 300, 304, 305, 307], "ani": [1, 5, 10, 11, 13, 14, 15, 16, 17, 21, 22, 24, 25, 27, 29, 30, 33, 36, 37, 38, 42, 44, 47, 50, 51, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 76, 80, 206, 214, 220, 224, 226, 227, 233, 234, 235, 236, 237, 238, 239, 242, 254, 255, 256, 258, 269, 272, 273, 281, 284, 298, 299, 301, 304, 305, 306], "pad": [1, 45, 47, 48, 49, 50, 54, 73, 75, 76, 214, 216, 246, 248, 251, 267], "miscellan": 1, "modifi": [1, 10, 21, 24, 25, 26, 218, 220, 230, 258, 293, 300, 302, 303, 304, 305, 306, 307], "For": [2, 7, 9, 10, 12, 14, 16, 18, 19, 20, 21, 22, 24, 25, 33, 35, 36, 37, 38, 42, 50, 53, 54, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 78, 79, 80, 81, 91, 95, 101, 107, 116, 120, 125, 129, 143, 146, 147, 150, 151, 154, 156, 158, 160, 165, 167, 171, 175, 208, 214, 216, 221, 224, 225, 228, 232, 243, 254, 260, 266, 273, 277, 279, 281, 292, 294, 295, 296, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307], "detail": [2, 9, 10, 12, 13, 16, 21, 22, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 78, 79, 80, 81, 160, 207, 216, 221, 248, 252, 263, 272, 281, 294, 295, 298, 300, 301, 302, 303, 304, 305, 306, 307], "usag": [2, 21, 220, 221, 223, 253, 257, 258, 282, 292, 298, 300, 301, 302, 305, 306, 307], "guid": [2, 23, 24, 26, 33, 35, 37, 42, 60, 62, 63, 64, 65, 66, 67, 68, 69, 250, 273, 293, 299, 301, 303, 304], "pleas": [2, 7, 34, 40, 43, 78, 79, 80, 81, 88, 89, 90, 98, 99, 112, 113, 114, 115, 123, 124, 133, 134, 135, 140, 141, 152, 163, 164, 170, 216, 221, 252, 263, 272, 279, 292, 295, 296, 300, 302, 307], "see": [2, 7, 9, 10, 11, 12, 13, 14, 16, 18, 19, 21, 22, 23, 26, 34, 40, 43, 47, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 74, 88, 89, 90, 98, 99, 100, 112, 113, 114, 115, 123, 124, 133, 134, 135, 140, 141, 152, 153, 160, 163, 164, 170, 180, 183, 184, 187, 192, 195, 196, 199, 207, 213, 215, 216, 226, 229, 237, 238, 243, 252, 257, 263, 272, 273, 277, 279, 281, 287, 292, 293, 294, 295, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307], "overview": [2, 7, 24, 26, 224, 291, 294, 295, 301, 303, 304, 307], "support": [2, 4, 10, 11, 15, 16, 17, 18, 21, 22, 23, 25, 26, 27, 36, 37, 54, 55, 56, 59, 60, 61, 62, 65, 66, 67, 68, 69, 72, 77, 95, 107, 120, 129, 142, 149, 150, 151, 153, 154, 156, 165, 168, 169, 171, 206, 208, 216, 225, 226, 231, 249, 255, 256, 258, 262, 265, 266, 293, 294, 295, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307], "sever": [2, 305], "wide": [2, 9, 208, 303], "onli": [2, 4, 10, 16, 18, 22, 23, 26, 35, 36, 42, 54, 55, 56, 61, 67, 73, 77, 81, 95, 107, 120, 129, 144, 149, 150, 151, 153, 154, 156, 165, 171, 208, 212, 214, 216, 221, 223, 227, 231, 233, 235, 239, 254, 255, 256, 258, 262, 263, 265, 266, 272, 298, 300, 301, 303, 304, 305, 306, 307], "help": [2, 11, 18, 19, 22, 63, 100, 214, 216, 224, 254, 273, 291, 292, 293, 298, 299, 300, 301, 303, 305, 306, 307], "quickli": [2, 11, 24, 38, 57, 294, 299, 305], "bootstrap": [2, 11], "your": [2, 7, 9, 11, 12, 13, 14, 16, 17, 18, 21, 23, 26, 27, 38, 57, 60, 64, 67, 79, 80, 81, 147, 151, 216, 225, 273, 276, 277, 283, 291, 292, 293, 294, 295, 298, 299, 302, 303, 304, 305, 306, 307], "fine": [2, 9, 10, 11, 12, 16, 18, 19, 20, 22, 23, 25, 26, 36, 54, 55, 56, 71, 230, 283, 291, 293, 294, 295, 296, 300], "tune": [2, 4, 9, 10, 11, 12, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 29, 36, 54, 55, 56, 71, 230, 283, 291, 292, 293, 294, 295, 296, 298, 300], "also": [2, 9, 10, 12, 14, 16, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 53, 60, 64, 67, 71, 73, 74, 91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 169, 171, 175, 208, 214, 217, 230, 231, 250, 263, 265, 272, 273, 277, 283, 286, 292, 295, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307], "like": [2, 6, 12, 22, 23, 24, 25, 26, 169, 216, 221, 223, 225, 256, 292, 298, 299, 300, 301, 303, 304, 305, 306], "These": [2, 5, 10, 13, 15, 18, 19, 21, 22, 24, 25, 27, 54, 55, 67, 216, 244, 294, 296, 299, 300, 301, 302, 304, 305, 306, 307], "ar": [2, 5, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 22, 23, 24, 26, 27, 31, 35, 38, 39, 42, 45, 47, 48, 52, 54, 55, 56, 59, 60, 64, 65, 66, 67, 73, 75, 76, 79, 85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 100, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 144, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 207, 213, 214, 215, 216, 218, 224, 225, 226, 230, 231, 232, 235, 236, 244, 246, 252, 254, 255, 257, 258, 260, 262, 264, 265, 270, 272, 282, 283, 292, 293, 294, 296, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307], "especi": [2, 293, 298, 300, 305], "specifi": [2, 10, 12, 16, 18, 20, 22, 24, 25, 27, 31, 33, 35, 37, 42, 44, 60, 62, 63, 64, 65, 66, 67, 68, 69, 73, 75, 77, 94, 101, 106, 107, 116, 119, 120, 125, 129, 146, 148, 150, 162, 169, 171, 175, 179, 204, 208, 214, 215, 222, 223, 224, 252, 263, 266, 272, 277, 279, 282, 295, 296, 298, 299, 300, 301, 302, 305, 306, 307], "yaml": [2, 10, 17, 18, 20, 24, 25, 27, 28, 29, 53, 60, 64, 67, 71, 277, 293, 296, 298, 299, 300, 301, 302, 304, 306, 307], "config": [2, 9, 12, 13, 16, 17, 18, 19, 20, 21, 22, 23, 26, 27, 28, 29, 30, 53, 60, 64, 67, 71, 208, 235, 254, 258, 273, 277, 282, 293, 294, 295, 296, 299, 300, 302, 303, 304, 305, 306, 307], "represent": [2, 253, 303, 304, 306, 307], "abov": [2, 4, 9, 16, 17, 18, 20, 22, 55, 220, 270, 292, 295, 300, 302, 304, 305, 306, 307], "text": [4, 5, 9, 11, 12, 15, 18, 19, 21, 35, 36, 37, 38, 39, 42, 44, 50, 54, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 144, 225, 226, 237, 239, 240, 242, 244, 299, 300, 306], "version": [4, 58, 73, 95, 107, 120, 129, 149, 154, 156, 165, 171, 181, 182, 183, 184, 185, 186, 188, 189, 190, 191, 193, 194, 195, 196, 197, 198, 200, 201, 202, 203, 208, 288, 292, 302, 305, 306, 307], "famili": [4, 22, 25, 59, 61, 65, 66, 69, 70, 72, 257, 293, 298, 302, 303], "import": [4, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 27, 60, 64, 65, 66, 67, 71, 77, 216, 217, 218, 219, 247, 273, 276, 277, 299, 300, 301, 302, 303, 304, 305, 306, 307], "you": [4, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 36, 38, 55, 56, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 100, 207, 214, 216, 219, 221, 223, 226, 228, 257, 273, 276, 277, 283, 291, 292, 293, 294, 295, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307], "need": [4, 9, 10, 12, 14, 16, 18, 19, 20, 22, 23, 24, 25, 26, 38, 54, 56, 208, 212, 214, 216, 224, 225, 250, 272, 273, 276, 277, 278, 292, 294, 295, 296, 298, 299, 300, 301, 302, 304, 305, 307], "request": [4, 262, 300], "access": [4, 10, 22, 24, 25, 53, 254, 260, 294, 295, 298, 300, 301], "hug": [4, 11, 22, 32, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 119, 148, 169, 179, 204, 241, 261, 293, 298, 301, 302], "face": [4, 11, 22, 32, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 119, 148, 169, 179, 204, 241, 261, 293, 298, 301, 302], "befor": [4, 19, 22, 38, 52, 54, 65, 78, 79, 81, 91, 95, 147, 151, 208, 213, 214, 215, 216, 221, 223, 224, 226, 231, 240, 254, 273, 295, 298, 300, 305, 306], "download": [4, 10, 11, 16, 22, 65, 289, 292, 294, 295, 299, 302, 303, 304, 306, 307], "To": [4, 9, 12, 13, 14, 16, 17, 18, 19, 20, 22, 24, 25, 26, 54, 65, 214, 216, 226, 254, 283, 292, 293, 295, 296, 298, 300, 301, 302, 303, 304, 305, 306, 307], "1b": [4, 10, 17, 136, 138, 140, 291], "meta": [4, 10, 15, 16, 20, 21, 22, 100, 210, 254, 255, 294, 295, 298, 299, 300, 301, 303], "output": [4, 10, 12, 13, 14, 20, 21, 22, 31, 35, 45, 53, 55, 56, 59, 62, 64, 68, 69, 73, 81, 85, 86, 87, 91, 95, 101, 105, 107, 108, 109, 110, 111, 116, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 142, 143, 146, 147, 149, 150, 151, 154, 155, 156, 157, 158, 161, 165, 166, 171, 174, 175, 181, 182, 185, 186, 188, 189, 190, 191, 205, 206, 208, 210, 211, 213, 214, 215, 216, 221, 223, 224, 225, 226, 230, 231, 234, 235, 236, 244, 256, 263, 275, 282, 283, 292, 294, 295, 298, 300, 301, 302, 303, 304, 305, 307], "dir": [4, 10, 21, 22, 277, 292, 294, 295, 298, 300, 301, 302, 303, 306], "tmp": [4, 9, 10, 12, 14, 15, 16, 17, 18, 19, 20, 21, 24, 258, 294, 295, 299, 301, 303], "ignor": [4, 9, 10, 12, 22, 42, 71, 212, 213, 215, 222, 223, 259, 283, 294, 295, 298, 303], "pattern": [4, 10, 19, 240, 294, 295, 298, 303], "origin": [4, 10, 15, 16, 17, 20, 21, 22, 58, 59, 63, 220, 225, 226, 230, 231, 294, 295, 299, 300, 302, 303, 304, 305, 306, 307], "consolid": [4, 10, 22, 294, 295, 303], "00": [4, 10, 16, 22, 60, 64, 290, 294, 295, 297, 301, 303], "pth": [4, 10, 22, 253, 294, 295, 300, 303], "hf": [4, 9, 18, 20, 21, 22, 247, 249, 254, 298, 299, 300, 301, 302], "token": [4, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 22, 24, 25, 36, 42, 47, 49, 50, 51, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 91, 94, 95, 101, 106, 107, 116, 119, 120, 125, 129, 144, 146, 147, 148, 150, 151, 154, 156, 158, 160, 162, 165, 167, 169, 171, 175, 179, 204, 208, 210, 213, 214, 215, 216, 221, 223, 224, 225, 226, 237, 238, 239, 240, 241, 242, 244, 246, 248, 251, 263, 267, 294, 298, 300, 301, 302, 303, 304, 305, 306, 307], "hf_token": [4, 21, 295, 303], "3b": [4, 137, 139, 141, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203], "The": [4, 9, 11, 12, 15, 16, 18, 19, 20, 21, 22, 23, 24, 25, 26, 29, 30, 32, 36, 46, 47, 52, 53, 54, 55, 56, 60, 63, 64, 65, 66, 67, 70, 78, 79, 80, 81, 85, 86, 87, 95, 96, 97, 107, 108, 109, 110, 111, 120, 121, 122, 129, 130, 131, 132, 138, 139, 142, 144, 147, 149, 150, 151, 154, 156, 165, 166, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 193, 194, 195, 196, 197, 198, 200, 201, 202, 203, 206, 209, 210, 211, 212, 216, 220, 221, 222, 223, 224, 225, 226, 230, 232, 237, 238, 239, 240, 241, 242, 244, 245, 247, 248, 249, 250, 252, 254, 256, 258, 261, 262, 264, 266, 273, 277, 280, 282, 286, 287, 288, 292, 293, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307], "reus": [4, 293], "llama3_token": [4, 15, 17, 20, 21, 65, 66, 73, 299, 302], "class": [4, 10, 13, 14, 15, 21, 24, 26, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 42, 43, 53, 54, 55, 56, 57, 65, 66, 78, 79, 80, 81, 94, 100, 105, 106, 119, 142, 143, 144, 148, 153, 156, 160, 161, 162, 169, 179, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 221, 222, 223, 224, 225, 226, 227, 229, 230, 231, 233, 234, 237, 238, 239, 240, 243, 244, 247, 248, 249, 250, 253, 254, 255, 256, 257, 258, 273, 274, 275, 276, 277, 296, 299, 301, 303, 304, 305, 307], "languag": [4, 10, 16, 32, 73, 150, 225, 226, 230, 231, 247, 283, 304, 305], "11b": [4, 145, 152], "8b": [4, 15, 16, 20, 21, 118, 122, 124, 128, 130, 132, 135, 166, 291, 294, 295, 298, 299, 306], "70b": [4, 83, 86, 89, 103, 109, 113, 117, 121, 123, 127, 131, 134, 302], "405b": [4, 126, 130, 133], "weight": [4, 21, 22, 25, 50, 85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 145, 148, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 193, 194, 195, 196, 197, 198, 200, 201, 202, 203, 212, 220, 229, 230, 231, 235, 239, 247, 254, 255, 256, 257, 266, 277, 283, 291, 294, 295, 298, 299, 300, 301, 302, 303, 304, 306, 307], "can": [4, 5, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 30, 33, 35, 36, 37, 38, 39, 42, 50, 53, 55, 56, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 78, 79, 81, 144, 147, 151, 209, 210, 212, 213, 214, 216, 221, 223, 224, 226, 228, 232, 239, 240, 252, 254, 257, 259, 263, 272, 273, 276, 277, 279, 282, 291, 292, 293, 294, 295, 296, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307], "instead": [4, 9, 12, 14, 16, 22, 25, 31, 45, 54, 55, 71, 81, 129, 150, 151, 207, 212, 216, 231, 250, 298, 302, 304, 305, 306], "builder": [4, 9, 10, 11, 12, 13, 15, 16, 17, 22, 58, 60, 61, 64, 67, 82, 83, 84, 85, 86, 87, 88, 89, 90, 92, 93, 96, 97, 98, 99, 102, 103, 104, 105, 108, 109, 110, 111, 112, 113, 114, 115, 117, 118, 121, 122, 123, 124, 126, 127, 128, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 152, 155, 157, 159, 161, 163, 164, 166, 168, 170, 172, 173, 174, 176, 177, 178, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 299, 305, 307], "all": [4, 5, 10, 11, 14, 15, 19, 21, 25, 30, 35, 36, 38, 42, 45, 47, 50, 53, 54, 55, 56, 81, 119, 142, 148, 169, 179, 181, 182, 183, 184, 185, 186, 188, 189, 190, 191, 193, 194, 195, 196, 197, 198, 200, 201, 202, 203, 204, 208, 212, 214, 216, 217, 218, 219, 220, 224, 225, 226, 228, 232, 243, 254, 258, 260, 264, 270, 278, 284, 285, 289, 291, 293, 294, 295, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306], "7b": [4, 9, 12, 14, 18, 19, 20, 21, 22, 61, 72, 84, 87, 90, 93, 97, 104, 105, 110, 111, 114, 115, 155, 157, 159, 161, 164, 174, 178, 190, 191, 202, 203, 254, 255, 299, 301, 302, 304, 307], "13b": [4, 22, 82, 85, 88, 102, 108, 112], "codellama": 4, "size": [4, 14, 15, 16, 22, 25, 27, 45, 50, 59, 62, 65, 66, 68, 79, 80, 81, 142, 143, 144, 145, 147, 148, 149, 151, 181, 182, 183, 184, 185, 186, 188, 189, 190, 191, 193, 194, 195, 196, 197, 198, 200, 201, 202, 203, 207, 208, 209, 210, 213, 214, 215, 216, 219, 221, 223, 224, 225, 226, 244, 245, 246, 267, 268, 270, 293, 295, 298, 300, 301, 302, 304, 305, 306], "0": [4, 9, 10, 12, 14, 15, 16, 18, 20, 22, 25, 45, 47, 48, 49, 50, 54, 60, 64, 67, 73, 74, 76, 77, 81, 85, 86, 87, 88, 89, 90, 91, 95, 96, 97, 98, 99, 101, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 120, 121, 122, 123, 124, 125, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 143, 144, 146, 147, 149, 150, 151, 152, 154, 155, 156, 157, 158, 160, 163, 164, 165, 166, 167, 170, 171, 172, 173, 174, 175, 176, 177, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 207, 208, 214, 216, 217, 218, 219, 225, 230, 231, 242, 247, 248, 249, 250, 251, 261, 267, 273, 276, 277, 281, 286, 288, 290, 295, 297, 299, 300, 301, 302, 304, 305, 306, 307], "5b": [4, 172, 173, 176, 177, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 305], "14b": [4, 181, 182, 193, 194], "32b": [4, 185, 186, 197, 198], "72b": [4, 188, 189, 200, 201], "qwen2": [4, 10, 171, 172, 173, 174, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 257, 305], "exampl": [4, 10, 19, 21, 22, 23, 24, 25, 26, 27, 29, 33, 35, 37, 38, 42, 44, 45, 46, 47, 48, 49, 50, 53, 54, 56, 59, 60, 61, 62, 64, 65, 66, 67, 68, 69, 71, 72, 73, 75, 76, 77, 81, 143, 144, 147, 151, 207, 208, 216, 217, 218, 219, 221, 223, 224, 225, 226, 228, 229, 232, 237, 238, 239, 240, 242, 243, 247, 249, 250, 251, 252, 253, 254, 255, 257, 258, 266, 267, 273, 276, 277, 280, 283, 286, 287, 288, 289, 290, 292, 294, 295, 297, 298, 299, 300, 302, 303, 304, 305, 306, 307], "qwen2_5": [4, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204], "1_5b": 4, "none": [4, 9, 16, 25, 26, 28, 30, 31, 33, 35, 37, 42, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 77, 81, 94, 101, 106, 107, 116, 119, 120, 125, 129, 142, 143, 144, 146, 148, 150, 162, 169, 179, 204, 205, 207, 208, 210, 213, 214, 215, 216, 217, 218, 219, 224, 226, 232, 234, 235, 236, 239, 242, 245, 246, 248, 254, 255, 256, 257, 258, 259, 262, 266, 271, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 284, 285, 286, 287, 298, 300, 306], "mini": [4, 21, 166, 167, 168, 169, 170], "4k": [4, 21, 167, 168, 169], "microsoft": [4, 168, 169], "ai": [4, 10, 12, 14, 19, 55, 56, 159, 277, 299, 302], "thi": [4, 9, 10, 12, 13, 14, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 31, 33, 34, 35, 36, 37, 42, 43, 44, 45, 47, 48, 50, 53, 54, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 75, 79, 80, 81, 91, 95, 101, 107, 116, 120, 125, 129, 142, 144, 146, 147, 150, 151, 153, 154, 156, 158, 160, 165, 167, 168, 169, 171, 175, 205, 207, 208, 210, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 224, 225, 226, 228, 229, 232, 235, 236, 237, 238, 239, 240, 242, 243, 244, 246, 247, 248, 250, 252, 253, 254, 255, 256, 258, 261, 262, 265, 267, 270, 272, 273, 274, 276, 277, 278, 279, 281, 283, 285, 286, 291, 292, 293, 294, 295, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307], "v0": [4, 9, 14, 18, 19, 21, 153], "mistralai": [4, 21, 298], "2b": [4, 92, 96], "gemma2": 4, "googl": [4, 92, 93], "gguf": 4, "compon": [4, 6, 14, 21, 22, 25, 30, 48, 55, 56, 65, 66, 230, 293, 296, 301, 303, 304, 307], "multimod": [4, 11, 14, 36, 42, 56, 65, 66, 224, 292], "encod": [4, 5, 15, 21, 50, 56, 73, 74, 81, 142, 143, 145, 146, 147, 149, 150, 151, 208, 213, 214, 215, 219, 224, 225, 226, 228, 237, 239, 240, 242, 244, 247, 250, 299], "perform": [5, 12, 13, 17, 19, 20, 21, 22, 54, 73, 216, 221, 232, 243, 250, 293, 294, 295, 299, 300, 302, 303, 305, 306, 307], "direct": [5, 18, 25, 48, 85, 86, 95, 96, 97, 107, 108, 109, 110, 120, 121, 122, 131, 132, 138, 139, 154, 155, 156, 157, 165, 166, 230, 247, 292, 296, 305], "id": [5, 14, 17, 21, 22, 47, 48, 49, 50, 54, 61, 65, 66, 72, 73, 74, 76, 77, 144, 208, 210, 214, 215, 224, 237, 238, 239, 240, 241, 242, 244, 254, 256, 273, 299, 300], "decod": [5, 9, 12, 14, 15, 16, 18, 20, 21, 60, 64, 67, 73, 91, 95, 101, 107, 116, 120, 125, 129, 143, 144, 145, 146, 147, 149, 150, 151, 154, 156, 158, 160, 165, 167, 171, 175, 208, 213, 214, 215, 219, 224, 226, 228, 237, 239, 240, 299], "typic": [5, 9, 12, 20, 24, 33, 37, 42, 50, 54, 55, 56, 57, 71, 169, 228, 247, 250, 305, 306, 307], "byte": [5, 21, 240, 305, 307], "pair": [5, 10, 18, 21, 24, 48, 49, 63, 67, 70, 240], "underli": [5, 13, 18, 21, 239, 305, 307], "helper": 5, "method": [5, 13, 14, 15, 19, 21, 22, 24, 25, 26, 29, 46, 55, 57, 59, 60, 61, 62, 63, 64, 67, 68, 69, 70, 71, 72, 144, 214, 220, 221, 224, 227, 228, 229, 233, 235, 237, 238, 258, 266, 292, 293, 304, 307], "two": [5, 15, 18, 19, 22, 24, 35, 50, 52, 65, 66, 73, 74, 79, 216, 225, 228, 230, 244, 251, 253, 293, 295, 300, 301, 302, 304, 305, 306, 307], "pre": [5, 9, 11, 12, 17, 18, 19, 20, 54, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 100, 145, 148, 149, 216, 224, 226, 228, 230, 295, 299, 305], "train": [5, 9, 10, 11, 12, 13, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 31, 33, 35, 50, 53, 54, 55, 56, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 100, 144, 145, 148, 149, 206, 208, 210, 214, 215, 220, 221, 223, 224, 225, 226, 228, 230, 247, 250, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 291, 293, 294, 296, 298, 299, 300, 302, 303, 304, 305, 306, 307], "function": [5, 10, 22, 24, 25, 27, 29, 45, 46, 47, 48, 60, 64, 67, 73, 79, 80, 81, 147, 151, 205, 208, 216, 217, 220, 232, 235, 236, 247, 248, 252, 254, 268, 281, 283, 285, 286, 293, 303, 307], "preprocess": [5, 54, 216], "imag": [5, 11, 15, 35, 36, 37, 42, 44, 46, 50, 56, 65, 66, 78, 79, 80, 81, 142, 143, 144, 145, 147, 148, 149, 151, 216, 225, 244, 304], "loss": [6, 9, 12, 14, 24, 25, 36, 38, 55, 56, 59, 60, 62, 64, 67, 68, 69, 221, 222, 223, 247, 248, 249, 250, 301, 303, 304, 307], "algorithm": [6, 21, 245, 250, 281], "ppo": [6, 245, 246, 247, 248, 296], "dpo": [6, 18, 48, 55, 232, 247, 249, 250, 296], "offer": 7, "allow": [7, 10, 53, 226, 230, 235, 276, 295, 298, 305, 306, 307], "seamless": 7, "transit": 7, "between": [7, 9, 18, 19, 21, 22, 55, 60, 67, 146, 150, 213, 214, 218, 224, 246, 248, 250, 254, 257, 273, 300, 302, 303, 304, 305, 306, 307], "interoper": [7, 22, 25, 293, 300, 307], "rest": [7, 299, 305, 307], "ecosystem": [7, 22, 25, 293, 300, 302, 307], "comprehens": [7, 305], "deep": [7, 22, 23, 24, 25, 26, 226, 228, 293, 296, 301, 302, 305], "dive": [7, 22, 23, 24, 25, 26, 293, 295, 296, 301, 302, 305], "util": [7, 14, 16, 22, 24, 25, 27, 45, 47, 50, 142, 259, 276, 278, 279, 285, 286, 287, 288, 293, 300, 301, 305, 307], "work": [7, 22, 25, 35, 42, 212, 225, 226, 293, 295, 298, 300, 302, 305, 307], "set": [7, 9, 12, 17, 18, 20, 22, 23, 24, 25, 26, 33, 36, 37, 42, 50, 54, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 71, 72, 101, 107, 116, 120, 125, 129, 144, 146, 149, 150, 154, 156, 158, 160, 165, 167, 171, 175, 208, 210, 213, 214, 217, 218, 219, 224, 232, 234, 252, 258, 263, 270, 272, 273, 279, 280, 281, 282, 285, 286, 293, 296, 298, 299, 300, 301, 302, 303, 304, 305, 306], "enabl": [7, 10, 11, 17, 21, 23, 24, 25, 26, 53, 85, 86, 87, 88, 89, 90, 96, 97, 98, 99, 108, 109, 110, 111, 112, 113, 114, 115, 121, 122, 123, 124, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 152, 155, 157, 163, 164, 166, 170, 172, 173, 174, 176, 177, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 195, 196, 199, 208, 213, 214, 215, 217, 218, 219, 224, 226, 230, 231, 281, 282, 295, 302, 304, 305, 307], "consumpt": [7, 53, 75, 294, 305], "dure": [7, 10, 11, 22, 54, 59, 60, 62, 64, 67, 68, 69, 207, 208, 210, 214, 215, 216, 220, 224, 225, 250, 265, 294, 295, 299, 300, 302, 304, 305, 306, 307], "control": [7, 13, 18, 21, 25, 36, 59, 60, 62, 64, 67, 68, 69, 218, 219, 226, 232, 273, 281, 295, 300, 305], "lr": [7, 24, 258, 261, 264, 303, 305], "process": [7, 11, 14, 15, 17, 25, 26, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 81, 147, 151, 216, 220, 268, 269, 281, 301, 306, 307], "variou": 7, "provid": [7, 10, 11, 12, 14, 22, 24, 25, 27, 32, 33, 35, 37, 42, 46, 47, 51, 53, 54, 73, 75, 81, 208, 212, 214, 216, 224, 232, 242, 247, 256, 263, 273, 277, 282, 286, 293, 294, 295, 298, 299, 300, 301, 302, 305], "debug": [7, 22, 24, 25, 273, 298], "finetun": [7, 10, 22, 24, 25, 85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 166, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 224, 291, 293, 295, 301, 302, 305], "job": [7, 10, 26, 281, 301], "involv": [9, 12, 17, 20, 56, 306], "multi": [9, 18, 25, 208, 302], "turn": [9, 18, 25, 33, 36, 37, 42, 52, 55, 67, 299, 305], "multipl": [9, 16, 17, 18, 22, 24, 25, 33, 36, 37, 42, 48, 53, 56, 67, 142, 143, 208, 214, 215, 216, 224, 231, 273, 274, 275, 276, 277, 282, 301, 302, 303, 305], "back": [9, 21, 22, 52, 232, 254, 304, 305, 307], "forth": [9, 52], "user": [9, 12, 13, 14, 15, 16, 18, 19, 21, 25, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 52, 55, 56, 60, 64, 67, 94, 101, 106, 107, 116, 119, 120, 125, 129, 146, 148, 150, 154, 156, 158, 160, 162, 165, 167, 169, 171, 175, 179, 204, 208, 242, 296, 299, 301, 306], "assist": [9, 12, 13, 14, 15, 16, 18, 19, 21, 31, 32, 33, 35, 36, 37, 38, 39, 41, 42, 44, 52, 55, 56, 60, 67, 73, 94, 100, 106, 119, 148, 162, 169, 179, 204, 242, 299], "role": [9, 13, 14, 15, 16, 18, 19, 21, 33, 36, 37, 38, 39, 42, 44, 55, 56, 60, 67, 94, 106, 119, 144, 148, 162, 169, 179, 204, 242, 299], "content": [9, 13, 15, 16, 18, 19, 21, 22, 33, 36, 37, 38, 39, 42, 44, 55, 56, 60, 67, 242, 299], "what": [9, 14, 15, 16, 18, 22, 23, 24, 26, 36, 37, 55, 56, 60, 64, 67, 100, 153, 216, 291, 296, 299, 300, 301, 302, 305], "answer": [9, 15, 16, 19, 40, 64, 300, 302], "ultim": [9, 306], "question": [9, 15, 16, 19, 40, 64, 300, 302], "life": 9, "42": [9, 73, 216], "That": [9, 299], "s": [9, 10, 12, 13, 14, 16, 17, 18, 19, 20, 22, 24, 25, 26, 27, 29, 32, 37, 42, 52, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 85, 86, 87, 100, 107, 108, 109, 110, 111, 120, 121, 122, 129, 130, 131, 132, 138, 139, 142, 143, 144, 149, 150, 151, 153, 154, 155, 156, 157, 165, 166, 169, 171, 174, 175, 181, 182, 185, 186, 188, 189, 190, 191, 207, 208, 210, 214, 215, 216, 220, 224, 227, 228, 229, 230, 233, 235, 236, 240, 247, 249, 250, 251, 252, 254, 255, 258, 263, 265, 267, 272, 273, 276, 279, 280, 283, 285, 286, 293, 298, 299, 301, 303, 304, 305, 306, 307], "ridicul": 9, "oh": 9, "i": [9, 12, 14, 18, 19, 20, 25, 36, 67, 73, 100, 142, 143, 153, 208, 213, 214, 215, 216, 220, 224, 234, 253, 258, 300, 302, 305, 306, 307], "know": [9, 299, 300, 303, 304], "more": [9, 10, 11, 12, 13, 14, 16, 18, 19, 21, 22, 24, 25, 38, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 207, 216, 221, 228, 235, 252, 253, 256, 273, 277, 279, 281, 285, 293, 294, 295, 296, 298, 300, 301, 302, 303, 304, 305, 306, 307], "structur": [9, 12, 13, 14, 19, 25, 37, 39, 42, 60, 119, 144, 148, 169, 179, 204, 244, 299, 300, 306], "than": [9, 10, 12, 16, 18, 24, 50, 52, 73, 75, 207, 208, 216, 247, 252, 256, 257, 284, 285, 288, 299, 300, 301, 302, 303, 304, 305, 307], "freeform": [9, 12, 57, 71], "associ": [9, 10, 11, 12, 22, 24, 25, 73, 74, 81, 91, 101, 116, 125, 146, 150, 158, 175, 273, 300, 304], "where": [9, 10, 12, 14, 16, 18, 19, 20, 36, 38, 45, 48, 59, 73, 75, 76, 79, 105, 142, 143, 161, 205, 208, 214, 216, 218, 221, 223, 224, 231, 239, 244, 245, 247, 248, 251, 263, 267, 272, 303, 305], "thei": [9, 11, 12, 19, 21, 24, 25, 53, 65, 66, 81, 142, 147, 151, 214, 216, 226, 236, 263, 298, 299, 304, 305, 306], "learn": [9, 12, 25, 53, 225, 226, 228, 258, 261, 264, 293, 294, 295, 296, 299, 301, 302, 304, 305, 306, 307], "simpli": [9, 12, 13, 14, 16, 20, 22, 24, 54, 56, 247, 298, 299, 300, 302, 303, 305, 307], "predict": [9, 12, 73, 74, 77, 245, 246, 248, 294], "next": [9, 12, 22, 54, 71, 73, 74, 81, 216, 244, 294, 302, 307], "respond": 9, "accur": 9, "primari": [9, 12, 16, 18, 20, 22, 24, 25, 55, 56, 296, 301], "entri": [9, 12, 16, 18, 20, 24, 25, 47, 50, 296, 301, 305], "point": [9, 10, 12, 16, 18, 20, 21, 24, 25, 46, 60, 242, 296, 300, 301, 302, 304, 306, 307], "torchtun": [9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 37, 38, 39, 42, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 292, 294, 295, 296, 299, 301, 305], "chat_dataset": [9, 12, 13, 18, 299], "let": [9, 10, 11, 12, 16, 18, 22, 24, 26, 298, 299, 300, 301, 302, 303, 304, 305, 307], "follow": [9, 10, 11, 12, 15, 16, 19, 22, 25, 36, 37, 38, 42, 50, 54, 55, 56, 64, 67, 144, 208, 213, 244, 248, 256, 257, 258, 261, 270, 277, 282, 291, 292, 295, 296, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307], "data": [9, 10, 12, 13, 14, 15, 16, 19, 21, 23, 31, 32, 33, 35, 36, 37, 38, 39, 42, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 55, 56, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 100, 148, 216, 243, 247, 249, 265, 273, 274, 275, 276, 277, 285, 294, 295, 300, 305, 306, 307], "directli": [9, 10, 12, 13, 14, 16, 22, 24, 25, 27, 31, 55, 56, 60, 64, 65, 67, 71, 247, 252, 254, 298, 300, 301, 302, 304, 305, 306, 307], "llm": [9, 10, 11, 12, 21, 25, 224, 226, 291, 292, 293, 294, 296, 300, 302, 303, 304], "my_data": [9, 12, 13, 16, 299], "human": [9, 16, 18, 36, 42, 60, 100, 247, 248, 249, 299], "valu": [9, 16, 22, 24, 33, 35, 37, 42, 45, 47, 48, 50, 59, 60, 62, 63, 64, 67, 68, 69, 70, 73, 74, 76, 77, 82, 83, 84, 91, 92, 93, 95, 101, 102, 103, 104, 105, 107, 116, 117, 118, 120, 125, 126, 127, 128, 129, 136, 137, 144, 146, 150, 154, 156, 158, 159, 160, 161, 165, 167, 171, 175, 176, 177, 178, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 207, 208, 209, 213, 214, 215, 222, 223, 224, 226, 235, 245, 246, 248, 251, 254, 257, 258, 261, 267, 273, 274, 275, 276, 277, 281, 295, 298, 299, 301, 302, 304, 305, 306], "gpt": [9, 16, 42, 60, 74, 299, 300], "mistral": [9, 14, 18, 19, 21, 144, 153, 154, 155, 156, 157, 159, 160, 161, 162, 163, 164, 257, 298, 299, 300, 301], "mistral_token": [9, 14, 18, 19, 21], "m_token": [9, 14, 18, 19, 20, 21], "path": [9, 10, 12, 14, 15, 16, 17, 18, 19, 20, 21, 24, 25, 26, 27, 35, 42, 46, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 94, 106, 119, 144, 148, 162, 169, 179, 204, 239, 240, 241, 254, 255, 256, 282, 298, 299, 300, 302, 304], "1": [9, 14, 16, 18, 19, 20, 21, 22, 25, 35, 42, 45, 47, 48, 49, 50, 54, 69, 73, 74, 76, 77, 78, 79, 101, 107, 116, 120, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 143, 144, 146, 150, 153, 154, 156, 158, 160, 165, 167, 171, 172, 173, 175, 176, 177, 183, 184, 195, 196, 207, 208, 214, 216, 217, 218, 219, 221, 222, 223, 239, 240, 242, 247, 248, 249, 250, 255, 257, 261, 267, 270, 273, 276, 277, 280, 281, 293, 294, 298, 299, 300, 301, 304, 305, 306, 307], "prompt_templ": [9, 12, 14, 16, 18, 19, 94, 106, 119, 144, 148, 162, 169, 179, 204], "mistralchattempl": [9, 14, 18, 19, 162, 299], "max_seq_len": [9, 10, 12, 14, 16, 17, 18, 20, 21, 24, 27, 47, 50, 51, 54, 59, 60, 61, 62, 64, 65, 66, 68, 69, 71, 72, 91, 94, 95, 101, 106, 107, 116, 119, 120, 125, 129, 144, 146, 148, 150, 154, 156, 158, 160, 162, 165, 167, 169, 171, 175, 179, 204, 207, 208, 210, 214, 219, 306], "8192": [9, 12, 14, 16, 17, 18, 20, 21, 148, 304, 306], "ds": [9, 10, 12, 15, 16, 18, 20, 54, 69, 299], "sourc": [9, 10, 12, 13, 16, 18, 20, 22, 24, 27, 28, 29, 30, 31, 32, 33, 35, 36, 37, 38, 39, 42, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 91, 92, 93, 94, 95, 96, 97, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 116, 117, 118, 119, 120, 121, 122, 125, 126, 127, 128, 129, 130, 131, 132, 136, 137, 138, 139, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 165, 166, 167, 168, 169, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 299, 300, 306], "data_fil": [9, 12, 13, 16, 18, 20, 55, 56, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 299], "split": [9, 10, 12, 13, 14, 16, 18, 20, 22, 44, 53, 54, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 240, 299, 300, 306], "conversation_column": [9, 60, 299], "conversation_styl": [9, 60, 299], "By": [9, 12, 22, 230, 295, 298, 303, 304, 305, 306, 307], "default": [9, 10, 12, 16, 22, 24, 31, 32, 33, 35, 36, 37, 42, 45, 48, 49, 50, 51, 54, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 77, 81, 82, 83, 84, 85, 86, 87, 91, 92, 93, 94, 95, 96, 97, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 116, 117, 118, 119, 120, 121, 122, 125, 126, 127, 128, 129, 130, 131, 132, 136, 137, 138, 139, 144, 145, 148, 149, 150, 151, 154, 155, 156, 157, 158, 159, 160, 161, 162, 165, 166, 167, 169, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 208, 209, 210, 213, 214, 215, 220, 222, 223, 224, 230, 231, 235, 239, 240, 242, 245, 246, 247, 250, 254, 255, 256, 258, 261, 262, 268, 272, 273, 274, 277, 280, 281, 282, 292, 295, 298, 299, 300, 302, 303, 304, 305, 306, 307], "true": [9, 10, 12, 13, 14, 15, 16, 17, 22, 24, 31, 36, 45, 53, 54, 57, 58, 59, 60, 62, 64, 65, 66, 67, 68, 69, 71, 72, 75, 76, 81, 88, 89, 90, 91, 95, 98, 99, 112, 113, 114, 115, 123, 124, 133, 134, 135, 140, 141, 144, 145, 152, 163, 164, 170, 208, 213, 214, 215, 217, 218, 219, 220, 221, 222, 224, 226, 232, 239, 240, 242, 244, 245, 248, 251, 252, 254, 255, 256, 263, 264, 265, 267, 269, 270, 273, 276, 282, 288, 294, 298, 299, 300, 302, 304, 305, 306, 307], "train_on_input": [9, 12, 13, 18, 24, 31, 33, 35, 37, 42, 53, 58, 59, 60, 62, 63, 64, 67, 68, 69, 70], "new_system_prompt": [9, 12, 13, 33, 35, 37, 42, 60, 62, 63, 64, 65, 66, 67, 68, 69], "tokenized_dict": [9, 12, 15, 16, 18, 20], "label": [9, 12, 20, 25, 47, 48, 49, 50, 54, 61, 69, 72, 221, 222, 223, 247, 250, 303], "print": [9, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 26, 44, 50, 53, 59, 62, 65, 66, 68, 69, 73, 144, 216, 217, 218, 219, 239, 240, 242, 288, 299, 301, 304, 306, 307], "inst": [9, 14, 19, 21, 100, 144, 153, 299], "733": [9, 14, 21], "16289": [9, 14, 21], "28793": [9, 14, 21], "1824": 9, "349": 9, "272": 9, "4372": 9, "In": [9, 10, 12, 13, 14, 16, 17, 18, 19, 20, 21, 22, 24, 25, 55, 79, 80, 81, 147, 151, 210, 214, 216, 231, 252, 272, 276, 277, 295, 299, 300, 302, 303, 304, 305, 306, 307], "_component_": [9, 10, 12, 13, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 53, 60, 64, 67, 71, 282, 295, 299, 300, 302, 303, 304, 305, 306], "null": [9, 22, 24, 306], "have": [9, 10, 13, 14, 18, 21, 22, 24, 27, 35, 36, 55, 60, 67, 75, 79, 80, 81, 142, 147, 151, 181, 182, 183, 184, 185, 186, 188, 189, 190, 191, 193, 194, 195, 196, 197, 198, 200, 201, 202, 203, 206, 207, 208, 209, 212, 214, 216, 217, 218, 219, 221, 223, 224, 229, 236, 244, 250, 253, 256, 258, 263, 264, 276, 284, 292, 299, 300, 301, 302, 303, 304, 305, 306, 307], "singl": [9, 10, 16, 17, 18, 19, 22, 24, 27, 33, 35, 37, 42, 47, 53, 54, 55, 56, 57, 60, 67, 71, 79, 80, 81, 94, 105, 106, 119, 142, 143, 144, 147, 148, 151, 161, 162, 169, 208, 214, 216, 224, 254, 255, 256, 257, 258, 260, 296, 298, 299, 300, 301, 302, 303, 304, 305, 307], "name": [9, 12, 13, 14, 16, 18, 20, 22, 23, 24, 26, 28, 31, 33, 35, 37, 42, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 229, 234, 236, 240, 254, 255, 256, 257, 258, 260, 273, 274, 275, 276, 277, 283, 284, 286, 298, 299, 300, 302, 305, 306], "messag": [9, 11, 12, 15, 16, 18, 19, 21, 31, 32, 33, 35, 37, 38, 39, 42, 44, 52, 55, 56, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 94, 106, 119, 144, 148, 162, 169, 238, 242, 292, 298, 299], "contain": [9, 11, 13, 14, 15, 16, 18, 20, 22, 33, 35, 36, 42, 47, 48, 49, 50, 54, 55, 56, 57, 60, 65, 71, 119, 144, 148, 169, 179, 204, 207, 208, 210, 214, 215, 224, 227, 229, 233, 234, 235, 240, 242, 245, 251, 254, 255, 256, 258, 260, 265, 271, 276, 282, 283, 285, 299, 300, 302, 304], "topic": [9, 291], "per": [9, 16, 47, 88, 89, 90, 98, 99, 112, 113, 114, 115, 123, 124, 133, 134, 135, 140, 141, 143, 144, 152, 163, 164, 170, 207, 216, 220, 244, 246, 247, 298, 305, 306, 307], "could": [9, 18, 19, 264, 303, 304], "system": [9, 12, 13, 18, 19, 32, 33, 35, 36, 37, 38, 39, 41, 42, 44, 52, 55, 56, 60, 62, 63, 64, 65, 66, 67, 68, 69, 94, 100, 106, 119, 148, 153, 162, 169, 179, 204, 242, 299], "tool": [9, 18, 19, 22, 36, 38, 56, 153, 273, 300, 301], "call": [9, 14, 18, 21, 22, 27, 36, 38, 56, 65, 66, 153, 208, 214, 216, 217, 218, 220, 224, 235, 273, 274, 275, 276, 277, 278, 282, 283, 299, 304, 307], "return": [9, 10, 13, 15, 18, 19, 21, 27, 29, 36, 38, 44, 45, 46, 47, 48, 49, 50, 51, 54, 55, 56, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 91, 92, 93, 94, 95, 96, 97, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 116, 117, 118, 119, 120, 121, 122, 125, 126, 127, 128, 129, 130, 131, 132, 136, 137, 138, 139, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 154, 155, 156, 157, 158, 159, 160, 161, 162, 165, 166, 167, 168, 169, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 213, 214, 215, 216, 218, 219, 221, 222, 223, 224, 225, 226, 227, 229, 230, 231, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 256, 258, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 280, 281, 282, 286, 287, 288, 303, 304, 307], "dai": [9, 20], "todai": 9, "It": [9, 10, 14, 16, 32, 36, 38, 55, 56, 60, 62, 64, 65, 66, 68, 70, 144, 147, 151, 153, 212, 214, 216, 224, 247, 250, 273, 298, 299, 303, 307], "tuesdai": 9, "about": [9, 10, 13, 14, 18, 22, 25, 65, 66, 216, 247, 250, 273, 277, 293, 294, 295, 296, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307], "tomorrow": 9, "wednesdai": 9, "As": [9, 12, 16, 22, 24, 25, 26, 231, 293, 300, 305, 307], "an": [9, 10, 12, 14, 16, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 46, 50, 52, 53, 57, 60, 62, 64, 65, 66, 67, 68, 71, 72, 78, 79, 80, 107, 120, 129, 144, 147, 149, 151, 154, 156, 160, 165, 171, 172, 173, 176, 177, 208, 212, 214, 216, 224, 225, 226, 228, 229, 232, 233, 234, 238, 243, 244, 247, 252, 253, 254, 255, 256, 258, 259, 263, 264, 273, 277, 282, 286, 293, 294, 295, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307], "slimorca": [9, 69], "pass": [9, 10, 11, 12, 14, 15, 16, 17, 18, 19, 20, 21, 24, 27, 36, 38, 53, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 91, 95, 101, 107, 116, 120, 125, 129, 149, 154, 156, 158, 160, 165, 167, 171, 175, 208, 212, 214, 218, 219, 220, 224, 232, 236, 240, 248, 252, 256, 262, 263, 265, 269, 272, 273, 276, 277, 279, 282, 298, 299, 304, 306, 307], "repo": [9, 10, 12, 16, 18, 20, 22, 65, 254, 255, 257, 298, 300], "select": [9, 181, 182, 183, 184, 185, 186, 188, 189, 190, 191, 193, 194, 195, 196, 197, 198, 200, 201, 202, 203, 259], "one": [9, 10, 11, 12, 13, 16, 18, 22, 25, 33, 35, 37, 42, 47, 50, 52, 60, 66, 67, 216, 221, 223, 242, 256, 273, 300, 301, 302, 305, 307], "most": [9, 12, 13, 16, 18, 20, 22, 24, 36, 38, 299, 301, 304, 305, 307], "gemma": [9, 12, 18, 20, 92, 93, 94, 95, 96, 97, 98, 99, 212, 257, 305], "gemma_token": [9, 12, 18, 20], "g_token": [9, 12, 18, 20], "open": [9, 20, 46, 69, 92, 93, 300], "orca": [9, 69], "dedup": [9, 69], "recip": [9, 11, 12, 16, 18, 20, 22, 23, 24, 26, 27, 28, 29, 144, 214, 224, 254, 255, 256, 293, 294, 295, 299, 300, 302, 305, 307], "via": [9, 12, 14, 16, 17, 18, 20, 23, 24, 26, 55, 60, 64, 67, 71, 208, 214, 215, 230, 231, 254, 304, 307], "http": [9, 12, 16, 27, 46, 57, 61, 63, 65, 71, 72, 74, 82, 83, 84, 85, 86, 87, 88, 89, 90, 92, 93, 95, 96, 97, 98, 99, 102, 103, 104, 105, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 152, 154, 155, 156, 157, 159, 161, 163, 164, 165, 166, 168, 169, 170, 172, 173, 174, 176, 177, 178, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 208, 209, 210, 216, 221, 222, 244, 245, 247, 248, 249, 250, 252, 254, 255, 261, 270, 273, 276, 277, 279, 281, 287, 292, 300, 302, 303], "ha": [9, 18, 22, 64, 73, 143, 211, 213, 214, 216, 219, 221, 223, 224, 227, 229, 232, 233, 236, 251, 256, 258, 283, 284, 299, 300, 301, 302, 303, 304, 305, 307], "addition": [9, 22, 239, 240, 250, 281, 299, 304, 305], "argument": [9, 10, 12, 16, 22, 24, 27, 34, 40, 43, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 88, 89, 90, 98, 99, 112, 113, 114, 115, 123, 124, 133, 134, 135, 140, 141, 152, 163, 164, 170, 252, 263, 269, 273, 274, 276, 277, 279, 298, 299, 304, 305, 306], "load_dataset": [9, 12, 16, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 299], "document": [9, 12, 16, 17, 78, 79, 80, 81, 208, 214, 215, 252, 263, 272, 294, 296, 298, 305], "file": [9, 10, 11, 12, 16, 22, 23, 24, 25, 26, 27, 28, 29, 46, 55, 56, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 119, 144, 148, 169, 179, 204, 239, 240, 241, 254, 255, 256, 274, 277, 282, 290, 293, 295, 297, 298, 299, 300, 301, 302, 304, 305, 306, 307], "raw": [9, 11, 13, 14, 16, 21, 44], "vari": [9, 50, 54, 214], "field": [9, 10, 14, 15, 27, 31, 35, 36, 42, 44, 54, 55, 56, 59, 65, 66, 271], "indic": [9, 14, 16, 18, 19, 50, 53, 54, 75, 76, 81, 147, 151, 208, 210, 214, 215, 216, 224, 225, 244, 245, 248, 251, 252, 267, 270, 299], "There": [9, 24, 52, 79, 299, 301, 302, 303, 304, 305], "few": [9, 10, 226, 302, 304, 307], "standard": [9, 12, 14, 15, 17, 19, 22, 34, 55, 56, 60, 63, 101, 107, 116, 120, 125, 129, 144, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 208, 230, 275, 293, 299, 300, 302, 303], "across": [9, 22, 25, 50, 53, 230, 254, 276, 281, 300, 302, 303, 306], "mani": [9, 14, 16, 19, 24, 54, 294, 295, 300, 303], "we": [9, 10, 11, 12, 18, 19, 20, 21, 22, 23, 24, 25, 26, 47, 50, 54, 55, 56, 60, 61, 67, 72, 73, 77, 207, 208, 210, 212, 214, 215, 216, 218, 221, 223, 224, 231, 247, 250, 254, 255, 256, 262, 266, 272, 278, 283, 293, 294, 295, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307], "ipython": [9, 14, 19, 36, 38, 41, 55, 56, 94, 106, 119, 148, 162, 169, 179, 204], "transform": [9, 10, 11, 16, 22, 25, 31, 33, 35, 55, 56, 59, 60, 62, 63, 65, 66, 67, 68, 69, 81, 85, 86, 87, 91, 95, 96, 97, 101, 107, 108, 109, 110, 111, 116, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 143, 144, 146, 147, 148, 149, 150, 151, 154, 155, 156, 157, 158, 160, 165, 166, 167, 171, 172, 173, 174, 175, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 213, 214, 215, 216, 226, 244, 261, 279, 304, 305, 306], "sharegpttomessag": [9, 13, 60, 69], "expect": [9, 12, 13, 15, 16, 18, 19, 20, 22, 24, 27, 31, 33, 35, 36, 37, 42, 46, 50, 55, 56, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 143, 144, 210, 224, 236, 258, 273, 277, 284, 299, 304, 305, 306], "code": [9, 10, 12, 13, 16, 19, 21, 22, 25, 82, 83, 84, 85, 86, 87, 88, 89, 90, 214, 273, 289, 293, 301, 305], "openaitomessag": [9, 13, 60, 67], "If": [9, 10, 13, 14, 16, 17, 19, 21, 22, 24, 30, 33, 35, 36, 37, 42, 44, 46, 47, 50, 51, 52, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 71, 73, 75, 77, 81, 94, 101, 106, 107, 116, 119, 120, 125, 129, 142, 144, 146, 148, 150, 162, 169, 171, 175, 179, 204, 207, 208, 210, 212, 214, 215, 216, 218, 219, 220, 221, 223, 224, 230, 231, 236, 242, 254, 255, 256, 257, 258, 259, 262, 263, 264, 265, 266, 269, 273, 276, 277, 281, 282, 284, 286, 292, 298, 299, 300, 301, 302, 303, 304, 305, 306], "doe": [9, 17, 22, 44, 50, 54, 67, 71, 91, 153, 158, 168, 208, 212, 214, 215, 217, 218, 219, 222, 223, 224, 229, 242, 254, 256, 258, 283, 298, 300, 306], "fit": [9, 25, 54, 61, 71, 72, 216, 247, 299], "creat": [9, 10, 13, 16, 19, 22, 24, 27, 38, 54, 56, 60, 67, 75, 82, 83, 84, 85, 86, 87, 88, 89, 90, 92, 93, 96, 97, 98, 99, 102, 103, 104, 105, 108, 109, 110, 111, 112, 113, 114, 115, 117, 118, 121, 122, 123, 124, 126, 127, 128, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 152, 155, 157, 159, 161, 163, 164, 166, 168, 170, 172, 173, 174, 176, 177, 178, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 207, 208, 214, 215, 216, 224, 252, 254, 255, 256, 260, 261, 273, 274, 276, 298, 300, 307], "custom": [9, 15, 16, 21, 24, 25, 31, 38, 55, 56, 60, 64, 65, 66, 67, 71, 94, 106, 119, 148, 162, 169, 179, 204, 279, 293, 294, 295, 298, 301, 302, 304, 305], "dialogu": [9, 16, 43, 68, 299], "defin": [9, 10, 17, 22, 24, 25, 38, 55, 56, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 208, 213, 214, 224, 227, 229, 231, 233, 246, 301, 304], "same": [9, 10, 11, 15, 18, 22, 24, 38, 45, 78, 79, 85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 143, 166, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 206, 207, 209, 211, 212, 213, 215, 216, 219, 224, 226, 242, 248, 250, 251, 258, 263, 264, 277, 283, 285, 295, 298, 299, 300, 302, 303, 304, 305, 306, 307], "wai": [9, 14, 19, 22, 24, 55, 56, 235, 253, 298, 300, 301, 302, 303], "instruct_dataset": [9, 12, 13, 53], "info": [9, 287, 301], "slimorca_dataset": [9, 24], "command": [10, 12, 17, 21, 23, 25, 26, 292, 295, 296, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307], "line": [10, 17, 22, 23, 25, 296, 298, 301, 302, 305], "both": [10, 14, 15, 21, 22, 37, 50, 53, 63, 67, 205, 224, 226, 228, 236, 298, 300, 303, 304, 305, 306, 307], "built": [10, 11, 13, 23, 24, 26, 63, 67, 70, 292, 299, 301, 307], "done": [10, 17, 54, 214, 235, 262, 272, 283, 304, 306, 307], "run": [10, 17, 22, 23, 24, 26, 29, 91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 207, 208, 214, 220, 221, 254, 255, 256, 258, 259, 260, 270, 273, 276, 277, 278, 292, 293, 294, 295, 296, 299, 301, 302, 303, 304, 305, 306, 307], "cli": [10, 24, 26, 28, 29, 292, 294, 300, 301, 305], "which": [10, 11, 12, 14, 16, 17, 18, 19, 20, 21, 22, 24, 25, 46, 47, 53, 54, 57, 59, 60, 62, 64, 67, 68, 69, 71, 76, 77, 85, 86, 87, 94, 95, 96, 97, 106, 107, 108, 109, 110, 111, 119, 120, 121, 122, 129, 130, 131, 132, 138, 139, 144, 147, 148, 149, 150, 151, 153, 154, 155, 156, 157, 162, 165, 166, 169, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 207, 208, 210, 214, 215, 216, 217, 218, 219, 224, 226, 235, 236, 239, 254, 255, 256, 258, 261, 262, 274, 277, 279, 283, 293, 294, 295, 296, 298, 299, 300, 301, 303, 304, 305, 306, 307], "folder": [10, 22], "first": [10, 17, 22, 24, 27, 42, 52, 54, 65, 76, 81, 147, 151, 214, 216, 217, 218, 224, 251, 254, 291, 293, 294, 299, 300, 302, 303, 304, 306, 307], "ensur": [10, 19, 21, 22, 24, 30, 52, 55, 56, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 208, 217, 254, 256, 262, 293, 301], "instal": [10, 23, 24, 26, 270, 273, 276, 277, 291, 298, 300, 301, 302, 303, 304, 305, 306, 307], "environ": [10, 25, 270, 273, 286, 292, 296, 298, 300, 301, 306], "so": [10, 13, 18, 19, 20, 22, 24, 54, 65, 212, 216, 254, 292, 293, 299, 300, 301, 302, 304, 305, 306, 307], "directori": [10, 22, 24, 35, 42, 65, 254, 255, 256, 274, 276, 277, 282, 298, 300, 301, 302], "new": [10, 14, 15, 16, 19, 21, 25, 37, 42, 59, 61, 62, 63, 65, 67, 68, 69, 159, 207, 225, 226, 257, 273, 274, 276, 299, 300, 301, 302, 303, 304, 307], "librari": [10, 247, 249, 262, 281, 287, 291, 292, 293, 298, 305, 307], "mkdir": 10, "my_project": [10, 273, 277], "cd": [10, 21, 292, 300], "llama": [10, 15, 16, 17, 20, 21, 22, 100, 142, 144, 145, 146, 147, 148, 150, 151, 209, 210, 254, 255, 294, 295, 298, 299, 300, 301, 302, 303, 304], "3": [10, 15, 16, 17, 20, 21, 22, 45, 47, 48, 49, 50, 54, 76, 77, 81, 142, 144, 145, 146, 147, 148, 150, 151, 153, 166, 168, 169, 216, 257, 267, 280, 287, 294, 295, 298, 299, 300, 301, 302, 303, 306, 307], "2": [10, 14, 15, 17, 21, 22, 26, 45, 47, 48, 49, 50, 52, 54, 69, 76, 77, 78, 79, 136, 137, 138, 139, 140, 141, 142, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 207, 208, 216, 239, 240, 242, 248, 250, 251, 254, 255, 257, 267, 280, 281, 282, 288, 295, 299, 300, 301, 302, 304, 305, 306], "lora": [10, 24, 85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 230, 231, 232, 235, 236, 254, 272, 291, 293, 296, 299, 301, 302, 303], "devic": [10, 17, 24, 25, 219, 258, 262, 265, 285, 286, 296, 298, 299, 300, 301, 302, 304, 305, 307], "lora_finetune_single_devic": [10, 24, 294, 298, 299, 300, 301, 302, 303, 304, 305, 307], "llama3_2": [10, 17, 136, 137, 138, 139, 140, 141, 212, 217, 218, 219, 257, 303], "1b_lora_single_devic": 10, "often": [10, 304, 305], "ll": [10, 18, 20, 22, 24, 25, 73, 266, 293, 295, 299, 300, 301, 302, 303, 305, 306, 307], "want": [10, 12, 19, 22, 24, 25, 26, 27, 50, 55, 56, 73, 212, 228, 292, 298, 299, 300, 301, 302, 303, 304, 305], "start": [10, 23, 25, 26, 46, 76, 242, 257, 273, 292, 293, 299, 300, 301, 303, 305, 306], "our": [10, 12, 13, 20, 22, 25, 293, 294, 295, 296, 299, 300, 301, 303, 304, 305, 306, 307], "particular": [10, 11, 13, 19, 21, 24, 53, 144, 252, 304, 307], "adjust": [10, 230, 294, 295, 303, 305, 306], "hyperparamet": [10, 23, 250, 258, 293, 301, 304, 307], "cp": [10, 24, 292, 298, 299, 300, 301, 302, 306], "copi": [10, 299, 300, 301, 302, 305, 306, 307], "make": [10, 17, 19, 22, 23, 24, 25, 26, 145, 181, 182, 183, 184, 185, 186, 188, 189, 190, 191, 193, 194, 195, 196, 197, 198, 200, 201, 202, 203, 216, 293, 298, 300, 301, 302, 303, 304, 305, 306, 307], "modif": [10, 306], "show": [10, 144, 244, 292, 294, 295, 298, 299, 303, 304], "each": [10, 12, 15, 18, 19, 20, 22, 25, 38, 39, 42, 47, 48, 50, 53, 54, 55, 56, 78, 79, 80, 81, 85, 86, 87, 95, 96, 97, 107, 108, 109, 110, 111, 120, 121, 122, 129, 130, 131, 132, 138, 139, 143, 144, 147, 149, 150, 151, 154, 155, 156, 157, 165, 166, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 208, 210, 214, 215, 216, 221, 223, 224, 226, 230, 235, 236, 242, 244, 245, 246, 247, 249, 250, 267, 281, 282, 293, 295, 296, 298, 300, 301, 304, 305, 306], "ls": [10, 21, 292, 296, 298, 300, 301, 302], "full": [10, 11, 13, 16, 22, 24, 25, 34, 40, 43, 55, 72, 88, 89, 90, 98, 99, 112, 113, 114, 115, 123, 124, 133, 134, 135, 140, 141, 149, 152, 163, 164, 170, 224, 235, 236, 242, 259, 292, 293, 296, 298, 300, 302, 304, 305, 306], "5b_full_single_devic": 10, "qwen_config": 10, "now": [10, 19, 22, 207, 218, 258, 260, 295, 299, 300, 301, 302, 303, 304, 306, 307], "sure": [10, 17, 22, 24, 181, 182, 183, 184, 185, 186, 188, 189, 190, 191, 193, 194, 195, 196, 197, 198, 200, 201, 202, 203, 300, 301, 302, 303, 304, 305, 306, 307], "correct": [10, 12, 14, 19, 25, 34, 62, 181, 182, 183, 184, 185, 186, 188, 189, 190, 191, 193, 194, 195, 196, 197, 198, 200, 201, 202, 203, 209, 210, 214, 286, 293, 299], "ve": [10, 18, 21, 24, 207, 295, 298, 299, 300, 302, 303, 304, 305], "even": [10, 216, 283, 292, 298, 299, 302, 303, 304, 305, 307], "didn": 10, "t": [10, 13, 14, 18, 19, 20, 22, 24, 25, 45, 142, 143, 221, 226, 262, 277, 281, 298, 299, 300, 301, 303, 305, 307], "complet": [10, 11, 12, 18, 22, 25, 37, 54, 61, 71, 169, 299, 300, 301, 302, 305], "note": [10, 16, 21, 22, 24, 95, 224, 229, 258, 278, 281, 283, 295, 299, 300, 303, 304, 305, 306, 307], "must": [10, 13, 17, 27, 38, 53, 65, 66, 208, 218, 229, 253, 273, 307], "extens": [10, 25, 256, 293], "full_finetune_single_devic": [10, 17, 264, 298, 300, 301], "Or": [10, 224, 292], "rel": [10, 16, 17, 54, 208, 210, 214, 215, 224, 247, 265, 303, 304], "discuss": [10, 14, 19, 21, 24, 300, 301, 302, 304], "workflow": [10, 11, 291, 301, 304], "write": [10, 16, 22, 25, 254, 255, 256, 274, 301], "own": [10, 13, 18, 21, 22, 38, 272, 281, 298, 299, 300, 302, 303, 304], "loop": 10, "logic": [10, 15, 25, 31, 56, 238, 257, 293, 296, 301, 304], "case": [10, 14, 16, 22, 25, 26, 36, 38, 55, 79, 80, 81, 147, 151, 216, 218, 254, 258, 262, 266, 272, 274, 279, 293, 298, 299, 300, 302, 304, 305, 307], "similar": [10, 13, 16, 60, 61, 63, 65, 66, 67, 70, 71, 72, 235, 247, 300, 302, 303, 304, 305, 307], "scratch": 10, "local": [10, 11, 14, 46, 55, 56, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 119, 148, 169, 179, 204, 273, 277, 281, 292, 298, 299, 300, 301], "single_devic": 10, "py": [10, 13, 24, 27, 74, 85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 166, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 207, 209, 210, 222, 247, 248, 249, 250, 261, 298, 300, 302], "recommend": [10, 60, 61, 62, 67, 68, 70, 72, 153, 214, 221, 273, 276, 299, 300, 305, 307], "python": [10, 24, 273, 277, 281, 287, 289, 298, 300, 306], "convent": [10, 213], "main": [10, 27, 29, 169, 209, 210, 292, 295, 300, 302], "script": [10, 22, 26, 296, 298, 300, 301, 302], "decor": [10, 25, 29], "pars": [10, 24, 27, 28, 241, 296, 301], "omegaconf": [10, 27], "dictconfig": [10, 24, 25, 27, 28, 29, 30, 273, 277, 282], "def": [10, 13, 15, 19, 21, 24, 25, 26, 29, 65, 66, 252, 257, 303, 304, 307], "cfg": [10, 24, 25, 28, 29, 30], "add": [10, 12, 13, 14, 16, 19, 21, 23, 24, 26, 50, 54, 57, 71, 144, 153, 216, 228, 240, 242, 256, 257, 299, 300, 302, 304, 305, 307], "here": [10, 12, 14, 15, 16, 18, 20, 21, 22, 23, 24, 26, 32, 62, 65, 66, 209, 210, 264, 294, 295, 296, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307], "attribut": [10, 212, 232, 242, 250, 260], "__name__": 10, "__main__": 10, "don": [10, 13, 14, 18, 19, 20, 22, 24, 25, 277, 281, 298, 299, 300, 301, 303, 305, 307], "experiment": [10, 21, 24], "optim": [10, 18, 19, 22, 24, 25, 48, 53, 55, 91, 158, 168, 247, 248, 249, 250, 256, 258, 260, 261, 264, 265, 278, 282, 294, 295, 296, 299, 300, 301, 302, 303, 304, 307], "them": [10, 12, 15, 18, 19, 22, 24, 53, 67, 216, 220, 226, 242, 285, 295, 298, 299, 300, 304, 305, 306, 307], "when": [10, 16, 17, 18, 20, 21, 22, 24, 25, 29, 53, 54, 55, 56, 57, 67, 71, 73, 75, 207, 208, 210, 212, 214, 215, 216, 218, 219, 220, 221, 223, 224, 225, 232, 235, 246, 261, 263, 276, 278, 283, 294, 298, 300, 302, 303, 304, 305, 306, 307], "mean": [10, 24, 144, 208, 209, 213, 214, 215, 224, 245, 272, 298, 299, 301, 304, 306], "high": [10, 53, 55, 56, 293, 303, 304, 305], "level": [10, 25, 55, 56, 221, 223, 243, 260, 272, 287, 293, 303, 307], "paramet": [10, 13, 14, 15, 16, 25, 27, 28, 29, 30, 31, 33, 35, 36, 37, 38, 42, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 91, 92, 93, 94, 95, 96, 97, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 116, 117, 118, 119, 120, 121, 122, 125, 126, 127, 128, 129, 130, 131, 132, 136, 137, 138, 139, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 154, 155, 156, 157, 158, 159, 160, 161, 162, 165, 166, 167, 169, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 269, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 291, 293, 294, 295, 296, 298, 299, 300, 301, 302, 303, 304, 306, 307], "easili": [10, 16, 22, 24, 293, 303, 304, 306, 307], "custom_decod": 10, "customtransformerdecod": 10, "nn": [10, 27, 45, 47, 50, 81, 142, 143, 205, 207, 208, 212, 213, 214, 215, 216, 217, 218, 219, 220, 224, 225, 226, 227, 228, 229, 232, 233, 234, 252, 259, 260, 272, 278, 279, 283, 284, 303, 304, 307], "modul": [10, 13, 15, 21, 24, 27, 65, 66, 78, 79, 80, 81, 142, 143, 144, 147, 151, 156, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 247, 248, 249, 252, 257, 259, 260, 263, 272, 278, 279, 281, 301, 303, 304, 305, 307], "A": [10, 13, 15, 19, 25, 26, 33, 34, 37, 40, 42, 43, 47, 48, 49, 50, 53, 54, 67, 81, 179, 204, 208, 212, 213, 214, 215, 216, 220, 224, 231, 235, 239, 240, 242, 244, 245, 246, 247, 248, 249, 250, 251, 252, 257, 258, 264, 265, 266, 271, 272, 290, 291, 297, 298, 299, 304, 305, 306, 307], "architectur": [10, 25, 100, 153, 181, 182, 183, 184, 185, 186, 188, 189, 190, 191, 193, 194, 195, 196, 197, 198, 200, 201, 202, 203, 214, 216, 224, 226, 257, 298], "present": [10, 33, 37, 42, 62, 63, 65, 66, 67, 68, 69, 240, 256, 283], "custom_model": 10, "num_lay": [10, 27, 81, 91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 214, 216, 224, 226], "int": [10, 15, 21, 24, 26, 47, 48, 49, 50, 51, 54, 61, 65, 66, 72, 73, 74, 75, 77, 78, 79, 80, 81, 85, 86, 87, 88, 89, 90, 91, 94, 95, 96, 97, 98, 99, 101, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 119, 120, 121, 122, 123, 124, 125, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 154, 155, 156, 157, 158, 160, 162, 163, 164, 165, 166, 167, 169, 170, 171, 172, 173, 174, 175, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 204, 207, 208, 209, 210, 213, 214, 215, 216, 219, 221, 222, 223, 224, 225, 226, 230, 231, 237, 238, 239, 240, 241, 242, 244, 251, 252, 254, 255, 256, 258, 259, 261, 263, 268, 272, 273, 274, 275, 276, 277, 279, 281, 282, 298, 303, 304, 305, 307], "classification_head": 10, "bool": [10, 15, 19, 21, 24, 31, 33, 35, 36, 37, 42, 45, 54, 57, 58, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 81, 85, 86, 87, 88, 89, 90, 91, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 144, 145, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 175, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 208, 213, 214, 215, 220, 221, 222, 224, 226, 230, 231, 235, 236, 238, 239, 240, 242, 245, 251, 252, 254, 255, 256, 263, 265, 269, 270, 272, 273, 276, 279, 282, 283, 288, 305, 307], "fals": [10, 13, 14, 15, 16, 18, 19, 22, 24, 33, 35, 36, 37, 42, 45, 53, 54, 58, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 75, 76, 81, 85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 145, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 175, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 208, 214, 215, 217, 218, 219, 224, 225, 226, 230, 231, 232, 235, 239, 251, 254, 255, 256, 267, 270, 282, 283, 298, 299, 300, 302, 304, 306, 307], "setup": [10, 22, 24, 25, 75, 207, 208, 213, 214, 215, 217, 218, 219, 224, 226, 259, 282, 298, 300, 304, 307], "expos": [10, 13, 24, 25, 256, 296, 301], "friendli": [10, 60, 64, 67, 71, 73, 299], "manner": [10, 20], "rather": [10, 247, 305], "everi": [10, 12, 22, 25, 62, 63, 67, 68, 69, 78, 79, 80, 146, 150, 216, 218, 276, 282, 292, 298, 305, 307], "construct": [10, 36, 63, 244, 296, 304], "care": [10, 22, 254, 256, 300, 302, 304], "how": [10, 13, 14, 18, 22, 23, 24, 25, 26, 216, 252, 273, 279, 291, 294, 295, 298, 299, 300, 301, 302, 305, 306, 307], "implement": [10, 19, 21, 22, 25, 55, 57, 59, 60, 61, 62, 63, 64, 67, 68, 69, 70, 71, 72, 205, 209, 210, 211, 216, 222, 229, 231, 237, 238, 243, 247, 248, 249, 250, 254, 261, 266, 276, 293, 295, 303, 304, 305, 306, 307], "llama3_2_vision_11b": 10, "custom_dataset": [10, 13], "sftdataset": [10, 13, 24, 55, 58, 59, 60, 62, 64, 65, 66, 68, 69], "packeddataset": [10, 17, 53, 58, 59, 60, 62, 64, 68, 69, 71, 72], "inputoutputtomessag": [10, 13, 14, 62, 68], "modeltoken": [10, 15, 21, 24, 36, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 67, 68, 69, 70, 71, 72, 242], "build": [10, 25, 71, 81, 91, 101, 116, 125, 146, 147, 150, 151, 158, 160, 175, 253, 293, 302, 304, 305], "block": [10, 25, 54, 85, 86, 87, 91, 95, 96, 97, 101, 107, 108, 109, 110, 111, 116, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 146, 149, 150, 151, 154, 155, 156, 157, 158, 165, 166, 171, 172, 173, 174, 175, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 208, 214, 215, 235, 236, 293], "tiny_cod": 10, "pack": [10, 54, 58, 59, 60, 62, 64, 65, 66, 68, 69, 71, 72, 208, 210, 214, 215, 224, 306], "subset": [10, 15, 16, 47, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 95, 107, 120, 129, 149, 154, 156, 165, 171, 227, 233], "nampdn": 10, "tini": 10, "respons": [10, 12, 13, 18, 19, 21, 32, 33, 35, 36, 37, 42, 55, 56, 60, 62, 63, 64, 65, 66, 67, 68, 69, 242, 245, 246, 247, 249, 250, 300, 301, 302], "model_transform": [10, 13, 15, 16, 55, 56, 62, 65, 66, 68, 69, 144], "message_transform": [10, 13, 55, 56], "column_map": [10, 12, 13, 16, 18, 31, 33, 35, 37, 42, 53, 58, 59, 62, 63, 64, 65, 66, 67, 68, 69, 70], "input": [10, 11, 12, 13, 14, 15, 20, 21, 22, 31, 35, 47, 48, 49, 50, 54, 55, 56, 59, 61, 62, 64, 65, 66, 68, 69, 72, 78, 79, 80, 81, 94, 106, 119, 142, 143, 144, 147, 148, 151, 162, 169, 171, 175, 205, 206, 208, 209, 210, 211, 212, 213, 214, 215, 216, 222, 223, 224, 225, 226, 230, 231, 239, 240, 244, 254, 256, 264, 281, 284, 299, 304, 307], "filter_fn": [10, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72], "lambda": [10, 245], "x": [10, 22, 45, 73, 74, 75, 78, 79, 80, 142, 143, 205, 206, 208, 209, 210, 211, 213, 214, 215, 216, 224, 225, 226, 230, 231, 267, 280, 303, 304, 306, 307], "split_across_pack": [10, 54, 71], "els": [10, 11, 12, 19, 25, 277, 293, 307], "posit": [10, 17, 24, 27, 54, 74, 76, 78, 79, 80, 81, 91, 95, 125, 129, 142, 147, 151, 154, 156, 158, 160, 165, 167, 207, 208, 210, 213, 214, 215, 216, 224, 225, 302], "automat": [10, 12, 16, 17, 19, 21, 23, 24, 26, 27, 59, 60, 298, 300, 307], "instanti": [10, 30, 38, 82, 83, 84, 85, 86, 87, 91, 92, 93, 94, 95, 96, 97, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 116, 117, 118, 119, 120, 121, 122, 125, 126, 127, 128, 129, 130, 131, 132, 136, 137, 138, 139, 145, 146, 147, 148, 149, 150, 151, 154, 155, 156, 157, 158, 159, 160, 161, 162, 165, 166, 167, 168, 169, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 258], "separ": [10, 55, 226, 242, 254, 299, 301, 302, 304, 307], "under": [10, 24, 282, 305, 307], "best": [10, 16, 18, 25, 295, 299, 303, 305], "root": [10, 209, 276, 277], "custom_finetun": 10, "32": [10, 27, 207, 216, 224, 226, 273, 302, 304, 305, 306, 307], "option": [10, 12, 18, 21, 22, 24, 25, 31, 33, 35, 37, 42, 50, 51, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 77, 80, 81, 85, 86, 87, 94, 95, 96, 97, 101, 106, 107, 108, 109, 110, 111, 116, 119, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 142, 143, 144, 146, 147, 148, 149, 150, 151, 154, 155, 156, 157, 162, 165, 166, 169, 171, 172, 173, 174, 175, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 204, 205, 208, 210, 213, 214, 215, 216, 219, 220, 224, 235, 236, 237, 239, 242, 245, 246, 248, 254, 255, 256, 258, 259, 262, 266, 273, 274, 277, 281, 282, 286, 287, 292, 293, 298, 299, 300, 305], "param": [10, 22, 25, 85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 145, 149, 166, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 227, 228, 230, 231, 233, 234, 236, 254, 304, 306, 307], "omit": [10, 303, 304, 305], "being": [10, 19, 22, 56, 218, 254, 255, 256, 260, 286, 305, 306, 307], "found": [10, 11, 22, 23, 24, 26, 209, 210, 254, 255, 256, 295, 298, 303, 304, 307], "correctli": [10, 21, 22, 25, 30, 235, 254, 292, 296, 299, 301, 307], "try": [10, 22, 24, 299, 300, 301, 302, 307], "after": [10, 19, 20, 23, 25, 38, 56, 65, 66, 94, 106, 119, 144, 148, 162, 169, 207, 208, 211, 214, 215, 224, 226, 251, 272, 273, 274, 275, 276, 277, 295, 299, 300, 302, 306, 307], "pythonpath": 10, "pwd": 10, "vlm": [11, 16], "hub": [11, 22, 55, 56, 298, 301], "remot": [11, 14, 35, 42, 46, 55, 56], "url": [11, 16, 35, 37, 42, 46, 292], "project": [11, 23, 26, 81, 85, 86, 87, 91, 95, 101, 105, 107, 108, 109, 110, 111, 116, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 142, 143, 146, 147, 149, 150, 151, 154, 155, 156, 157, 158, 161, 165, 166, 171, 174, 175, 181, 182, 185, 186, 188, 189, 190, 191, 205, 208, 214, 216, 224, 228, 235, 236, 257, 263, 273, 277, 291, 304, 305, 307], "prefer": [11, 13, 25, 48, 55, 63, 67, 70, 247, 248, 249, 250, 293, 296, 298, 305], "align": [11, 65, 66, 247, 299, 303], "continu": [11, 20, 54, 216, 273], "pretrain": [11, 142, 143, 144, 224, 226, 228, 239, 240, 298, 299, 301, 304, 307], "beyond": [11, 300, 305, 307], "those": [11, 22, 257, 300, 302, 304], "customiz": 11, "task": [11, 12, 16, 18, 19, 34, 40, 43, 53, 61, 144, 294, 299, 300, 302, 303, 304, 305, 306, 307], "supervis": [11, 20, 56], "rlhf": [11, 55, 63, 245, 246, 247, 248, 249, 250, 251], "queri": [11, 91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 207, 208, 214, 215, 224, 302, 305], "time": [11, 16, 17, 22, 60, 64, 91, 158, 218, 221, 223, 242, 245, 274, 276, 282, 295, 298, 299, 300, 302, 307], "take": [11, 12, 13, 16, 18, 22, 24, 25, 27, 48, 55, 56, 65, 66, 67, 142, 207, 216, 220, 226, 254, 256, 285, 286, 295, 299, 300, 301, 302, 303, 304, 305, 307], "object": [11, 13, 14, 15, 19, 21, 24, 27, 28, 81, 208, 247, 250, 252, 266], "appli": [11, 12, 15, 19, 22, 25, 47, 55, 56, 59, 65, 66, 85, 86, 87, 88, 89, 90, 91, 95, 96, 97, 98, 99, 101, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 120, 121, 122, 123, 124, 125, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 146, 149, 150, 151, 152, 154, 155, 156, 157, 158, 163, 164, 165, 166, 170, 171, 172, 173, 174, 175, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 208, 212, 213, 214, 215, 224, 230, 235, 236, 279, 293, 294, 303, 305, 307], "templat": [11, 31, 32, 34, 38, 39, 40, 43, 55, 56, 59, 62, 68, 94, 100, 106, 119, 144, 148, 153, 162, 169, 179, 204], "anyth": [11, 61, 285], "requir": [11, 15, 17, 19, 21, 22, 24, 47, 48, 53, 55, 56, 57, 65, 66, 67, 71, 144, 212, 214, 225, 254, 256, 258, 269, 270, 272, 273, 276, 277, 281, 282, 292, 295, 298, 299, 301, 305, 306, 307], "collat": [11, 47, 49, 50, 54], "packag": [11, 23, 26, 273, 276, 277, 292], "togeth": [11, 25, 54, 221, 277, 296, 301, 304, 305, 306], "form": [12, 18, 22, 24, 25, 31, 44, 52, 55, 56, 298], "along": [12, 22, 304], "describ": [12, 279], "hand": [12, 36], "grammar": [12, 19, 34, 62], "head": [12, 81, 91, 95, 101, 107, 116, 120, 125, 129, 142, 146, 147, 150, 151, 154, 156, 158, 160, 165, 167, 171, 175, 207, 208, 210, 214, 224, 228, 257, 302], "csv": [12, 55, 56, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72], "incorrect": [12, 19], "cat": [12, 16, 19, 244], "grammarerrorcorrectiontempl": [12, 19, 62], "prepend": [12, 14, 16, 19, 33, 35, 37, 38, 39, 42, 60, 62, 63, 64, 65, 66, 67, 68, 69, 94, 106, 119, 144, 148, 162, 169, 179, 204, 239], "english": [12, 19, 34], "ncorrect": [12, 34], "mask": [12, 13, 14, 15, 17, 19, 21, 36, 38, 50, 54, 56, 59, 60, 62, 64, 65, 66, 67, 68, 69, 74, 75, 76, 144, 208, 213, 214, 215, 224, 238, 242, 244, 245, 248, 267, 299, 303], "out": [12, 15, 18, 20, 22, 24, 25, 59, 60, 62, 64, 67, 68, 69, 75, 76, 244, 254, 255, 267, 291, 293, 294, 295, 296, 298, 299, 300, 301, 302, 304, 305, 307], "100": [12, 18, 25, 48, 49, 50, 59, 60, 62, 64, 67, 68, 69, 73, 221, 222, 223, 225, 303, 304, 307], "27957": 12, "736": 12, "577": 12, "anoth": [12, 13, 16, 24, 56, 212, 273, 300, 305], "c4": [12, 71, 306], "200m": 12, "liweili": [12, 62], "c4_200m": [12, 62], "chang": [12, 13, 16, 21, 22, 23, 24, 26, 31, 33, 35, 64, 66, 70, 256, 292, 298, 300, 301, 302, 303, 304, 305, 306, 307], "remap": 12, "someth": [12, 22, 25, 26, 299, 300, 306], "hello": [12, 13, 14, 19, 21, 44, 239, 240, 287, 299, 300, 302], "world": [12, 13, 14, 19, 21, 44, 239, 240, 268, 270, 287, 300], "bye": [12, 13], "robot": [12, 15], "am": [12, 14, 16, 60, 64, 100, 153, 299, 300, 302], "prompttempl": [12, 31, 34, 40, 43, 144], "relev": [12, 14, 25, 213, 214, 215, 224, 298, 300, 304, 305], "inform": [12, 14, 22, 273, 277, 279, 293, 298, 300, 301], "mai": [12, 16, 17, 24, 26, 60, 73, 216, 219, 225, 263, 283, 294, 295, 299, 301, 303, 304, 305], "alpaca_dataset": [12, 17, 24, 58], "grammar_dataset": 12, "samsum_dataset": 12, "dictionari": [13, 14, 15, 36, 38, 44, 47, 48, 49, 54, 55, 56, 94, 106, 119, 148, 162, 169, 179, 204, 265, 271, 273, 274, 275, 276, 277, 285, 300], "onc": [13, 21, 24, 38, 214, 224, 300, 301, 302, 304, 307], "repres": [13, 36, 48, 78, 79, 216, 253, 259, 299, 305, 306], "prepar": [13, 15, 299, 306], "ad": [13, 16, 19, 21, 25, 38, 50, 78, 79, 80, 146, 150, 160, 216, 224, 225, 228, 239, 242, 256, 257, 299, 304, 305, 306, 307], "column": [13, 16, 18, 20, 31, 33, 35, 37, 42, 55, 56, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 208, 214, 215, 224, 306], "worri": [13, 22, 299, 301], "itself": [13, 24], "do": [13, 15, 18, 21, 22, 23, 25, 36, 47, 65, 67, 218, 235, 242, 273, 277, 283, 298, 300, 301, 302, 304, 305, 306], "well": [13, 18, 22, 24, 25, 293, 298, 300, 302, 303, 305, 307], "flexibl": [13, 24, 53, 305], "inherit": [13, 14, 19, 25, 293], "__call__": [13, 15, 19, 65, 66, 144], "simpl": [13, 22, 25, 216, 250, 291, 301, 304, 306, 307], "contriv": [13, 19], "would": [13, 15, 19, 22, 24, 26, 38, 54, 214, 216, 224, 292, 299, 300, 304, 305, 307], "inde": [13, 262, 300], "quit": [13, 305, 307], "type": [13, 14, 15, 16, 21, 26, 27, 29, 36, 37, 44, 45, 46, 47, 48, 49, 50, 51, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 91, 92, 93, 94, 95, 96, 97, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 116, 117, 118, 119, 120, 121, 122, 125, 126, 127, 128, 129, 130, 131, 132, 136, 137, 138, 139, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 154, 155, 156, 157, 158, 159, 160, 161, 162, 165, 166, 167, 168, 169, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 213, 214, 215, 216, 219, 220, 221, 222, 223, 224, 225, 226, 227, 230, 231, 233, 237, 238, 239, 240, 241, 242, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 260, 262, 263, 264, 265, 266, 267, 268, 269, 270, 272, 279, 280, 281, 282, 284, 286, 287, 288, 295, 300, 304, 305, 306, 307], "map": [13, 15, 19, 21, 22, 31, 33, 35, 37, 38, 42, 47, 53, 54, 59, 62, 63, 64, 65, 66, 67, 68, 69, 70, 94, 106, 119, 144, 148, 162, 169, 179, 204, 234, 240, 241, 254, 258, 260, 273, 274, 275, 276, 277, 278, 282, 300, 304], "messagetransform": 13, "self": [13, 15, 18, 19, 20, 21, 25, 26, 54, 65, 66, 85, 86, 87, 91, 95, 96, 97, 101, 107, 108, 109, 110, 111, 116, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 146, 149, 150, 151, 154, 155, 156, 157, 158, 160, 165, 166, 167, 171, 172, 173, 174, 175, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 208, 213, 214, 215, 221, 223, 224, 226, 229, 235, 236, 254, 257, 258, 303, 304, 307], "str": [13, 15, 21, 24, 27, 28, 31, 33, 35, 36, 37, 38, 42, 44, 46, 47, 48, 49, 50, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 94, 106, 119, 144, 148, 149, 152, 162, 169, 179, 204, 220, 225, 226, 227, 229, 230, 231, 233, 234, 235, 236, 237, 238, 239, 240, 241, 253, 254, 255, 256, 257, 258, 259, 262, 265, 266, 269, 271, 273, 274, 275, 276, 277, 281, 282, 283, 284, 286, 287, 288, 305], "eot": [13, 14, 19, 36, 144], "_messag": 13, "0x7fb0a10094e0": 13, "0x7fb0a100a290": 13, "msg": [13, 14, 16, 19, 21, 299], "text_cont": [13, 14, 16, 19, 36, 299], "manipul": 13, "load_dataset_kwarg": [13, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72], "mymessagetransform": 13, "chosenrejectedtomessag": [13, 63, 67], "core": [14, 25, 55, 56, 293, 296, 301, 307], "govern": [14, 299], "serv": [14, 19, 24, 33, 35, 37, 42, 60, 62, 63, 64, 65, 66, 67, 68, 69, 242, 252, 304], "interfac": [14, 25, 38, 39, 53, 229, 243], "api": [14, 25, 26, 34, 40, 43, 55, 56, 57, 59, 65, 66, 88, 89, 90, 98, 99, 112, 113, 114, 115, 123, 124, 133, 134, 135, 140, 141, 152, 163, 164, 170, 235, 273, 292, 296, 298, 299, 301, 302, 307], "oper": [14, 25, 216, 232, 243, 281, 306], "send": 14, "other": [14, 15, 18, 20, 22, 25, 27, 35, 38, 53, 256, 263, 282, 285, 294, 295, 299, 301, 302, 303, 304, 305, 306], "special": [14, 16, 19, 36, 42, 119, 144, 146, 148, 150, 169, 179, 204, 216, 225, 237, 238, 240, 241, 242, 244, 258], "individu": [14, 36, 54, 224, 265, 277, 279, 299], "ref": [14, 55, 56, 57, 59, 65, 66, 168, 169, 277], "constructor": [14, 21], "ident": [14, 18, 20, 45, 47, 54, 65, 67, 153, 214, 230, 300, 305, 306], "from_dict": [14, 36, 299], "becaus": [14, 21, 55, 56, 95, 207, 214, 216, 224, 256, 298, 299, 306], "correspond": [14, 18, 21, 36, 48, 74, 75, 76, 227, 229, 233, 245, 248, 262, 295, 301, 302, 305, 306], "begin": [14, 22, 54, 71, 216, 240, 242, 299, 302, 307], "pil": [14, 15, 16, 36, 37, 44, 46], "img_msg": 14, "place": [14, 16, 20, 283, 299, 305], "mode": [14, 15, 16, 219, 259, 266, 273, 300], "rgb": [14, 15, 16, 142], "4": [14, 15, 16, 22, 24, 45, 47, 48, 49, 50, 76, 81, 144, 147, 151, 207, 208, 216, 267, 288, 293, 295, 298, 300, 302, 303, 304, 305, 306, 307], "appropri": [14, 36, 53, 76, 100, 225, 254, 261, 307], "load_imag": [14, 16], "image_path": [14, 16], "jpg": [14, 16, 35, 42, 46], "tag": [14, 16, 19, 21, 38, 42, 44, 94, 100, 106, 119, 144, 148, 153, 162, 169, 179, 204, 273, 274, 275, 276, 277, 299], "placehold": [14, 16, 42, 253], "should": [14, 15, 16, 18, 20, 22, 24, 25, 33, 35, 36, 37, 38, 42, 47, 54, 59, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 75, 76, 85, 86, 87, 95, 96, 97, 100, 101, 107, 108, 109, 110, 111, 116, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 142, 146, 149, 150, 151, 153, 154, 155, 156, 157, 158, 160, 165, 166, 167, 171, 172, 173, 174, 175, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 207, 208, 214, 216, 219, 224, 229, 235, 236, 245, 248, 252, 253, 271, 273, 274, 275, 276, 277, 292, 293, 300, 301, 302, 303, 304, 305, 306, 307], "insert": [14, 226, 306], "format_content_with_imag": [14, 16], "image_tag": [14, 16, 42, 44], "conveni": [14, 24, 25, 46, 298], "prompttemplateinterfac": [14, 19, 94, 106, 119, 148, 162, 169, 179, 204], "templated_msg": [14, 19], "contains_media": [14, 16, 36], "get_media": [14, 15, 16, 36], "4x4": 14, "0x7f8d27e72740": 14, "tokenize_messsag": 14, "hi": [14, 20, 73, 299], "tokenize_messag": [14, 15, 21, 36, 55, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 144, 238, 242, 299], "22557": 14, "1526": [14, 21], "28808": 14, "28705": [14, 21], "28748": [14, 21], "15359": 14, "28725": 14, "315": [14, 20], "837": 14, "396": 14, "16107": 14, "13892": 14, "28723": 14, "modal": [15, 16, 56, 144, 226], "current": [15, 16, 18, 22, 35, 42, 54, 67, 75, 91, 95, 107, 120, 129, 149, 150, 151, 154, 156, 158, 165, 168, 171, 207, 208, 210, 214, 215, 224, 248, 255, 256, 258, 263, 266, 268, 274, 276, 278, 281, 295, 296, 301, 302, 303, 305, 306], "intend": [15, 285, 299], "drop": [15, 144, 225, 303, 306], "replac": [15, 16, 42, 51, 59, 60, 62, 64, 67, 68, 69, 144, 220, 225, 283, 304], "llama3_2_vis": [15, 16, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152], "llama3visiontransform": [15, 16, 148], "__init__": [15, 24, 25, 65, 66, 303, 304, 307], "transform_imag": 15, "clipimagetransform": [15, 65, 66, 144, 216], "xattn_mask": 15, "visioncrossattentionmask": [15, 144, 243], "224": [15, 16, 144], "tile_s": [15, 79, 80, 81, 144, 147, 151, 216, 244], "patch_siz": [15, 79, 80, 81, 144, 147, 151, 216, 244], "14": [15, 48, 144, 216, 306, 307], "skip_special_token": [15, 16, 67, 144], "begin_of_text": [15, 16, 21, 299], "start_header_id": [15, 16, 299], "end_header_id": [15, 16, 299], "n": [15, 16, 18, 19, 21, 34, 38, 40, 43, 208, 216, 242, 290, 297, 298, 299, 306], "eot_id": [15, 16, 21, 299], "na": [15, 299], "encoder_input": [15, 16, 50, 213, 214, 224], "shape": [15, 16, 22, 47, 50, 73, 74, 75, 76, 78, 79, 80, 81, 142, 143, 144, 147, 151, 205, 206, 207, 208, 209, 210, 211, 213, 214, 215, 216, 221, 222, 223, 224, 225, 226, 230, 231, 244, 245, 246, 247, 248, 249, 250, 251, 267, 282, 283, 303], "num_til": [15, 16, 142, 143, 216], "num_channel": [15, 16, 216], "tile_height": [15, 16], "tile_width": [15, 16], "torch": [15, 16, 22, 24, 45, 47, 48, 49, 50, 73, 74, 75, 76, 77, 78, 79, 80, 81, 142, 143, 144, 205, 206, 207, 208, 209, 210, 211, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 230, 231, 245, 246, 247, 248, 249, 250, 251, 256, 258, 260, 261, 262, 264, 265, 267, 269, 270, 276, 278, 279, 280, 281, 282, 283, 284, 285, 286, 288, 292, 295, 300, 301, 302, 303, 304, 305, 307], "just": [15, 19, 22, 293, 295, 298, 299, 301, 302, 304, 305, 306], "the_cauldron_dataset": [15, 16], "ai2d": [15, 66], "respir": 15, "combust": 15, "give": [15, 21, 24, 253, 303, 304, 305], "choic": [15, 18], "oxygen": 15, "b": [15, 25, 45, 47, 142, 143, 207, 208, 210, 214, 215, 224, 231, 245, 246, 250, 267, 277, 304, 307], "carbon": 15, "dioxid": 15, "c": [15, 45, 47, 50, 65, 142, 299], "nitrogen": 15, "d": [15, 24, 36, 65, 142, 143, 207, 208, 214, 224, 298, 299, 303, 304, 306], "heat": 15, "letter": 15, "mymultimodaltransform": 15, "my_tokenizer_build": 15, "myimagetransform": 15, "add_eo": [15, 57, 71, 144, 239, 240, 299], "tupl": [15, 19, 21, 24, 27, 38, 48, 73, 74, 80, 94, 106, 119, 144, 148, 162, 169, 179, 204, 207, 216, 220, 238, 242, 245, 246, 247, 248, 249, 250, 251, 252, 268, 282, 283, 284], "infer": [15, 19, 22, 50, 56, 91, 100, 158, 207, 208, 210, 214, 215, 224, 286, 291, 295, 296, 299, 300, 301, 302, 306, 307], "vision": [15, 16, 56, 81, 142, 144, 145, 146, 147, 148, 149, 150, 151, 152, 225, 257], "aspect_ratio": [15, 50, 78, 79, 142, 216], "append": [15, 19, 38, 39, 94, 106, 119, 144, 148, 162, 169, 179, 204, 214, 224, 239, 273, 292], "addit": [15, 21, 22, 24, 25, 27, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 144, 146, 147, 150, 151, 153, 218, 225, 226, 235, 247, 252, 254, 255, 256, 262, 263, 269, 272, 273, 274, 276, 277, 279, 293, 299, 301, 304, 305], "kei": [15, 21, 22, 24, 26, 33, 35, 37, 42, 47, 48, 55, 56, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 207, 208, 213, 214, 215, 224, 226, 234, 235, 236, 250, 254, 256, 258, 273, 282, 298, 300, 301, 304, 305, 307], "e": [16, 18, 19, 36, 46, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 78, 79, 80, 81, 142, 143, 144, 147, 151, 208, 216, 220, 224, 229, 234, 244, 253, 254, 258, 265, 282, 286, 292, 295, 300, 302, 304, 305, 306, 307], "g": [16, 18, 46, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 78, 79, 80, 81, 142, 143, 144, 147, 151, 208, 216, 224, 229, 244, 253, 254, 265, 282, 286, 295, 302, 304, 305, 306, 307], "base": [16, 18, 20, 22, 27, 36, 38, 85, 86, 87, 88, 89, 90, 91, 95, 96, 97, 98, 99, 101, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 125, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 144, 145, 148, 149, 150, 151, 152, 154, 155, 156, 157, 158, 160, 163, 164, 165, 166, 167, 170, 171, 172, 173, 174, 175, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 210, 230, 231, 232, 234, 235, 236, 246, 247, 249, 250, 254, 261, 263, 264, 272, 274, 283, 286, 291, 299, 300, 301, 302, 303, 304, 305, 307], "multimodal_chat_dataset": 16, "visual": [16, 226], "get": [16, 22, 23, 24, 25, 26, 50, 144, 258, 262, 265, 268, 273, 287, 292, 293, 294, 295, 299, 300, 301, 303, 304, 305, 306], "below": [16, 23, 26, 47, 252, 302, 303, 304, 307], "clock": 16, "10": [16, 45, 47, 48, 49, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 216, 225, 300, 302, 305, 306, 307], "llama3_2_vision_transform": 16, "questionanswertempl": [16, 19, 70], "image_s": [16, 145, 148, 149, 152, 216], "560": [16, 145, 148, 149, 152], "image_dir": [16, 35, 42, 65], "home": [16, 24, 35, 42, 46], "nquestion": 16, "nit": 16, "00am": 16, "sharegpt": [16, 42, 60, 299], "q1": [16, 33, 55, 60, 67], "a1": [16, 33, 55, 60], "sharegpt4v": 16, "lin": 16, "chen": 16, "renam": 16, "themselv": [16, 307], "pathlib": 16, "pil_imag": 16, "Then": [16, 20, 26, 232, 301, 303, 305], "relat": [16, 213, 214, 224, 304], "user_messag": [16, 34, 40, 43, 144, 299], "locat": [16, 21, 24, 35, 42, 298, 302, 304, 306, 307], "long": [16, 54, 240, 299, 304], "image_dog": 16, "image_cat": 16, "image_bird": 16, "dog": [16, 244], "bird": [16, 46], "pet": 16, "three": [16, 22, 25, 50, 144, 247, 249, 250, 296, 301], "referenc": 16, "huggingfac": [16, 57, 61, 63, 71, 72, 161, 168, 169, 176, 177, 178, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 247, 249, 250, 254, 255, 261, 298, 300], "co": [16, 57, 61, 63, 71, 72, 161, 168, 169, 176, 177, 178, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 254, 255, 300], "img": 16, "llava_instruct_dataset": 16, "concaten": [17, 21, 48, 53, 147, 151, 238, 242], "sequenc": [17, 45, 47, 48, 49, 50, 54, 57, 61, 65, 66, 71, 72, 75, 76, 91, 94, 95, 101, 106, 107, 116, 119, 120, 125, 129, 142, 143, 144, 146, 148, 150, 154, 156, 158, 160, 162, 165, 167, 169, 171, 175, 179, 204, 207, 208, 210, 213, 214, 215, 216, 219, 224, 226, 240, 242, 244, 246, 250, 251, 267, 299], "upto": [17, 210], "maximum": [17, 24, 47, 50, 51, 54, 61, 72, 75, 78, 79, 81, 91, 94, 95, 101, 106, 107, 116, 119, 120, 125, 129, 144, 146, 147, 148, 150, 151, 154, 156, 158, 160, 162, 165, 167, 169, 171, 175, 207, 208, 210, 213, 214, 215, 219, 224, 226, 253, 298], "length": [17, 45, 47, 49, 50, 51, 52, 53, 54, 61, 72, 75, 91, 94, 95, 101, 106, 107, 116, 119, 120, 125, 129, 142, 143, 144, 146, 148, 150, 154, 156, 158, 160, 162, 165, 167, 168, 169, 171, 175, 179, 204, 207, 208, 210, 213, 214, 215, 219, 221, 223, 224, 226, 240, 244, 245, 246, 255, 267, 273, 305], "slow": [17, 305, 307], "down": [17, 216, 256, 304, 305, 307], "introduc": [17, 85, 86, 95, 96, 97, 107, 108, 109, 110, 120, 121, 122, 131, 132, 138, 139, 154, 155, 156, 157, 165, 166, 208, 209, 226, 230, 231, 250, 295, 299, 303, 304, 305, 306, 307], "signific": [17, 305, 306], "speedup": [17, 300, 302], "depend": [17, 25, 26, 254, 282, 298, 300, 303, 304, 305, 307], "iter": [17, 282, 283, 284, 307], "through": [17, 18, 22, 23, 24, 25, 26, 55, 81, 147, 151, 205, 207, 216, 226, 232, 293, 294, 295, 296, 298, 299, 300, 301, 303, 305, 306, 307], "greedi": [17, 54], "upon": [17, 25, 53, 214, 218, 224, 302], "initi": [17, 22, 25, 29, 53, 54, 82, 83, 84, 92, 93, 102, 103, 104, 105, 117, 118, 126, 127, 128, 136, 137, 159, 161, 176, 177, 178, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 230, 247, 258, 269, 270, 283, 295, 301, 304, 307], "max": [17, 50, 54, 179, 204, 214, 216, 224, 240, 253, 261, 298, 304], "llama3": [17, 20, 21, 24, 65, 66, 73, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 144, 146, 148, 149, 150, 152, 165, 221, 223, 257, 263, 291, 293, 294, 295, 298, 300, 305], "load": [17, 22, 25, 35, 42, 46, 53, 54, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 224, 235, 254, 255, 256, 258, 276, 283, 300, 302, 304], "isinst": [17, 252], "1b_full_single_devic": 17, "prevent": [17, 22, 54, 247, 298, 305], "irrelev": 17, "cross": [17, 50, 54, 146, 150, 213, 221, 223, 224, 226, 244, 303], "attend": [17, 54, 208, 213, 214, 215, 224, 244], "pytorch": [17, 24, 25, 74, 214, 220, 221, 252, 270, 276, 279, 281, 282, 291, 292, 293, 295, 300, 302, 304, 305, 306, 307], "flex": 17, "attent": [17, 50, 54, 74, 75, 76, 81, 85, 86, 87, 91, 95, 96, 97, 101, 107, 108, 109, 110, 111, 116, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 146, 147, 149, 150, 151, 154, 155, 156, 157, 158, 160, 165, 166, 167, 168, 171, 172, 173, 174, 175, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 207, 208, 210, 213, 214, 215, 217, 224, 226, 235, 236, 244, 302, 304, 305, 307], "flash": 17, "non": [17, 222, 223, 236, 246, 303], "causal": [17, 54, 75, 208, 214, 215, 224], "hardwar": [17, 262, 293, 300, 304, 305], "cuda": [17, 24, 262, 265, 282, 286, 292, 300, 305, 307], "ture": 17, "sdpa": 17, "memori": [17, 21, 25, 53, 54, 57, 61, 71, 72, 212, 214, 220, 221, 223, 224, 235, 263, 265, 271, 272, 282, 291, 293, 294, 295, 300, 301, 302, 303, 306], "effici": [17, 235, 263, 291, 293, 294, 300, 301, 304, 306], "fallback": 17, "while": [17, 24, 25, 85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 166, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 225, 230, 293, 295, 300, 305, 306, 307], "retain": [17, 247, 305, 307], "reward": [18, 105, 111, 115, 157, 161, 164, 245, 246, 247, 249, 250, 257], "downstream": 18, "captur": 18, "ground": [18, 221, 222, 223, 305], "truth": [18, 24, 221, 222, 223, 300, 302], "usual": [18, 21, 22, 210, 214, 251, 254, 267, 277, 298, 300, 304, 305], "outcom": 18, "binari": 18, "comparison": [18, 25, 304, 307], "annot": 18, "accord": [18, 19, 65, 66, 76, 153, 299], "criterion": 18, "style": [18, 31, 54, 58, 59, 60, 69, 226, 307], "interact": [18, 25, 55, 67, 291, 296, 301], "free": [18, 250, 296, 304], "preference_dataset": 18, "my_preference_dataset": [18, 67], "chosen_convers": [18, 67], "hole": [18, 67], "my": [18, 19, 23, 67, 73, 298, 299, 300, 302], "trouser": [18, 67], "fix": [18, 20, 67, 306], "rejected_convers": [18, 67], "off": [18, 25, 38, 67, 294, 295, 300, 306], "chosen": [18, 33, 55, 63, 67, 70, 247, 249, 250, 282], "reject": [18, 33, 55, 63, 67, 70, 247, 249, 250], "rejected_input_id": [18, 48, 67], "nwhat": 18, "ntake": 18, "rejected_label": [18, 48], "128006": 18, "78191": 18, "128007": 18, "271": 18, "18293": 18, "1124": 18, "1022": 18, "13": [18, 20, 21, 48, 216, 242, 251, 307], "128009": [18, 299], "accomplish": [18, 20, 53, 60, 64, 67, 71], "shown": [18, 300, 305, 306], "di": 18, "look": [18, 19, 22, 24, 25, 260, 276, 292, 299, 300, 301, 302, 303, 304, 306], "anthrop": [18, 63], "harmless": [18, 63], "granni": 18, "her": [18, 20], "mobil": [18, 300], "phone": [18, 300], "issu": [18, 296, 306], "grandmoth": 18, "manag": [18, 22, 53, 218, 219, 232, 273, 280, 299], "behavior": [18, 22, 272, 299], "thing": [18, 305, 307], "grandma": 18, "feel": [18, 296, 304], "box": [18, 293, 295, 307], "hh_rlhf_helpful_dataset": 18, "hendrydong": 18, "preference_700k": 18, "stack_exchange_paired_dataset": 18, "purpos": [19, 65, 66, 301, 302], "whenev": [19, 144, 221, 304], "llama2": [19, 22, 24, 25, 27, 61, 72, 82, 83, 84, 85, 86, 87, 88, 89, 90, 100, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 144, 205, 214, 215, 257, 291, 294, 298, 301, 302, 305, 306], "were": [19, 20, 21, 53, 142, 216, 232, 248, 301, 306], "gear": [19, 144], "summar": [19, 43, 68, 299, 305], "summarizetempl": [19, 68, 299], "commun": [19, 144, 300, 305], "chatmltempl": [19, 144], "gec_templ": 19, "extend": [19, 21, 22, 25, 293, 305], "customprompttempl": 19, "achiev": [19, 38, 278, 295, 300, 302, 303, 304, 306, 307], "prepend_tag": [19, 38], "append_tag": [19, 38], "thu": [19, 31, 38, 55, 56, 214, 305, 306], "empti": [19, 47, 50, 52, 77, 298], "standalon": [19, 207], "my_custom_templ": 19, "Is": 19, "overhyp": 19, "advanc": [19, 79, 80, 81, 147, 151, 216], "configur": [19, 21, 25, 55, 56, 59, 60, 61, 62, 64, 65, 66, 67, 68, 69, 70, 71, 72, 95, 107, 120, 129, 149, 154, 165, 171, 273, 293, 295, 296, 299, 301, 302, 303, 304, 305, 306, 307], "doesn": [19, 300], "neatli": 19, "fall": 19, "protocol": [19, 21, 229, 237, 238, 243], "arg": [19, 21, 24, 27, 32, 39, 80, 206, 214, 220, 226, 229, 237, 238, 243, 275, 282, 295, 306], "whether": [19, 31, 33, 35, 36, 37, 42, 47, 50, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 85, 86, 87, 91, 95, 96, 97, 107, 108, 109, 110, 111, 120, 121, 122, 129, 130, 131, 132, 138, 139, 144, 145, 149, 150, 151, 154, 155, 156, 157, 165, 166, 171, 172, 173, 174, 175, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 220, 222, 224, 230, 231, 235, 236, 239, 240, 252, 262, 264, 265, 273, 283, 299, 303], "sai": [19, 298, 299, 301], "eureka": 19, "eurekatempl": 19, "formatted_dialogu": 19, "llama2chattempl": [19, 106, 153, 299], "paradigm": [20, 25, 294, 305], "unstructur": [20, 57, 71, 72], "unlabel": 20, "text_complet": 20, "odyssei": 20, "clear": [20, 305], "river": 20, "oceanu": 20, "had": 20, "got": [20, 50], "sea": 20, "went": 20, "till": 20, "reach": 20, "aeaean": 20, "island": 20, "dawn": 20, "sunris": 20, "drew": 20, "ship": 20, "sand": 20, "shore": 20, "sleep": 20, "wait": [20, 282], "break": [20, 144, 240], "child": 20, "morn": 20, "rosi": 20, "finger": 20, "appear": [20, 305], "sent": [20, 277], "men": 20, "circ": 20, "hous": 20, "fetch": [20, 304], "bodi": 20, "elpenor": 20, "cut": 20, "firewood": 20, "wood": 20, "headland": 20, "jut": 20, "wept": 20, "over": [20, 21, 25, 36, 56, 222, 223, 247, 261, 293, 295, 298, 300, 303, 304, 305, 307], "him": 20, "lament": 20, "funer": 20, "rite": 20, "armour": 20, "been": [20, 73, 75, 207, 214, 224, 251, 258, 263, 299, 305, 306], "burn": 20, "ash": 20, "rais": [20, 22, 27, 30, 33, 35, 37, 42, 44, 46, 47, 50, 52, 53, 59, 60, 62, 64, 65, 66, 68, 69, 71, 75, 81, 171, 207, 208, 212, 213, 214, 216, 217, 218, 219, 235, 236, 242, 254, 255, 256, 258, 262, 264, 265, 269, 273, 277, 281, 283, 284, 285], "cairn": 20, "stone": 20, "top": [20, 74, 77, 147, 151, 260, 305, 307], "oar": 20, "he": 20, "row": [20, 55, 56, 208, 214, 215, 224], "text_completion_dataset": [20, 306], "128000": [20, 299, 306], "6153": 20, "584": 20, "1051": 20, "2867": 20, "279": 20, "15140": 20, "22302": 20, "355": 20, "11": [20, 22, 45, 47, 48, 216, 300, 306, 307], "323": 20, "1047": 20, "2751": 20, "704": 20, "1139": 20, "1825": 20, "9581": 20, "4024": 20, "389": 20, "12222": 20, "8813": 20, "362": 20, "12791": 20, "5420": 20, "13218": 20, "1405": 20, "1070": 20, "374": 20, "39493": 20, "64919": 20, "439": 20, "304": 20, "1023": 20, "7634": 20, "1226": 20, "1243": 20, "24465": 20, "1057": 20, "8448": 20, "311": 20, "70163": 20, "1077": 20, "31284": 20, "6212": 20, "30315": 20, "1938": 20, "1288": 20, "1464": 20, "128001": [20, 306], "similarli": [20, 119, 148, 169, 179, 204, 306], "wikimedia": 20, "wikipedia": [20, 46, 72], "cnn_dailymail_articles_dataset": 20, "index": [21, 48, 49, 50, 53, 54, 208, 210, 215, 222, 224, 246, 261, 286, 292, 299, 300], "embed": [21, 22, 78, 79, 80, 81, 91, 95, 101, 107, 116, 120, 125, 129, 142, 143, 144, 146, 147, 150, 151, 154, 156, 158, 160, 165, 167, 171, 175, 207, 208, 209, 210, 213, 214, 216, 224, 225, 226, 228, 263, 299, 302, 305, 306], "vector": [21, 230, 249, 299, 305], "understood": 21, "plai": [21, 300, 305], "necessari": [21, 22, 55, 56, 273, 274, 275, 276, 277, 299, 304], "phi3": [21, 22, 165, 166, 168, 169, 170, 257, 298], "phi3_mini_token": 21, "p_token": 21, "phi": [21, 168, 169, 257], "32010": 21, "29871": 21, "1792": [21, 242], "9508": [21, 242], "32007": 21, "32001": 21, "4299": 21, "2933": [21, 242], "nuser": 21, "nmodel": 21, "sentencepiec": [21, 239, 302], "tiktoken": [21, 144, 240, 302], "host": [21, 292, 298, 301, 305], "distribut": [21, 77, 258, 269, 270, 279, 281, 286, 293, 296, 298, 301, 302, 303, 305], "alongsid": [21, 263, 305], "alreadi": [21, 24, 33, 37, 42, 62, 63, 65, 66, 67, 68, 69, 207, 208, 218, 219, 224, 257, 269, 292, 298, 300, 303, 304], "_token": [21, 25], "mistraltoken": [21, 162, 299], "adher": [21, 37, 42], "arbitrarili": 21, "small": [21, 209, 300, 305], "seq": [21, 214, 224], "len": [21, 22, 50, 53, 59, 62, 65, 66, 68, 214, 216, 224], "demonstr": [21, 305, 306], "7": [21, 22, 45, 47, 48, 49, 50, 207, 216, 244, 248], "6312": 21, "28709": 21, "assign": [21, 24, 55, 56], "uniqu": [21, 55, 56, 257], "abil": 21, "NOT": [21, 22, 91, 144, 158], "presenc": [21, 31], "certain": [21, 22, 24, 282, 299], "proper": [21, 292, 301], "end_of_text": 21, "special_token": [21, 144, 240, 299], "added_token": 21, "128257": 21, "128258": 21, "remain": [21, 37, 42, 212, 261, 303, 304, 305], "special_tokens_path": [21, 119, 148, 169, 179, 204], "basetoken": 21, "actual": [21, 23, 24, 26, 31, 33, 35, 55, 56, 59, 62, 63, 64, 66, 67, 68, 70, 144, 295, 299, 306], "string": [21, 22, 35, 36, 38, 44, 60, 61, 94, 106, 119, 144, 148, 162, 169, 179, 204, 229, 237, 239, 240, 242, 253, 259, 262, 266, 273, 286, 298, 305], "kwarg": [21, 24, 27, 32, 39, 179, 204, 206, 213, 215, 220, 226, 229, 237, 238, 243, 269, 273, 274, 275, 276, 277, 279, 282], "dict": [21, 22, 24, 25, 26, 27, 31, 33, 35, 36, 37, 38, 42, 44, 47, 48, 49, 50, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 94, 106, 119, 144, 148, 162, 169, 179, 204, 213, 215, 220, 224, 226, 227, 233, 234, 235, 236, 237, 238, 240, 241, 243, 254, 255, 256, 258, 260, 265, 269, 271, 273, 278, 283, 285], "given": [21, 25, 27, 44, 47, 52, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 76, 144, 218, 219, 231, 232, 237, 238, 246, 262, 266, 272, 278, 286, 288, 293, 304], "token_id": [21, 144, 237, 240], "its": [21, 54, 100, 153, 156, 208, 210, 214, 215, 224, 226, 230, 258, 278, 281, 298, 299, 300, 302, 304, 305], "sentencepiecebasetoken": [21, 237], "bpe": 21, "sp_token": 21, "reason": [21, 25, 73, 300, 305, 306], "walk": [22, 25, 276, 293, 299, 300, 301, 306, 307], "design": [22, 25, 250], "cover": [22, 23, 24, 25, 26, 299, 300, 307], "scenario": [22, 53, 144], "compos": [22, 216], "plug": [22, 305], "evalu": [22, 25, 291, 293, 295, 296, 301, 303, 304, 307], "gener": [22, 25, 47, 54, 61, 71, 74, 75, 76, 77, 144, 218, 219, 232, 245, 264, 273, 280, 281, 282, 289, 291, 295, 299, 303, 304, 305, 306, 307], "easi": [22, 25, 293, 304, 305], "understand": [22, 24, 25, 226, 291, 293, 294, 299, 304, 305, 307], "concept": [22, 296, 300, 301, 305], "talk": 22, "close": [22, 25, 273, 274, 275, 276, 277, 304], "veri": [22, 53, 214, 224, 298, 300, 305], "dictat": 22, "state_dict": [22, 220, 225, 226, 235, 254, 255, 256, 257, 258, 283, 304, 307], "store": [22, 55, 56, 273, 274, 277, 304, 305, 307], "disk": [22, 57, 274], "identifi": [22, 273], "state": [22, 25, 143, 214, 216, 218, 220, 224, 227, 233, 234, 235, 236, 245, 247, 254, 255, 256, 258, 260, 283, 300, 302, 304, 307], "match": [22, 44, 236, 273, 283, 292, 298, 300, 302, 304], "up": [22, 23, 25, 26, 50, 54, 61, 72, 144, 214, 218, 219, 224, 240, 244, 260, 273, 282, 294, 295, 296, 298, 299, 301, 302, 304, 305, 307], "exactli": [22, 236, 253, 306], "definit": [22, 304], "either": [22, 47, 55, 56, 73, 208, 214, 215, 236, 254, 273, 279, 292, 298, 304, 305, 306, 307], "explicit": 22, "error": [22, 24, 34, 52, 254, 281, 298], "except": [22, 36, 153, 181, 182, 183, 184, 185, 186, 188, 189, 190, 191, 193, 194, 195, 196, 197, 198, 200, 201, 202, 203, 242, 303], "wors": [22, 305], "silent": 22, "succe": 22, "popular": [22, 224, 293, 300], "offici": [22, 100, 299, 301, 302], "websit": 22, "inspect": [22, 300, 304, 307], "mmap": [22, 300], "weights_onli": [22, 256], "map_loc": [22, 300], "cpu": [22, 25, 219, 220, 262, 282, 286, 292, 298, 300, 307], "tensor": [22, 45, 47, 48, 49, 50, 73, 74, 75, 76, 77, 78, 79, 80, 81, 142, 143, 205, 206, 207, 208, 209, 210, 211, 213, 214, 215, 216, 220, 221, 222, 223, 224, 225, 226, 230, 231, 245, 246, 247, 248, 249, 250, 251, 254, 267, 273, 274, 275, 276, 277, 280, 283, 285, 303, 304, 305, 307], "item": 22, "f": [22, 26, 59, 62, 65, 66, 68, 253, 299, 300, 303, 304, 307], "tok_embed": [22, 214, 224, 225], "32000": [22, 27, 304], "4096": [22, 27, 61, 72, 208, 210, 304, 306], "292": 22, "tabl": [22, 225, 299, 300, 302, 303, 305, 307], "layer": [22, 25, 81, 85, 86, 87, 88, 89, 90, 91, 95, 96, 97, 98, 99, 101, 105, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 120, 121, 122, 123, 124, 125, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 143, 146, 147, 149, 150, 151, 152, 154, 155, 156, 157, 158, 160, 161, 163, 164, 165, 166, 167, 170, 171, 172, 173, 174, 175, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 208, 211, 212, 213, 214, 215, 216, 217, 218, 219, 224, 226, 228, 230, 231, 235, 236, 252, 259, 263, 293, 294, 302, 304, 305, 306, 307], "dim": [22, 50, 142, 143, 205, 208, 209, 210, 214, 221, 223, 224, 303], "within": [22, 24, 27, 54, 73, 77, 78, 95, 107, 120, 129, 149, 150, 151, 154, 156, 165, 171, 216, 218, 219, 276, 281, 282, 298, 304, 307], "big": 22, "bin": [22, 298, 300], "piec": 22, "pytorch_model": [22, 300], "00001": [22, 298, 303], "00002": [22, 298, 303], "embed_token": 22, "241": 22, "Not": 22, "fewer": [22, 208], "sinc": [22, 24, 27, 55, 56, 223, 254, 256, 299, 300, 302, 305, 306], "mismatch": 22, "caus": [22, 239], "re": [22, 24, 218, 226, 250, 256, 293, 294, 295, 299, 300, 301, 304, 305], "end": [22, 25, 36, 57, 71, 144, 240, 242, 291, 293, 299, 302, 304, 306], "number": [22, 25, 44, 50, 54, 61, 72, 73, 78, 79, 81, 91, 95, 101, 107, 116, 120, 125, 129, 142, 143, 144, 146, 147, 150, 151, 154, 156, 158, 160, 165, 167, 171, 175, 207, 208, 214, 216, 222, 223, 244, 254, 255, 256, 258, 259, 261, 268, 281, 282, 298, 301, 303, 304, 305], "save": [22, 25, 26, 214, 220, 221, 223, 224, 254, 255, 256, 258, 263, 272, 277, 291, 295, 298, 299, 300, 302, 304, 305, 306], "less": [22, 50, 73, 300, 301, 302, 305, 307], "prone": 22, "invari": 22, "accept": [22, 24, 252, 301, 305, 307], "explicitli": [22, 229, 293, 304], "produc": [22, 258, 295, 306, 307], "One": [22, 50, 306], "advantag": [22, 245, 248, 295, 304], "abl": [22, 25, 300, 301, 306], "post": [22, 216, 278, 282, 295, 300, 302, 306, 307], "quantiz": [22, 85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 230, 231, 256, 266, 291, 292, 294, 296, 301, 307], "eval": [22, 291, 293, 306], "without": [22, 24, 26, 208, 212, 214, 218, 224, 235, 292, 293, 295, 299, 300, 304, 305, 306], "OR": 22, "surround": [22, 25, 293], "load_checkpoint": [22, 25, 254, 255, 256, 257], "save_checkpoint": [22, 25, 26, 254, 255, 256], "permut": 22, "behav": 22, "further": [22, 216, 250, 298, 303, 304, 305, 306, 307], "illustr": [22, 65, 66, 302], "whilst": [22, 294, 305], "read": [22, 254, 255, 256, 293, 305], "compat": [22, 254, 256, 305, 306], "framework": [22, 25, 293], "mention": [22, 300, 305, 307], "assum": [22, 35, 42, 45, 47, 65, 94, 106, 119, 148, 162, 169, 179, 204, 207, 208, 210, 215, 224, 225, 227, 233, 240, 258, 260, 261, 262, 264, 299, 300, 304], "checkpoint_dir": [22, 24, 254, 255, 256, 300, 302, 303, 306], "easiest": [22, 300, 301], "everyth": [22, 25, 293, 296, 301], "flow": [22, 54, 306, 307], "safetensor": [22, 253, 254, 298, 303], "output_dir": [22, 24, 254, 255, 256, 282, 300, 302, 304, 306, 307], "snippet": 22, "explain": [22, 305], "fullmodelhfcheckpoint": [22, 300, 303], "sort": [22, 254, 256], "order": [22, 23, 25, 254, 256, 276, 277, 301, 305], "matter": [22, 254, 256, 298, 304], "checkpoint_fil": [22, 24, 26, 254, 255, 256, 300, 302, 303, 304, 306, 307], "restart": [22, 298], "previou": [22, 54, 254, 255, 256, 303], "section": [22, 25, 265, 291, 300, 302, 305, 307], "recipe_checkpoint": [22, 254, 255, 256, 306], "model_typ": [22, 254, 255, 256, 300, 302, 306], "resume_from_checkpoint": [22, 254, 255, 256], "discrep": [22, 254], "github": [22, 27, 74, 85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 166, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 209, 210, 221, 222, 247, 248, 249, 250, 261, 292, 300, 302, 303], "repositori": [22, 55, 56, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 100, 294, 295, 300, 301], "fullmodelmetacheckpoint": [22, 302, 306], "test": [22, 24, 25, 73, 293, 295, 299, 305], "written": [22, 24, 25, 254, 255, 273, 274, 275, 276, 277, 293], "partit": [22, 254, 307], "key_1": [22, 256], "weight_1": 22, "key_2": 22, "weight_2": 22, "mid": 22, "chekpoint": 22, "middl": [22, 226, 300, 305], "subsequ": [22, 25, 207, 214, 216, 244], "recipe_st": [22, 254, 255, 256], "pt": [22, 26, 254, 255, 256, 300, 302, 303, 306], "epoch": [22, 25, 26, 254, 255, 256, 258, 261, 298, 299, 300, 301, 302, 306], "etc": [22, 25, 143, 254, 265, 301], "flood": 22, "overwritten": 22, "updat": [22, 24, 25, 38, 207, 208, 214, 224, 230, 243, 247, 248, 254, 258, 282, 285, 292, 299, 300, 301, 302, 304, 305, 306, 307], "hf_model_0001_0": [22, 300, 303], "hf_model_0002_0": [22, 300], "adapt": [22, 85, 86, 95, 96, 97, 107, 108, 109, 110, 120, 121, 122, 131, 132, 138, 139, 143, 154, 155, 156, 157, 165, 166, 224, 226, 227, 229, 230, 231, 232, 233, 234, 254, 255, 256, 272, 294, 299, 300, 304, 307], "merg": [22, 27, 28, 179, 204, 254, 300, 302, 307], "tutori": [22, 279, 293, 294, 295, 299, 300, 301, 302, 303, 304, 305, 306, 307], "save_adapter_weights_onli": 22, "choos": [22, 60, 304], "resum": [22, 25, 254, 255, 256, 261, 307], "frozen": [22, 143, 149, 152, 225, 247, 304, 305, 307], "learnt": [22, 299, 300], "refer": [22, 24, 25, 209, 210, 216, 221, 232, 246, 247, 248, 249, 250, 273, 293, 304, 305, 306], "adapter_checkpoint": [22, 254, 255, 256], "adapter_0": [22, 300], "knowledg": [22, 291], "forward": [22, 25, 78, 79, 80, 142, 143, 205, 206, 208, 209, 210, 211, 213, 214, 215, 216, 218, 219, 221, 222, 223, 224, 225, 226, 230, 231, 247, 248, 249, 250, 265, 282, 302, 303, 304, 305, 307], "modeltyp": [22, 254, 255, 256], "llama2_13b": [22, 108], "right": [22, 47, 50, 76, 214, 254, 300, 302, 304], "pytorch_fil": 22, "00003": [22, 253, 303], "torchtune_sd": 22, "load_state_dict": [22, 224, 225, 226, 235, 258, 283, 304], "successfulli": [22, 298, 301], "vocab": [22, 27, 179, 204, 214, 224, 225, 302], "70": [22, 117], "randint": 22, "no_grad": 22, "6": [22, 45, 47, 48, 49, 50, 54, 91, 95, 209, 216, 267, 295, 306, 307], "3989": 22, "9": [22, 45, 47, 48, 50, 207, 216, 267, 300, 306, 307], "0531": 22, "2375": 22, "5": [22, 24, 45, 47, 48, 49, 50, 75, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 216, 247, 250, 251, 261, 300, 301, 302, 303, 305], "2822": 22, "4872": 22, "7469": 22, "8": [22, 45, 47, 48, 50, 59, 62, 65, 66, 68, 85, 86, 87, 88, 89, 90, 96, 97, 98, 99, 108, 109, 110, 111, 112, 113, 114, 115, 121, 122, 123, 124, 125, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 155, 157, 163, 164, 166, 170, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 207, 216, 221, 223, 300, 303, 304, 305, 306, 307], "6737": 22, "0023": 22, "8235": 22, "6819": 22, "2424": 22, "0109": 22, "6915": 22, "3618": 22, "1628": 22, "8594": 22, "5857": 22, "1151": 22, "7808": 22, "2322": 22, "8850": 22, "9604": 22, "7624": 22, "6040": 22, "3159": 22, "5849": 22, "8039": 22, "9322": 22, "2010": [22, 216], "6824": 22, "8929": 22, "8465": 22, "3794": 22, "3500": 22, "6145": 22, "5931": 22, "find": [22, 23, 25, 26, 247, 298, 300, 301, 303, 304, 305], "hope": 22, "deeper": [22, 294, 295, 301, 305], "insight": [22, 300], "happi": [22, 300], "cometlogg": 23, "checkpoint": [23, 24, 25, 220, 224, 226, 240, 253, 254, 255, 256, 257, 258, 259, 277, 279, 283, 293, 295, 298, 302, 303, 304, 306, 307], "workspac": [23, 26, 273], "seen": [23, 26, 304, 307], "screenshot": [23, 26], "comet_ml": [23, 273], "featur": [23, 25, 26, 292, 293, 294, 295, 300, 301, 305], "pip": [23, 26, 273, 276, 277, 292, 300, 302, 305], "login": [23, 26, 273, 277, 298, 300], "metric_logg": [23, 24, 25, 26], "metric_log": [23, 24, 26, 273, 274, 275, 276, 277], "experiment_nam": [23, 273], "experi": [23, 24, 273, 277, 291, 293, 302, 303, 304], "grab": [23, 26, 302], "tab": [23, 26], "asset": 23, "artifact": [23, 26, 282], "click": [23, 26], "effect": [24, 250, 303, 305, 306], "prerequisit": [24, 299, 300, 301, 302, 303, 304, 306, 307], "Be": [24, 299, 300, 301, 302, 303, 304, 305, 306, 307], "familiar": [24, 299, 300, 301, 302, 303, 304, 306, 307], "fundament": [24, 306], "reproduc": [24, 273], "overridden": [24, 282], "quick": 24, "seed": [24, 25, 26, 281, 301, 306], "shuffl": [24, 54, 306], "dtype": [24, 25, 77, 207, 208, 213, 214, 215, 217, 218, 219, 220, 224, 226, 262, 280, 284, 300, 303, 305, 306, 307], "fp32": [24, 214, 221, 223, 305, 306, 307], "enable_fsdp": 24, "keyword": [24, 27, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 220, 299], "subfield": 24, "dotpath": [24, 94, 106, 119, 148, 162, 169, 179, 204], "wish": [24, 207, 218, 283], "exact": [24, 27, 300], "normal": [24, 54, 144, 206, 208, 209, 213, 214, 215, 221, 222, 223, 239, 299, 304, 306, 307], "instanc": [24, 27, 53, 107, 120, 129, 149, 154, 156, 165, 171, 172, 173, 176, 177, 220, 227, 233, 234, 304], "preced": [24, 27, 298, 302, 304], "throw": 24, "notic": [24, 78, 79, 80, 216, 299, 304], "miss": [24, 235, 236, 282, 304], "llama2_token": [24, 299, 300], "llama2token": [24, 106], "512": [24, 81, 307], "overwrit": [24, 256, 283, 292, 298], "duplic": [24, 25, 293, 298], "sometim": 24, "resolv": [24, 28, 301], "alpaca": [24, 31, 53, 58, 59, 85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 166, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 303], "disklogg": 24, "log_dir": [24, 274, 276, 277], "verifi": [24, 262, 263, 286, 299, 301, 304], "properli": [24, 235, 270, 298], "wa": [24, 35, 42, 50, 75, 79, 80, 81, 147, 151, 216, 235, 299, 304, 306, 307], "7b_lora_single_devic": [24, 300, 301, 304, 307], "my_config": [24, 298], "guidelin": 24, "tempt": 24, "put": [24, 25, 296, 301, 304, 306], "much": [24, 225, 250, 300, 302, 304, 305, 306, 307], "switch": 24, "encourag": [24, 250, 304, 305], "clariti": 24, "significantli": [24, 247, 294, 295, 305], "easier": [24, 300, 301], "dont": 24, "privat": 24, "parent": [24, 298], "guarante": 24, "stabil": [24, 221, 223, 293, 295, 305, 306, 307], "underscor": 24, "_alpaca": 24, "k1": [24, 25], "v1": [24, 25, 72], "k2": [24, 25], "v2": [24, 25, 273], "my_model_checkpoint": 24, "file_1": 24, "file_2": 24, "my_tokenizer_path": 24, "nest": [24, 285], "dot": 24, "notat": [24, 50, 142, 143, 208, 210, 214, 224, 245, 246, 267], "flag": [24, 25, 36, 59, 60, 62, 64, 67, 68, 69, 252, 256, 263, 298, 305, 307], "bitsandbyt": [24, 305], "pagedadamw8bit": [24, 305], "delet": [24, 214, 217, 218, 219, 224], "foreach": 24, "8b_full": [24, 298], "adamw": [24, 304, 305], "2e": [24, 305], "fuse": [24, 146, 150, 224, 225, 226, 227, 278, 306], "nproc_per_nod": [24, 295, 302, 304, 306], "full_finetune_distribut": [24, 264, 298, 300, 301], "thought": [25, 293, 296, 301, 307], "target": [25, 75, 222, 223, 250, 293, 303], "pipelin": [25, 293, 295], "eg": [25, 214, 224, 254, 293], "meaning": [25, 293, 300], "fsdp": [25, 212, 252, 258, 263, 272, 301, 302, 305], "activ": [25, 81, 205, 259, 265, 271, 279, 282, 293, 295, 306, 307], "gradient": [25, 222, 223, 272, 278, 282, 293, 295, 300, 302, 304, 307], "accumul": [25, 278, 282, 293, 295], "mix": [25, 206, 298, 300, 305], "precis": [25, 206, 220, 262, 293, 295, 301, 307], "complex": 25, "becom": [25, 216, 292], "harder": 25, "anticip": 25, "methodolog": 25, "possibl": [25, 54, 253, 298, 305], "trade": [25, 305], "vs": [25, 301], "qualiti": [25, 300, 304, 306], "believ": 25, "suit": [25, 301, 305], "solut": 25, "result": [25, 65, 81, 147, 151, 216, 223, 242, 244, 282, 295, 300, 302, 303, 304, 305, 306, 307], "meant": [25, 220, 258], "expertis": 25, "routin": 25, "yourself": [25, 298, 302, 304], "exist": [25, 219, 226, 258, 273, 292, 298, 300, 301, 302, 307], "ones": [25, 50, 207], "modular": [25, 293], "wandb": [25, 26, 277, 301], "log": [25, 28, 247, 248, 249, 250, 265, 271, 273, 274, 275, 276, 277, 287, 300, 301, 302, 303, 304, 305, 307], "fulli": [25, 53, 149], "nativ": [25, 291, 293, 304, 306, 307], "numer": [25, 66, 293, 295, 306], "pariti": [25, 293], "verif": [25, 209], "benchmark": [25, 281, 293, 300, 302, 304, 306], "limit": [25, 258, 303, 305, 306], "hidden": [25, 81, 143, 147, 151, 205, 214, 216], "behind": 25, "unnecessari": 25, "abstract": [25, 237, 238, 293, 301, 307], "No": [25, 256, 293], "go": [25, 81, 100, 147, 151, 153, 216, 242, 293, 300, 301, 303, 305, 307], "figur": [25, 304, 307], "spectrum": 25, "decid": 25, "avail": [25, 35, 42, 72, 224, 226, 262, 270, 286, 293, 298, 300, 302, 304, 305], "consist": [25, 33, 37, 42, 65, 66, 72, 296, 301], "overrid": [25, 28, 29, 33, 37, 42, 62, 63, 65, 66, 67, 68, 69, 283, 296, 298, 300, 301, 302, 303, 307], "valid": [25, 52, 76, 222, 235, 236, 246, 264, 283, 284, 292, 296, 300, 301], "closer": [25, 303, 304], "monolith": [25, 293], "trainer": [25, 247, 249, 250], "wrapper": [25, 206, 239, 240, 258, 260, 298, 304], "around": [25, 144, 206, 239, 240, 265, 298, 299, 300, 304, 305, 306, 307], "extern": 25, "primarili": [25, 53, 304], "eleutherai": [25, 72, 293, 303, 304, 306], "har": [25, 293, 303, 304, 306], "stage": [25, 216], "distil": [25, 291], "dataload": [25, 54, 59, 62, 65, 66, 68], "applic": [25, 254, 255, 277], "clean": [25, 26, 58, 303], "group": [25, 208, 268, 269, 273, 274, 275, 276, 277, 298, 302, 306], "init_process_group": [25, 269], "backend": [25, 298, 306], "gloo": 25, "nccl": 25, "fullfinetunerecipedistribut": 25, "cleanup": 25, "stuff": 25, "carri": [25, 56], "metric": [25, 301, 303, 305, 306], "logger": [25, 271, 273, 274, 275, 276, 277, 287, 301], "_devic": 25, "get_devic": 25, "_dtype": 25, "get_dtyp": 25, "ckpt_dict": 25, "wrap": [25, 226, 252, 259, 263, 272, 279, 299, 305], "_model": [25, 258], "_setup_model": 25, "_setup_token": 25, "_optim": 25, "_setup_optim": 25, "_loss_fn": 25, "_setup_loss": 25, "_sampler": 25, "_dataload": 25, "_setup_data": 25, "backward": [25, 258, 260, 278, 282, 307], "zero_grad": 25, "curr_epoch": 25, "rang": [25, 225, 247, 248, 250, 281, 298, 302, 306], "epochs_run": [25, 26], "total_epoch": [25, 26], "idx": [25, 54], "enumer": 25, "_autocast": 25, "logit": [25, 73, 74, 77, 221, 222, 223, 267, 303], "global_step": 25, "_log_every_n_step": 25, "_metric_logg": 25, "log_dict": [25, 273, 274, 275, 276, 277], "step": [25, 54, 55, 56, 65, 66, 214, 224, 245, 258, 260, 261, 273, 274, 275, 276, 277, 278, 282, 291, 295, 300, 304, 306, 307], "recipe_main": [25, 29], "fullfinetunerecip": 25, "wandblogg": [26, 304, 307], "tip": 26, "straggler": 26, "background": 26, "crash": 26, "otherwis": [26, 45, 47, 50, 79, 80, 81, 147, 151, 214, 216, 270, 273, 299, 306], "exit": [26, 218, 219, 232, 292, 298], "resourc": [26, 273, 274, 275, 276, 277, 305, 306], "kill": 26, "ps": 26, "aux": 26, "grep": 26, "awk": 26, "xarg": 26, "desir": [26, 55, 56, 280, 299, 305], "suggest": [26, 303], "approach": [26, 53, 303], "full_finetun": 26, "joinpath": 26, "_checkpoint": [26, 300], "_output_dir": [26, 254, 255, 256], "torchtune_model_": 26, "with_suffix": 26, "wandb_at": 26, "descript": [26, 298], "whatev": 26, "metadata": [26, 306], "seed_kei": 26, "epochs_kei": 26, "total_epochs_kei": 26, "max_steps_kei": 26, "max_steps_per_epoch": [26, 306], "add_fil": 26, "log_artifact": 26, "hydra": 27, "facebook": 27, "research": 27, "com": [27, 74, 85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 166, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 209, 210, 221, 222, 247, 248, 249, 250, 261, 273, 292, 300, 302, 303], "facebookresearch": [27, 209], "blob": [27, 74, 85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 166, 169, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 209, 210, 222, 247, 248, 249, 250, 261], "_intern": 27, "_instantiate2": 27, "l148": 27, "num_head": [27, 81, 91, 95, 101, 107, 116, 120, 125, 129, 146, 147, 150, 151, 154, 156, 158, 160, 165, 167, 171, 175, 207, 208, 210, 214], "num_kv_head": [27, 91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 207, 208], "vocab_s": [27, 73, 74, 91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 221, 222, 223, 225], "parsed_yaml": 27, "embed_dim": [27, 78, 79, 80, 81, 91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 208, 210, 213, 214, 215, 216, 225, 226, 283, 304], "valueerror": [27, 33, 35, 37, 42, 44, 46, 47, 50, 52, 53, 59, 60, 62, 64, 65, 66, 68, 69, 71, 171, 207, 208, 216, 217, 218, 219, 254, 255, 256, 262, 265, 281, 284], "recipe_nam": 28, "rank": [28, 85, 86, 87, 95, 96, 97, 107, 108, 109, 110, 111, 120, 121, 122, 129, 130, 131, 132, 138, 139, 149, 150, 151, 154, 155, 156, 157, 165, 166, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 230, 231, 268, 270, 281, 294, 301, 304, 307], "zero": [28, 50, 207, 209, 214, 224, 253, 300, 302, 306], "displai": 28, "callabl": [29, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 81, 214, 232, 252, 263, 266, 272, 279], "With": [29, 300, 303, 304, 306, 307], "my_recip": 29, "foo": 29, "bar": [29, 293, 301, 305], "configerror": 30, "cannot": [30, 46, 256, 302], "equival": [31, 35, 79, 249, 250], "condit": [31, 73, 270, 298], "dedic": 31, "due": [31, 239, 304, 305, 307], "keep": [31, 33, 35, 37, 42, 63, 64, 66, 67, 70, 212, 225, 300, 304, 305], "openai": [32, 37, 60, 248], "markup": 32, "im_start": 32, "context": [32, 168, 218, 219, 232, 280, 282, 305], "im_end": 32, "goe": [32, 232], "a2": [33, 55], "functool": [34, 40, 43, 252], "partial": [34, 40, 43, 252], "_prompt_templ": [34, 40, 43], "assistant_messag": [34, 40, 43], "respect": [35, 53, 100, 207, 234, 282, 299], "final": [35, 42, 55, 56, 85, 86, 87, 91, 95, 101, 107, 108, 109, 110, 111, 116, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 146, 147, 149, 150, 151, 154, 155, 156, 157, 158, 165, 166, 171, 174, 175, 181, 182, 185, 186, 188, 189, 190, 191, 205, 214, 224, 235, 236, 300, 302, 303, 304, 305, 307], "leav": [35, 42, 305], "liter": [36, 38, 41, 85, 86, 87, 88, 89, 90, 94, 95, 96, 97, 98, 99, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 119, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 148, 149, 150, 151, 152, 154, 155, 156, 157, 162, 163, 164, 165, 166, 169, 170, 171, 172, 173, 174, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 204, 235, 236], "union": [36, 46, 47, 58, 59, 60, 62, 64, 68, 69, 71, 72, 94, 106, 119, 148, 162, 169, 179, 204, 214, 224, 236, 254, 259, 264, 273, 274, 275, 276, 277, 279, 281], "interleav": [36, 244], "attach": 36, "writer": 36, "calcul": [36, 38, 76, 142, 144, 208, 213, 215, 216, 245, 246, 248, 302], "consecut": [36, 52, 207, 244], "last": [36, 51, 54, 71, 214, 246, 258, 261], "properti": [36, 304, 305], "media": [36, 56], "classmethod": 36, "image_url": 37, "unmask": [37, 42, 222], "consid": [38, 53, 55, 56, 79, 80, 81, 147, 151, 216, 305], "come": [38, 52, 229, 304, 305], "nanswer": 40, "alia": [41, 252], "alwai": [42, 273, 283, 299, 305], "nsummari": [43, 299], "summari": [43, 53, 68, 216, 265], "batch_first": 45, "padding_valu": 45, "float": [45, 73, 74, 77, 85, 86, 87, 88, 89, 90, 91, 95, 96, 97, 98, 99, 101, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 120, 121, 122, 123, 124, 125, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 144, 149, 150, 151, 152, 154, 155, 156, 157, 158, 160, 163, 164, 165, 166, 167, 170, 171, 172, 173, 174, 175, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 208, 209, 230, 231, 245, 246, 247, 248, 249, 250, 258, 261, 264, 265, 271, 273, 274, 275, 276, 277, 304, 305, 306, 307], "rnn": [45, 47, 50], "pad_sequ": [45, 47, 50], "variabl": [45, 257, 270, 273, 305, 307], "left": [45, 47, 50, 144, 214, 304], "longest": [45, 49, 50], "trail": 45, "dimens": [45, 50, 91, 95, 101, 107, 116, 120, 125, 129, 142, 146, 147, 150, 151, 154, 156, 158, 160, 165, 167, 171, 175, 205, 207, 208, 210, 214, 216, 225, 230, 231, 302, 304, 305, 307], "element": [45, 47, 50, 53, 222, 267, 300], "12": [45, 47, 48, 69, 216, 292, 306], "image_loc": 46, "www": [46, 273], "org": [46, 65, 82, 83, 84, 85, 86, 88, 89, 90, 95, 96, 97, 98, 99, 102, 103, 104, 105, 107, 108, 109, 110, 112, 113, 114, 115, 120, 121, 122, 123, 124, 131, 132, 133, 134, 135, 138, 139, 140, 141, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 208, 209, 210, 216, 244, 245, 247, 248, 249, 250, 252, 270, 276, 279, 281, 287, 292], "en": [46, 57, 61, 63, 71, 72, 306], "pad_direct": [47, 50], "keys_to_pad": 47, "padding_idx": [47, 48, 49, 50, 54], "left_pad_sequ": [47, 50], "integ": [47, 49, 225, 252, 253, 259, 281], "batch_siz": [47, 59, 62, 65, 66, 68, 207, 208, 213, 214, 215, 217, 218, 219, 221, 222, 223, 224, 225, 226, 247, 249, 251, 300, 305, 306], "ignore_idx": [48, 49, 50], "input_id": [48, 267], "chosen_input_id": [48, 67], "chosen_label": 48, "15": [48, 216, 263, 299, 300, 304, 307], "16": [48, 85, 86, 87, 88, 89, 90, 96, 97, 98, 99, 108, 109, 110, 111, 112, 113, 114, 115, 121, 122, 123, 124, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 155, 157, 163, 164, 166, 170, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 207, 216, 304, 305, 307], "17": [48, 216, 304], "18": [48, 216, 302], "19": [48, 216, 307], "20": [48, 216, 251, 306], "token_pair": 49, "padded_col": 49, "pad_max_til": 50, "pad_max_imag": 50, "tile": [50, 78, 79, 80, 81, 142, 143, 144, 145, 147, 148, 149, 151, 216, 244], "aspect": [50, 78, 79, 293], "ratio": [50, 78, 79, 247, 248], "text_seq_len": [50, 244], "n_tile": [50, 78, 79, 216], "h": [50, 142, 207, 216, 221, 223, 292, 298], "w": [50, 82, 83, 84, 92, 93, 102, 103, 104, 105, 117, 118, 126, 127, 128, 136, 137, 142, 159, 161, 176, 177, 178, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 216, 273, 276, 277, 299, 300, 304, 307], "h_ratio": 50, "w_ratio": 50, "encoder_mask": [50, 213, 214, 224], "image_seq_len": [50, 244], "channel": [50, 81, 142, 144, 147, 151, 216, 230, 306], "height": [50, 142], "largest": 50, "bsz": [50, 73, 74, 75, 76, 78, 79, 216, 221, 223], "max_num_imag": 50, "max_num_til": [50, 78, 79, 81, 144, 147, 151, 216], "tokens_per_til": 50, "image_id": 50, "four": [50, 304], "model_input": 50, "max_text_seq_len": 50, "40": [50, 79, 80, 81, 147, 151, 216, 244, 305, 307], "did": [50, 302, 307], "extra": [50, 144, 224, 292, 299, 304, 305, 306, 307], "second": [50, 208, 225, 300, 304, 305, 307], "eos_id": [51, 144, 240, 242], "shorter": [52, 214], "min": [52, 304], "invalid": 52, "sub": [53, 276], "unifi": [53, 161], "simplifi": [53, 247, 298, 303, 304], "simultan": 53, "intern": 53, "aggreg": 53, "transpar": 53, "howev": [53, 169, 292, 303, 305], "constitu": 53, "might": [53, 218, 225, 228, 298, 300, 305], "larg": [53, 221, 223, 230, 231, 282, 298, 305, 307], "comput": [53, 55, 56, 101, 107, 116, 120, 125, 129, 142, 143, 146, 150, 171, 175, 208, 210, 214, 215, 221, 223, 224, 244, 247, 249, 250, 265, 281, 295, 300, 303, 305, 306, 307], "cumul": 53, "maintain": [53, 226, 294, 305, 307], "deleg": 53, "retriev": [53, 55, 56, 214, 263], "lead": [53, 239, 253, 295], "scale": [53, 73, 74, 77, 85, 86, 87, 95, 96, 97, 107, 108, 109, 110, 111, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 149, 150, 151, 154, 155, 156, 157, 165, 166, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 209, 211, 213, 215, 230, 231, 246, 250, 304, 305, 306, 307], "strategi": [53, 295], "stream": [53, 287, 305], "demand": 53, "deriv": [53, 205, 214, 215], "instans": 53, "dataset1": 53, "mycustomdataset": 53, "params1": 53, "dataset2": 53, "params2": 53, "concat_dataset": 53, "total": [53, 246, 248, 261, 268, 290, 297, 300, 302, 303, 304, 305], "data_point": 53, "1500": 53, "vicgal": 53, "gpt4": 53, "samsum": [53, 68], "focus": [53, 296, 301, 305], "enhanc": [53, 216, 250, 305, 307], "divers": 53, "machin": [53, 249, 286, 298, 300], "max_pack": 54, "outsid": [54, 281, 282, 304], "sampler": [54, 301], "part": [54, 225, 249, 299, 307], "buffer": [54, 214, 224, 305], "enough": [54, 299], "lower": [54, 295, 303, 304], "triangular": 54, "wise": 54, "made": [54, 60, 64, 67, 71, 144, 300], "smaller": [54, 225, 300, 302, 303, 304, 305, 306, 307], "jam": 54, "s1": [54, 239], "s2": [54, 239], "s3": 54, "s4": 54, "contamin": 54, "input_po": [54, 74, 208, 210, 214, 215, 224], "matrix": [54, 213, 214, 224], "increment": 54, "move": [54, 71, 214, 285, 305], "entir": [54, 71, 221, 228, 272, 299, 307], "avoid": [54, 71, 209, 216, 220, 281, 298, 306, 307], "truncat": [54, 61, 71, 72, 94, 106, 119, 144, 148, 162, 169, 179, 204, 240, 251], "sentenc": [54, 71], "techniqu": [55, 293, 294, 295, 300, 301, 302, 303, 304, 305, 306], "repons": 55, "At": [55, 56, 214, 224], "extract": [55, 56, 61, 241], "against": [55, 56, 250, 288, 306, 307], "unit": [55, 56, 272, 293], "filepath": [55, 56, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72], "filter": [55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 306], "prior": [55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 283], "doc": [55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 252, 270, 273, 276, 277, 281, 287, 298, 300], "round": [56, 306], "incorpor": [56, 247], "happen": [56, 221, 223], "ti": [56, 95, 171, 175, 212, 305], "agnost": 56, "treat": [56, 216, 232, 299], "minimum": [56, 65, 66], "corpu": [57, 61, 71, 72], "package_refer": [57, 61, 63, 71, 72], "loading_method": [57, 61, 63, 71, 72], "tabular": [57, 71], "txt": [57, 71, 179, 204, 274, 301], "eo": [57, 71, 169, 239, 242, 299], "yahma": 58, "variant": [58, 62, 68], "page": [58, 72, 292, 293, 298, 301, 302, 305], "tatsu": 59, "lab": [59, 74], "codebas": [59, 300], "independ": 59, "contribut": [59, 60, 62, 64, 67, 68, 69, 222, 223, 246, 248], "alpacatomessag": 59, "alpaca_d": 59, "altern": [60, 64, 67, 218, 301, 305], "toward": [60, 250], "my_dataset": [60, 64], "london": [60, 64], "ccdv": 61, "cnn_dailymail": 61, "textcompletiondataset": [61, 71, 72], "cnn": 61, "dailymail": 61, "articl": [61, 72], "highlight": [61, 307], "disabl": [61, 72, 214, 218, 224, 232, 281, 306], "highest": [61, 72], "conjunct": [62, 68, 70, 214, 305], "grammar_d": 62, "rlhflow": 63, "hh": 63, "preferencedataset": [63, 67, 70], "liuhaotian": 65, "llava": 65, "150k": 65, "coco": 65, "train2017": 65, "llava_instruct_150k": 65, "2017": 65, "visit": [65, 300], "cocodataset": 65, "wget": 65, "zip": [65, 289], "unzip": 65, "minim": [65, 66, 301, 303, 304, 305, 306, 307], "clip": [65, 66, 78, 79, 80, 81, 142, 143, 144, 147, 151, 216, 248], "mymodeltransform": [65, 66], "tokenizer_path": [65, 66], "image_transform": [65, 66], "yet": [65, 66, 153, 299, 300], "llava_instruct_d": 65, "huggingfacem4": 66, "the_cauldron": 66, "cauldron": 66, "card": 66, "cauldron_d": 66, "compris": 67, "share": [67, 208, 212, 300], "c1": 67, "r1": 67, "chosen_messag": 67, "rejected_messag": 67, "samsung": 68, "samsum_d": 68, "351": 69, "82": 69, "391": 69, "221": 69, "220": 69, "193": 69, "471": 69, "lvwerra": 70, "stack": [70, 216, 282], "exchang": 70, "allenai": [71, 306], "data_dir": 71, "realnewslik": 71, "wikitext_document_level": 72, "wikitext": [72, 306], "103": [72, 300], "transformerdecod": [73, 74, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 95, 96, 97, 98, 99, 101, 102, 103, 104, 105, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 146, 150, 154, 155, 156, 157, 158, 159, 160, 161, 163, 164, 165, 166, 167, 168, 170, 171, 172, 173, 174, 175, 176, 177, 178, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 213, 215, 224, 225, 226, 304], "max_generated_token": 73, "pad_id": [73, 251], "temperatur": [73, 74, 77, 247, 249, 250, 300], "top_k": [73, 74, 77, 300], "stop_token": [73, 251], "rng": 73, "custom_generate_next_token": 73, "seq_length": [73, 74, 75, 213, 215, 225, 226], "prune": [73, 77, 307], "probabl": [73, 77, 85, 86, 87, 95, 96, 97, 107, 109, 110, 111, 120, 121, 122, 129, 130, 131, 132, 138, 139, 149, 150, 151, 154, 155, 156, 157, 165, 166, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 230, 231, 247, 248, 249, 250, 300, 303], "stop": [73, 251], "random": [73, 216, 281, 301], "compil": [73, 221, 300, 302, 305, 307], "generate_next_token": 73, "llama3_8b": [73, 122, 130, 224, 302, 305, 306], "manual_se": 73, "tolist": 73, "jeremi": 73, "m": [73, 220, 299, 306], "seq_len": [73, 75, 76, 214], "num_generated_token": 73, "q": [74, 77, 208, 304], "randomli": [74, 77, 283], "softmax": [74, 77, 208, 214, 215, 224, 303], "trick": [74, 77], "fast": [74, 300], "32971d3129541c5bfb4f715abc33d1c5f408d204": 74, "l40": 74, "k": [74, 77, 79, 208, 304], "padding_mask": [75, 76, 248, 251], "target_seq_len": 75, "suitabl": 75, "scaled_dot_product_attent": [75, 91, 95, 101, 107, 116, 120, 125, 129, 154, 156, 158, 160, 165, 167, 171, 175, 208], "static": 75, "kv": [75, 207, 208, 214, 215, 217, 218, 219, 224, 306], "cach": [75, 207, 208, 210, 213, 214, 215, 217, 218, 219, 224, 226, 292, 298], "longer": [75, 207, 305], "boolean": [75, 76, 81, 208, 213, 214, 215, 224, 226, 252, 267], "assertionerror": [75, 81, 207, 213, 214, 235, 236, 283], "shift": [76, 214], "uniform_": 77, "int32": 77, "patch": [78, 79, 80, 81, 143, 144, 147, 151, 216, 244], "check": [78, 79, 80, 81, 213, 214, 215, 216, 224, 226, 235, 262, 270, 288, 291, 293, 294, 295, 296, 299, 300, 301, 304, 305], "vision_transform": [78, 79, 80, 81], "visiontransform": [78, 79, 80, 81], "divid": [78, 79, 80, 81, 144, 147, 151, 216, 222, 223, 244], "dimension": [78, 79, 80, 81, 147, 151, 216], "n_img": [78, 79, 216], "n_tokens_per_til": [78, 79, 80], "crop": [78, 79, 80, 81, 142, 147, 151, 216], "local_token_positional_embed": 79, "_position_embed": [79, 216], "tokenpositionalembed": [79, 216], "gate": [79, 211, 257, 294, 295, 298, 301], "global_token_positional_embed": 79, "400": [79, 80, 81, 147, 151, 216, 244], "10x10": [79, 80, 81, 147, 151, 216, 244], "grid": [79, 80, 81, 147, 151, 216, 244], "th": [79, 207], "silu": [81, 205], "cls_output_dim": [81, 216], "attn_bia": 81, "out_indic": [81, 216], "output_cls_project": 81, "in_channel": [81, 147, 151, 216], "intermediate_act": 81, "transformerencoderlay": 81, "cl": [81, 143, 216], "mlp": [81, 85, 86, 87, 91, 95, 96, 97, 101, 107, 108, 109, 110, 111, 116, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 146, 149, 150, 151, 154, 155, 156, 157, 158, 160, 165, 166, 167, 171, 172, 173, 174, 175, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 213, 214, 215, 235, 236, 302, 304, 305], "bia": [81, 212, 229, 230, 231, 283, 304, 306, 307], "intermedi": [81, 91, 95, 101, 107, 116, 120, 125, 129, 146, 147, 150, 151, 154, 156, 158, 160, 165, 167, 171, 175, 216, 256, 279, 302, 307], "fourth": [81, 147, 151, 216], "determin": [81, 147, 151, 236], "divis": [81, 209], "code_llama2": [82, 83, 84, 85, 86, 87, 88, 89, 90, 298], "arxiv": [82, 83, 84, 85, 86, 88, 89, 90, 95, 96, 97, 98, 99, 102, 103, 104, 105, 107, 108, 109, 110, 112, 113, 114, 115, 120, 121, 122, 123, 124, 131, 132, 133, 134, 135, 138, 139, 140, 141, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 208, 209, 210, 216, 244, 245, 247, 248, 249, 250], "pdf": [82, 83, 84, 244, 245], "2308": [82, 83, 84], "12950": [82, 83, 84], "lora_attn_modul": [85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 235, 236, 294, 304, 305, 307], "q_proj": [85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 208, 235, 236, 294, 304, 305, 306, 307], "k_proj": [85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 208, 235, 236, 294, 304, 305, 306, 307], "v_proj": [85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 208, 235, 236, 294, 304, 305, 306, 307], "output_proj": [85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 208, 235, 236, 304, 305, 306, 307], "apply_lora_to_mlp": [85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 235, 236, 294, 304, 305], "apply_lora_to_output": [85, 86, 87, 88, 89, 90, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 235, 236, 304, 305], "lora_rank": [85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 294, 304, 305], "lora_alpha": [85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 294, 304, 305], "lora_dropout": [85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 305], "use_dora": [85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 131, 132, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 305], "quantize_bas": [85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 230, 231, 305, 307], "code_llama2_13b": 85, "tloen": [85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 166, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191], "8bb8579e403dc78e37fe81ffbb253c413007323f": [85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 166, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191], "l41": [85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 166, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191], "l43": [85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 166, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191], "linear": [85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 143, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 212, 214, 229, 230, 231, 235, 236, 304, 305, 306, 307], "low": [85, 86, 87, 95, 96, 97, 107, 108, 109, 110, 111, 120, 121, 122, 129, 130, 131, 132, 138, 139, 149, 150, 151, 154, 155, 156, 157, 165, 166, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 230, 231, 294, 300, 303, 304, 307], "approxim": [85, 86, 87, 95, 96, 97, 107, 108, 109, 110, 111, 120, 121, 122, 129, 130, 131, 132, 138, 139, 149, 150, 151, 154, 155, 156, 157, 165, 166, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 230, 231, 304], "factor": [85, 86, 87, 95, 96, 97, 107, 108, 109, 110, 111, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 149, 150, 151, 154, 155, 156, 157, 165, 166, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 230, 231, 245, 300], "dropout": [85, 86, 87, 91, 95, 96, 97, 101, 107, 109, 110, 111, 116, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 149, 150, 151, 154, 155, 156, 157, 158, 160, 165, 166, 167, 171, 172, 173, 174, 175, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 208, 230, 231, 304, 305, 307], "decompos": [85, 86, 95, 96, 97, 107, 108, 109, 110, 120, 121, 122, 131, 132, 138, 139, 154, 155, 156, 157, 165, 166, 230, 294], "magnitud": [85, 86, 95, 96, 97, 107, 108, 109, 110, 120, 121, 122, 131, 132, 138, 139, 154, 155, 156, 157, 165, 166, 230, 305], "dora": [85, 86, 95, 96, 97, 107, 108, 109, 110, 120, 121, 122, 129, 131, 132, 138, 139, 150, 151, 154, 155, 156, 157, 165, 166, 230, 294], "ab": [85, 86, 88, 89, 90, 95, 96, 97, 98, 99, 102, 103, 104, 105, 107, 108, 109, 110, 112, 113, 114, 115, 120, 121, 122, 123, 124, 131, 132, 133, 134, 135, 138, 139, 140, 141, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 208, 209, 210, 216, 247, 248, 249, 250], "2402": [85, 86, 95, 96, 97, 107, 108, 109, 110, 120, 121, 122, 131, 132, 138, 139, 154, 155, 156, 157, 165, 166], "09353": [85, 86, 95, 96, 97, 107, 108, 109, 110, 120, 121, 122, 131, 132, 138, 139, 154, 155, 156, 157, 165, 166], "code_llama2_70b": 86, "code_llama2_7b": 87, "qlora": [88, 89, 90, 98, 99, 112, 113, 114, 115, 123, 124, 133, 134, 135, 140, 141, 152, 163, 164, 170, 220, 291, 293, 294, 302, 304], "paper": [88, 89, 90, 98, 99, 112, 113, 114, 115, 123, 124, 133, 134, 135, 140, 141, 152, 163, 164, 170, 244, 247, 249, 250, 303, 304, 307], "2305": [88, 89, 90, 98, 99, 112, 113, 114, 115, 123, 124, 133, 134, 135, 140, 141, 152, 163, 164, 170, 208, 247, 249], "14314": [88, 89, 90, 98, 99, 112, 113, 114, 115, 123, 124, 133, 134, 135, 140, 141, 152, 163, 164, 170], "lora_code_llama2_13b": 88, "lora_code_llama2_70b": 89, "lora_code_llama2_7b": 90, "head_dim": [91, 95, 207, 208, 214], "intermediate_dim": [91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175], "attn_dropout": [91, 95, 101, 107, 116, 120, 125, 129, 154, 156, 158, 160, 165, 167, 171, 175, 208, 214], "norm_ep": [91, 95, 101, 107, 116, 120, 125, 129, 154, 156, 158, 160, 165, 167, 171, 175], "1e": [91, 95, 101, 107, 116, 120, 125, 129, 154, 156, 158, 160, 165, 167, 171, 175, 209, 303, 305], "06": [91, 95, 209, 304], "rope_bas": [91, 95, 101, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175], "10000": [91, 95, 101, 154, 156, 158, 160, 165, 167, 210], "norm_embed": [91, 95], "transformerselfattentionlay": [91, 101, 116, 125, 158, 175, 213, 214, 224, 226], "rm": [91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175], "norm": [91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 214], "space": [91, 101, 116, 125, 146, 150, 158, 175, 214, 228, 305], "slide": [91, 158, 168], "window": [91, 158, 168], "vocabulari": [91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 221, 223, 304, 305], "mha": [91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 208, 214], "onto": [91, 95, 101, 107, 116, 120, 125, 129, 154, 156, 158, 160, 165, 167, 171, 175, 208, 228], "epsilon": [91, 95, 101, 107, 116, 120, 125, 129, 154, 156, 158, 160, 165, 167, 171, 175, 248], "rotari": [91, 95, 101, 125, 129, 154, 156, 158, 160, 165, 167, 210, 302], "10_000": [91, 95, 154, 156, 158, 160, 167], "blog": [92, 93], "technolog": [92, 93], "develop": [92, 93, 292, 307], "gemmatoken": 94, "_templatetyp": [94, 106, 119, 148, 162, 169, 179, 204], "gemma_2b": 96, "gemma_7b": 97, "lora_gemma_2b": 98, "lora_gemma_7b": 99, "taken": [100, 304, 307], "sy": [100, 299], "honest": [100, 299], "pari": [100, 153], "capit": [100, 153], "franc": [100, 153], "known": [100, 153, 266, 306], "stun": [100, 153], "05": [101, 107, 116, 120, 125, 129, 154, 156, 158, 160, 165, 167, 171, 175], "gqa": [101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 208], "mqa": [101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 208], "kvcach": [101, 107, 116, 120, 125, 129, 146, 150, 165, 171, 175, 208, 214, 217, 218, 219, 224], "scale_hidden_dim_for_mlp": [101, 107, 116, 120, 125, 129, 146, 150, 171, 175], "2307": [102, 103, 104, 105], "09288": [102, 103, 104, 105], "classif": [105, 156, 160, 161, 257], "llama2_70b": 109, "llama2_7b": [110, 304], "classifi": [111, 156, 160, 161, 283, 305], "llama2_reward_7b": [111, 257], "lora_llama2_13b": 112, "lora_llama2_70b": 113, "lora_llama2_7b": [114, 304], "lora_llama2_reward_7b": 115, "500000": [116, 120, 125, 129, 146, 150], "llama3token": [119, 144, 238], "regist": [119, 144, 148, 169, 179, 204, 220, 278, 307], "canon": [119, 144, 148, 169, 179, 204], "llama3_70b": 121, "lora_llama3_70b": 123, "lora_llama3_8b": [124, 305], "scale_factor": [125, 129], "500_000": [125, 129], "rope": [125, 129, 171, 175, 208, 210], "llama3_1": [126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 294, 303], "llama3_1_70b": 131, "llama3_1_8b": 132, "lora_llama3_1_405b": 133, "lora_llama3_1_70b": 134, "lora_llama3_1_8b": 135, "llama3_2_1b": [138, 217, 218, 219], "llama3_2_3b": 139, "lora_llama3_2_1b": 140, "lora_llama3_2_3b": 141, "projection_head": [142, 224, 228], "combin": [142, 144, 147, 151, 214, 224, 226, 228, 246, 303], "learnabl": [142, 211, 224, 226, 230, 300], "fusion": [142, 145, 146, 147, 149, 150, 151, 224, 225, 226, 227, 228], "encoder_dim": [142, 143], "decoder_dim": [142, 143], "num_img": [142, 143], "num_emb": [142, 143], "broken": [142, 143, 216, 226], "width": [142, 306], "clip_embeds_per_til": 142, "emb": [142, 143, 208, 213, 214, 224], "num_hidden_input": 143, "sequenti": [143, 224, 228], "num_hidden": 143, "hidden_st": [143, 216], "image_mean": 144, "image_std": 144, "tranform": 144, "possible_resolut": 144, "448": [144, 145, 148, 149], "deviat": 144, "still": [144, 221, 223, 225, 226, 294, 304, 306, 307], "transformed_data": 144, "img1": [144, 244], "img2": [144, 244], "31587": [144, 239, 240], "29644": [144, 239, 240], "102": [144, 239, 240], "truncate_at_eo": [144, 240], "skip": [144, 208], "tokenize_head": 144, "tokenize_end": 144, "header": 144, "eom": 144, "wether": 144, "decoder_train": [145, 149, 152, 224], "encoder_train": [145, 149, 152, 224], "fusion_train": [145, 149, 152, 224], "deepfusionmodel": [145, 149, 152], "trainabl": [145, 149, 226, 231, 234, 272, 304, 305, 307], "resiz": [145, 148, 149], "fusion_interv": [146, 150], "num_special_token": [146, 150], "encoder_max_seq_len": [146, 150, 213, 214, 215, 219, 224, 226], "causalselfattent": [146, 150], "interv": [146, 150, 301], "clip_embed_dim": [147, 151], "clip_num_lay": [147, 151], "clip_hidden_st": [147, 151], "num_layers_project": [147, 151], "decoder_embed_dim": [147, 151], "llama3visionencod": [147, 151], "spatial": [147, 151], "backbon": [147, 151], "trainbl": 149, "decoder_lora": 150, "fusion_lora": [150, 151], "encoder_lora": 151, "lora_llama3_2_vision_11b": 152, "num_class": [156, 160, 283], "announc": 159, "ray2333": 161, "feedback": [161, 247], "lora_mistral_7b": 163, "lora_mistral_reward_7b": 164, "phi3_mini": [166, 257], "128k": 168, "nor": 168, "phi3minitoken": 169, "tokenizer_config": 169, "spm": 169, "lm": [169, 248, 303], "bo": [169, 239, 242, 299], "unk": 169, "augment": [169, 307], "endoftext": 169, "phi3minisentencepiecebasetoken": 169, "lora_phi3_mini": 170, "1000000": [171, 175], "tie_word_embed": [171, 172, 173, 175, 176, 177, 180, 183, 184, 187, 192, 195, 196, 199], "qwen2transformerdecod": 171, "period": [171, 175], "word": [171, 175, 305, 306], "qwen2_0_5b": [172, 212], "qwen2_1_5b": [173, 212], "qwen2_7b": 174, "qwen": [176, 177, 178, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203], "merges_fil": [179, 204], "qwen2token": 179, "qwen2_5_0_5b": 180, "qwen2_5_14b_bas": 181, "slightli": [181, 182, 183, 184, 185, 186, 188, 189, 190, 191, 193, 194, 195, 196, 197, 198, 200, 201, 202, 203, 303], "qwen2_5_14b_instruct": 182, "qwen2_5_1_5b_bas": 183, "qwen2_5_1_5b_instruct": 184, "qwen2_5_32b_bas": 185, "qwen2_5_32b_instruct": 186, "qwen2_5_3b": 187, "qwen2_5_72b_bas": 188, "qwen2_5_72b_instruct": 189, "qwen2_5_7b_bas": 190, "qwen2_5_7b_instruct": 191, "qwen2_5token": 204, "gate_proj": 205, "down_proj": 205, "up_proj": 205, "feed": [205, 213, 215], "network": [205, 232, 304, 307], "fed": [205, 299], "multipli": [205, 305], "in_dim": [205, 229, 230, 231, 304, 305, 307], "out_dim": [205, 214, 229, 230, 231, 304, 305, 307], "layernorm": 206, "past": 207, "expand": 207, "dpython": [207, 208, 213, 214, 215, 219, 220, 224, 226, 280, 284], "reset": [207, 208, 213, 214, 215, 224, 226, 265], "k_val": 207, "v_val": 207, "fill": 207, "bfloat16": [207, 220, 280, 300, 301, 302, 304, 305, 306], "greater": [207, 216, 288], "pos_embed": [208, 213, 304, 306], "q_norm": 208, "k_norm": 208, "kv_cach": [208, 217, 218, 219], "is_caus": 208, "13245v1": 208, "multihead": 208, "extrem": 208, "credit": 208, "litgpt": 208, "v": [208, 214, 224, 304], "n_kv_head": 208, "rotarypositionalembed": [208, 304, 306], "rmsnorm": 208, "vice": [208, 298], "versa": [208, 298], "y": 208, "s_x": 208, "s_y": 208, "_masktyp": [208, 214, 215], "score": [208, 214, 215, 246], "encoder_max_cache_seq_len": [208, 214, 215], "j": [208, 213, 214, 215, 224], "blockmask": [208, 214, 215], "create_block_mask": [208, 214, 215], "flex_attent": [208, 214, 215], "n_h": [208, 210], "num": [208, 210], "n_kv": 208, "h_d": [208, 210], "reset_cach": [208, 213, 214, 215, 224, 226], "setup_cach": [208, 213, 214, 215, 217, 218, 224, 226], "ep": 209, "squar": 209, "1910": 209, "07467": 209, "propos": [210, 305], "2104": 210, "09864": 210, "verfic": 210, "l80": 210, "init": [210, 265, 277, 307], "exceed": 210, "freq": 210, "recomput": [210, 305], "geometr": 210, "progress": [210, 296, 301, 305], "rotat": 210, "angl": 210, "basic": [211, 302], "tied_modul": 212, "pointer": [212, 293], "why": [212, 299, 301, 304], "whose": [212, 232, 273, 278], "attributeerror": [212, 285], "attn": [213, 215, 217, 218, 219, 304, 306, 307], "multiheadattent": [213, 215, 304, 306], "ca_norm": 213, "mlp_norm": [213, 215], "ca_scal": 213, "mlp_scale": [213, 215], "ff": [213, 215], "caches_are_en": [213, 214, 215, 217, 218, 219, 224, 226], "func": [213, 215, 226], "caches_are_setup": [213, 214, 215, 217, 218, 219, 224, 226], "token_sequ": 213, "embed_sequ": 213, "decoder_max_seq_len": [213, 214, 215, 217, 218, 219, 224, 226], "modulelist": 214, "output_hidden_st": [214, 224], "belong": [214, 260], "reduc": [214, 247, 293, 294, 295, 303, 304, 305, 306, 307], "statement": 214, "improv": [214, 240, 249, 263, 295, 302, 303, 304, 305], "readabl": [214, 300], "behaviour": [214, 224, 283], "alter": [214, 224], "common_util": [214, 217, 218, 219, 220], "disable_kv_cach": [214, 224], "chunked_output": 214, "last_hidden_st": 214, "chunk": [214, 221, 223, 240], "cewithchunkedoutputloss": [214, 224], "upcast": [214, 221, 223], "set_num_output_chunk": [214, 224], "num_chunk": [214, 221, 223], "s_e": [214, 224], "d_e": [214, 224], "arang": [214, 224], "prompt_length": [214, 224], "correspondingli": 214, "padded_prompt_length": 214, "m_": [214, 224], "realloc": [214, 224], "runtimeerror": [214, 242, 258, 262, 264, 269], "num_output_chunk": [214, 221, 223, 224], "transformercrossattentionlay": [214, 224, 226], "fusionlay": [214, 224], "sa_norm": 215, "sa_scal": 215, "token_pos_embed": 216, "pre_tile_pos_emb": 216, "post_tile_pos_emb": 216, "cls_project": 216, "vit": 216, "11929": 216, "convolut": 216, "flatten": 216, "downscal": 216, "800x400": 216, "400x400": 216, "_transform": 216, "whole": [216, 303], "n_token": 216, "101": 216, "pool": 216, "tiledtokenpositionalembed": 216, "tilepositionalembed": 216, "tile_pos_emb": 216, "8x8": 216, "21": 216, "22": 216, "23": [216, 261], "24": [216, 301, 302], "25": [216, 300, 303], "26": 216, "27": [216, 300], "28": [216, 300], "29": [216, 307], "30": [216, 251, 306], "31": [216, 302], "33": 216, "34": 216, "35": [216, 307], "36": 216, "37": 216, "38": [216, 300], "39": 216, "41": 216, "43": 216, "44": 216, "45": 216, "46": 216, "47": 216, "48": [216, 300, 307], "49": 216, "50": [216, 251, 273, 300], "51": 216, "52": [216, 301], "53": 216, "54": 216, "55": [216, 301], "56": 216, "57": [216, 304, 307], "58": 216, "59": [216, 307], "60": 216, "61": [216, 300], "62": 216, "63": 216, "64": [216, 294, 304, 305], "num_patches_per_til": 216, "emb_dim": 216, "constain": 216, "anim": 216, "max_n_img": 216, "n_channel": 216, "vision_util": 216, "tile_crop": 216, "800": 216, "patch_grid_s": 216, "rand": 216, "nch": 216, "tile_cropped_imag": 216, "batch_imag": 216, "unsqueez": 216, "batch_aspect_ratio": 216, "clip_vision_encod": 216, "cache_en": 217, "float32": [217, 218, 219, 262, 303], "1024": [217, 218, 219, 306], "temporarili": [218, 219, 232, 305], "enter": [218, 219], "overhead": [218, 247, 295, 305, 306], "untouch": [218, 299], "yield": [218, 219, 232], "caller": [218, 219, 232], "delete_kv_cach": 219, "offload_to_cpu": 220, "hook": [220, 278, 305, 307], "nf4": [220, 305, 307], "restor": 220, "higher": [220, 302, 303, 305, 306, 307], "offload": [220, 307], "increas": [220, 247, 261, 302, 303, 304, 305, 306], "peak": [220, 265, 271, 300, 302, 304, 307], "gpu": [220, 295, 298, 300, 301, 302, 303, 304, 305, 306, 307], "_register_state_dict_hook": 220, "mymodul": 220, "_after_": 220, "nf4tensor": [220, 307], "unquant": [220, 306, 307], "unus": 220, "ignore_index": [221, 222, 223, 303], "entropi": [221, 223, 303], "bf16": [221, 223, 262, 305, 307], "ce": [221, 303], "better": [221, 223, 250, 293, 299, 300, 303, 305, 306], "accuraci": [221, 223, 295, 300, 302, 303, 304, 305, 306, 307], "doubl": [221, 223, 307], "therefor": [221, 223, 305, 307], "num_token": [221, 222, 223], "consider": [221, 223], "compute_cross_entropi": 221, "gain": [221, 295, 302], "won": [221, 299], "realiz": 221, "pull": [221, 294, 295, 298], "1390": 221, "loss_fn": [221, 223], "chunkedcrossentropyloss": 221, "output_chunk": [221, 223], "kullback": [222, 303], "leibler": [222, 303], "diverg": [222, 223, 246, 303], "jongwooko": [222, 303], "distillm": [222, 303], "17c0f98bc263b1861a02d5df578c84aea652ee65": 222, "student_logit": [222, 223, 303], "teacher_logit": [222, 223, 303], "student": [222, 223], "teacher": [222, 223, 300], "kl": [222, 223, 246, 303], "teacher_chunk": 223, "teacher_model": 223, "model_fus": [224, 225, 226, 227, 228], "deepfus": 224, "evolut": 224, "signatur": 224, "interchang": 224, "fusion_param": [224, 225, 226, 227, 228], "fusionembed": 224, "fusion_lay": [224, 226], "clip_vit_224": [224, 228], "feedforward": [224, 228], "register_fusion_modul": 224, "flamingo": [224, 226, 244], "strict": [224, 225, 226, 235, 304], "freez": [224, 300, 304], "fusion_vocab_s": 225, "necessit": 225, "rout": 225, "128": [225, 294, 302, 304, 305], "fusion_first": 226, "shot": [226, 300, 302, 306], "infus": 226, "interpret": 226, "enocd": 226, "isn": [226, 262, 298], "fused_lay": 226, "mark": [228, 299], "earli": 228, "peft": [229, 230, 231, 232, 233, 234, 235, 236, 254, 294, 304, 307], "adapter_param": [229, 230, 231, 232, 233, 234], "proj": 229, "loralinear": [229, 304, 305, 307], "alpha": [230, 231, 304, 305, 307], "use_bia": [230, 231], "scalar": [230, 273, 274, 275, 276, 277, 305], "orient": [230, 305], "bax": [230, 231], "distinct": [230, 307], "lora_a": [230, 231, 304, 307], "lora_b": [230, 231, 304, 307], "initialize_dora_magnitud": 230, "perturb": 231, "decomposit": [231, 304, 305], "matric": [231, 304, 307], "mapsto": 231, "w_0x": 231, "r": [231, 304], "polici": [232, 246, 247, 248, 249, 250, 252, 263, 272, 279, 296], "neural": [232, 304, 307], "get_adapter_param": [234, 304], "base_miss": 235, "base_unexpect": 235, "lora_miss": 235, "lora_unexpect": 235, "validate_state_dict_for_lora": [235, 304], "unlik": 235, "reli": [235, 242, 300, 302], "unexpect": 235, "nonempti": 235, "full_model_state_dict_kei": 236, "lora_state_dict_kei": 236, "base_model_state_dict_kei": 236, "confirm": [236, 292], "lora_modul": 236, "complement": 236, "disjoint": 236, "overlap": [236, 305], "tiktokenbasetoken": 237, "light": 239, "sentencepieceprocessor": 239, "trim": 239, "whitespac": 239, "spm_model": [239, 299], "tokenized_text": [239, 240], "add_bo": [239, 240, 299], "trim_leading_whitespac": 239, "prefix": [239, 305], "unbatch": 239, "bos_id": [240, 242], "lightweight": [240, 299], "substr": 240, "repetit": 240, "speed": [240, 282, 302, 305, 306, 307], "identif": 240, "regex": 240, "absent": 240, "tt_model": 240, "tokenizer_json_path": 241, "heavili": 242, "concat": 242, "1788": 242, "2643": 242, "465": 242, "22137": 242, "join": 242, "satisfi": [242, 300], "loos": 243, "image_token_id": 244, "particip": [244, 245], "laid": 244, "fig": 244, "2204": 244, "14198": 244, "immedi": [244, 305], "until": [244, 305], "img3": 244, "equal": [244, 288], "gamma": [245, 249, 250], "lmbda": 245, "estim": [245, 246], "1506": 245, "02438": 245, "response_len": [245, 246], "receiv": 245, "discount": 245, "gae": 245, "logprob": [246, 250], "ref_logprob": 246, "kl_coeff": 246, "valid_score_idx": 246, "coeffici": [246, 248], "total_reward": 246, "kl_reward": 246, "beta": [247, 250], "label_smooth": [247, 250], "18290": 247, "intuit": [247, 249, 250], "dispref": 247, "dynam": [247, 306], "degener": 247, "occur": [247, 295], "naiv": 247, "trl": [247, 249, 250], "5d1deb1445828cfd0e947cb3a7925b1c03a283fc": 247, "dpo_train": [247, 249], "l844": 247, "2009": 247, "01325": 247, "regular": [247, 250, 305, 306, 307], "baselin": [247, 248, 300, 303, 304], "uncertainti": [247, 250], "policy_chosen_logp": [247, 249, 250], "policy_rejected_logp": [247, 249, 250], "reference_chosen_logp": [247, 249], "reference_rejected_logp": [247, 249], "chosen_reward": [247, 249, 250], "rejected_reward": [247, 249, 250], "value_clip_rang": 248, "value_coeff": 248, "proxim": [248, 296], "1707": 248, "06347": 248, "eqn": 248, "vwxyzjn": 248, "ccc19538e817e98a60d3253242ac15e2a562cb49": 248, "lm_human_preference_detail": 248, "train_policy_acceler": 248, "l719": 248, "ea25b9e8b234e6ee1bca43083f8f3cf974143998": 248, "ppo2": 248, "l68": 248, "l75": 248, "pi_old_logprob": 248, "pi_logprob": 248, "phi_old_valu": 248, "phi_valu": 248, "value_padding_mask": 248, "old": 248, "participag": 248, "five": 248, "policy_loss": 248, "value_loss": 248, "clipfrac": 248, "fraction": 248, "statist": [249, 305], "rso": 249, "hing": 249, "2309": 249, "06657": 249, "logist": 249, "regress": 249, "slic": 249, "10425": 249, "almost": [249, 304], "svm": 249, "counter": 249, "4dce042a3863db1d375358e8c8092b874b02934b": 249, "l1141": 249, "simpo": 250, "2405": 250, "14734": 250, "averag": [250, 303], "implicit": 250, "margin": 250, "bradlei": 250, "terri": 250, "larger": [250, 256, 300, 302, 303, 305], "win": 250, "lose": 250, "98ad01ddfd1e1b67ec018014b83cba40e0caea66": 250, "cpo_train": 250, "l603": 250, "pretti": [250, 300], "identitc": 250, "elimin": 250, "kind": 250, "ipoloss": 250, "fill_valu": 251, "sequence_length": 251, "stop_token_id": 251, "869": 251, "eos_mask": 251, "truncated_sequ": 251, "datatyp": [252, 305, 307], "denot": 252, "auto_wrap_polici": [252, 263, 279], "submodul": [252, 272], "obei": 252, "contract": 252, "get_fsdp_polici": 252, "modules_to_wrap": [252, 263, 272], "min_num_param": 252, "my_fsdp_polici": 252, "recurs": [252, 272, 276], "sum": [252, 303, 304], "p": [252, 258, 304, 306, 307], "numel": [252, 304], "1000": [252, 306], "stabl": [252, 270, 276, 281, 292, 305], "html": [252, 270, 276, 279, 281, 287, 291], "filename_format": 253, "max_filenam": 253, "concis": 253, "filenam": [253, 274], "file_": 253, "_of_": 253, "n_file": 253, "build_checkpoint_filenam": 253, "file_00001_of_00003": 253, "file_00002_of_00003": 253, "file_00003_of_00003": 253, "safe_seri": 254, "from_pretrain": 254, "0001_of_0003": 254, "0002_of_0003": 254, "todo": 254, "preserv": [254, 307], "weight_map": [254, 300], "convert_weight": 254, "_model_typ": [254, 257], "intermediate_checkpoint": [254, 255, 256], "adapter_onli": [254, 255, 256], "_weight_map": 254, "shard": [255, 302], "wip": 255, "qualnam": 257, "boundari": 257, "distinguish": 257, "llama3_vis": 257, "llama3_2_vision_decod": 257, "mistral_reward_7b": 257, "my_new_model": 257, "my_custom_state_dict_map": 257, "optim_map": 258, "bare": 258, "bone": 258, "optim_dict": [258, 260, 278], "cfg_optim": 258, "ckpt": 258, "optim_ckpt": 258, "placeholder_optim_dict": 258, "optiminbackwardwrapp": 258, "get_last_lr": 258, "rate": [258, 261, 264, 293, 301, 305], "schedul": [258, 261, 282, 301, 305], "get_optim_kei": 258, "arbitrari": [258, 304, 305], "optim_ckpt_map": 258, "set_lr_schedul": 258, "lr_schedul": [258, 261], "lrschedul": 258, "loadabl": 258, "step_lr_schedul": 258, "ac_mod": 259, "ac_opt": 259, "op": [259, 306], "ac": [259, 263], "optimizerinbackwardwrapp": [260, 264], "named_paramet": [260, 283], "num_warmup_step": 261, "num_training_step": 261, "num_cycl": [261, 282], "last_epoch": 261, "lambdalr": 261, "linearli": 261, "decreas": [261, 304, 305, 306, 307], "cosin": 261, "v4": 261, "src": 261, "l104": 261, "warmup": [261, 282], "phase": 261, "wave": 261, "half": [261, 305], "kernel": 262, "memory_efficient_fsdp_wrap": [263, 306], "maxim": [263, 272, 291, 293], "workload": [263, 295, 305, 306], "fullyshardeddataparallel": [263, 272, 305], "fsdppolicytyp": [263, 272], "warpper": 264, "optimizer_in_backward": 264, "reset_stat": 265, "track": [265, 273], "alloc": [265, 271, 272, 302, 305, 307], "reserv": [265, 271, 299, 307], "stat": [265, 271, 307], "int4": [266, 306], "4w": 266, "recogn": 266, "int8dynactint4weightquant": [266, 295, 306], "8da4w": [266, 306], "int4weightonlyquant": [266, 306], "int8dynactint4weightqatquant": [266, 295, 306], "qat": [266, 291, 296], "int4weightonlyqatquant": 266, "exclud": 267, "aka": 268, "master": 270, "port": [270, 298], "address": [270, 303, 305], "hold": [270, 301], "peak_memory_act": 271, "peak_memory_alloc": 271, "peak_memory_reserv": 271, "get_memory_stat": 271, "hierarch": 272, "api_kei": 273, "experiment_kei": 273, "onlin": 273, "log_cod": 273, "comet": 273, "site": [273, 300], "ml": 273, "team": 273, "compar": [273, 276, 288, 300, 302, 303, 304, 306, 307], "sdk": 273, "uncategor": 273, "alphanumer": 273, "charact": 273, "get_or_cr": 273, "fresh": 273, "persist": 273, "hpo": 273, "sweep": 273, "server": 273, "offlin": 273, "auto": [273, 298], "creation": 273, "experimentconfig": 273, "project_nam": 273, "my_workspac": 273, "my_metr": [273, 276, 277], "importerror": [273, 277], "termin": [273, 276, 277], "comet_api_kei": 273, "flush": [273, 274, 275, 276, 277], "ndarrai": [273, 274, 275, 276, 277], "record": [273, 274, 275, 276, 277, 282], "log_config": [273, 277], "payload": [273, 274, 275, 276, 277], "log_": 274, "unixtimestamp": 274, "thread": 274, "safe": 274, "organize_log": 276, "tensorboard": 276, "subdirectori": 276, "logdir": 276, "startup": 276, "tree": [276, 300, 302], "tfevent": 276, "encount": 276, "frontend": 276, "organ": [276, 298], "accordingli": [276, 306], "my_log_dir": 276, "view": [276, 303], "entiti": 277, "bias": [277, 304, 307], "usernam": 277, "my_ent": 277, "my_group": 277, "account": [277, 304, 307], "link": [277, 300, 302], "capecap": 277, "6053ofw0": 277, "torchtune_config_j67sb73v": 277, "soon": [278, 305], "readi": [278, 291, 299, 306], "grad": 278, "acwrappolicytyp": 279, "author": [279, 293, 301, 305, 307], "fsdp_adavnced_tutori": 279, "insid": 280, "contextmanag": 280, "debug_mod": 281, "pseudo": 281, "commonli": [281, 304, 305, 307], "numpi": 281, "determinist": 281, "global": [281, 305], "warn": 281, "nondeterminist": 281, "cudnn": 281, "set_deterministic_debug_mod": 281, "profile_memori": 282, "with_stack": 282, "record_shap": 282, "with_flop": 282, "wait_step": 282, "warmup_step": 282, "active_step": 282, "profil": 282, "layout": 282, "trace": 282, "profileract": 282, "gradient_accumul": 282, "sensibl": 282, "default_schedul": 282, "reduct": [282, 295, 304], "scope": 282, "flop": 282, "cycl": 282, "repeat": [282, 305], "model_named_paramet": 283, "force_overrid": 283, "concret": [283, 305], "vocab_dim": 283, "named_param": 284, "inplac": [285, 304], "too": [285, 295, 302], "handler": 287, "_log": 287, "__version__": 288, "generated_examples_python": 289, "galleri": [289, 297], "sphinx": 289, "000": [290, 297, 302], "execut": [290, 297], "generated_exampl": 290, "mem": [290, 297], "mb": [290, 297], "gentl": 291, "introduct": 291, "first_finetune_tutori": 291, "kd": 291, "torchvis": 292, "torchao": [292, 295, 300, 302, 305, 306, 307], "latest": [292, 295, 301, 305, 307], "whl": 292, "cu121": 292, "cu118": 292, "cu124": 292, "And": [292, 300], "welcom": [292, 298], "greatest": [292, 301], "contributor": 292, "dev": 292, "commit": 292, "branch": 292, "therebi": [292, 305, 306, 307], "forc": [292, 303], "reinstal": 292, "opt": [292, 301], "suffix": 292, "On": [293, 304], "emphas": 293, "simplic": 293, "component": 293, "prove": 293, "democrat": 293, "zoo": 293, "varieti": [293, 304], "integr": [293, 300, 301, 302, 304, 306, 307], "fsdp2": 293, "excit": 293, "checkout": 293, "quickstart": 293, "attain": 293, "embodi": 293, "philosophi": 293, "usabl": 293, "composit": 293, "hard": 293, "outlin": 293, "unecessari": 293, "never": 293, "thoroughli": 293, "competit": 294, "grant": [294, 295, 301], "interest": [294, 295, 300, 303], "8b_lora_single_devic": [294, 298, 299, 302, 303, 305], "lever": [294, 295], "action": [294, 295], "degrad": [295, 305, 306, 307], "simul": [295, 305, 306], "compromis": 295, "blogpost": [295, 305], "qat_distribut": [295, 306], "8b_qat_ful": [295, 306], "least": [295, 302, 304, 306], "vram": [295, 302, 304, 305, 306], "80gb": [295, 306], "a100": 295, "h100": 295, "delai": 295, "fake": [295, 306], "empir": [295, 306], "potenti": [295, 304, 305], "fake_quant_after_n_step": [295, 306], "idea": [295, 303, 307], "roughli": 295, "total_step": 295, "futur": [295, 306], "plan": [295, 300], "un": 295, "groupsiz": [295, 306], "256": [295, 302, 306], "hackabl": [296, 301], "singularli": [296, 301], "technic": [296, 301], "awar": [296, 305, 306], "tracker": 296, "short": 298, "subcommand": 298, "anytim": 298, "symlink": 298, "wrote": 298, "readm": [298, 300, 302], "md": 298, "lot": [298, 300, 305], "recent": 298, "releas": [298, 302], "agre": 298, "term": [298, 305], "perman": 298, "eat": 298, "bandwith": 298, "storag": [298, 307], "00030": 298, "ootb": 298, "7b_full_low_memori": [298, 300, 301], "8b_full_single_devic": 298, "mini_full_low_memori": 298, "7b_full": [298, 300, 301], "13b_full": [298, 300, 301], "70b_full": 298, "edit": 298, "clobber": 298, "destin": 298, "lora_finetune_distribut": [298, 302, 304], "torchrun": 298, "launch": [298, 299, 301], "nproc": 298, "node": 298, "worker": 298, "nnode": [298, 304, 306], "minimum_nod": 298, "maximum_nod": 298, "fail": 298, "rdzv": 298, "rendezv": 298, "endpoint": 298, "8b_lora": [298, 302], "bypass": 298, "fancy_lora": 298, "8b_fancy_lora": 298, "nice": 299, "meet": 299, "overhaul": 299, "multiturn": 299, "untrain": 299, "accompani": 299, "who": 299, "influenti": 299, "hip": 299, "hop": 299, "artist": 299, "2pac": 299, "rakim": 299, "flavor": 299, "formatted_messag": 299, "nyou": 299, "nwho": 299, "518": 299, "25580": 299, "29962": 299, "3532": 299, "14816": 299, "29903": 299, "6778": 299, "_spm_model": 299, "piece_to_id": 299, "manual": [299, 307], "529": 299, "29879": 299, "29958": 299, "nhere": 299, "pure": 299, "mess": 299, "prime": 299, "strictli": 299, "ask": [299, 305], "though": 299, "robust": 299, "pretend": 299, "zuckerberg": 299, "seem": [299, 300], "good": [299, 304, 305], "altogeth": 299, "honor": 299, "custom_8b_lora_single_devic": 299, "favorit": [300, 304], "seemlessli": 300, "connect": [300, 306], "amount": 300, "natur": 300, "export": 300, "leverag": [300, 302, 307], "percentag": 300, "16gb": [300, 304], "rtx": 300, "3090": 300, "4090": 300, "hour": 300, "7b_qlora_single_devic": [300, 301, 307], "473": 300, "98": [300, 307], "gb": [300, 302, 304, 306, 307], "484": 300, "01": [300, 301], "fact": [300, 302, 304, 305], "third": 300, "But": [300, 304], "realli": 300, "eleuther_ev": [300, 302, 306], "eleuther_evalu": [300, 302, 306], "lm_eval": [300, 302], "custom_eval_config": [300, 302], "truthfulqa_mc2": [300, 302, 303, 304], "measur": [300, 302], "propens": [300, 302], "324": 300, "loglikelihood": 300, "195": 300, "121": 300, "197": 300, "acc": [300, 306], "388": 300, "489": 300, "great": [300, 305], "custom_generation_config": [300, 302], "kick": 300, "300": 300, "bai": 300, "area": 300, "92": 300, "exploratorium": 300, "san": 300, "francisco": 300, "magazin": 300, "awesom": 300, "bridg": 300, "cool": 300, "96": [300, 307], "sec": [300, 302], "83": 300, "99": [300, 304], "72": 300, "littl": 300, "int8_weight_onli": [300, 302], "int8_dynamic_activation_int8_weight": [300, 302], "ao": [300, 302], "quant_api": [300, 302], "quantize_": [300, 302], "int4_weight_onli": [300, 302], "previous": [300, 302, 304], "benefit": 300, "clone": [300, 304, 306, 307], "assumpt": 300, "new_dir": 300, "output_dict": 300, "sd_1": 300, "sd_2": 300, "dump": 300, "convert_hf_checkpoint": 300, "checkpoint_path": 300, "justin": 300, "school": 300, "math": 300, "ws": 300, "94": [300, 302], "bandwidth": [300, 302], "1391": 300, "84": 300, "thats": 300, "seamlessli": 300, "authent": [300, 301], "hopefulli": 300, "gave": 300, "minut": 301, "agreement": 301, "depth": 301, "principl": 301, "boilerpl": 301, "substanti": [301, 304], "custom_config": 301, "replic": 301, "lorafinetunerecipesingledevic": 301, "lora_finetune_output": 301, "log_1713194212": 301, "3697006702423096": 301, "25880": [301, 307], "83it": 301, "monitor": 301, "tqdm": 301, "e2": 301, "focu": 302, "theta": 302, "observ": [302, 306], "consum": [302, 307], "overal": [302, 303], "8b_qlora_single_devic": [302, 305], "coupl": [302, 304, 307], "meta_model_0": [302, 306], "122": 302, "sarah": 302, "busi": 302, "mum": 302, "young": 302, "children": 302, "live": 302, "north": 302, "east": 302, "england": 302, "135": 302, "88": 302, "138": 302, "346": 302, "09": 302, "139": 302, "broader": 302, "teach": [303, 304], "straight": [303, 304], "jump": [303, 304], "compress": 303, "transfer": 303, "capac": 303, "computation": 303, "expens": 303, "deploi": 303, "imit": 303, "diagram": 303, "aim": [303, 305], "minillm": 303, "forwardklloss": 303, "super": 303, "teacher_prob": 303, "student_logprob": 303, "log_softmax": 303, "prod_prob": 303, "forwardklwithchunkedoutputloss": 303, "knowledge_distillation_single_devic": 303, "bit": [303, 304, 305, 306, 307], "alpaca_cleaned_dataset": 303, "hellaswag": [303, 306], "commonsense_qa": 303, "kd_ratio": 303, "teacher_checkpoint": 303, "00004": 303, "truthfulqa": [303, 304], "commonsens": 303, "constant": 303, "boost": 303, "graph": [303, 305], "irrespect": 303, "3e": 303, "truthful_qa": 303, "wherea": 303, "unfamiliar": 304, "oppos": [304, 307], "momentum": [304, 305], "aghajanyan": 304, "et": 304, "al": 304, "hypothes": 304, "intrins": 304, "eight": 304, "practic": 304, "blue": 304, "although": [304, 306], "rememb": 304, "approx": 304, "15m": 304, "65k": 304, "requires_grad": [304, 307], "frozen_out": [304, 307], "lora_out": [304, 307], "base_model": 304, "lora_model": 304, "lora_llama_2_7b": [304, 307], "alon": 304, "in_featur": [304, 306], "out_featur": [304, 306], "validate_missing_and_unexpected_for_lora": 304, "peft_util": 304, "set_trainable_param": 304, "lora_param": 304, "total_param": 304, "trainable_param": 304, "2f": 304, "6742609920": 304, "4194304": 304, "7b_lora": 304, "my_model_checkpoint_path": [304, 306, 307], "tokenizer_checkpoint": [304, 306, 307], "my_tokenizer_checkpoint_path": [304, 306, 307], "constraint": 304, "factori": 304, "benefici": 304, "impact": [304, 305], "minor": 304, "lora_experiment_1": 304, "smooth": [304, 307], "curv": [304, 307], "500": 304, "ran": 304, "footprint": [304, 306], "commod": 304, "cogniz": 304, "ax": 304, "parallel": 304, "475": 304, "87": 304, "508": 304, "86": 304, "504": 304, "04": 304, "514": 304, "lowest": 304, "absolut": 304, "4gb": 304, "tradeoff": 304, "salman": 305, "mohammadi": 305, "brief": 305, "glossari": 305, "struggl": 305, "constrain": [305, 306], "cost": 305, "particularli": 305, "gradient_accumulation_step": 305, "throughput": 305, "ram": 305, "bottleneck": 305, "sebastian": 305, "raschka": 305, "fp16": 305, "sound": 305, "quot": 305, "aliv": 305, "region": 305, "enable_activation_checkpoint": 305, "bring": 305, "autograd": [305, 307], "saved_tensors_hook": 305, "cours": 305, "runtim": 305, "hide": 305, "later": 305, "brought": 305, "enable_activation_offload": 305, "total_batch_s": 305, "count": 305, "suppos": 305, "log_every_n_step": 305, "translat": 305, "frequent": 305, "slowli": 305, "num_devic": 305, "adamw8bit": 305, "pagedadamw": 305, "modern": 305, "converg": 305, "stateless": 305, "stochast": 305, "descent": 305, "sacrif": 305, "remov": 305, "optimizer_in_bwd": 305, "cpuoffloadoptim": 305, "offload_gradi": 305, "prototyp": 305, "low_bit_optim": 305, "4e": 305, "adam": 305, "hint": 305, "slowdown": 305, "4x": 305, "fsdp_cpu_offload": 305, "greatli": 305, "lora_": 305, "lora_llama3": 305, "_lora": 305, "firstli": 305, "secondli": 305, "affect": 305, "fashion": 305, "slower": [305, 307], "jointli": 305, "sens": 305, "novel": 305, "normalfloat": [305, 307], "8x": [305, 307], "worth": 305, "cast": [305, 306], "incur": [305, 306, 307], "penalti": 305, "qlora_": 305, "qlora_llama3_8b": 305, "_qlora": 305, "reap": 305, "hood": [305, 307], "doralinear": 305, "swap": [305, 306], "perplex": 306, "goal": 306, "ptq": 306, "kept": 306, "nois": 306, "henc": 306, "x_q": 306, "int8": 306, "zp": 306, "x_float": 306, "qmin": 306, "qmax": 306, "clamp": 306, "x_fq": 306, "dequant": 306, "proce": 306, "prepared_model": 306, "int8dynactint4weightqatlinear": 306, "int8dynactint4weightlinear": 306, "train_loop": 306, "converted_model": 306, "recov": 306, "custom_8b_qat_ful": 306, "2000": 306, "led": 306, "presum": 306, "mutat": 306, "5gb": 306, "custom_quant": 306, "poorli": 306, "custom_eleuther_evalu": 306, "fullmodeltorchtunecheckpoint": 306, "max_seq_length": 306, "my_eleuther_evalu": 306, "stderr": 306, "word_perplex": 306, "9148": 306, "byte_perplex": 306, "5357": 306, "bits_per_byt": 306, "6189": 306, "5687": 306, "0049": 306, "acc_norm": 306, "7536": 306, "0043": 306, "portion": [306, 307], "74": 306, "048": 306, "190": 306, "7735": 306, "5598": 306, "6413": 306, "5481": 306, "0050": 306, "7390": 306, "0044": 306, "7251": 306, "4994": 306, "5844": 306, "5740": 306, "7610": 306, "outperform": 306, "importantli": 306, "characterist": 306, "187": 306, "958": 306, "halv": 306, "motiv": 306, "edg": 306, "smartphon": 306, "executorch": 306, "xnnpack": 306, "export_llama": 306, "use_sdpa_with_kv_cach": 306, "qmode": 306, "group_siz": 306, "get_bos_id": 306, "get_eos_id": 306, "output_nam": 306, "llama3_8da4w": 306, "pte": 306, "881": 306, "oneplu": 306, "709": 306, "tok": 306, "815": 306, "316": 306, "364": 306, "highli": 307, "vanilla": 307, "held": 307, "bespok": 307, "vast": 307, "major": 307, "normatfloat": 307, "deepdiv": 307, "de": 307, "counterpart": 307, "set_default_devic": 307, "qlora_linear": 307, "memory_alloc": 307, "177": 307, "152": 307, "del": 307, "empty_cach": 307, "lora_linear": 307, "081": 307, "344": 307, "qlora_llama2_7b": 307, "qlora_model": 307, "essenti": 307, "reparametrize_as_dtype_state_dict_post_hook": 307, "149": 307, "9157477021217346": 307, "02": 307, "08": 307, "15it": 307, "nightli": 307, "200": 307, "hundr": 307, "228": 307, "8158286809921265": 307, "95it": 307, "exercis": 307, "linear_nf4": 307, "to_nf4": 307, "linear_weight": 307, "incom": 307}, "objects": {"torchtune.config": [[27, 0, 1, "", "instantiate"], [28, 0, 1, "", "log_config"], [29, 0, 1, "", "parse"], [30, 0, 1, "", "validate"]], "torchtune.data": [[31, 1, 1, "", "AlpacaToMessages"], [32, 1, 1, "", "ChatMLTemplate"], [33, 1, 1, "", "ChosenRejectedToMessages"], [34, 2, 1, "", "GrammarErrorCorrectionTemplate"], [35, 1, 1, "", "InputOutputToMessages"], [36, 1, 1, "", "Message"], [37, 1, 1, "", "OpenAIToMessages"], [38, 1, 1, "", "PromptTemplate"], [39, 1, 1, "", "PromptTemplateInterface"], [40, 2, 1, "", "QuestionAnswerTemplate"], [41, 2, 1, "", "Role"], [42, 1, 1, "", "ShareGPTToMessages"], [43, 2, 1, "", "SummarizeTemplate"], [44, 0, 1, "", "format_content_with_images"], [45, 0, 1, "", "left_pad_sequence"], [46, 0, 1, "", "load_image"], [47, 0, 1, "", "padded_collate"], [48, 0, 1, "", "padded_collate_dpo"], [49, 0, 1, "", "padded_collate_sft"], [50, 0, 1, "", "padded_collate_tiled_images_and_mask"], [51, 0, 1, "", "truncate"], [52, 0, 1, "", "validate_messages"]], "torchtune.data.Message": [[36, 3, 1, "", "contains_media"], [36, 4, 1, "", "from_dict"], [36, 4, 1, "", "get_media"], [36, 3, 1, "", "text_content"]], "torchtune.datasets": [[53, 1, 1, "", "ConcatDataset"], [54, 1, 1, "", "PackedDataset"], [55, 1, 1, "", "PreferenceDataset"], [56, 1, 1, "", "SFTDataset"], [57, 1, 1, "", "TextCompletionDataset"], [58, 0, 1, "", "alpaca_cleaned_dataset"], [59, 0, 1, "", "alpaca_dataset"], [60, 0, 1, "", "chat_dataset"], [61, 0, 1, "", "cnn_dailymail_articles_dataset"], [62, 0, 1, "", "grammar_dataset"], [63, 0, 1, "", "hh_rlhf_helpful_dataset"], [64, 0, 1, "", "instruct_dataset"], [67, 0, 1, "", "preference_dataset"], [68, 0, 1, "", "samsum_dataset"], [69, 0, 1, "", "slimorca_dataset"], [70, 0, 1, "", "stack_exchange_paired_dataset"], [71, 0, 1, "", "text_completion_dataset"], [72, 0, 1, "", "wikitext_dataset"]], "torchtune.datasets.multimodal": [[65, 0, 1, "", "llava_instruct_dataset"], [66, 0, 1, "", "the_cauldron_dataset"]], "torchtune.generation": [[73, 0, 1, "", "generate"], [74, 0, 1, "", "generate_next_token"], [75, 0, 1, "", "get_causal_mask_from_padding_mask"], [76, 0, 1, "", "get_position_ids_from_padding_mask"], [77, 0, 1, "", "sample"]], "torchtune.models.clip": [[78, 1, 1, "", "TilePositionalEmbedding"], [79, 1, 1, "", "TiledTokenPositionalEmbedding"], [80, 1, 1, "", "TokenPositionalEmbedding"], [81, 0, 1, "", "clip_vision_encoder"]], "torchtune.models.clip.TilePositionalEmbedding": [[78, 4, 1, "", "forward"]], "torchtune.models.clip.TiledTokenPositionalEmbedding": [[79, 4, 1, "", "forward"]], "torchtune.models.clip.TokenPositionalEmbedding": [[80, 4, 1, "", "forward"]], "torchtune.models.code_llama2": [[82, 0, 1, "", "code_llama2_13b"], [83, 0, 1, "", "code_llama2_70b"], [84, 0, 1, "", "code_llama2_7b"], [85, 0, 1, "", "lora_code_llama2_13b"], [86, 0, 1, "", "lora_code_llama2_70b"], [87, 0, 1, "", "lora_code_llama2_7b"], [88, 0, 1, "", "qlora_code_llama2_13b"], [89, 0, 1, "", "qlora_code_llama2_70b"], [90, 0, 1, "", "qlora_code_llama2_7b"]], "torchtune.models.gemma": [[91, 0, 1, "", "gemma"], [92, 0, 1, "", "gemma_2b"], [93, 0, 1, "", "gemma_7b"], [94, 0, 1, "", "gemma_tokenizer"], [95, 0, 1, "", "lora_gemma"], [96, 0, 1, "", "lora_gemma_2b"], [97, 0, 1, "", "lora_gemma_7b"], [98, 0, 1, "", "qlora_gemma_2b"], [99, 0, 1, "", "qlora_gemma_7b"]], "torchtune.models.llama2": [[100, 1, 1, "", "Llama2ChatTemplate"], [101, 0, 1, "", "llama2"], [102, 0, 1, "", "llama2_13b"], [103, 0, 1, "", "llama2_70b"], [104, 0, 1, "", "llama2_7b"], [105, 0, 1, "", "llama2_reward_7b"], [106, 0, 1, "", "llama2_tokenizer"], [107, 0, 1, "", "lora_llama2"], [108, 0, 1, "", "lora_llama2_13b"], [109, 0, 1, "", "lora_llama2_70b"], [110, 0, 1, "", "lora_llama2_7b"], [111, 0, 1, "", "lora_llama2_reward_7b"], [112, 0, 1, "", "qlora_llama2_13b"], [113, 0, 1, "", "qlora_llama2_70b"], [114, 0, 1, "", "qlora_llama2_7b"], [115, 0, 1, "", "qlora_llama2_reward_7b"]], "torchtune.models.llama3": [[116, 0, 1, "", "llama3"], [117, 0, 1, "", "llama3_70b"], [118, 0, 1, "", "llama3_8b"], [119, 0, 1, "", "llama3_tokenizer"], [120, 0, 1, "", "lora_llama3"], [121, 0, 1, "", "lora_llama3_70b"], [122, 0, 1, "", "lora_llama3_8b"], [123, 0, 1, "", "qlora_llama3_70b"], [124, 0, 1, "", "qlora_llama3_8b"]], "torchtune.models.llama3_1": [[125, 0, 1, "", "llama3_1"], [126, 0, 1, "", "llama3_1_405b"], [127, 0, 1, "", "llama3_1_70b"], [128, 0, 1, "", "llama3_1_8b"], [129, 0, 1, "", "lora_llama3_1"], [130, 0, 1, "", "lora_llama3_1_405b"], [131, 0, 1, "", "lora_llama3_1_70b"], [132, 0, 1, "", "lora_llama3_1_8b"], [133, 0, 1, "", "qlora_llama3_1_405b"], [134, 0, 1, "", "qlora_llama3_1_70b"], [135, 0, 1, "", "qlora_llama3_1_8b"]], "torchtune.models.llama3_2": [[136, 0, 1, "", "llama3_2_1b"], [137, 0, 1, "", "llama3_2_3b"], [138, 0, 1, "", "lora_llama3_2_1b"], [139, 0, 1, "", "lora_llama3_2_3b"], [140, 0, 1, "", "qlora_llama3_2_1b"], [141, 0, 1, "", "qlora_llama3_2_3b"]], "torchtune.models.llama3_2_vision": [[142, 1, 1, "", "Llama3VisionEncoder"], [143, 1, 1, "", "Llama3VisionProjectionHead"], [144, 1, 1, "", "Llama3VisionTransform"], [145, 0, 1, "", "llama3_2_vision_11b"], [146, 0, 1, "", "llama3_2_vision_decoder"], [147, 0, 1, "", "llama3_2_vision_encoder"], [148, 0, 1, "", "llama3_2_vision_transform"], [149, 0, 1, "", "lora_llama3_2_vision_11b"], [150, 0, 1, "", "lora_llama3_2_vision_decoder"], [151, 0, 1, "", "lora_llama3_2_vision_encoder"], [152, 0, 1, "", "qlora_llama3_2_vision_11b"]], "torchtune.models.llama3_2_vision.Llama3VisionEncoder": [[142, 4, 1, "", "forward"]], "torchtune.models.llama3_2_vision.Llama3VisionProjectionHead": [[143, 4, 1, "", "forward"]], "torchtune.models.llama3_2_vision.Llama3VisionTransform": [[144, 4, 1, "", "decode"], [144, 4, 1, "", "tokenize_message"], [144, 4, 1, "", "tokenize_messages"]], "torchtune.models.mistral": [[153, 1, 1, "", "MistralChatTemplate"], [154, 0, 1, "", "lora_mistral"], [155, 0, 1, "", "lora_mistral_7b"], [156, 0, 1, "", "lora_mistral_classifier"], [157, 0, 1, "", "lora_mistral_reward_7b"], [158, 0, 1, "", "mistral"], [159, 0, 1, "", "mistral_7b"], [160, 0, 1, "", "mistral_classifier"], [161, 0, 1, "", "mistral_reward_7b"], [162, 0, 1, "", "mistral_tokenizer"], [163, 0, 1, "", "qlora_mistral_7b"], [164, 0, 1, "", "qlora_mistral_reward_7b"]], "torchtune.models.phi3": [[165, 0, 1, "", "lora_phi3"], [166, 0, 1, "", "lora_phi3_mini"], [167, 0, 1, "", "phi3"], [168, 0, 1, "", "phi3_mini"], [169, 0, 1, "", "phi3_mini_tokenizer"], [170, 0, 1, "", "qlora_phi3_mini"]], "torchtune.models.qwen2": [[171, 0, 1, "", "lora_qwen2"], [172, 0, 1, "", "lora_qwen2_0_5b"], [173, 0, 1, "", "lora_qwen2_1_5b"], [174, 0, 1, "", "lora_qwen2_7b"], [175, 0, 1, "", "qwen2"], [176, 0, 1, "", "qwen2_0_5b"], [177, 0, 1, "", "qwen2_1_5b"], [178, 0, 1, "", "qwen2_7b"], [179, 0, 1, "", "qwen2_tokenizer"]], "torchtune.models.qwen2_5": [[180, 0, 1, "", "lora_qwen2_5_0_5b"], [181, 0, 1, "", "lora_qwen2_5_14b_base"], [182, 0, 1, "", "lora_qwen2_5_14b_instruct"], [183, 0, 1, "", "lora_qwen2_5_1_5b_base"], [184, 0, 1, "", "lora_qwen2_5_1_5b_instruct"], [185, 0, 1, "", "lora_qwen2_5_32b_base"], [186, 0, 1, "", "lora_qwen2_5_32b_instruct"], [187, 0, 1, "", "lora_qwen2_5_3b"], [188, 0, 1, "", "lora_qwen2_5_72b_base"], [189, 0, 1, "", "lora_qwen2_5_72b_instruct"], [190, 0, 1, "", "lora_qwen2_5_7b_base"], [191, 0, 1, "", "lora_qwen2_5_7b_instruct"], [192, 0, 1, "", "qwen2_5_0_5b"], [193, 0, 1, "", "qwen2_5_14b_base"], [194, 0, 1, "", "qwen2_5_14b_instruct"], [195, 0, 1, "", "qwen2_5_1_5b_base"], [196, 0, 1, "", "qwen2_5_1_5b_instruct"], [197, 0, 1, "", "qwen2_5_32b_base"], [198, 0, 1, "", "qwen2_5_32b_instruct"], [199, 0, 1, "", "qwen2_5_3b"], [200, 0, 1, "", "qwen2_5_72b_base"], [201, 0, 1, "", "qwen2_5_72b_instruct"], [202, 0, 1, "", "qwen2_5_7b_base"], [203, 0, 1, "", "qwen2_5_7b_instruct"], [204, 0, 1, "", "qwen2_5_tokenizer"]], "torchtune.modules": [[205, 1, 1, "", "FeedForward"], [206, 1, 1, "", "Fp32LayerNorm"], [207, 1, 1, "", "KVCache"], [208, 1, 1, "", "MultiHeadAttention"], [209, 1, 1, "", "RMSNorm"], [210, 1, 1, "", "RotaryPositionalEmbeddings"], [211, 1, 1, "", "TanhGate"], [212, 1, 1, "", "TiedLinear"], [213, 1, 1, "", "TransformerCrossAttentionLayer"], [214, 1, 1, "", "TransformerDecoder"], [215, 1, 1, "", "TransformerSelfAttentionLayer"], [216, 1, 1, "", "VisionTransformer"]], "torchtune.modules.FeedForward": [[205, 4, 1, "", "forward"]], "torchtune.modules.Fp32LayerNorm": [[206, 4, 1, "", "forward"]], "torchtune.modules.KVCache": [[207, 4, 1, "", "reset"], [207, 4, 1, "", "update"]], "torchtune.modules.MultiHeadAttention": [[208, 4, 1, "", "forward"], [208, 4, 1, "", "reset_cache"], [208, 4, 1, "", "setup_cache"]], "torchtune.modules.RMSNorm": [[209, 4, 1, "", "forward"]], "torchtune.modules.RotaryPositionalEmbeddings": [[210, 4, 1, "", "forward"]], "torchtune.modules.TanhGate": [[211, 4, 1, "", "forward"]], "torchtune.modules.TransformerCrossAttentionLayer": [[213, 4, 1, "", "caches_are_enabled"], [213, 4, 1, "", "caches_are_setup"], [213, 4, 1, "", "forward"], [213, 4, 1, "", "reset_cache"], [213, 4, 1, "", "setup_caches"]], "torchtune.modules.TransformerDecoder": [[214, 4, 1, "", "caches_are_enabled"], [214, 4, 1, "", "caches_are_setup"], [214, 4, 1, "", "chunked_output"], [214, 4, 1, "", "forward"], [214, 4, 1, "", "reset_caches"], [214, 4, 1, "", "set_num_output_chunks"], [214, 4, 1, "", "setup_caches"]], "torchtune.modules.TransformerSelfAttentionLayer": [[215, 4, 1, "", "caches_are_enabled"], [215, 4, 1, "", "caches_are_setup"], [215, 4, 1, "", "forward"], [215, 4, 1, "", "reset_cache"], [215, 4, 1, "", "setup_caches"]], "torchtune.modules.VisionTransformer": [[216, 4, 1, "", "forward"]], "torchtune.modules.common_utils": [[217, 0, 1, "", "delete_kv_caches"], [218, 0, 1, "", "disable_kv_cache"], [219, 0, 1, "", "local_kv_cache"], [220, 0, 1, "", "reparametrize_as_dtype_state_dict_post_hook"]], "torchtune.modules.loss": [[221, 1, 1, "", "CEWithChunkedOutputLoss"], [222, 1, 1, "", "ForwardKLLoss"], [223, 1, 1, "", "ForwardKLWithChunkedOutputLoss"]], "torchtune.modules.loss.CEWithChunkedOutputLoss": [[221, 4, 1, "", "compute_cross_entropy"], [221, 4, 1, "", "forward"]], "torchtune.modules.loss.ForwardKLLoss": [[222, 4, 1, "", "forward"]], "torchtune.modules.loss.ForwardKLWithChunkedOutputLoss": [[223, 4, 1, "", "forward"]], "torchtune.modules.model_fusion": [[224, 1, 1, "", "DeepFusionModel"], [225, 1, 1, "", "FusionEmbedding"], [226, 1, 1, "", "FusionLayer"], [227, 0, 1, "", "get_fusion_params"], [228, 0, 1, "", "register_fusion_module"]], "torchtune.modules.model_fusion.DeepFusionModel": [[224, 4, 1, "", "caches_are_enabled"], [224, 4, 1, "", "caches_are_setup"], [224, 4, 1, "", "forward"], [224, 4, 1, "", "reset_caches"], [224, 4, 1, "", "set_num_output_chunks"], [224, 4, 1, "", "setup_caches"]], "torchtune.modules.model_fusion.FusionEmbedding": [[225, 4, 1, "", "forward"], [225, 4, 1, "", "fusion_params"]], "torchtune.modules.model_fusion.FusionLayer": [[226, 4, 1, "", "caches_are_enabled"], [226, 4, 1, "", "caches_are_setup"], [226, 4, 1, "", "forward"], [226, 4, 1, "", "fusion_params"], [226, 4, 1, "", "reset_cache"], [226, 4, 1, "", "setup_caches"]], "torchtune.modules.peft": [[229, 1, 1, "", "AdapterModule"], [230, 1, 1, "", "DoRALinear"], [231, 1, 1, "", "LoRALinear"], [232, 0, 1, "", "disable_adapter"], [233, 0, 1, "", "get_adapter_params"], [234, 0, 1, "", "set_trainable_params"], [235, 0, 1, "", "validate_missing_and_unexpected_for_lora"], [236, 0, 1, "", "validate_state_dict_for_lora"]], "torchtune.modules.peft.AdapterModule": [[229, 4, 1, "", "adapter_params"]], "torchtune.modules.peft.DoRALinear": [[230, 4, 1, "", "adapter_params"], [230, 4, 1, "", "forward"], [230, 4, 1, "", "initialize_dora_magnitude"]], "torchtune.modules.peft.LoRALinear": [[231, 4, 1, "", "adapter_params"], [231, 4, 1, "", "forward"]], "torchtune.modules.tokenizers": [[237, 1, 1, "", "BaseTokenizer"], [238, 1, 1, "", "ModelTokenizer"], [239, 1, 1, "", "SentencePieceBaseTokenizer"], [240, 1, 1, "", "TikTokenBaseTokenizer"], [241, 0, 1, "", "parse_hf_tokenizer_json"], [242, 0, 1, "", "tokenize_messages_no_special_tokens"]], "torchtune.modules.tokenizers.BaseTokenizer": [[237, 4, 1, "", "decode"], [237, 4, 1, "", "encode"]], "torchtune.modules.tokenizers.ModelTokenizer": [[238, 4, 1, "", "tokenize_messages"]], "torchtune.modules.tokenizers.SentencePieceBaseTokenizer": [[239, 4, 1, "", "decode"], [239, 4, 1, "", "encode"]], "torchtune.modules.tokenizers.TikTokenBaseTokenizer": [[240, 4, 1, "", "decode"], [240, 4, 1, "", "encode"]], "torchtune.modules.transforms": [[243, 1, 1, "", "Transform"], [244, 1, 1, "", "VisionCrossAttentionMask"]], "torchtune.rlhf": [[245, 0, 1, "", "estimate_advantages"], [246, 0, 1, "", "get_rewards_ppo"], [251, 0, 1, "", "truncate_sequence_at_first_stop_token"]], "torchtune.rlhf.loss": [[247, 1, 1, "", "DPOLoss"], [248, 1, 1, "", "PPOLoss"], [249, 1, 1, "", "RSOLoss"], [250, 1, 1, "", "SimPOLoss"]], "torchtune.rlhf.loss.DPOLoss": [[247, 4, 1, "", "forward"]], "torchtune.rlhf.loss.PPOLoss": [[248, 4, 1, "", "forward"]], "torchtune.rlhf.loss.RSOLoss": [[249, 4, 1, "", "forward"]], "torchtune.rlhf.loss.SimPOLoss": [[250, 4, 1, "", "forward"]], "torchtune.training": [[252, 2, 1, "", "FSDPPolicyType"], [253, 1, 1, "", "FormattedCheckpointFiles"], [254, 1, 1, "", "FullModelHFCheckpointer"], [255, 1, 1, "", "FullModelMetaCheckpointer"], [256, 1, 1, "", "FullModelTorchTuneCheckpointer"], [257, 1, 1, "", "ModelType"], [258, 1, 1, "", "OptimizerInBackwardWrapper"], [259, 0, 1, "", "apply_selective_activation_checkpointing"], [260, 0, 1, "", "create_optim_in_bwd_wrapper"], [261, 0, 1, "", "get_cosine_schedule_with_warmup"], [262, 0, 1, "", "get_dtype"], [263, 0, 1, "", "get_full_finetune_fsdp_wrap_policy"], [264, 0, 1, "", "get_lr"], [265, 0, 1, "", "get_memory_stats"], [266, 0, 1, "", "get_quantizer_mode"], [267, 0, 1, "", "get_unmasked_sequence_lengths"], [268, 0, 1, "", "get_world_size_and_rank"], [269, 0, 1, "", "init_distributed"], [270, 0, 1, "", "is_distributed"], [271, 0, 1, "", "log_memory_stats"], [272, 0, 1, "", "lora_fsdp_wrap_policy"], [278, 0, 1, "", "register_optim_in_bwd_hooks"], [279, 0, 1, "", "set_activation_checkpointing"], [280, 0, 1, "", "set_default_dtype"], [281, 0, 1, "", "set_seed"], [282, 0, 1, "", "setup_torch_profiler"], [283, 0, 1, "", "update_state_dict_for_classifier"], [284, 0, 1, "", "validate_expected_param_dtype"]], "torchtune.training.FormattedCheckpointFiles": [[253, 4, 1, "", "build_checkpoint_filenames"]], "torchtune.training.FullModelHFCheckpointer": [[254, 4, 1, "", "load_checkpoint"], [254, 4, 1, "", "save_checkpoint"]], "torchtune.training.FullModelMetaCheckpointer": [[255, 4, 1, "", "load_checkpoint"], [255, 4, 1, "", "save_checkpoint"]], "torchtune.training.FullModelTorchTuneCheckpointer": [[256, 4, 1, "", "load_checkpoint"], [256, 4, 1, "", "save_checkpoint"]], "torchtune.training.OptimizerInBackwardWrapper": [[258, 4, 1, "", "get_last_lr"], [258, 4, 1, "", "get_optim_key"], [258, 4, 1, "", "load_state_dict"], [258, 4, 1, "", "set_lr_scheduler"], [258, 4, 1, "", "state_dict"], [258, 4, 1, "", "step_lr_scheduler"]], "torchtune.training.metric_logging": [[273, 1, 1, "", "CometLogger"], [274, 1, 1, "", "DiskLogger"], [275, 1, 1, "", "StdoutLogger"], [276, 1, 1, "", "TensorBoardLogger"], [277, 1, 1, "", "WandBLogger"]], "torchtune.training.metric_logging.CometLogger": [[273, 4, 1, "", "close"], [273, 4, 1, "", "log"], [273, 4, 1, "", "log_config"], [273, 4, 1, "", "log_dict"]], "torchtune.training.metric_logging.DiskLogger": [[274, 4, 1, "", "close"], [274, 4, 1, "", "log"], [274, 4, 1, "", "log_dict"]], "torchtune.training.metric_logging.StdoutLogger": [[275, 4, 1, "", "close"], [275, 4, 1, "", "log"], [275, 4, 1, "", "log_dict"]], "torchtune.training.metric_logging.TensorBoardLogger": [[276, 4, 1, "", "close"], [276, 4, 1, "", "log"], [276, 4, 1, "", "log_dict"]], "torchtune.training.metric_logging.WandBLogger": [[277, 4, 1, "", "close"], [277, 4, 1, "", "log"], [277, 4, 1, "", "log_config"], [277, 4, 1, "", "log_dict"]], "torchtune.utils": [[285, 0, 1, "", "batch_to_device"], [286, 0, 1, "", "get_device"], [287, 0, 1, "", "get_logger"], [288, 0, 1, "", "torch_version_ge"]]}, "objtypes": {"0": "py:function", "1": "py:class", "2": "py:data", "3": "py:property", "4": "py:method"}, "objnames": {"0": ["py", "function", "Python function"], "1": ["py", "class", "Python class"], "2": ["py", "data", "Python data"], "3": ["py", "property", "Python property"], "4": ["py", "method", "Python method"]}, "titleterms": {"torchtun": [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 22, 34, 40, 41, 43, 252, 291, 293, 298, 300, 302, 303, 304, 306, 307], "config": [0, 10, 24, 25, 298, 301], "data": [1, 11, 34, 40, 41, 43, 299], "text": [1, 2, 14, 16, 20, 302], "templat": [1, 9, 12, 14, 19, 21, 299], "type": 1, "messag": [1, 13, 14, 36], "transform": [1, 5, 13, 14, 15, 243], "collat": 1, "helper": 1, "function": 1, "dataset": [2, 9, 11, 12, 16, 18, 20, 299], "imag": [2, 14, 16], "gener": [2, 3, 73, 300, 302], "builder": 2, "class": [2, 19, 25], "model": [4, 5, 15, 21, 26, 298, 300, 301, 302, 303, 304, 305, 306], "llama3": [4, 116, 299, 302, 303, 306], "2": [4, 303], "vision": [4, 5], "1": [4, 303], "llama2": [4, 101, 299, 300, 304, 307], "code": 4, "llama": 4, "qwen": 4, "5": 4, "phi": 4, "3": 4, "mistral": [4, 158], "gemma": [4, 91], "clip": 4, "modul": 5, "compon": [5, 10, 24, 305], "build": [5, 292, 307], "block": 5, "loss": 5, "base": [5, 21], "token": [5, 14, 21, 299], "util": [5, 8], "peft": [5, 305], "fusion": 5, "rlhf": 6, "train": [7, 252, 295, 301], "checkpoint": [7, 22, 26, 300, 305], "reduc": 7, "precis": [7, 305], "distribut": [7, 295], "memori": [7, 304, 305, 307], "manag": 7, "schedul": 7, "metric": [7, 23, 26], "log": [7, 23, 26], "perform": [7, 304], "profil": 7, "miscellan": [7, 8], "chat": [9, 299], "exampl": [9, 12, 13, 15, 16, 18, 20], "format": [9, 12, 14, 16, 18, 20, 22], "load": [9, 12, 16, 18, 20, 21], "from": [9, 12, 16, 18, 20, 21, 299, 307], "hug": [9, 12, 16, 18, 20, 21, 300], "face": [9, 12, 16, 18, 20, 21, 300], "local": [9, 12, 16, 18, 20], "remot": [9, 12, 16], "specifi": 9, "convers": 9, "style": 9, "sharegpt": 9, "openai": 9, "renam": [9, 12], "column": [9, 12], "built": [9, 12, 16, 18, 19, 20, 298], "custom": [10, 13, 19, 299], "recip": [10, 25, 296, 298, 301, 303, 304, 306], "set": [10, 21], "up": [10, 300], "your": [10, 24, 25, 300, 301], "project": 10, "launch": 10, "overview": [11, 22, 293, 296, 300, 305], "pipelin": 11, "instruct": [12, 292, 302], "configur": [13, 24], "creat": [14, 15], "prompt": [14, 19, 21, 299], "access": [14, 302], "content": 14, "multimod": [15, 16], "us": [15, 19, 24, 25, 299, 300, 303, 307], "interleav": 16, "sampl": [17, 77], "pack": 17, "prefer": 18, "defin": 19, "via": [19, 292, 302], "dotpath": 19, "string": 19, "dictionari": 19, "prompttempl": [19, 38], "complet": 20, "json": 20, "txt": 20, "download": [21, 298, 300, 301], "file": 21, "max": 21, "sequenc": 21, "length": 21, "special": [21, 299], "handl": 22, "differ": 22, "hfcheckpoint": 22, "metacheckpoint": 22, "torchtunecheckpoint": 22, "intermedi": 22, "vs": 22, "final": 22, "lora": [22, 294, 300, 304, 305, 307], "put": [22, 307], "thi": 22, "all": [22, 24, 307], "togeth": [22, 307], "comet": 23, "logger": [23, 26], "about": 24, "where": 24, "do": 24, "paramet": [24, 305], "live": 24, "write": 24, "instanti": [24, 27], "referenc": 24, "other": [24, 300], "field": 24, "interpol": 24, "valid": [24, 30, 298], "best": 24, "practic": 24, "airtight": 24, "public": 24, "api": 24, "onli": 24, "command": 24, "line": 24, "overrid": 24, "remov": 24, "what": [25, 293, 303, 304, 306, 307], "ar": 25, "script": 25, "run": [25, 298, 300], "cli": [25, 298], "pars": [25, 29], "weight": [26, 305], "bias": 26, "w": 26, "b": 26, "log_config": 28, "alpacatomessag": 31, "chatmltempl": 32, "chosenrejectedtomessag": 33, "grammarerrorcorrectiontempl": 34, "inputoutputtomessag": 35, "openaitomessag": 37, "prompttemplateinterfac": 39, "questionanswertempl": 40, "role": 41, "sharegpttomessag": 42, "summarizetempl": 43, "format_content_with_imag": 44, "left_pad_sequ": 45, "load_imag": 46, "padded_col": 47, "padded_collate_dpo": 48, "padded_collate_sft": 49, "padded_collate_tiled_images_and_mask": 50, "truncat": 51, "validate_messag": 52, "concatdataset": 53, "packeddataset": 54, "preferencedataset": 55, "sftdataset": 56, "textcompletiondataset": 57, "alpaca_cleaned_dataset": 58, "alpaca_dataset": 59, "chat_dataset": 60, "cnn_dailymail_articles_dataset": 61, "grammar_dataset": 62, "hh_rlhf_helpful_dataset": 63, "instruct_dataset": 64, "llava_instruct_dataset": 65, "the_cauldron_dataset": 66, "preference_dataset": 67, "samsum_dataset": 68, "slimorca_dataset": 69, "stack_exchange_paired_dataset": 70, "text_completion_dataset": 71, "wikitext_dataset": 72, "generate_next_token": 74, "get_causal_mask_from_padding_mask": 75, "get_position_ids_from_padding_mask": 76, "tilepositionalembed": 78, "tiledtokenpositionalembed": 79, "tokenpositionalembed": 80, "clip_vision_encod": 81, "code_llama2_13b": 82, "code_llama2_70b": 83, "code_llama2_7b": 84, "lora_code_llama2_13b": 85, "lora_code_llama2_70b": 86, "lora_code_llama2_7b": 87, "qlora_code_llama2_13b": 88, "qlora_code_llama2_70b": 89, "qlora_code_llama2_7b": 90, "gemma_2b": 92, "gemma_7b": 93, "gemma_token": 94, "lora_gemma": 95, "lora_gemma_2b": 96, "lora_gemma_7b": 97, "qlora_gemma_2b": 98, "qlora_gemma_7b": 99, "llama2chattempl": 100, "llama2_13b": 102, "llama2_70b": 103, "llama2_7b": 104, "llama2_reward_7b": 105, "llama2_token": 106, "lora_llama2": 107, "lora_llama2_13b": 108, "lora_llama2_70b": 109, "lora_llama2_7b": 110, "lora_llama2_reward_7b": 111, "qlora_llama2_13b": 112, "qlora_llama2_70b": 113, "qlora_llama2_7b": 114, "qlora_llama2_reward_7b": 115, "llama3_70b": 117, "llama3_8b": 118, "llama3_token": 119, "lora_llama3": 120, "lora_llama3_70b": 121, "lora_llama3_8b": 122, "qlora_llama3_70b": 123, "qlora_llama3_8b": 124, "llama3_1": 125, "llama3_1_405b": 126, "llama3_1_70b": 127, "llama3_1_8b": 128, "lora_llama3_1": 129, "lora_llama3_1_405b": 130, "lora_llama3_1_70b": 131, "lora_llama3_1_8b": 132, "qlora_llama3_1_405b": 133, "qlora_llama3_1_70b": 134, "qlora_llama3_1_8b": 135, "llama3_2_1b": 136, "llama3_2_3b": 137, "lora_llama3_2_1b": 138, "lora_llama3_2_3b": 139, "qlora_llama3_2_1b": 140, "qlora_llama3_2_3b": 141, "llama3visionencod": 142, "llama3visionprojectionhead": 143, "llama3visiontransform": 144, "llama3_2_vision_11b": 145, "llama3_2_vision_decod": 146, "llama3_2_vision_encod": 147, "llama3_2_vision_transform": 148, "lora_llama3_2_vision_11b": 149, "lora_llama3_2_vision_decod": 150, "lora_llama3_2_vision_encod": 151, "qlora_llama3_2_vision_11b": 152, "mistralchattempl": 153, "lora_mistr": 154, "lora_mistral_7b": 155, "lora_mistral_classifi": 156, "lora_mistral_reward_7b": 157, "mistral_7b": 159, "mistral_classifi": 160, "mistral_reward_7b": 161, "mistral_token": 162, "qlora_mistral_7b": 163, "qlora_mistral_reward_7b": 164, "lora_phi3": 165, "lora_phi3_mini": 166, "phi3": 167, "phi3_mini": 168, "phi3_mini_token": 169, "qlora_phi3_mini": 170, "lora_qwen2": 171, "lora_qwen2_0_5b": 172, "lora_qwen2_1_5b": 173, "lora_qwen2_7b": 174, "qwen2": [175, 303], "qwen2_0_5b": 176, "qwen2_1_5b": 177, "qwen2_7b": 178, "qwen2_token": 179, "lora_qwen2_5_0_5b": 180, "lora_qwen2_5_14b_bas": 181, "lora_qwen2_5_14b_instruct": 182, "lora_qwen2_5_1_5b_bas": 183, "lora_qwen2_5_1_5b_instruct": 184, "lora_qwen2_5_32b_bas": 185, "lora_qwen2_5_32b_instruct": 186, "lora_qwen2_5_3b": 187, "lora_qwen2_5_72b_bas": 188, "lora_qwen2_5_72b_instruct": 189, "lora_qwen2_5_7b_bas": 190, "lora_qwen2_5_7b_instruct": 191, "qwen2_5_0_5b": 192, "qwen2_5_14b_bas": 193, "qwen2_5_14b_instruct": 194, "qwen2_5_1_5b_bas": 195, "qwen2_5_1_5b_instruct": 196, "qwen2_5_32b_bas": 197, "qwen2_5_32b_instruct": 198, "qwen2_5_3b": 199, "qwen2_5_72b_bas": 200, "qwen2_5_72b_instruct": 201, "qwen2_5_7b_bas": 202, "qwen2_5_7b_instruct": 203, "qwen2_5_token": 204, "feedforward": 205, "fp32layernorm": 206, "kvcach": 207, "multiheadattent": 208, "rmsnorm": 209, "rotarypositionalembed": 210, "tanhgat": 211, "tiedlinear": 212, "transformercrossattentionlay": 213, "transformerdecod": 214, "transformerselfattentionlay": 215, "visiontransform": 216, "delete_kv_cach": 217, "disable_kv_cach": 218, "local_kv_cach": 219, "reparametrize_as_dtype_state_dict_post_hook": 220, "cewithchunkedoutputloss": 221, "forwardklloss": 222, "forwardklwithchunkedoutputloss": 223, "deepfusionmodel": 224, "fusionembed": 225, "fusionlay": 226, "get_fusion_param": 227, "register_fusion_modul": 228, "adaptermodul": 229, "doralinear": 230, "loralinear": 231, "disable_adapt": 232, "get_adapter_param": 233, "set_trainable_param": 234, "validate_missing_and_unexpected_for_lora": 235, "validate_state_dict_for_lora": 236, "basetoken": 237, "modeltoken": 238, "sentencepiecebasetoken": 239, "tiktokenbasetoken": 240, "parse_hf_tokenizer_json": 241, "tokenize_messages_no_special_token": 242, "visioncrossattentionmask": 244, "estimate_advantag": 245, "get_rewards_ppo": 246, "dpoloss": 247, "ppoloss": 248, "rsoloss": 249, "simpoloss": 250, "truncate_sequence_at_first_stop_token": 251, "fsdppolicytyp": 252, "formattedcheckpointfil": 253, "fullmodelhfcheckpoint": 254, "fullmodelmetacheckpoint": 255, "fullmodeltorchtunecheckpoint": 256, "modeltyp": 257, "optimizerinbackwardwrapp": 258, "apply_selective_activation_checkpoint": 259, "create_optim_in_bwd_wrapp": 260, "get_cosine_schedule_with_warmup": 261, "get_dtyp": 262, "get_full_finetune_fsdp_wrap_polici": 263, "get_lr": 264, "get_memory_stat": 265, "get_quantizer_mod": 266, "get_unmasked_sequence_length": 267, "get_world_size_and_rank": 268, "init_distribut": 269, "is_distribut": 270, "log_memory_stat": 271, "lora_fsdp_wrap_polici": 272, "cometlogg": 273, "disklogg": 274, "stdoutlogg": 275, "tensorboardlogg": 276, "wandblogg": 277, "register_optim_in_bwd_hook": 278, "set_activation_checkpoint": 279, "set_default_dtyp": 280, "set_se": 281, "setup_torch_profil": 282, "update_state_dict_for_classifi": 283, "validate_expected_param_dtyp": 284, "batch_to_devic": 285, "get_devic": 286, "get_logg": 287, "torch_version_g": 288, "comput": [290, 297], "time": [290, 297], "welcom": 291, "document": 291, "get": [291, 298, 302], "start": [291, 298], "tutori": 291, "instal": 292, "pre": 292, "requisit": 292, "pypi": 292, "git": 292, "clone": 292, "nightli": 292, "kei": 293, "concept": 293, "design": 293, "principl": 293, "singl": 294, "devic": [294, 306], "finetun": [294, 296, 300, 304, 306, 307], "quantiz": [295, 300, 302, 305, 306], "awar": 295, "qat": [295, 306], "list": 298, "copi": 298, "fine": [299, 301, 302, 303, 304, 305, 306, 307], "tune": [299, 301, 302, 303, 304, 305, 306, 307], "chang": 299, "when": 299, "should": 299, "i": 299, "end": 300, "workflow": 300, "7b": 300, "evalu": [300, 302, 306], "eleutherai": [300, 302], "s": [300, 302], "eval": [300, 302], "har": [300, 302], "speed": 300, "librari": 300, "upload": 300, "hub": 300, "first": 301, "llm": 301, "select": 301, "modifi": 301, "next": 301, "step": [301, 305], "meta": 302, "8b": [302, 303], "our": 302, "faster": 302, "distil": 303, "1b": 303, "knowledg": 303, "how": [303, 304], "doe": [303, 304], "work": [303, 304], "kd": 303, "ablat": 303, "studi": 303, "teacher": 303, "student": 303, "hyperparamet": 303, "learn": 303, "rate": 303, "ratio": 303, "5b": 303, "0": 303, "appli": [304, 306], "trade": 304, "off": 304, "optim": 305, "activ": 305, "offload": 305, "gradient": 305, "accumul": 305, "lower": [305, 306], "fuse": 305, "backward": 305, "pass": 305, "state": 305, "cpu": 305, "effici": 305, "low": 305, "rank": 305, "adapt": 305, "qlora": [305, 307], "decompos": 305, "dora": 305, "option": 306, "save": 307, "deep": 307, "dive": 307}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 6, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1, "sphinx": 56}}) \ No newline at end of file +Search.setIndex({"docnames": ["api_ref_config", "api_ref_data", "api_ref_datasets", "api_ref_generation", "api_ref_models", "api_ref_modules", "api_ref_rlhf", "api_ref_training", "api_ref_utilities", "basics/chat_datasets", "basics/custom_components", "basics/datasets_overview", "basics/instruct_datasets", "basics/message_transforms", "basics/messages", "basics/model_transforms", "basics/multimodal_datasets", "basics/packing", "basics/preference_datasets", "basics/prompt_templates", "basics/text_completion_datasets", "basics/tokenizers", "deep_dives/checkpointer", "deep_dives/comet_logging", "deep_dives/configs", "deep_dives/recipe_deepdive", "deep_dives/wandb_logging", "generated/torchtune.config.instantiate", "generated/torchtune.config.log_config", "generated/torchtune.config.parse", "generated/torchtune.config.validate", "generated/torchtune.data.AlpacaToMessages", "generated/torchtune.data.ChatMLTemplate", "generated/torchtune.data.ChosenRejectedToMessages", "generated/torchtune.data.GrammarErrorCorrectionTemplate", "generated/torchtune.data.InputOutputToMessages", "generated/torchtune.data.Message", "generated/torchtune.data.OpenAIToMessages", "generated/torchtune.data.PromptTemplate", "generated/torchtune.data.PromptTemplateInterface", "generated/torchtune.data.QuestionAnswerTemplate", "generated/torchtune.data.Role", "generated/torchtune.data.ShareGPTToMessages", "generated/torchtune.data.SummarizeTemplate", "generated/torchtune.data.format_content_with_images", "generated/torchtune.data.left_pad_sequence", "generated/torchtune.data.load_image", "generated/torchtune.data.padded_collate", "generated/torchtune.data.padded_collate_dpo", "generated/torchtune.data.padded_collate_sft", "generated/torchtune.data.padded_collate_tiled_images_and_mask", "generated/torchtune.data.truncate", "generated/torchtune.data.validate_messages", "generated/torchtune.datasets.ConcatDataset", "generated/torchtune.datasets.PackedDataset", "generated/torchtune.datasets.PreferenceDataset", "generated/torchtune.datasets.SFTDataset", "generated/torchtune.datasets.TextCompletionDataset", "generated/torchtune.datasets.alpaca_cleaned_dataset", "generated/torchtune.datasets.alpaca_dataset", "generated/torchtune.datasets.chat_dataset", "generated/torchtune.datasets.cnn_dailymail_articles_dataset", "generated/torchtune.datasets.grammar_dataset", "generated/torchtune.datasets.hh_rlhf_helpful_dataset", "generated/torchtune.datasets.instruct_dataset", "generated/torchtune.datasets.multimodal.llava_instruct_dataset", "generated/torchtune.datasets.multimodal.the_cauldron_dataset", "generated/torchtune.datasets.preference_dataset", "generated/torchtune.datasets.samsum_dataset", "generated/torchtune.datasets.slimorca_dataset", "generated/torchtune.datasets.stack_exchange_paired_dataset", "generated/torchtune.datasets.text_completion_dataset", "generated/torchtune.datasets.wikitext_dataset", "generated/torchtune.generation.generate", "generated/torchtune.generation.generate_next_token", "generated/torchtune.generation.get_causal_mask_from_padding_mask", "generated/torchtune.generation.get_position_ids_from_padding_mask", "generated/torchtune.generation.sample", "generated/torchtune.models.clip.TilePositionalEmbedding", "generated/torchtune.models.clip.TiledTokenPositionalEmbedding", "generated/torchtune.models.clip.TokenPositionalEmbedding", "generated/torchtune.models.clip.clip_vision_encoder", "generated/torchtune.models.code_llama2.code_llama2_13b", "generated/torchtune.models.code_llama2.code_llama2_70b", "generated/torchtune.models.code_llama2.code_llama2_7b", "generated/torchtune.models.code_llama2.lora_code_llama2_13b", "generated/torchtune.models.code_llama2.lora_code_llama2_70b", "generated/torchtune.models.code_llama2.lora_code_llama2_7b", "generated/torchtune.models.code_llama2.qlora_code_llama2_13b", "generated/torchtune.models.code_llama2.qlora_code_llama2_70b", "generated/torchtune.models.code_llama2.qlora_code_llama2_7b", "generated/torchtune.models.gemma.gemma", "generated/torchtune.models.gemma.gemma_2b", "generated/torchtune.models.gemma.gemma_7b", "generated/torchtune.models.gemma.gemma_tokenizer", "generated/torchtune.models.gemma.lora_gemma", "generated/torchtune.models.gemma.lora_gemma_2b", "generated/torchtune.models.gemma.lora_gemma_7b", "generated/torchtune.models.gemma.qlora_gemma_2b", "generated/torchtune.models.gemma.qlora_gemma_7b", "generated/torchtune.models.llama2.Llama2ChatTemplate", "generated/torchtune.models.llama2.llama2", "generated/torchtune.models.llama2.llama2_13b", "generated/torchtune.models.llama2.llama2_70b", "generated/torchtune.models.llama2.llama2_7b", "generated/torchtune.models.llama2.llama2_reward_7b", "generated/torchtune.models.llama2.llama2_tokenizer", "generated/torchtune.models.llama2.lora_llama2", "generated/torchtune.models.llama2.lora_llama2_13b", "generated/torchtune.models.llama2.lora_llama2_70b", "generated/torchtune.models.llama2.lora_llama2_7b", "generated/torchtune.models.llama2.lora_llama2_reward_7b", "generated/torchtune.models.llama2.qlora_llama2_13b", "generated/torchtune.models.llama2.qlora_llama2_70b", "generated/torchtune.models.llama2.qlora_llama2_7b", "generated/torchtune.models.llama2.qlora_llama2_reward_7b", "generated/torchtune.models.llama3.llama3", "generated/torchtune.models.llama3.llama3_70b", "generated/torchtune.models.llama3.llama3_8b", "generated/torchtune.models.llama3.llama3_tokenizer", "generated/torchtune.models.llama3.lora_llama3", "generated/torchtune.models.llama3.lora_llama3_70b", "generated/torchtune.models.llama3.lora_llama3_8b", "generated/torchtune.models.llama3.qlora_llama3_70b", "generated/torchtune.models.llama3.qlora_llama3_8b", "generated/torchtune.models.llama3_1.llama3_1", "generated/torchtune.models.llama3_1.llama3_1_405b", "generated/torchtune.models.llama3_1.llama3_1_70b", "generated/torchtune.models.llama3_1.llama3_1_8b", "generated/torchtune.models.llama3_1.lora_llama3_1", "generated/torchtune.models.llama3_1.lora_llama3_1_405b", "generated/torchtune.models.llama3_1.lora_llama3_1_70b", "generated/torchtune.models.llama3_1.lora_llama3_1_8b", "generated/torchtune.models.llama3_1.qlora_llama3_1_405b", "generated/torchtune.models.llama3_1.qlora_llama3_1_70b", "generated/torchtune.models.llama3_1.qlora_llama3_1_8b", "generated/torchtune.models.llama3_2.llama3_2_1b", "generated/torchtune.models.llama3_2.llama3_2_3b", "generated/torchtune.models.llama3_2.lora_llama3_2_1b", "generated/torchtune.models.llama3_2.lora_llama3_2_3b", "generated/torchtune.models.llama3_2.qlora_llama3_2_1b", "generated/torchtune.models.llama3_2.qlora_llama3_2_3b", "generated/torchtune.models.llama3_2_vision.Llama3VisionEncoder", "generated/torchtune.models.llama3_2_vision.Llama3VisionProjectionHead", "generated/torchtune.models.llama3_2_vision.Llama3VisionTransform", "generated/torchtune.models.llama3_2_vision.llama3_2_vision_11b", "generated/torchtune.models.llama3_2_vision.llama3_2_vision_decoder", "generated/torchtune.models.llama3_2_vision.llama3_2_vision_encoder", "generated/torchtune.models.llama3_2_vision.llama3_2_vision_transform", "generated/torchtune.models.llama3_2_vision.lora_llama3_2_vision_11b", "generated/torchtune.models.llama3_2_vision.lora_llama3_2_vision_decoder", "generated/torchtune.models.llama3_2_vision.lora_llama3_2_vision_encoder", "generated/torchtune.models.llama3_2_vision.qlora_llama3_2_vision_11b", "generated/torchtune.models.mistral.MistralChatTemplate", "generated/torchtune.models.mistral.lora_mistral", "generated/torchtune.models.mistral.lora_mistral_7b", "generated/torchtune.models.mistral.lora_mistral_classifier", "generated/torchtune.models.mistral.lora_mistral_reward_7b", "generated/torchtune.models.mistral.mistral", "generated/torchtune.models.mistral.mistral_7b", "generated/torchtune.models.mistral.mistral_classifier", "generated/torchtune.models.mistral.mistral_reward_7b", "generated/torchtune.models.mistral.mistral_tokenizer", "generated/torchtune.models.mistral.qlora_mistral_7b", "generated/torchtune.models.mistral.qlora_mistral_reward_7b", "generated/torchtune.models.phi3.lora_phi3", "generated/torchtune.models.phi3.lora_phi3_mini", "generated/torchtune.models.phi3.phi3", "generated/torchtune.models.phi3.phi3_mini", "generated/torchtune.models.phi3.phi3_mini_tokenizer", "generated/torchtune.models.phi3.qlora_phi3_mini", "generated/torchtune.models.qwen2.lora_qwen2", "generated/torchtune.models.qwen2.lora_qwen2_0_5b", "generated/torchtune.models.qwen2.lora_qwen2_1_5b", "generated/torchtune.models.qwen2.lora_qwen2_7b", "generated/torchtune.models.qwen2.qwen2", "generated/torchtune.models.qwen2.qwen2_0_5b", "generated/torchtune.models.qwen2.qwen2_1_5b", "generated/torchtune.models.qwen2.qwen2_7b", "generated/torchtune.models.qwen2.qwen2_tokenizer", "generated/torchtune.models.qwen2_5.lora_qwen2_5_0_5b", "generated/torchtune.models.qwen2_5.lora_qwen2_5_14b_base", "generated/torchtune.models.qwen2_5.lora_qwen2_5_14b_instruct", "generated/torchtune.models.qwen2_5.lora_qwen2_5_1_5b_base", "generated/torchtune.models.qwen2_5.lora_qwen2_5_1_5b_instruct", "generated/torchtune.models.qwen2_5.lora_qwen2_5_32b_base", "generated/torchtune.models.qwen2_5.lora_qwen2_5_32b_instruct", "generated/torchtune.models.qwen2_5.lora_qwen2_5_3b", "generated/torchtune.models.qwen2_5.lora_qwen2_5_72b_base", "generated/torchtune.models.qwen2_5.lora_qwen2_5_72b_instruct", "generated/torchtune.models.qwen2_5.lora_qwen2_5_7b_base", "generated/torchtune.models.qwen2_5.lora_qwen2_5_7b_instruct", "generated/torchtune.models.qwen2_5.qwen2_5_0_5b", "generated/torchtune.models.qwen2_5.qwen2_5_14b_base", "generated/torchtune.models.qwen2_5.qwen2_5_14b_instruct", "generated/torchtune.models.qwen2_5.qwen2_5_1_5b_base", "generated/torchtune.models.qwen2_5.qwen2_5_1_5b_instruct", "generated/torchtune.models.qwen2_5.qwen2_5_32b_base", "generated/torchtune.models.qwen2_5.qwen2_5_32b_instruct", "generated/torchtune.models.qwen2_5.qwen2_5_3b", "generated/torchtune.models.qwen2_5.qwen2_5_72b_base", "generated/torchtune.models.qwen2_5.qwen2_5_72b_instruct", "generated/torchtune.models.qwen2_5.qwen2_5_7b_base", "generated/torchtune.models.qwen2_5.qwen2_5_7b_instruct", "generated/torchtune.models.qwen2_5.qwen2_5_tokenizer", "generated/torchtune.modules.FeedForward", "generated/torchtune.modules.Fp32LayerNorm", "generated/torchtune.modules.KVCache", "generated/torchtune.modules.MultiHeadAttention", "generated/torchtune.modules.RMSNorm", "generated/torchtune.modules.RotaryPositionalEmbeddings", "generated/torchtune.modules.TanhGate", "generated/torchtune.modules.TiedLinear", "generated/torchtune.modules.TransformerCrossAttentionLayer", "generated/torchtune.modules.TransformerDecoder", "generated/torchtune.modules.TransformerSelfAttentionLayer", "generated/torchtune.modules.VisionTransformer", "generated/torchtune.modules.common_utils.delete_kv_caches", "generated/torchtune.modules.common_utils.disable_kv_cache", "generated/torchtune.modules.common_utils.local_kv_cache", "generated/torchtune.modules.common_utils.reparametrize_as_dtype_state_dict_post_hook", "generated/torchtune.modules.loss.CEWithChunkedOutputLoss", "generated/torchtune.modules.loss.ForwardKLLoss", "generated/torchtune.modules.loss.ForwardKLWithChunkedOutputLoss", "generated/torchtune.modules.model_fusion.DeepFusionModel", "generated/torchtune.modules.model_fusion.FusionEmbedding", "generated/torchtune.modules.model_fusion.FusionLayer", "generated/torchtune.modules.model_fusion.get_fusion_params", "generated/torchtune.modules.model_fusion.register_fusion_module", "generated/torchtune.modules.peft.AdapterModule", "generated/torchtune.modules.peft.DoRALinear", "generated/torchtune.modules.peft.LoRALinear", "generated/torchtune.modules.peft.disable_adapter", "generated/torchtune.modules.peft.get_adapter_params", "generated/torchtune.modules.peft.set_trainable_params", "generated/torchtune.modules.peft.validate_missing_and_unexpected_for_lora", "generated/torchtune.modules.peft.validate_state_dict_for_lora", "generated/torchtune.modules.tokenizers.BaseTokenizer", "generated/torchtune.modules.tokenizers.ModelTokenizer", "generated/torchtune.modules.tokenizers.SentencePieceBaseTokenizer", "generated/torchtune.modules.tokenizers.TikTokenBaseTokenizer", "generated/torchtune.modules.tokenizers.parse_hf_tokenizer_json", "generated/torchtune.modules.tokenizers.tokenize_messages_no_special_tokens", "generated/torchtune.modules.transforms.Transform", "generated/torchtune.modules.transforms.VisionCrossAttentionMask", "generated/torchtune.rlhf.estimate_advantages", "generated/torchtune.rlhf.get_rewards_ppo", "generated/torchtune.rlhf.loss.DPOLoss", "generated/torchtune.rlhf.loss.PPOLoss", "generated/torchtune.rlhf.loss.RSOLoss", "generated/torchtune.rlhf.loss.SimPOLoss", "generated/torchtune.rlhf.truncate_sequence_at_first_stop_token", "generated/torchtune.training.FSDPPolicyType", "generated/torchtune.training.FormattedCheckpointFiles", "generated/torchtune.training.FullModelHFCheckpointer", "generated/torchtune.training.FullModelMetaCheckpointer", "generated/torchtune.training.FullModelTorchTuneCheckpointer", "generated/torchtune.training.ModelType", "generated/torchtune.training.OptimizerInBackwardWrapper", "generated/torchtune.training.apply_selective_activation_checkpointing", "generated/torchtune.training.create_optim_in_bwd_wrapper", "generated/torchtune.training.get_cosine_schedule_with_warmup", "generated/torchtune.training.get_dtype", "generated/torchtune.training.get_full_finetune_fsdp_wrap_policy", "generated/torchtune.training.get_lr", "generated/torchtune.training.get_memory_stats", "generated/torchtune.training.get_quantizer_mode", "generated/torchtune.training.get_unmasked_sequence_lengths", "generated/torchtune.training.get_world_size_and_rank", "generated/torchtune.training.init_distributed", "generated/torchtune.training.is_distributed", "generated/torchtune.training.log_memory_stats", "generated/torchtune.training.lora_fsdp_wrap_policy", "generated/torchtune.training.metric_logging.CometLogger", "generated/torchtune.training.metric_logging.DiskLogger", "generated/torchtune.training.metric_logging.StdoutLogger", "generated/torchtune.training.metric_logging.TensorBoardLogger", "generated/torchtune.training.metric_logging.WandBLogger", "generated/torchtune.training.register_optim_in_bwd_hooks", "generated/torchtune.training.set_activation_checkpointing", "generated/torchtune.training.set_default_dtype", "generated/torchtune.training.set_seed", "generated/torchtune.training.setup_torch_profiler", "generated/torchtune.training.update_state_dict_for_classifier", "generated/torchtune.training.validate_expected_param_dtype", "generated/torchtune.utils.batch_to_device", "generated/torchtune.utils.get_device", "generated/torchtune.utils.get_logger", "generated/torchtune.utils.torch_version_ge", "generated_examples/index", "generated_examples/sg_execution_times", "index", "install", "overview", "recipes/lora_finetune_single_device", "recipes/qat_distributed", "recipes/recipes_overview", "sg_execution_times", "tune_cli", "tutorials/chat", "tutorials/e2e_flow", "tutorials/first_finetune_tutorial", "tutorials/llama3", "tutorials/llama_kd_tutorial", "tutorials/lora_finetune", "tutorials/memory_optimizations", "tutorials/qat_finetune", "tutorials/qlora_finetune"], "filenames": ["api_ref_config.rst", "api_ref_data.rst", "api_ref_datasets.rst", "api_ref_generation.rst", "api_ref_models.rst", "api_ref_modules.rst", "api_ref_rlhf.rst", "api_ref_training.rst", "api_ref_utilities.rst", "basics/chat_datasets.rst", "basics/custom_components.rst", "basics/datasets_overview.rst", "basics/instruct_datasets.rst", "basics/message_transforms.rst", "basics/messages.rst", "basics/model_transforms.rst", "basics/multimodal_datasets.rst", "basics/packing.rst", "basics/preference_datasets.rst", "basics/prompt_templates.rst", "basics/text_completion_datasets.rst", "basics/tokenizers.rst", "deep_dives/checkpointer.rst", "deep_dives/comet_logging.rst", "deep_dives/configs.rst", "deep_dives/recipe_deepdive.rst", "deep_dives/wandb_logging.rst", "generated/torchtune.config.instantiate.rst", "generated/torchtune.config.log_config.rst", "generated/torchtune.config.parse.rst", "generated/torchtune.config.validate.rst", "generated/torchtune.data.AlpacaToMessages.rst", "generated/torchtune.data.ChatMLTemplate.rst", "generated/torchtune.data.ChosenRejectedToMessages.rst", "generated/torchtune.data.GrammarErrorCorrectionTemplate.rst", "generated/torchtune.data.InputOutputToMessages.rst", "generated/torchtune.data.Message.rst", "generated/torchtune.data.OpenAIToMessages.rst", "generated/torchtune.data.PromptTemplate.rst", "generated/torchtune.data.PromptTemplateInterface.rst", "generated/torchtune.data.QuestionAnswerTemplate.rst", "generated/torchtune.data.Role.rst", "generated/torchtune.data.ShareGPTToMessages.rst", "generated/torchtune.data.SummarizeTemplate.rst", "generated/torchtune.data.format_content_with_images.rst", "generated/torchtune.data.left_pad_sequence.rst", "generated/torchtune.data.load_image.rst", "generated/torchtune.data.padded_collate.rst", "generated/torchtune.data.padded_collate_dpo.rst", "generated/torchtune.data.padded_collate_sft.rst", "generated/torchtune.data.padded_collate_tiled_images_and_mask.rst", "generated/torchtune.data.truncate.rst", "generated/torchtune.data.validate_messages.rst", "generated/torchtune.datasets.ConcatDataset.rst", "generated/torchtune.datasets.PackedDataset.rst", "generated/torchtune.datasets.PreferenceDataset.rst", "generated/torchtune.datasets.SFTDataset.rst", "generated/torchtune.datasets.TextCompletionDataset.rst", "generated/torchtune.datasets.alpaca_cleaned_dataset.rst", "generated/torchtune.datasets.alpaca_dataset.rst", "generated/torchtune.datasets.chat_dataset.rst", "generated/torchtune.datasets.cnn_dailymail_articles_dataset.rst", "generated/torchtune.datasets.grammar_dataset.rst", "generated/torchtune.datasets.hh_rlhf_helpful_dataset.rst", "generated/torchtune.datasets.instruct_dataset.rst", "generated/torchtune.datasets.multimodal.llava_instruct_dataset.rst", "generated/torchtune.datasets.multimodal.the_cauldron_dataset.rst", "generated/torchtune.datasets.preference_dataset.rst", "generated/torchtune.datasets.samsum_dataset.rst", "generated/torchtune.datasets.slimorca_dataset.rst", "generated/torchtune.datasets.stack_exchange_paired_dataset.rst", "generated/torchtune.datasets.text_completion_dataset.rst", "generated/torchtune.datasets.wikitext_dataset.rst", "generated/torchtune.generation.generate.rst", "generated/torchtune.generation.generate_next_token.rst", "generated/torchtune.generation.get_causal_mask_from_padding_mask.rst", "generated/torchtune.generation.get_position_ids_from_padding_mask.rst", "generated/torchtune.generation.sample.rst", "generated/torchtune.models.clip.TilePositionalEmbedding.rst", "generated/torchtune.models.clip.TiledTokenPositionalEmbedding.rst", "generated/torchtune.models.clip.TokenPositionalEmbedding.rst", "generated/torchtune.models.clip.clip_vision_encoder.rst", "generated/torchtune.models.code_llama2.code_llama2_13b.rst", "generated/torchtune.models.code_llama2.code_llama2_70b.rst", "generated/torchtune.models.code_llama2.code_llama2_7b.rst", "generated/torchtune.models.code_llama2.lora_code_llama2_13b.rst", "generated/torchtune.models.code_llama2.lora_code_llama2_70b.rst", "generated/torchtune.models.code_llama2.lora_code_llama2_7b.rst", "generated/torchtune.models.code_llama2.qlora_code_llama2_13b.rst", "generated/torchtune.models.code_llama2.qlora_code_llama2_70b.rst", "generated/torchtune.models.code_llama2.qlora_code_llama2_7b.rst", "generated/torchtune.models.gemma.gemma.rst", "generated/torchtune.models.gemma.gemma_2b.rst", "generated/torchtune.models.gemma.gemma_7b.rst", "generated/torchtune.models.gemma.gemma_tokenizer.rst", "generated/torchtune.models.gemma.lora_gemma.rst", "generated/torchtune.models.gemma.lora_gemma_2b.rst", "generated/torchtune.models.gemma.lora_gemma_7b.rst", "generated/torchtune.models.gemma.qlora_gemma_2b.rst", "generated/torchtune.models.gemma.qlora_gemma_7b.rst", "generated/torchtune.models.llama2.Llama2ChatTemplate.rst", "generated/torchtune.models.llama2.llama2.rst", "generated/torchtune.models.llama2.llama2_13b.rst", "generated/torchtune.models.llama2.llama2_70b.rst", "generated/torchtune.models.llama2.llama2_7b.rst", "generated/torchtune.models.llama2.llama2_reward_7b.rst", "generated/torchtune.models.llama2.llama2_tokenizer.rst", "generated/torchtune.models.llama2.lora_llama2.rst", "generated/torchtune.models.llama2.lora_llama2_13b.rst", "generated/torchtune.models.llama2.lora_llama2_70b.rst", "generated/torchtune.models.llama2.lora_llama2_7b.rst", "generated/torchtune.models.llama2.lora_llama2_reward_7b.rst", "generated/torchtune.models.llama2.qlora_llama2_13b.rst", "generated/torchtune.models.llama2.qlora_llama2_70b.rst", "generated/torchtune.models.llama2.qlora_llama2_7b.rst", "generated/torchtune.models.llama2.qlora_llama2_reward_7b.rst", "generated/torchtune.models.llama3.llama3.rst", "generated/torchtune.models.llama3.llama3_70b.rst", "generated/torchtune.models.llama3.llama3_8b.rst", "generated/torchtune.models.llama3.llama3_tokenizer.rst", "generated/torchtune.models.llama3.lora_llama3.rst", "generated/torchtune.models.llama3.lora_llama3_70b.rst", "generated/torchtune.models.llama3.lora_llama3_8b.rst", "generated/torchtune.models.llama3.qlora_llama3_70b.rst", "generated/torchtune.models.llama3.qlora_llama3_8b.rst", "generated/torchtune.models.llama3_1.llama3_1.rst", "generated/torchtune.models.llama3_1.llama3_1_405b.rst", "generated/torchtune.models.llama3_1.llama3_1_70b.rst", "generated/torchtune.models.llama3_1.llama3_1_8b.rst", "generated/torchtune.models.llama3_1.lora_llama3_1.rst", "generated/torchtune.models.llama3_1.lora_llama3_1_405b.rst", "generated/torchtune.models.llama3_1.lora_llama3_1_70b.rst", "generated/torchtune.models.llama3_1.lora_llama3_1_8b.rst", "generated/torchtune.models.llama3_1.qlora_llama3_1_405b.rst", "generated/torchtune.models.llama3_1.qlora_llama3_1_70b.rst", "generated/torchtune.models.llama3_1.qlora_llama3_1_8b.rst", "generated/torchtune.models.llama3_2.llama3_2_1b.rst", "generated/torchtune.models.llama3_2.llama3_2_3b.rst", "generated/torchtune.models.llama3_2.lora_llama3_2_1b.rst", "generated/torchtune.models.llama3_2.lora_llama3_2_3b.rst", "generated/torchtune.models.llama3_2.qlora_llama3_2_1b.rst", "generated/torchtune.models.llama3_2.qlora_llama3_2_3b.rst", "generated/torchtune.models.llama3_2_vision.Llama3VisionEncoder.rst", "generated/torchtune.models.llama3_2_vision.Llama3VisionProjectionHead.rst", "generated/torchtune.models.llama3_2_vision.Llama3VisionTransform.rst", "generated/torchtune.models.llama3_2_vision.llama3_2_vision_11b.rst", "generated/torchtune.models.llama3_2_vision.llama3_2_vision_decoder.rst", "generated/torchtune.models.llama3_2_vision.llama3_2_vision_encoder.rst", "generated/torchtune.models.llama3_2_vision.llama3_2_vision_transform.rst", "generated/torchtune.models.llama3_2_vision.lora_llama3_2_vision_11b.rst", "generated/torchtune.models.llama3_2_vision.lora_llama3_2_vision_decoder.rst", "generated/torchtune.models.llama3_2_vision.lora_llama3_2_vision_encoder.rst", "generated/torchtune.models.llama3_2_vision.qlora_llama3_2_vision_11b.rst", "generated/torchtune.models.mistral.MistralChatTemplate.rst", "generated/torchtune.models.mistral.lora_mistral.rst", "generated/torchtune.models.mistral.lora_mistral_7b.rst", "generated/torchtune.models.mistral.lora_mistral_classifier.rst", "generated/torchtune.models.mistral.lora_mistral_reward_7b.rst", "generated/torchtune.models.mistral.mistral.rst", "generated/torchtune.models.mistral.mistral_7b.rst", "generated/torchtune.models.mistral.mistral_classifier.rst", "generated/torchtune.models.mistral.mistral_reward_7b.rst", "generated/torchtune.models.mistral.mistral_tokenizer.rst", "generated/torchtune.models.mistral.qlora_mistral_7b.rst", "generated/torchtune.models.mistral.qlora_mistral_reward_7b.rst", "generated/torchtune.models.phi3.lora_phi3.rst", "generated/torchtune.models.phi3.lora_phi3_mini.rst", "generated/torchtune.models.phi3.phi3.rst", "generated/torchtune.models.phi3.phi3_mini.rst", "generated/torchtune.models.phi3.phi3_mini_tokenizer.rst", "generated/torchtune.models.phi3.qlora_phi3_mini.rst", "generated/torchtune.models.qwen2.lora_qwen2.rst", "generated/torchtune.models.qwen2.lora_qwen2_0_5b.rst", "generated/torchtune.models.qwen2.lora_qwen2_1_5b.rst", "generated/torchtune.models.qwen2.lora_qwen2_7b.rst", "generated/torchtune.models.qwen2.qwen2.rst", "generated/torchtune.models.qwen2.qwen2_0_5b.rst", "generated/torchtune.models.qwen2.qwen2_1_5b.rst", "generated/torchtune.models.qwen2.qwen2_7b.rst", "generated/torchtune.models.qwen2.qwen2_tokenizer.rst", "generated/torchtune.models.qwen2_5.lora_qwen2_5_0_5b.rst", "generated/torchtune.models.qwen2_5.lora_qwen2_5_14b_base.rst", "generated/torchtune.models.qwen2_5.lora_qwen2_5_14b_instruct.rst", "generated/torchtune.models.qwen2_5.lora_qwen2_5_1_5b_base.rst", "generated/torchtune.models.qwen2_5.lora_qwen2_5_1_5b_instruct.rst", "generated/torchtune.models.qwen2_5.lora_qwen2_5_32b_base.rst", "generated/torchtune.models.qwen2_5.lora_qwen2_5_32b_instruct.rst", "generated/torchtune.models.qwen2_5.lora_qwen2_5_3b.rst", "generated/torchtune.models.qwen2_5.lora_qwen2_5_72b_base.rst", "generated/torchtune.models.qwen2_5.lora_qwen2_5_72b_instruct.rst", "generated/torchtune.models.qwen2_5.lora_qwen2_5_7b_base.rst", "generated/torchtune.models.qwen2_5.lora_qwen2_5_7b_instruct.rst", "generated/torchtune.models.qwen2_5.qwen2_5_0_5b.rst", "generated/torchtune.models.qwen2_5.qwen2_5_14b_base.rst", "generated/torchtune.models.qwen2_5.qwen2_5_14b_instruct.rst", "generated/torchtune.models.qwen2_5.qwen2_5_1_5b_base.rst", "generated/torchtune.models.qwen2_5.qwen2_5_1_5b_instruct.rst", "generated/torchtune.models.qwen2_5.qwen2_5_32b_base.rst", "generated/torchtune.models.qwen2_5.qwen2_5_32b_instruct.rst", "generated/torchtune.models.qwen2_5.qwen2_5_3b.rst", "generated/torchtune.models.qwen2_5.qwen2_5_72b_base.rst", "generated/torchtune.models.qwen2_5.qwen2_5_72b_instruct.rst", "generated/torchtune.models.qwen2_5.qwen2_5_7b_base.rst", "generated/torchtune.models.qwen2_5.qwen2_5_7b_instruct.rst", "generated/torchtune.models.qwen2_5.qwen2_5_tokenizer.rst", "generated/torchtune.modules.FeedForward.rst", "generated/torchtune.modules.Fp32LayerNorm.rst", "generated/torchtune.modules.KVCache.rst", "generated/torchtune.modules.MultiHeadAttention.rst", "generated/torchtune.modules.RMSNorm.rst", "generated/torchtune.modules.RotaryPositionalEmbeddings.rst", "generated/torchtune.modules.TanhGate.rst", "generated/torchtune.modules.TiedLinear.rst", "generated/torchtune.modules.TransformerCrossAttentionLayer.rst", "generated/torchtune.modules.TransformerDecoder.rst", "generated/torchtune.modules.TransformerSelfAttentionLayer.rst", "generated/torchtune.modules.VisionTransformer.rst", "generated/torchtune.modules.common_utils.delete_kv_caches.rst", "generated/torchtune.modules.common_utils.disable_kv_cache.rst", "generated/torchtune.modules.common_utils.local_kv_cache.rst", "generated/torchtune.modules.common_utils.reparametrize_as_dtype_state_dict_post_hook.rst", "generated/torchtune.modules.loss.CEWithChunkedOutputLoss.rst", "generated/torchtune.modules.loss.ForwardKLLoss.rst", "generated/torchtune.modules.loss.ForwardKLWithChunkedOutputLoss.rst", "generated/torchtune.modules.model_fusion.DeepFusionModel.rst", "generated/torchtune.modules.model_fusion.FusionEmbedding.rst", "generated/torchtune.modules.model_fusion.FusionLayer.rst", "generated/torchtune.modules.model_fusion.get_fusion_params.rst", "generated/torchtune.modules.model_fusion.register_fusion_module.rst", "generated/torchtune.modules.peft.AdapterModule.rst", "generated/torchtune.modules.peft.DoRALinear.rst", "generated/torchtune.modules.peft.LoRALinear.rst", "generated/torchtune.modules.peft.disable_adapter.rst", "generated/torchtune.modules.peft.get_adapter_params.rst", "generated/torchtune.modules.peft.set_trainable_params.rst", "generated/torchtune.modules.peft.validate_missing_and_unexpected_for_lora.rst", "generated/torchtune.modules.peft.validate_state_dict_for_lora.rst", "generated/torchtune.modules.tokenizers.BaseTokenizer.rst", "generated/torchtune.modules.tokenizers.ModelTokenizer.rst", "generated/torchtune.modules.tokenizers.SentencePieceBaseTokenizer.rst", "generated/torchtune.modules.tokenizers.TikTokenBaseTokenizer.rst", "generated/torchtune.modules.tokenizers.parse_hf_tokenizer_json.rst", "generated/torchtune.modules.tokenizers.tokenize_messages_no_special_tokens.rst", "generated/torchtune.modules.transforms.Transform.rst", "generated/torchtune.modules.transforms.VisionCrossAttentionMask.rst", "generated/torchtune.rlhf.estimate_advantages.rst", "generated/torchtune.rlhf.get_rewards_ppo.rst", "generated/torchtune.rlhf.loss.DPOLoss.rst", "generated/torchtune.rlhf.loss.PPOLoss.rst", "generated/torchtune.rlhf.loss.RSOLoss.rst", "generated/torchtune.rlhf.loss.SimPOLoss.rst", "generated/torchtune.rlhf.truncate_sequence_at_first_stop_token.rst", "generated/torchtune.training.FSDPPolicyType.rst", "generated/torchtune.training.FormattedCheckpointFiles.rst", "generated/torchtune.training.FullModelHFCheckpointer.rst", "generated/torchtune.training.FullModelMetaCheckpointer.rst", "generated/torchtune.training.FullModelTorchTuneCheckpointer.rst", "generated/torchtune.training.ModelType.rst", "generated/torchtune.training.OptimizerInBackwardWrapper.rst", "generated/torchtune.training.apply_selective_activation_checkpointing.rst", "generated/torchtune.training.create_optim_in_bwd_wrapper.rst", "generated/torchtune.training.get_cosine_schedule_with_warmup.rst", "generated/torchtune.training.get_dtype.rst", "generated/torchtune.training.get_full_finetune_fsdp_wrap_policy.rst", "generated/torchtune.training.get_lr.rst", "generated/torchtune.training.get_memory_stats.rst", "generated/torchtune.training.get_quantizer_mode.rst", "generated/torchtune.training.get_unmasked_sequence_lengths.rst", "generated/torchtune.training.get_world_size_and_rank.rst", "generated/torchtune.training.init_distributed.rst", "generated/torchtune.training.is_distributed.rst", "generated/torchtune.training.log_memory_stats.rst", "generated/torchtune.training.lora_fsdp_wrap_policy.rst", "generated/torchtune.training.metric_logging.CometLogger.rst", "generated/torchtune.training.metric_logging.DiskLogger.rst", "generated/torchtune.training.metric_logging.StdoutLogger.rst", "generated/torchtune.training.metric_logging.TensorBoardLogger.rst", "generated/torchtune.training.metric_logging.WandBLogger.rst", "generated/torchtune.training.register_optim_in_bwd_hooks.rst", "generated/torchtune.training.set_activation_checkpointing.rst", "generated/torchtune.training.set_default_dtype.rst", "generated/torchtune.training.set_seed.rst", "generated/torchtune.training.setup_torch_profiler.rst", "generated/torchtune.training.update_state_dict_for_classifier.rst", "generated/torchtune.training.validate_expected_param_dtype.rst", "generated/torchtune.utils.batch_to_device.rst", "generated/torchtune.utils.get_device.rst", "generated/torchtune.utils.get_logger.rst", "generated/torchtune.utils.torch_version_ge.rst", "generated_examples/index.rst", "generated_examples/sg_execution_times.rst", "index.rst", "install.rst", "overview.rst", "recipes/lora_finetune_single_device.rst", "recipes/qat_distributed.rst", "recipes/recipes_overview.rst", "sg_execution_times.rst", "tune_cli.rst", "tutorials/chat.rst", "tutorials/e2e_flow.rst", "tutorials/first_finetune_tutorial.rst", "tutorials/llama3.rst", "tutorials/llama_kd_tutorial.rst", "tutorials/lora_finetune.rst", "tutorials/memory_optimizations.rst", "tutorials/qat_finetune.rst", "tutorials/qlora_finetune.rst"], "titles": ["torchtune.config", "torchtune.data", "torchtune.datasets", "torchtune.generation", "torchtune.models", "torchtune.modules", "torchtune.rlhf", "torchtune.training", "torchtune.utils", "Chat Datasets", "Custom Components and Recipes", "Datasets Overview", "Instruct Datasets", "Message Transforms", "Messages", "Multimodal Transforms", "Multimodal Datasets", "Sample packing", "Preference Datasets", "Prompt Templates", "Text-completion Datasets", "Tokenizers", "Checkpointing in torchtune", "Logging to Comet", "All About Configs", "What Are Recipes?", "Logging to Weights & Biases", "instantiate", "log_config", "parse", "validate", "AlpacaToMessages", "ChatMLTemplate", "ChosenRejectedToMessages", "torchtune.data.GrammarErrorCorrectionTemplate", "InputOutputToMessages", "Message", "OpenAIToMessages", "PromptTemplate", "PromptTemplateInterface", "torchtune.data.QuestionAnswerTemplate", "torchtune.data.Role", "ShareGPTToMessages", "torchtune.data.SummarizeTemplate", "format_content_with_images", "left_pad_sequence", "load_image", "padded_collate", "padded_collate_dpo", "padded_collate_sft", "padded_collate_tiled_images_and_mask", "truncate", "validate_messages", "ConcatDataset", "PackedDataset", "PreferenceDataset", "SFTDataset", "TextCompletionDataset", "alpaca_cleaned_dataset", "alpaca_dataset", "chat_dataset", "cnn_dailymail_articles_dataset", "grammar_dataset", "hh_rlhf_helpful_dataset", "instruct_dataset", "llava_instruct_dataset", "the_cauldron_dataset", "preference_dataset", "samsum_dataset", "slimorca_dataset", "stack_exchange_paired_dataset", "text_completion_dataset", "wikitext_dataset", "generate", "generate_next_token", "get_causal_mask_from_padding_mask", "get_position_ids_from_padding_mask", "sample", "TilePositionalEmbedding", "TiledTokenPositionalEmbedding", "TokenPositionalEmbedding", "clip_vision_encoder", "code_llama2_13b", "code_llama2_70b", "code_llama2_7b", "lora_code_llama2_13b", "lora_code_llama2_70b", "lora_code_llama2_7b", "qlora_code_llama2_13b", "qlora_code_llama2_70b", "qlora_code_llama2_7b", "gemma", "gemma_2b", "gemma_7b", "gemma_tokenizer", "lora_gemma", "lora_gemma_2b", "lora_gemma_7b", "qlora_gemma_2b", "qlora_gemma_7b", "Llama2ChatTemplate", "llama2", "llama2_13b", "llama2_70b", "llama2_7b", "llama2_reward_7b", "llama2_tokenizer", "lora_llama2", "lora_llama2_13b", "lora_llama2_70b", "lora_llama2_7b", "lora_llama2_reward_7b", "qlora_llama2_13b", "qlora_llama2_70b", "qlora_llama2_7b", "qlora_llama2_reward_7b", "llama3", "llama3_70b", "llama3_8b", "llama3_tokenizer", "lora_llama3", "lora_llama3_70b", "lora_llama3_8b", "qlora_llama3_70b", "qlora_llama3_8b", "llama3_1", "llama3_1_405b", "llama3_1_70b", "llama3_1_8b", "lora_llama3_1", "lora_llama3_1_405b", "lora_llama3_1_70b", "lora_llama3_1_8b", "qlora_llama3_1_405b", "qlora_llama3_1_70b", "qlora_llama3_1_8b", "llama3_2_1b", "llama3_2_3b", "lora_llama3_2_1b", "lora_llama3_2_3b", "qlora_llama3_2_1b", "qlora_llama3_2_3b", "Llama3VisionEncoder", "Llama3VisionProjectionHead", "Llama3VisionTransform", "llama3_2_vision_11b", "llama3_2_vision_decoder", "llama3_2_vision_encoder", "llama3_2_vision_transform", "lora_llama3_2_vision_11b", "lora_llama3_2_vision_decoder", "lora_llama3_2_vision_encoder", "qlora_llama3_2_vision_11b", "MistralChatTemplate", "lora_mistral", "lora_mistral_7b", "lora_mistral_classifier", "lora_mistral_reward_7b", "mistral", "mistral_7b", "mistral_classifier", "mistral_reward_7b", "mistral_tokenizer", "qlora_mistral_7b", "qlora_mistral_reward_7b", "lora_phi3", "lora_phi3_mini", "phi3", "phi3_mini", "phi3_mini_tokenizer", "qlora_phi3_mini", "lora_qwen2", "lora_qwen2_0_5b", "lora_qwen2_1_5b", "lora_qwen2_7b", "qwen2", "qwen2_0_5b", "qwen2_1_5b", "qwen2_7b", "qwen2_tokenizer", "lora_qwen2_5_0_5b", "lora_qwen2_5_14b_base", "lora_qwen2_5_14b_instruct", "lora_qwen2_5_1_5b_base", "lora_qwen2_5_1_5b_instruct", "lora_qwen2_5_32b_base", "lora_qwen2_5_32b_instruct", "lora_qwen2_5_3b", "lora_qwen2_5_72b_base", "lora_qwen2_5_72b_instruct", "lora_qwen2_5_7b_base", "lora_qwen2_5_7b_instruct", "qwen2_5_0_5b", "qwen2_5_14b_base", "qwen2_5_14b_instruct", "qwen2_5_1_5b_base", "qwen2_5_1_5b_instruct", "qwen2_5_32b_base", "qwen2_5_32b_instruct", "qwen2_5_3b", "qwen2_5_72b_base", "qwen2_5_72b_instruct", "qwen2_5_7b_base", "qwen2_5_7b_instruct", "qwen2_5_tokenizer", "FeedForward", "Fp32LayerNorm", "KVCache", "MultiHeadAttention", "RMSNorm", "RotaryPositionalEmbeddings", "TanhGate", "TiedLinear", "TransformerCrossAttentionLayer", "TransformerDecoder", "TransformerSelfAttentionLayer", "VisionTransformer", "delete_kv_caches", "disable_kv_cache", "local_kv_cache", "reparametrize_as_dtype_state_dict_post_hook", "CEWithChunkedOutputLoss", "ForwardKLLoss", "ForwardKLWithChunkedOutputLoss", "DeepFusionModel", "FusionEmbedding", "FusionLayer", "get_fusion_params", "register_fusion_module", "AdapterModule", "DoRALinear", "LoRALinear", "disable_adapter", "get_adapter_params", "set_trainable_params", "validate_missing_and_unexpected_for_lora", "validate_state_dict_for_lora", "BaseTokenizer", "ModelTokenizer", "SentencePieceBaseTokenizer", "TikTokenBaseTokenizer", "parse_hf_tokenizer_json", "tokenize_messages_no_special_tokens", "Transform", "VisionCrossAttentionMask", "estimate_advantages", "get_rewards_ppo", "DPOLoss", "PPOLoss", "RSOLoss", "SimPOLoss", "truncate_sequence_at_first_stop_token", "torchtune.training.FSDPPolicyType", "FormattedCheckpointFiles", "FullModelHFCheckpointer", "FullModelMetaCheckpointer", "FullModelTorchTuneCheckpointer", "ModelType", "OptimizerInBackwardWrapper", "apply_selective_activation_checkpointing", "create_optim_in_bwd_wrapper", "get_cosine_schedule_with_warmup", "get_dtype", "get_full_finetune_fsdp_wrap_policy", "get_lr", "get_memory_stats", "get_quantizer_mode", "get_unmasked_sequence_lengths", "get_world_size_and_rank", "init_distributed", "is_distributed", "log_memory_stats", "lora_fsdp_wrap_policy", "CometLogger", "DiskLogger", "StdoutLogger", "TensorBoardLogger", "WandBLogger", "register_optim_in_bwd_hooks", "set_activation_checkpointing", "set_default_dtype", "set_seed", "setup_torch_profiler", "update_state_dict_for_classifier", "validate_expected_param_dtype", "batch_to_device", "get_device", "get_logger", "torch_version_ge", "<no title>", "Computation times", "Welcome to the torchtune Documentation", "Install Instructions", "torchtune Overview", "LoRA Single Device Finetuning", "Distributed Quantization-Aware Training (QAT)", "Recipes Overview", "Computation times", "torchtune CLI", "Fine-Tuning Llama3 with Chat Data", "End-to-End Workflow with torchtune", "Fine-Tune Your First LLM", "Meta Llama3 in torchtune", "Distilling Llama3.1 8B into Llama3.2 1B using Knowledge Distillation", "Fine-Tuning Llama2 with LoRA", "Memory Optimization Overview", "Fine-Tuning Llama3 with QAT", "Fine-Tuning Llama2 with QLoRA"], "terms": {"instruct": [1, 2, 4, 9, 10, 11, 13, 15, 16, 17, 18, 19, 21, 31, 32, 33, 35, 37, 42, 54, 56, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 71, 145, 148, 149, 153, 161, 167, 168, 169, 176, 177, 178, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 291, 294, 295, 298, 299, 301, 303, 304, 306, 307], "prompt": [1, 9, 10, 11, 12, 13, 18, 31, 33, 34, 35, 36, 37, 38, 39, 40, 42, 43, 55, 56, 59, 60, 62, 63, 64, 67, 68, 69, 70, 73, 74, 94, 100, 106, 119, 144, 148, 153, 162, 169, 179, 204, 214, 224, 242, 300, 302], "chat": [1, 2, 11, 13, 16, 18, 32, 37, 42, 56, 60, 100, 169, 294], "includ": [1, 9, 11, 12, 16, 18, 19, 21, 22, 24, 25, 38, 39, 56, 77, 81, 91, 101, 116, 125, 146, 147, 148, 150, 151, 158, 169, 175, 214, 230, 231, 237, 254, 255, 293, 296, 298, 299, 300, 301, 302, 303, 304, 307], "some": [1, 17, 18, 20, 21, 22, 24, 32, 156, 225, 227, 233, 234, 291, 293, 294, 295, 298, 299, 300, 301, 303, 304, 305, 306, 307], "specif": [1, 5, 11, 12, 15, 19, 21, 24, 25, 27, 55, 56, 65, 66, 144, 238, 263, 295, 299, 300, 305, 306, 307], "format": [1, 2, 7, 11, 19, 21, 36, 45, 46, 55, 56, 59, 60, 63, 64, 67, 100, 144, 153, 238, 253, 254, 255, 256, 257, 298, 299, 300, 301, 302, 304, 305], "differ": [1, 9, 10, 17, 18, 19, 21, 24, 26, 48, 53, 60, 64, 78, 79, 80, 144, 181, 182, 183, 184, 185, 186, 188, 189, 190, 191, 193, 194, 195, 196, 197, 198, 200, 201, 202, 203, 212, 216, 230, 239, 247, 257, 284, 293, 294, 295, 298, 299, 300, 302, 303, 304, 305, 306, 307], "dataset": [1, 10, 13, 14, 15, 17, 19, 24, 31, 33, 35, 36, 37, 42, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 247, 293, 301, 302, 303, 306], "model": [1, 2, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 25, 27, 31, 32, 33, 35, 36, 37, 42, 53, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 207, 208, 209, 210, 212, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 241, 242, 243, 245, 246, 247, 248, 249, 250, 254, 255, 256, 257, 259, 260, 263, 265, 272, 273, 278, 279, 283, 291, 293, 294, 295, 299, 307], "convert": [1, 9, 11, 14, 21, 22, 33, 35, 37, 42, 49, 55, 56, 60, 65, 66, 67, 75, 142, 254, 300, 306, 307], "from": [1, 2, 4, 10, 11, 13, 14, 15, 17, 19, 22, 23, 24, 25, 26, 27, 31, 33, 36, 37, 42, 45, 46, 47, 50, 53, 54, 55, 56, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 76, 77, 78, 79, 80, 81, 82, 83, 84, 92, 93, 100, 102, 103, 104, 105, 119, 143, 144, 148, 159, 161, 169, 176, 177, 178, 179, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 208, 213, 214, 215, 216, 217, 218, 219, 221, 222, 223, 226, 227, 228, 229, 233, 236, 239, 241, 244, 247, 249, 250, 253, 254, 255, 256, 258, 260, 261, 273, 276, 277, 278, 283, 290, 292, 295, 297, 298, 300, 301, 302, 303, 304, 305, 306], "common": [1, 2, 5, 9, 14, 15, 24, 242, 298, 299, 302, 304, 305, 306], "schema": [1, 9, 11, 12, 16], "convers": [1, 13, 16, 18, 19, 21, 22, 33, 42, 52, 55, 56, 60, 65, 67, 69, 254, 256, 257, 293, 299, 300, 304, 305, 307], "json": [1, 9, 12, 13, 16, 18, 21, 22, 37, 42, 55, 56, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 119, 148, 169, 179, 204, 241, 254, 298, 299, 300, 306], "list": [1, 9, 11, 14, 15, 18, 19, 21, 22, 24, 33, 36, 38, 44, 45, 47, 48, 49, 50, 51, 52, 53, 55, 56, 60, 61, 65, 66, 67, 72, 73, 81, 85, 86, 87, 88, 89, 90, 94, 95, 96, 97, 98, 99, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 119, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 143, 144, 147, 148, 149, 150, 151, 152, 154, 155, 156, 157, 162, 163, 164, 165, 166, 169, 170, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 214, 216, 221, 223, 224, 225, 226, 229, 230, 231, 235, 236, 237, 238, 239, 240, 242, 244, 253, 254, 255, 256, 273, 287, 296, 299, 300, 301, 302, 305, 306], "us": [1, 2, 4, 5, 9, 10, 11, 12, 13, 14, 16, 17, 18, 20, 21, 22, 23, 26, 27, 29, 32, 35, 36, 38, 44, 47, 50, 53, 54, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 77, 79, 80, 81, 100, 101, 107, 116, 119, 120, 125, 129, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 165, 169, 171, 175, 179, 204, 205, 207, 208, 209, 210, 212, 214, 215, 216, 217, 218, 219, 220, 221, 224, 225, 228, 232, 235, 239, 240, 244, 245, 246, 247, 248, 250, 252, 254, 255, 257, 258, 262, 263, 265, 272, 273, 274, 275, 276, 277, 281, 283, 285, 286, 291, 292, 293, 294, 295, 296, 298, 301, 302, 304, 305, 306], "collect": [1, 24, 301], "sampl": [1, 9, 11, 12, 13, 14, 15, 16, 19, 20, 21, 23, 26, 33, 35, 36, 37, 42, 44, 50, 54, 55, 56, 57, 62, 63, 65, 66, 67, 68, 69, 71, 73, 74, 208, 210, 214, 215, 216, 224, 243, 244, 249, 299, 300, 305], "batch": [1, 11, 17, 25, 47, 48, 49, 50, 54, 59, 62, 65, 66, 68, 79, 142, 143, 207, 208, 210, 213, 214, 215, 216, 219, 224, 226, 245, 246, 247, 249, 250, 267, 282, 285, 293, 301, 302, 304, 305], "handl": [1, 13, 16, 17, 24, 29, 31, 53, 56, 144, 239, 240, 299, 300, 304, 305, 307], "ani": [1, 5, 10, 11, 13, 14, 15, 16, 17, 21, 22, 24, 25, 27, 29, 30, 33, 36, 37, 38, 42, 44, 47, 50, 51, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 76, 80, 206, 214, 220, 224, 226, 227, 233, 234, 235, 236, 237, 238, 239, 242, 254, 255, 256, 258, 269, 272, 273, 281, 284, 298, 299, 301, 304, 305, 306], "pad": [1, 45, 47, 48, 49, 50, 54, 73, 75, 76, 214, 216, 246, 248, 251, 267], "miscellan": 1, "modifi": [1, 10, 21, 24, 25, 26, 218, 220, 230, 258, 293, 300, 302, 303, 304, 305, 306, 307], "For": [2, 7, 9, 10, 12, 14, 16, 18, 19, 20, 21, 22, 24, 25, 33, 35, 36, 37, 38, 42, 50, 53, 54, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 78, 79, 80, 81, 91, 95, 101, 107, 116, 120, 125, 129, 143, 146, 147, 150, 151, 154, 156, 158, 160, 165, 167, 171, 175, 208, 214, 216, 221, 224, 225, 228, 232, 243, 254, 260, 266, 273, 277, 279, 281, 292, 294, 295, 296, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307], "detail": [2, 9, 10, 12, 13, 16, 21, 22, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 78, 79, 80, 81, 160, 207, 216, 221, 248, 252, 263, 272, 281, 294, 295, 298, 300, 301, 302, 303, 304, 305, 306, 307], "usag": [2, 21, 220, 221, 223, 253, 257, 258, 282, 292, 298, 300, 301, 302, 305, 306, 307], "guid": [2, 23, 24, 26, 33, 35, 37, 42, 60, 62, 63, 64, 65, 66, 67, 68, 69, 250, 273, 293, 299, 301, 303, 304], "pleas": [2, 7, 34, 40, 43, 78, 79, 80, 81, 88, 89, 90, 98, 99, 112, 113, 114, 115, 123, 124, 133, 134, 135, 140, 141, 152, 163, 164, 170, 216, 221, 252, 263, 272, 279, 292, 295, 296, 300, 302, 307], "see": [2, 7, 9, 10, 11, 12, 13, 14, 16, 18, 19, 21, 22, 23, 26, 34, 40, 43, 47, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 74, 88, 89, 90, 98, 99, 100, 112, 113, 114, 115, 123, 124, 133, 134, 135, 140, 141, 152, 153, 160, 163, 164, 170, 180, 183, 184, 187, 192, 195, 196, 199, 207, 213, 215, 216, 226, 229, 237, 238, 243, 252, 257, 263, 272, 273, 277, 279, 281, 287, 292, 293, 294, 295, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307], "overview": [2, 7, 24, 26, 224, 291, 294, 295, 301, 303, 304, 307], "support": [2, 4, 10, 11, 15, 16, 17, 18, 21, 22, 23, 25, 26, 27, 36, 37, 54, 55, 56, 59, 60, 61, 62, 65, 66, 67, 68, 69, 72, 77, 95, 107, 120, 129, 142, 149, 150, 151, 153, 154, 156, 165, 168, 169, 171, 206, 208, 216, 225, 226, 231, 249, 255, 256, 258, 262, 265, 266, 293, 294, 295, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307], "sever": [2, 305], "wide": [2, 9, 208, 303], "onli": [2, 4, 10, 16, 18, 22, 23, 26, 35, 36, 42, 54, 55, 56, 61, 67, 73, 77, 81, 95, 107, 120, 129, 144, 149, 150, 151, 153, 154, 156, 165, 171, 208, 212, 214, 216, 221, 223, 227, 231, 233, 235, 239, 254, 255, 256, 258, 262, 263, 265, 266, 272, 298, 300, 301, 303, 304, 305, 306, 307], "help": [2, 11, 18, 19, 22, 63, 100, 214, 216, 224, 254, 273, 291, 292, 293, 298, 299, 300, 301, 303, 305, 306, 307], "quickli": [2, 11, 24, 38, 57, 294, 299, 305], "bootstrap": [2, 11], "your": [2, 7, 9, 11, 12, 13, 14, 16, 17, 18, 21, 23, 26, 27, 38, 57, 60, 64, 67, 79, 80, 81, 147, 151, 216, 225, 273, 276, 277, 283, 291, 292, 293, 294, 295, 298, 299, 302, 303, 304, 305, 306, 307], "fine": [2, 9, 10, 11, 12, 16, 18, 19, 20, 22, 23, 25, 26, 36, 54, 55, 56, 71, 230, 283, 291, 293, 294, 295, 296, 300], "tune": [2, 4, 9, 10, 11, 12, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 29, 36, 54, 55, 56, 71, 230, 283, 291, 292, 293, 294, 295, 296, 298, 300], "also": [2, 9, 10, 12, 14, 16, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 53, 60, 64, 67, 71, 73, 74, 91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 169, 171, 175, 208, 214, 217, 230, 231, 250, 263, 265, 272, 273, 277, 283, 286, 292, 295, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307], "like": [2, 6, 12, 22, 23, 24, 25, 26, 169, 216, 221, 223, 225, 256, 292, 298, 299, 300, 301, 303, 304, 305, 306], "These": [2, 5, 10, 13, 15, 18, 19, 21, 22, 24, 25, 27, 54, 55, 67, 216, 244, 294, 296, 299, 300, 301, 302, 304, 305, 306, 307], "ar": [2, 5, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 22, 23, 24, 26, 27, 31, 35, 38, 39, 42, 45, 47, 48, 52, 54, 55, 56, 59, 60, 64, 65, 66, 67, 73, 75, 76, 79, 85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 100, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 144, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 207, 213, 214, 215, 216, 218, 224, 225, 226, 230, 231, 232, 235, 236, 244, 246, 252, 254, 255, 257, 258, 260, 262, 264, 265, 270, 272, 282, 283, 292, 293, 294, 296, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307], "especi": [2, 293, 298, 300, 305], "specifi": [2, 10, 12, 16, 18, 20, 22, 24, 25, 27, 31, 33, 35, 37, 42, 44, 60, 62, 63, 64, 65, 66, 67, 68, 69, 73, 75, 77, 94, 101, 106, 107, 116, 119, 120, 125, 129, 146, 148, 150, 162, 169, 171, 175, 179, 204, 208, 214, 215, 222, 223, 224, 252, 263, 266, 272, 277, 279, 282, 295, 296, 298, 299, 300, 301, 302, 305, 306, 307], "yaml": [2, 10, 17, 18, 20, 24, 25, 27, 28, 29, 53, 60, 64, 67, 71, 277, 293, 296, 298, 299, 300, 301, 302, 304, 306, 307], "config": [2, 9, 12, 13, 16, 17, 18, 19, 20, 21, 22, 23, 26, 27, 28, 29, 30, 53, 60, 64, 67, 71, 208, 235, 254, 258, 273, 277, 282, 293, 294, 295, 296, 299, 300, 302, 303, 304, 305, 306, 307], "represent": [2, 253, 303, 304, 306, 307], "abov": [2, 4, 9, 16, 17, 18, 20, 22, 55, 220, 270, 292, 295, 300, 302, 304, 305, 306, 307], "text": [4, 5, 9, 11, 12, 15, 18, 19, 21, 35, 36, 37, 38, 39, 42, 44, 50, 54, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 144, 225, 226, 237, 239, 240, 242, 244, 299, 300, 306], "version": [4, 58, 73, 95, 107, 120, 129, 149, 154, 156, 165, 171, 181, 182, 183, 184, 185, 186, 188, 189, 190, 191, 193, 194, 195, 196, 197, 198, 200, 201, 202, 203, 208, 288, 292, 302, 305, 306, 307], "famili": [4, 22, 25, 59, 61, 65, 66, 69, 70, 72, 257, 293, 298, 302, 303], "import": [4, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 27, 60, 64, 65, 66, 67, 71, 77, 216, 217, 218, 219, 247, 273, 276, 277, 299, 300, 301, 302, 303, 304, 305, 306, 307], "you": [4, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 36, 38, 55, 56, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 100, 207, 214, 216, 219, 221, 223, 226, 228, 257, 273, 276, 277, 283, 291, 292, 293, 294, 295, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307], "need": [4, 9, 10, 12, 14, 16, 18, 19, 20, 22, 23, 24, 25, 26, 38, 54, 56, 208, 212, 214, 216, 224, 225, 250, 272, 273, 276, 277, 278, 292, 294, 295, 296, 298, 299, 300, 301, 302, 304, 305, 307], "request": [4, 262, 300], "access": [4, 10, 22, 24, 25, 53, 254, 260, 294, 295, 298, 300, 301], "hug": [4, 11, 22, 32, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 119, 148, 169, 179, 204, 241, 261, 293, 298, 301, 302], "face": [4, 11, 22, 32, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 119, 148, 169, 179, 204, 241, 261, 293, 298, 301, 302], "befor": [4, 19, 22, 38, 52, 54, 65, 78, 79, 81, 91, 95, 147, 151, 208, 213, 214, 215, 216, 221, 223, 224, 226, 231, 240, 254, 273, 295, 298, 300, 305, 306], "download": [4, 10, 11, 16, 22, 65, 289, 292, 294, 295, 299, 302, 303, 304, 306, 307], "To": [4, 9, 12, 13, 14, 16, 17, 18, 19, 20, 22, 24, 25, 26, 54, 65, 214, 216, 226, 254, 283, 292, 293, 295, 296, 298, 300, 301, 302, 303, 304, 305, 306, 307], "1b": [4, 10, 17, 136, 138, 140, 291], "meta": [4, 10, 15, 16, 20, 21, 22, 100, 210, 254, 255, 294, 295, 298, 299, 300, 301, 303], "output": [4, 10, 12, 13, 14, 20, 21, 22, 31, 35, 45, 53, 55, 56, 59, 62, 64, 68, 69, 73, 81, 85, 86, 87, 91, 95, 101, 105, 107, 108, 109, 110, 111, 116, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 142, 143, 146, 147, 149, 150, 151, 154, 155, 156, 157, 158, 161, 165, 166, 171, 174, 175, 181, 182, 185, 186, 188, 189, 190, 191, 205, 206, 208, 210, 211, 213, 214, 215, 216, 221, 223, 224, 225, 226, 230, 231, 234, 235, 236, 244, 256, 263, 275, 282, 283, 292, 294, 295, 298, 300, 301, 302, 303, 304, 305, 307], "dir": [4, 10, 21, 22, 277, 292, 294, 295, 298, 300, 301, 302, 303, 306], "tmp": [4, 9, 10, 12, 14, 15, 16, 17, 18, 19, 20, 21, 24, 258, 294, 295, 299, 301, 303], "ignor": [4, 9, 10, 12, 22, 42, 71, 212, 213, 215, 222, 223, 259, 283, 294, 295, 298, 303], "pattern": [4, 10, 19, 240, 294, 295, 298, 303], "origin": [4, 10, 15, 16, 17, 20, 21, 22, 58, 59, 63, 220, 225, 226, 230, 231, 294, 295, 299, 300, 302, 303, 304, 305, 306, 307], "consolid": [4, 10, 22, 294, 295, 303], "00": [4, 10, 16, 22, 60, 64, 290, 294, 295, 297, 301, 303], "pth": [4, 10, 22, 253, 294, 295, 300, 303], "hf": [4, 9, 18, 20, 21, 22, 247, 249, 254, 298, 299, 300, 301, 302], "token": [4, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 22, 24, 25, 36, 42, 47, 49, 50, 51, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 91, 94, 95, 101, 106, 107, 116, 119, 120, 125, 129, 144, 146, 147, 148, 150, 151, 154, 156, 158, 160, 162, 165, 167, 169, 171, 175, 179, 204, 208, 210, 213, 214, 215, 216, 221, 223, 224, 225, 226, 237, 238, 239, 240, 241, 242, 244, 246, 248, 251, 263, 267, 294, 298, 300, 301, 302, 303, 304, 305, 306, 307], "hf_token": [4, 21, 295, 303], "3b": [4, 137, 139, 141, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203], "The": [4, 9, 11, 12, 15, 16, 18, 19, 20, 21, 22, 23, 24, 25, 26, 29, 30, 32, 36, 46, 47, 52, 53, 54, 55, 56, 60, 63, 64, 65, 66, 67, 70, 78, 79, 80, 81, 85, 86, 87, 95, 96, 97, 107, 108, 109, 110, 111, 120, 121, 122, 129, 130, 131, 132, 138, 139, 142, 144, 147, 149, 150, 151, 154, 156, 165, 166, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 193, 194, 195, 196, 197, 198, 200, 201, 202, 203, 206, 209, 210, 211, 212, 216, 220, 221, 222, 223, 224, 225, 226, 230, 232, 237, 238, 239, 240, 241, 242, 244, 245, 247, 248, 249, 250, 252, 254, 256, 258, 261, 262, 264, 266, 273, 277, 280, 282, 286, 287, 288, 292, 293, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307], "reus": [4, 293], "llama3_token": [4, 15, 17, 20, 21, 65, 66, 73, 299, 302], "class": [4, 10, 13, 14, 15, 21, 24, 26, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 42, 43, 53, 54, 55, 56, 57, 65, 66, 78, 79, 80, 81, 94, 100, 105, 106, 119, 142, 143, 144, 148, 153, 156, 160, 161, 162, 169, 179, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 221, 222, 223, 224, 225, 226, 227, 229, 230, 231, 233, 234, 237, 238, 239, 240, 243, 244, 247, 248, 249, 250, 253, 254, 255, 256, 257, 258, 273, 274, 275, 276, 277, 296, 299, 301, 303, 304, 305, 307], "languag": [4, 10, 16, 32, 73, 150, 225, 226, 230, 231, 247, 283, 304, 305], "11b": [4, 145, 152], "8b": [4, 15, 16, 20, 21, 118, 122, 124, 128, 130, 132, 135, 166, 291, 294, 295, 298, 299, 306], "70b": [4, 83, 86, 89, 103, 109, 113, 117, 121, 123, 127, 131, 134, 302], "405b": [4, 126, 130, 133], "weight": [4, 21, 22, 25, 50, 85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 145, 148, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 193, 194, 195, 196, 197, 198, 200, 201, 202, 203, 212, 220, 229, 230, 231, 235, 239, 247, 254, 255, 256, 257, 266, 277, 283, 291, 294, 295, 298, 299, 300, 301, 302, 303, 304, 306, 307], "can": [4, 5, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 30, 33, 35, 36, 37, 38, 39, 42, 50, 53, 55, 56, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 78, 79, 81, 144, 147, 151, 209, 210, 212, 213, 214, 216, 221, 223, 224, 226, 228, 232, 239, 240, 252, 254, 257, 259, 263, 272, 273, 276, 277, 279, 282, 291, 292, 293, 294, 295, 296, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307], "instead": [4, 9, 12, 14, 16, 22, 25, 31, 45, 54, 55, 71, 81, 129, 150, 151, 207, 212, 216, 231, 250, 298, 302, 304, 305, 306], "builder": [4, 9, 10, 11, 12, 13, 15, 16, 17, 22, 58, 60, 61, 64, 67, 82, 83, 84, 85, 86, 87, 88, 89, 90, 92, 93, 96, 97, 98, 99, 102, 103, 104, 105, 108, 109, 110, 111, 112, 113, 114, 115, 117, 118, 121, 122, 123, 124, 126, 127, 128, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 152, 155, 157, 159, 161, 163, 164, 166, 168, 170, 172, 173, 174, 176, 177, 178, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 299, 305, 307], "all": [4, 5, 10, 11, 14, 15, 19, 21, 25, 30, 35, 36, 38, 42, 45, 47, 50, 53, 54, 55, 56, 81, 119, 142, 148, 169, 179, 181, 182, 183, 184, 185, 186, 188, 189, 190, 191, 193, 194, 195, 196, 197, 198, 200, 201, 202, 203, 204, 208, 212, 214, 216, 217, 218, 219, 220, 224, 225, 226, 228, 232, 243, 254, 258, 260, 264, 270, 278, 284, 285, 289, 291, 293, 294, 295, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306], "7b": [4, 9, 12, 14, 18, 19, 20, 21, 22, 61, 72, 84, 87, 90, 93, 97, 104, 105, 110, 111, 114, 115, 155, 157, 159, 161, 164, 174, 178, 190, 191, 202, 203, 254, 255, 299, 301, 302, 304, 307], "13b": [4, 22, 82, 85, 88, 102, 108, 112], "codellama": 4, "size": [4, 14, 15, 16, 22, 25, 27, 45, 50, 59, 62, 65, 66, 68, 79, 80, 81, 142, 143, 144, 145, 147, 148, 149, 151, 181, 182, 183, 184, 185, 186, 188, 189, 190, 191, 193, 194, 195, 196, 197, 198, 200, 201, 202, 203, 207, 208, 209, 210, 213, 214, 215, 216, 219, 221, 223, 224, 225, 226, 244, 245, 246, 267, 268, 270, 293, 295, 298, 300, 301, 302, 304, 305, 306], "0": [4, 9, 10, 12, 14, 15, 16, 18, 20, 22, 25, 45, 47, 48, 49, 50, 54, 60, 64, 67, 73, 74, 76, 77, 81, 85, 86, 87, 88, 89, 90, 91, 95, 96, 97, 98, 99, 101, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 120, 121, 122, 123, 124, 125, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 143, 144, 146, 147, 149, 150, 151, 152, 154, 155, 156, 157, 158, 160, 163, 164, 165, 166, 167, 170, 171, 172, 173, 174, 175, 176, 177, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 207, 208, 214, 216, 217, 218, 219, 225, 230, 231, 242, 247, 248, 249, 250, 251, 261, 267, 273, 276, 277, 281, 286, 288, 290, 295, 297, 299, 300, 301, 302, 304, 305, 306, 307], "5b": [4, 172, 173, 176, 177, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 305], "14b": [4, 181, 182, 193, 194], "32b": [4, 185, 186, 197, 198], "72b": [4, 188, 189, 200, 201], "qwen2": [4, 10, 171, 172, 173, 174, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 257, 305], "exampl": [4, 10, 19, 21, 22, 23, 24, 25, 26, 27, 29, 33, 35, 37, 38, 42, 44, 45, 46, 47, 48, 49, 50, 53, 54, 56, 59, 60, 61, 62, 64, 65, 66, 67, 68, 69, 71, 72, 73, 75, 76, 77, 81, 143, 144, 147, 151, 207, 208, 216, 217, 218, 219, 221, 223, 224, 225, 226, 228, 229, 232, 237, 238, 239, 240, 242, 243, 247, 249, 250, 251, 252, 253, 254, 255, 257, 258, 266, 267, 273, 276, 277, 280, 283, 286, 287, 288, 289, 290, 292, 294, 295, 297, 298, 299, 300, 302, 303, 304, 305, 306, 307], "qwen2_5": [4, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204], "1_5b": 4, "none": [4, 9, 16, 25, 26, 28, 30, 31, 33, 35, 37, 42, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 77, 81, 94, 101, 106, 107, 116, 119, 120, 125, 129, 142, 143, 144, 146, 148, 150, 162, 169, 179, 204, 205, 207, 208, 210, 213, 214, 215, 216, 217, 218, 219, 224, 226, 232, 234, 235, 236, 239, 242, 245, 246, 248, 254, 255, 256, 257, 258, 259, 262, 266, 271, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 284, 285, 286, 287, 298, 300, 306], "mini": [4, 21, 166, 167, 168, 169, 170], "4k": [4, 21, 167, 168, 169], "microsoft": [4, 168, 169], "ai": [4, 10, 12, 14, 19, 55, 56, 159, 277, 299, 302], "thi": [4, 9, 10, 12, 13, 14, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 31, 33, 34, 35, 36, 37, 42, 43, 44, 45, 47, 48, 50, 53, 54, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 75, 79, 80, 81, 91, 95, 101, 107, 116, 120, 125, 129, 142, 144, 146, 147, 150, 151, 153, 154, 156, 158, 160, 165, 167, 168, 169, 171, 175, 205, 207, 208, 210, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 224, 225, 226, 228, 229, 232, 235, 236, 237, 238, 239, 240, 242, 243, 244, 246, 247, 248, 250, 252, 253, 254, 255, 256, 258, 261, 262, 265, 267, 270, 272, 273, 274, 276, 277, 278, 279, 281, 283, 285, 286, 291, 292, 293, 294, 295, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307], "v0": [4, 9, 14, 18, 19, 21, 153], "mistralai": [4, 21, 298], "2b": [4, 92, 96], "gemma2": 4, "googl": [4, 92, 93], "gguf": 4, "compon": [4, 6, 14, 21, 22, 25, 30, 48, 55, 56, 65, 66, 230, 293, 296, 301, 303, 304, 307], "multimod": [4, 11, 14, 36, 42, 56, 65, 66, 224, 292], "encod": [4, 5, 15, 21, 50, 56, 73, 74, 81, 142, 143, 145, 146, 147, 149, 150, 151, 208, 213, 214, 215, 219, 224, 225, 226, 228, 237, 239, 240, 242, 244, 247, 250, 299], "perform": [5, 12, 13, 17, 19, 20, 21, 22, 54, 73, 216, 221, 232, 243, 250, 293, 294, 295, 299, 300, 302, 303, 305, 306, 307], "direct": [5, 18, 25, 48, 85, 86, 95, 96, 97, 107, 108, 109, 110, 120, 121, 122, 131, 132, 138, 139, 154, 155, 156, 157, 165, 166, 230, 247, 292, 296, 305], "id": [5, 14, 17, 21, 22, 47, 48, 49, 50, 54, 61, 65, 66, 72, 73, 74, 76, 77, 144, 208, 210, 214, 215, 224, 237, 238, 239, 240, 241, 242, 244, 254, 256, 273, 299, 300], "decod": [5, 9, 12, 14, 15, 16, 18, 20, 21, 60, 64, 67, 73, 91, 95, 101, 107, 116, 120, 125, 129, 143, 144, 145, 146, 147, 149, 150, 151, 154, 156, 158, 160, 165, 167, 171, 175, 208, 213, 214, 215, 219, 224, 226, 228, 237, 239, 240, 299], "typic": [5, 9, 12, 20, 24, 33, 37, 42, 50, 54, 55, 56, 57, 71, 169, 228, 247, 250, 305, 306, 307], "byte": [5, 21, 240, 305, 307], "pair": [5, 10, 18, 21, 24, 48, 49, 63, 67, 70, 240], "underli": [5, 13, 18, 21, 239, 305, 307], "helper": 5, "method": [5, 13, 14, 15, 19, 21, 22, 24, 25, 26, 29, 46, 55, 57, 59, 60, 61, 62, 63, 64, 67, 68, 69, 70, 71, 72, 144, 214, 220, 221, 224, 227, 228, 229, 233, 235, 237, 238, 258, 266, 292, 293, 304, 307], "two": [5, 15, 18, 19, 22, 24, 35, 50, 52, 65, 66, 73, 74, 79, 216, 225, 228, 230, 244, 251, 253, 293, 295, 300, 301, 302, 304, 305, 306, 307], "pre": [5, 9, 11, 12, 17, 18, 19, 20, 54, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 100, 145, 148, 149, 216, 224, 226, 228, 230, 295, 299, 305], "train": [5, 9, 10, 11, 12, 13, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 31, 33, 35, 50, 53, 54, 55, 56, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 100, 144, 145, 148, 149, 206, 208, 210, 214, 215, 220, 221, 223, 224, 225, 226, 228, 230, 247, 250, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 291, 293, 294, 296, 298, 299, 300, 302, 303, 304, 305, 306, 307], "function": [5, 10, 22, 24, 25, 27, 29, 45, 46, 47, 48, 60, 64, 67, 73, 79, 80, 81, 147, 151, 205, 208, 216, 217, 220, 232, 235, 236, 247, 248, 252, 254, 268, 281, 283, 285, 286, 293, 303, 307], "preprocess": [5, 54, 216], "imag": [5, 11, 15, 35, 36, 37, 42, 44, 46, 50, 56, 65, 66, 78, 79, 80, 81, 142, 143, 144, 145, 147, 148, 149, 151, 216, 225, 244, 304], "loss": [6, 9, 12, 14, 24, 25, 36, 38, 55, 56, 59, 60, 62, 64, 67, 68, 69, 221, 222, 223, 247, 248, 249, 250, 301, 303, 304, 307], "algorithm": [6, 21, 245, 250, 281], "ppo": [6, 245, 246, 247, 248, 296], "dpo": [6, 18, 48, 55, 232, 247, 249, 250, 296], "offer": 7, "allow": [7, 10, 53, 226, 230, 235, 276, 295, 298, 305, 306, 307], "seamless": 7, "transit": 7, "between": [7, 9, 18, 19, 21, 22, 55, 60, 67, 146, 150, 213, 214, 218, 224, 246, 248, 250, 254, 257, 273, 300, 302, 303, 304, 305, 306, 307], "interoper": [7, 22, 25, 293, 300, 307], "rest": [7, 299, 305, 307], "ecosystem": [7, 22, 25, 293, 300, 302, 307], "comprehens": [7, 305], "deep": [7, 22, 23, 24, 25, 26, 226, 228, 293, 296, 301, 302, 305], "dive": [7, 22, 23, 24, 25, 26, 293, 295, 296, 301, 302, 305], "util": [7, 14, 16, 22, 24, 25, 27, 45, 47, 50, 142, 259, 276, 278, 279, 285, 286, 287, 288, 293, 300, 301, 305, 307], "work": [7, 22, 25, 35, 42, 212, 225, 226, 293, 295, 298, 300, 302, 305, 307], "set": [7, 9, 12, 17, 18, 20, 22, 23, 24, 25, 26, 33, 36, 37, 42, 50, 54, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 71, 72, 101, 107, 116, 120, 125, 129, 144, 146, 149, 150, 154, 156, 158, 160, 165, 167, 171, 175, 208, 210, 213, 214, 217, 218, 219, 224, 232, 234, 252, 258, 263, 270, 272, 273, 279, 280, 281, 282, 285, 286, 293, 296, 298, 299, 300, 301, 302, 303, 304, 305, 306], "enabl": [7, 10, 11, 17, 21, 23, 24, 25, 26, 53, 85, 86, 87, 88, 89, 90, 96, 97, 98, 99, 108, 109, 110, 111, 112, 113, 114, 115, 121, 122, 123, 124, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 152, 155, 157, 163, 164, 166, 170, 172, 173, 174, 176, 177, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 195, 196, 199, 208, 213, 214, 215, 217, 218, 219, 224, 226, 230, 231, 281, 282, 295, 302, 304, 305, 307], "consumpt": [7, 53, 75, 294, 305], "dure": [7, 10, 11, 22, 54, 59, 60, 62, 64, 67, 68, 69, 207, 208, 210, 214, 215, 216, 220, 224, 225, 250, 265, 294, 295, 299, 300, 302, 304, 305, 306, 307], "control": [7, 13, 18, 21, 25, 36, 59, 60, 62, 64, 67, 68, 69, 218, 219, 226, 232, 273, 281, 295, 300, 305], "lr": [7, 24, 258, 261, 264, 303, 305], "process": [7, 11, 14, 15, 17, 25, 26, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 81, 147, 151, 216, 220, 268, 269, 281, 301, 306, 307], "variou": 7, "provid": [7, 10, 11, 12, 14, 22, 24, 25, 27, 32, 33, 35, 37, 42, 46, 47, 51, 53, 54, 73, 75, 81, 208, 212, 214, 216, 224, 232, 242, 247, 256, 263, 273, 277, 282, 286, 293, 294, 295, 298, 299, 300, 301, 302, 305], "debug": [7, 22, 24, 25, 273, 298], "finetun": [7, 10, 22, 24, 25, 85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 166, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 224, 291, 293, 295, 301, 302, 305], "job": [7, 10, 26, 281, 301], "involv": [9, 12, 17, 20, 56, 306], "multi": [9, 18, 25, 208, 302], "turn": [9, 18, 25, 33, 36, 37, 42, 52, 55, 67, 299, 305], "multipl": [9, 16, 17, 18, 22, 24, 25, 33, 36, 37, 42, 48, 53, 56, 67, 142, 143, 208, 214, 215, 216, 224, 231, 273, 274, 275, 276, 277, 282, 301, 302, 303, 305], "back": [9, 21, 22, 52, 232, 254, 304, 305, 307], "forth": [9, 52], "user": [9, 12, 13, 14, 15, 16, 18, 19, 21, 25, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 52, 55, 56, 60, 64, 67, 94, 101, 106, 107, 116, 119, 120, 125, 129, 146, 148, 150, 154, 156, 158, 160, 162, 165, 167, 169, 171, 175, 179, 204, 208, 242, 296, 299, 301, 306], "assist": [9, 12, 13, 14, 15, 16, 18, 19, 21, 31, 32, 33, 35, 36, 37, 38, 39, 41, 42, 44, 52, 55, 56, 60, 67, 73, 94, 100, 106, 119, 148, 162, 169, 179, 204, 242, 299], "role": [9, 13, 14, 15, 16, 18, 19, 21, 33, 36, 37, 38, 39, 42, 44, 55, 56, 60, 67, 94, 106, 119, 144, 148, 162, 169, 179, 204, 242, 299], "content": [9, 13, 15, 16, 18, 19, 21, 22, 33, 36, 37, 38, 39, 42, 44, 55, 56, 60, 67, 242, 299], "what": [9, 14, 15, 16, 18, 22, 23, 24, 26, 36, 37, 55, 56, 60, 64, 67, 100, 153, 216, 291, 296, 299, 300, 301, 302, 305], "answer": [9, 15, 16, 19, 40, 64, 300, 302], "ultim": [9, 306], "question": [9, 15, 16, 19, 40, 64, 300, 302], "life": 9, "42": [9, 73, 216], "That": [9, 299], "s": [9, 10, 12, 13, 14, 16, 17, 18, 19, 20, 22, 24, 25, 26, 27, 29, 32, 37, 42, 52, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 85, 86, 87, 100, 107, 108, 109, 110, 111, 120, 121, 122, 129, 130, 131, 132, 138, 139, 142, 143, 144, 149, 150, 151, 153, 154, 155, 156, 157, 165, 166, 169, 171, 174, 175, 181, 182, 185, 186, 188, 189, 190, 191, 207, 208, 210, 214, 215, 216, 220, 224, 227, 228, 229, 230, 233, 235, 236, 240, 247, 249, 250, 251, 252, 254, 255, 258, 263, 265, 267, 272, 273, 276, 279, 280, 283, 285, 286, 293, 298, 299, 301, 303, 304, 305, 306, 307], "ridicul": 9, "oh": 9, "i": [9, 12, 14, 18, 19, 20, 25, 36, 67, 73, 100, 142, 143, 153, 208, 213, 214, 215, 216, 220, 224, 234, 253, 258, 300, 302, 305, 306, 307], "know": [9, 299, 300, 303, 304], "more": [9, 10, 11, 12, 13, 14, 16, 18, 19, 21, 22, 24, 25, 38, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 207, 216, 221, 228, 235, 252, 253, 256, 273, 277, 279, 281, 285, 293, 294, 295, 296, 298, 300, 301, 302, 303, 304, 305, 306, 307], "structur": [9, 12, 13, 14, 19, 25, 37, 39, 42, 60, 119, 144, 148, 169, 179, 204, 244, 299, 300, 306], "than": [9, 10, 12, 16, 18, 24, 50, 52, 73, 75, 207, 208, 216, 247, 252, 256, 257, 284, 285, 288, 299, 300, 301, 302, 303, 304, 305, 307], "freeform": [9, 12, 57, 71], "associ": [9, 10, 11, 12, 22, 24, 25, 73, 74, 81, 91, 101, 116, 125, 146, 150, 158, 175, 273, 300, 304], "where": [9, 10, 12, 14, 16, 18, 19, 20, 36, 38, 45, 48, 59, 73, 75, 76, 79, 105, 142, 143, 161, 205, 208, 214, 216, 218, 221, 223, 224, 231, 239, 244, 245, 247, 248, 251, 263, 267, 272, 303, 305], "thei": [9, 11, 12, 19, 21, 24, 25, 53, 65, 66, 81, 142, 147, 151, 214, 216, 226, 236, 263, 298, 299, 304, 305, 306], "learn": [9, 12, 25, 53, 225, 226, 228, 258, 261, 264, 293, 294, 295, 296, 299, 301, 302, 304, 305, 306, 307], "simpli": [9, 12, 13, 14, 16, 20, 22, 24, 54, 56, 247, 298, 299, 300, 302, 303, 305, 307], "predict": [9, 12, 73, 74, 77, 245, 246, 248, 294], "next": [9, 12, 22, 54, 71, 73, 74, 81, 216, 244, 294, 302, 307], "respond": 9, "accur": 9, "primari": [9, 12, 16, 18, 20, 22, 24, 25, 55, 56, 296, 301], "entri": [9, 12, 16, 18, 20, 24, 25, 47, 50, 296, 301, 305], "point": [9, 10, 12, 16, 18, 20, 21, 24, 25, 46, 60, 242, 296, 300, 301, 302, 304, 306, 307], "torchtun": [9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 37, 38, 39, 42, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 292, 294, 295, 296, 299, 301, 305], "chat_dataset": [9, 12, 13, 18, 299], "let": [9, 10, 11, 12, 16, 18, 22, 24, 26, 298, 299, 300, 301, 302, 303, 304, 305, 307], "follow": [9, 10, 11, 12, 15, 16, 19, 22, 25, 36, 37, 38, 42, 50, 54, 55, 56, 64, 67, 144, 208, 213, 244, 248, 256, 257, 258, 261, 270, 277, 282, 291, 292, 295, 296, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307], "data": [9, 10, 12, 13, 14, 15, 16, 19, 21, 23, 31, 32, 33, 35, 36, 37, 38, 39, 42, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 55, 56, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 100, 148, 216, 243, 247, 249, 265, 273, 274, 275, 276, 277, 285, 294, 295, 300, 305, 306, 307], "directli": [9, 10, 12, 13, 14, 16, 22, 24, 25, 27, 31, 55, 56, 60, 64, 65, 67, 71, 247, 252, 254, 298, 300, 301, 302, 304, 305, 306, 307], "llm": [9, 10, 11, 12, 21, 25, 224, 226, 291, 292, 293, 294, 296, 300, 302, 303, 304], "my_data": [9, 12, 13, 16, 299], "human": [9, 16, 18, 36, 42, 60, 100, 247, 248, 249, 299], "valu": [9, 16, 22, 24, 33, 35, 37, 42, 45, 47, 48, 50, 59, 60, 62, 63, 64, 67, 68, 69, 70, 73, 74, 76, 77, 82, 83, 84, 91, 92, 93, 95, 101, 102, 103, 104, 105, 107, 116, 117, 118, 120, 125, 126, 127, 128, 129, 136, 137, 144, 146, 150, 154, 156, 158, 159, 160, 161, 165, 167, 171, 175, 176, 177, 178, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 207, 208, 209, 213, 214, 215, 222, 223, 224, 226, 235, 245, 246, 248, 251, 254, 257, 258, 261, 267, 273, 274, 275, 276, 277, 281, 295, 298, 299, 301, 302, 304, 305, 306], "gpt": [9, 16, 42, 60, 74, 299, 300], "mistral": [9, 14, 18, 19, 21, 144, 153, 154, 155, 156, 157, 159, 160, 161, 162, 163, 164, 257, 298, 299, 300, 301], "mistral_token": [9, 14, 18, 19, 21], "m_token": [9, 14, 18, 19, 20, 21], "path": [9, 10, 12, 14, 15, 16, 17, 18, 19, 20, 21, 24, 25, 26, 27, 35, 42, 46, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 94, 106, 119, 144, 148, 162, 169, 179, 204, 239, 240, 241, 254, 255, 256, 282, 298, 299, 300, 302, 304], "1": [9, 14, 16, 18, 19, 20, 21, 22, 25, 35, 42, 45, 47, 48, 49, 50, 54, 69, 73, 74, 76, 77, 78, 79, 101, 107, 116, 120, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 143, 144, 146, 150, 153, 154, 156, 158, 160, 165, 167, 171, 172, 173, 175, 176, 177, 183, 184, 195, 196, 207, 208, 214, 216, 217, 218, 219, 221, 222, 223, 239, 240, 242, 247, 248, 249, 250, 255, 257, 261, 267, 270, 273, 276, 277, 280, 281, 293, 294, 298, 299, 300, 301, 304, 305, 306, 307], "prompt_templ": [9, 12, 14, 16, 18, 19, 94, 106, 119, 144, 148, 162, 169, 179, 204], "mistralchattempl": [9, 14, 18, 19, 162, 299], "max_seq_len": [9, 10, 12, 14, 16, 17, 18, 20, 21, 24, 27, 47, 50, 51, 54, 59, 60, 61, 62, 64, 65, 66, 68, 69, 71, 72, 91, 94, 95, 101, 106, 107, 116, 119, 120, 125, 129, 144, 146, 148, 150, 154, 156, 158, 160, 162, 165, 167, 169, 171, 175, 179, 204, 207, 208, 210, 214, 219, 306], "8192": [9, 12, 14, 16, 17, 18, 20, 21, 148, 304, 306], "ds": [9, 10, 12, 15, 16, 18, 20, 54, 69, 299], "sourc": [9, 10, 12, 13, 16, 18, 20, 22, 24, 27, 28, 29, 30, 31, 32, 33, 35, 36, 37, 38, 39, 42, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 91, 92, 93, 94, 95, 96, 97, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 116, 117, 118, 119, 120, 121, 122, 125, 126, 127, 128, 129, 130, 131, 132, 136, 137, 138, 139, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 165, 166, 167, 168, 169, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 299, 300, 306], "data_fil": [9, 12, 13, 16, 18, 20, 55, 56, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 299], "split": [9, 10, 12, 13, 14, 16, 18, 20, 22, 44, 53, 54, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 240, 299, 300, 306], "conversation_column": [9, 60, 299], "conversation_styl": [9, 60, 299], "By": [9, 12, 22, 230, 295, 298, 303, 304, 305, 306, 307], "default": [9, 10, 12, 16, 22, 24, 31, 32, 33, 35, 36, 37, 42, 45, 48, 49, 50, 51, 54, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 77, 81, 82, 83, 84, 85, 86, 87, 91, 92, 93, 94, 95, 96, 97, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 116, 117, 118, 119, 120, 121, 122, 125, 126, 127, 128, 129, 130, 131, 132, 136, 137, 138, 139, 144, 145, 148, 149, 150, 151, 154, 155, 156, 157, 158, 159, 160, 161, 162, 165, 166, 167, 169, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 208, 209, 210, 213, 214, 215, 220, 222, 223, 224, 230, 231, 235, 239, 240, 242, 245, 246, 247, 250, 254, 255, 256, 258, 261, 262, 268, 272, 273, 274, 277, 280, 281, 282, 292, 295, 298, 299, 300, 302, 303, 304, 305, 306, 307], "true": [9, 10, 12, 13, 14, 15, 16, 17, 22, 24, 31, 36, 45, 53, 54, 57, 58, 59, 60, 62, 64, 65, 66, 67, 68, 69, 71, 72, 75, 76, 81, 88, 89, 90, 91, 95, 98, 99, 112, 113, 114, 115, 123, 124, 133, 134, 135, 140, 141, 144, 145, 152, 163, 164, 170, 208, 213, 214, 215, 217, 218, 219, 220, 221, 222, 224, 226, 232, 239, 240, 242, 244, 245, 248, 251, 252, 254, 255, 256, 263, 264, 265, 267, 269, 270, 273, 276, 282, 288, 294, 298, 299, 300, 302, 304, 305, 306, 307], "train_on_input": [9, 12, 13, 18, 24, 31, 33, 35, 37, 42, 53, 58, 59, 60, 62, 63, 64, 67, 68, 69, 70], "new_system_prompt": [9, 12, 13, 33, 35, 37, 42, 60, 62, 63, 64, 65, 66, 67, 68, 69], "tokenized_dict": [9, 12, 15, 16, 18, 20], "label": [9, 12, 20, 25, 47, 48, 49, 50, 54, 61, 69, 72, 221, 222, 223, 247, 250, 303], "print": [9, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 26, 44, 50, 53, 59, 62, 65, 66, 68, 69, 73, 144, 216, 217, 218, 219, 239, 240, 242, 288, 299, 301, 304, 306, 307], "inst": [9, 14, 19, 21, 100, 144, 153, 299], "733": [9, 14, 21], "16289": [9, 14, 21], "28793": [9, 14, 21], "1824": 9, "349": 9, "272": 9, "4372": 9, "In": [9, 10, 12, 13, 14, 16, 17, 18, 19, 20, 21, 22, 24, 25, 55, 79, 80, 81, 147, 151, 210, 214, 216, 231, 252, 272, 276, 277, 295, 299, 300, 302, 303, 304, 305, 306, 307], "_component_": [9, 10, 12, 13, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 53, 60, 64, 67, 71, 282, 295, 299, 300, 302, 303, 304, 305, 306], "null": [9, 22, 24, 306], "have": [9, 10, 13, 14, 18, 21, 22, 24, 27, 35, 36, 55, 60, 67, 75, 79, 80, 81, 142, 147, 151, 181, 182, 183, 184, 185, 186, 188, 189, 190, 191, 193, 194, 195, 196, 197, 198, 200, 201, 202, 203, 206, 207, 208, 209, 212, 214, 216, 217, 218, 219, 221, 223, 224, 229, 236, 244, 250, 253, 256, 258, 263, 264, 276, 284, 292, 299, 300, 301, 302, 303, 304, 305, 306, 307], "singl": [9, 10, 16, 17, 18, 19, 22, 24, 27, 33, 35, 37, 42, 47, 53, 54, 55, 56, 57, 60, 67, 71, 79, 80, 81, 94, 105, 106, 119, 142, 143, 144, 147, 148, 151, 161, 162, 169, 208, 214, 216, 224, 254, 255, 256, 257, 258, 260, 296, 298, 299, 300, 301, 302, 303, 304, 305, 307], "name": [9, 12, 13, 14, 16, 18, 20, 22, 23, 24, 26, 28, 31, 33, 35, 37, 42, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 229, 234, 236, 240, 254, 255, 256, 257, 258, 260, 273, 274, 275, 276, 277, 283, 284, 286, 298, 299, 300, 302, 305, 306], "messag": [9, 11, 12, 15, 16, 18, 19, 21, 31, 32, 33, 35, 37, 38, 39, 42, 44, 52, 55, 56, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 94, 106, 119, 144, 148, 162, 169, 238, 242, 292, 298, 299], "contain": [9, 11, 13, 14, 15, 16, 18, 20, 22, 33, 35, 36, 42, 47, 48, 49, 50, 54, 55, 56, 57, 60, 65, 71, 119, 144, 148, 169, 179, 204, 207, 208, 210, 214, 215, 224, 227, 229, 233, 234, 235, 240, 242, 245, 251, 254, 255, 256, 258, 260, 265, 271, 276, 282, 283, 285, 299, 300, 302, 304], "topic": [9, 291], "per": [9, 16, 47, 88, 89, 90, 98, 99, 112, 113, 114, 115, 123, 124, 133, 134, 135, 140, 141, 143, 144, 152, 163, 164, 170, 207, 216, 220, 244, 246, 247, 298, 305, 306, 307], "could": [9, 18, 19, 264, 303, 304], "system": [9, 12, 13, 18, 19, 32, 33, 35, 36, 37, 38, 39, 41, 42, 44, 52, 55, 56, 60, 62, 63, 64, 65, 66, 67, 68, 69, 94, 100, 106, 119, 148, 153, 162, 169, 179, 204, 242, 299], "tool": [9, 18, 19, 22, 36, 38, 56, 153, 273, 300, 301], "call": [9, 14, 18, 21, 22, 27, 36, 38, 56, 65, 66, 153, 208, 214, 216, 217, 218, 220, 224, 235, 273, 274, 275, 276, 277, 278, 282, 283, 299, 304, 307], "return": [9, 10, 13, 15, 18, 19, 21, 27, 29, 36, 38, 44, 45, 46, 47, 48, 49, 50, 51, 54, 55, 56, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 91, 92, 93, 94, 95, 96, 97, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 116, 117, 118, 119, 120, 121, 122, 125, 126, 127, 128, 129, 130, 131, 132, 136, 137, 138, 139, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 154, 155, 156, 157, 158, 159, 160, 161, 162, 165, 166, 167, 168, 169, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 213, 214, 215, 216, 218, 219, 221, 222, 223, 224, 225, 226, 227, 229, 230, 231, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 256, 258, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 280, 281, 282, 286, 287, 288, 303, 304, 307], "dai": [9, 20], "todai": 9, "It": [9, 10, 14, 16, 32, 36, 38, 55, 56, 60, 62, 64, 65, 66, 68, 70, 144, 147, 151, 153, 212, 214, 216, 224, 247, 250, 273, 298, 299, 303, 307], "tuesdai": 9, "about": [9, 10, 13, 14, 18, 22, 25, 65, 66, 216, 247, 250, 273, 277, 293, 294, 295, 296, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307], "tomorrow": 9, "wednesdai": 9, "As": [9, 12, 16, 22, 24, 25, 26, 231, 293, 300, 305, 307], "an": [9, 10, 12, 14, 16, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 46, 50, 52, 53, 57, 60, 62, 64, 65, 66, 67, 68, 71, 72, 78, 79, 80, 107, 120, 129, 144, 147, 149, 151, 154, 156, 160, 165, 171, 172, 173, 176, 177, 208, 212, 214, 216, 224, 225, 226, 228, 229, 232, 233, 234, 238, 243, 244, 247, 252, 253, 254, 255, 256, 258, 259, 263, 264, 273, 277, 282, 286, 293, 294, 295, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307], "slimorca": [9, 69], "pass": [9, 10, 11, 12, 14, 15, 16, 17, 18, 19, 20, 21, 24, 27, 36, 38, 53, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 91, 95, 101, 107, 116, 120, 125, 129, 149, 154, 156, 158, 160, 165, 167, 171, 175, 208, 212, 214, 218, 219, 220, 224, 232, 236, 240, 248, 252, 256, 262, 263, 265, 269, 272, 273, 276, 277, 279, 282, 298, 299, 304, 306, 307], "repo": [9, 10, 12, 16, 18, 20, 22, 65, 254, 255, 257, 298, 300], "select": [9, 181, 182, 183, 184, 185, 186, 188, 189, 190, 191, 193, 194, 195, 196, 197, 198, 200, 201, 202, 203, 259], "one": [9, 10, 11, 12, 13, 16, 18, 22, 25, 33, 35, 37, 42, 47, 50, 52, 60, 66, 67, 216, 221, 223, 242, 256, 273, 300, 301, 302, 305, 307], "most": [9, 12, 13, 16, 18, 20, 22, 24, 36, 38, 299, 301, 304, 305, 307], "gemma": [9, 12, 18, 20, 92, 93, 94, 95, 96, 97, 98, 99, 212, 257, 305], "gemma_token": [9, 12, 18, 20], "g_token": [9, 12, 18, 20], "open": [9, 20, 46, 69, 92, 93, 300], "orca": [9, 69], "dedup": [9, 69], "recip": [9, 11, 12, 16, 18, 20, 22, 23, 24, 26, 27, 28, 29, 144, 214, 224, 254, 255, 256, 293, 294, 295, 299, 300, 302, 305, 307], "via": [9, 12, 14, 16, 17, 18, 20, 23, 24, 26, 55, 60, 64, 67, 71, 208, 214, 215, 230, 231, 254, 304, 307], "http": [9, 12, 16, 27, 46, 57, 61, 63, 65, 71, 72, 74, 82, 83, 84, 85, 86, 87, 88, 89, 90, 92, 93, 95, 96, 97, 98, 99, 102, 103, 104, 105, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 152, 154, 155, 156, 157, 159, 161, 163, 164, 165, 166, 168, 169, 170, 172, 173, 174, 176, 177, 178, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 208, 209, 210, 216, 221, 222, 244, 245, 247, 248, 249, 250, 252, 254, 255, 261, 270, 273, 276, 277, 279, 281, 287, 292, 300, 302, 303], "ha": [9, 18, 22, 64, 73, 143, 211, 213, 214, 216, 219, 221, 223, 224, 227, 229, 232, 233, 236, 251, 256, 258, 283, 284, 299, 300, 301, 302, 303, 304, 305, 307], "addition": [9, 22, 239, 240, 250, 281, 299, 304, 305], "argument": [9, 10, 12, 16, 22, 24, 27, 34, 40, 43, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 88, 89, 90, 98, 99, 112, 113, 114, 115, 123, 124, 133, 134, 135, 140, 141, 152, 163, 164, 170, 252, 263, 269, 273, 274, 276, 277, 279, 298, 299, 304, 305, 306], "load_dataset": [9, 12, 16, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 299], "document": [9, 12, 16, 17, 78, 79, 80, 81, 208, 214, 215, 252, 263, 272, 294, 296, 298, 305], "file": [9, 10, 11, 12, 16, 22, 23, 24, 25, 26, 27, 28, 29, 46, 55, 56, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 119, 144, 148, 169, 179, 204, 239, 240, 241, 254, 255, 256, 274, 277, 282, 290, 293, 295, 297, 298, 299, 300, 301, 302, 304, 305, 306, 307], "raw": [9, 11, 13, 14, 16, 21, 44], "vari": [9, 50, 54, 214], "field": [9, 10, 14, 15, 27, 31, 35, 36, 42, 44, 54, 55, 56, 59, 65, 66, 271], "indic": [9, 14, 16, 18, 19, 50, 53, 54, 75, 76, 81, 147, 151, 208, 210, 214, 215, 216, 224, 225, 244, 245, 248, 251, 252, 267, 270, 299], "There": [9, 24, 52, 79, 299, 301, 302, 303, 304, 305], "few": [9, 10, 226, 302, 304, 307], "standard": [9, 12, 14, 15, 17, 19, 22, 34, 55, 56, 60, 63, 101, 107, 116, 120, 125, 129, 144, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 208, 230, 275, 293, 299, 300, 302, 303], "across": [9, 22, 25, 50, 53, 230, 254, 276, 281, 300, 302, 303, 306], "mani": [9, 14, 16, 19, 24, 54, 294, 295, 300, 303], "we": [9, 10, 11, 12, 18, 19, 20, 21, 22, 23, 24, 25, 26, 47, 50, 54, 55, 56, 60, 61, 67, 72, 73, 77, 207, 208, 210, 212, 214, 215, 216, 218, 221, 223, 224, 231, 247, 250, 254, 255, 256, 262, 266, 272, 278, 283, 293, 294, 295, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307], "ipython": [9, 14, 19, 36, 38, 41, 55, 56, 94, 106, 119, 148, 162, 169, 179, 204], "transform": [9, 10, 11, 16, 22, 25, 31, 33, 35, 55, 56, 59, 60, 62, 63, 65, 66, 67, 68, 69, 81, 85, 86, 87, 91, 95, 96, 97, 101, 107, 108, 109, 110, 111, 116, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 143, 144, 146, 147, 148, 149, 150, 151, 154, 155, 156, 157, 158, 160, 165, 166, 167, 171, 172, 173, 174, 175, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 213, 214, 215, 216, 226, 244, 261, 279, 304, 305, 306], "sharegpttomessag": [9, 13, 60, 69], "expect": [9, 12, 13, 15, 16, 18, 19, 20, 22, 24, 27, 31, 33, 35, 36, 37, 42, 46, 50, 55, 56, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 143, 144, 210, 224, 236, 258, 273, 277, 284, 299, 304, 305, 306], "code": [9, 10, 12, 13, 16, 19, 21, 22, 25, 82, 83, 84, 85, 86, 87, 88, 89, 90, 214, 273, 289, 293, 301, 305], "openaitomessag": [9, 13, 60, 67], "If": [9, 10, 13, 14, 16, 17, 19, 21, 22, 24, 30, 33, 35, 36, 37, 42, 44, 46, 47, 50, 51, 52, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 71, 73, 75, 77, 81, 94, 101, 106, 107, 116, 119, 120, 125, 129, 142, 144, 146, 148, 150, 162, 169, 171, 175, 179, 204, 207, 208, 210, 212, 214, 215, 216, 218, 219, 220, 221, 223, 224, 230, 231, 236, 242, 254, 255, 256, 257, 258, 259, 262, 263, 264, 265, 266, 269, 273, 276, 277, 281, 282, 284, 286, 292, 298, 299, 300, 301, 302, 303, 304, 305, 306], "doe": [9, 17, 22, 44, 50, 54, 67, 71, 91, 153, 158, 168, 208, 212, 214, 215, 217, 218, 219, 222, 223, 224, 229, 242, 254, 256, 258, 283, 298, 300, 306], "fit": [9, 25, 54, 61, 71, 72, 216, 247, 299], "creat": [9, 10, 13, 16, 19, 22, 24, 27, 38, 54, 56, 60, 67, 75, 82, 83, 84, 85, 86, 87, 88, 89, 90, 92, 93, 96, 97, 98, 99, 102, 103, 104, 105, 108, 109, 110, 111, 112, 113, 114, 115, 117, 118, 121, 122, 123, 124, 126, 127, 128, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 152, 155, 157, 159, 161, 163, 164, 166, 168, 170, 172, 173, 174, 176, 177, 178, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 207, 208, 214, 215, 216, 224, 252, 254, 255, 256, 260, 261, 273, 274, 276, 298, 300, 307], "custom": [9, 15, 16, 21, 24, 25, 31, 38, 55, 56, 60, 64, 65, 66, 67, 71, 94, 106, 119, 148, 162, 169, 179, 204, 279, 293, 294, 295, 298, 301, 302, 304, 305], "dialogu": [9, 16, 43, 68, 299], "defin": [9, 10, 17, 22, 24, 25, 38, 55, 56, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 208, 213, 214, 224, 227, 229, 231, 233, 246, 301, 304], "same": [9, 10, 11, 15, 18, 22, 24, 38, 45, 78, 79, 85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 143, 166, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 206, 207, 209, 211, 212, 213, 215, 216, 219, 224, 226, 242, 248, 250, 251, 258, 263, 264, 277, 283, 285, 295, 298, 299, 300, 302, 303, 304, 305, 306, 307], "wai": [9, 14, 19, 22, 24, 55, 56, 235, 253, 298, 300, 301, 302, 303], "instruct_dataset": [9, 12, 13, 53], "info": [9, 287, 301], "slimorca_dataset": [9, 24], "command": [10, 12, 17, 21, 23, 25, 26, 292, 295, 296, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307], "line": [10, 17, 22, 23, 25, 296, 298, 301, 302, 305], "both": [10, 14, 15, 21, 22, 37, 50, 53, 63, 67, 205, 224, 226, 228, 236, 298, 300, 303, 304, 305, 306, 307], "built": [10, 11, 13, 23, 24, 26, 63, 67, 70, 292, 299, 301, 307], "done": [10, 17, 54, 214, 235, 262, 272, 283, 304, 306, 307], "run": [10, 17, 22, 23, 24, 26, 29, 91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 207, 208, 214, 220, 221, 254, 255, 256, 258, 259, 260, 270, 273, 276, 277, 278, 292, 293, 294, 295, 296, 299, 301, 302, 303, 304, 305, 306, 307], "cli": [10, 24, 26, 28, 29, 292, 294, 300, 301, 305], "which": [10, 11, 12, 14, 16, 17, 18, 19, 20, 21, 22, 24, 25, 46, 47, 53, 54, 57, 59, 60, 62, 64, 67, 68, 69, 71, 76, 77, 85, 86, 87, 94, 95, 96, 97, 106, 107, 108, 109, 110, 111, 119, 120, 121, 122, 129, 130, 131, 132, 138, 139, 144, 147, 148, 149, 150, 151, 153, 154, 155, 156, 157, 162, 165, 166, 169, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 207, 208, 210, 214, 215, 216, 217, 218, 219, 224, 226, 235, 236, 239, 254, 255, 256, 258, 261, 262, 274, 277, 279, 283, 293, 294, 295, 296, 298, 299, 300, 301, 303, 304, 305, 306, 307], "folder": [10, 22], "first": [10, 17, 22, 24, 27, 42, 52, 54, 65, 76, 81, 147, 151, 214, 216, 217, 218, 224, 251, 254, 291, 293, 294, 299, 300, 302, 303, 304, 306, 307], "ensur": [10, 19, 21, 22, 24, 30, 52, 55, 56, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 208, 217, 254, 256, 262, 293, 301], "instal": [10, 23, 24, 26, 270, 273, 276, 277, 291, 298, 300, 301, 302, 303, 304, 305, 306, 307], "environ": [10, 25, 270, 273, 286, 292, 296, 298, 300, 301, 306], "so": [10, 13, 18, 19, 20, 22, 24, 54, 65, 212, 216, 254, 292, 293, 299, 300, 301, 302, 304, 305, 306, 307], "directori": [10, 22, 24, 35, 42, 65, 254, 255, 256, 274, 276, 277, 282, 298, 300, 301, 302], "new": [10, 14, 15, 16, 19, 21, 25, 37, 42, 59, 61, 62, 63, 65, 67, 68, 69, 159, 207, 225, 226, 257, 273, 274, 276, 299, 300, 301, 302, 303, 304, 307], "librari": [10, 247, 249, 262, 281, 287, 291, 292, 293, 298, 305, 307], "mkdir": 10, "my_project": [10, 273, 277], "cd": [10, 21, 292, 300], "llama": [10, 15, 16, 17, 20, 21, 22, 100, 142, 144, 145, 146, 147, 148, 150, 151, 209, 210, 254, 255, 294, 295, 298, 299, 300, 301, 302, 303, 304], "3": [10, 15, 16, 17, 20, 21, 22, 45, 47, 48, 49, 50, 54, 76, 77, 81, 142, 144, 145, 146, 147, 148, 150, 151, 153, 166, 168, 169, 216, 257, 267, 280, 287, 294, 295, 298, 299, 300, 301, 302, 303, 306, 307], "2": [10, 14, 15, 17, 21, 22, 26, 45, 47, 48, 49, 50, 52, 54, 69, 76, 77, 78, 79, 136, 137, 138, 139, 140, 141, 142, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 207, 208, 216, 239, 240, 242, 248, 250, 251, 254, 255, 257, 267, 280, 281, 282, 288, 295, 299, 300, 301, 302, 304, 305, 306], "lora": [10, 24, 85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 230, 231, 232, 235, 236, 254, 272, 291, 293, 296, 299, 301, 302, 303], "devic": [10, 17, 24, 25, 219, 258, 262, 265, 285, 286, 296, 298, 299, 300, 301, 302, 304, 305, 307], "lora_finetune_single_devic": [10, 24, 294, 298, 299, 300, 301, 302, 303, 304, 305, 307], "llama3_2": [10, 17, 136, 137, 138, 139, 140, 141, 212, 217, 218, 219, 257, 303], "1b_lora_single_devic": 10, "often": [10, 304, 305], "ll": [10, 18, 20, 22, 24, 25, 73, 266, 293, 295, 299, 300, 301, 302, 303, 305, 306, 307], "want": [10, 12, 19, 22, 24, 25, 26, 27, 50, 55, 56, 73, 212, 228, 292, 298, 299, 300, 301, 302, 303, 304, 305], "start": [10, 23, 25, 26, 46, 76, 242, 257, 273, 292, 293, 299, 300, 301, 303, 305, 306], "our": [10, 12, 13, 20, 22, 25, 293, 294, 295, 296, 299, 300, 301, 303, 304, 305, 306, 307], "particular": [10, 11, 13, 19, 21, 24, 53, 144, 252, 304, 307], "adjust": [10, 230, 294, 295, 303, 305, 306], "hyperparamet": [10, 23, 250, 258, 293, 301, 304, 307], "cp": [10, 24, 292, 298, 299, 300, 301, 302, 306], "copi": [10, 299, 300, 301, 302, 305, 306, 307], "make": [10, 17, 19, 22, 23, 24, 25, 26, 145, 181, 182, 183, 184, 185, 186, 188, 189, 190, 191, 193, 194, 195, 196, 197, 198, 200, 201, 202, 203, 216, 293, 298, 300, 301, 302, 303, 304, 305, 306, 307], "modif": [10, 306], "show": [10, 144, 244, 292, 294, 295, 298, 299, 303, 304], "each": [10, 12, 15, 18, 19, 20, 22, 25, 38, 39, 42, 47, 48, 50, 53, 54, 55, 56, 78, 79, 80, 81, 85, 86, 87, 95, 96, 97, 107, 108, 109, 110, 111, 120, 121, 122, 129, 130, 131, 132, 138, 139, 143, 144, 147, 149, 150, 151, 154, 155, 156, 157, 165, 166, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 208, 210, 214, 215, 216, 221, 223, 224, 226, 230, 235, 236, 242, 244, 245, 246, 247, 249, 250, 267, 281, 282, 293, 295, 296, 298, 300, 301, 304, 305, 306], "ls": [10, 21, 292, 296, 298, 300, 301, 302], "full": [10, 11, 13, 16, 22, 24, 25, 34, 40, 43, 55, 72, 88, 89, 90, 98, 99, 112, 113, 114, 115, 123, 124, 133, 134, 135, 140, 141, 149, 152, 163, 164, 170, 224, 235, 236, 242, 259, 292, 293, 296, 298, 300, 302, 304, 305, 306], "5b_full_single_devic": 10, "qwen_config": 10, "now": [10, 19, 22, 207, 218, 258, 260, 295, 299, 300, 301, 302, 303, 304, 306, 307], "sure": [10, 17, 22, 24, 181, 182, 183, 184, 185, 186, 188, 189, 190, 191, 193, 194, 195, 196, 197, 198, 200, 201, 202, 203, 300, 301, 302, 303, 304, 305, 306, 307], "correct": [10, 12, 14, 19, 25, 34, 62, 181, 182, 183, 184, 185, 186, 188, 189, 190, 191, 193, 194, 195, 196, 197, 198, 200, 201, 202, 203, 209, 210, 214, 286, 293, 299], "ve": [10, 18, 21, 24, 207, 295, 298, 299, 300, 302, 303, 304, 305], "even": [10, 216, 283, 292, 298, 299, 302, 303, 304, 305, 307], "didn": 10, "t": [10, 13, 14, 18, 19, 20, 22, 24, 25, 45, 142, 143, 221, 226, 262, 277, 281, 298, 299, 300, 301, 303, 305, 307], "complet": [10, 11, 12, 18, 22, 25, 37, 54, 61, 71, 169, 299, 300, 301, 302, 305], "note": [10, 16, 21, 22, 24, 95, 224, 229, 258, 278, 281, 283, 295, 299, 300, 303, 304, 305, 306, 307], "must": [10, 13, 17, 27, 38, 53, 65, 66, 208, 218, 229, 253, 273, 307], "extens": [10, 25, 256, 293], "full_finetune_single_devic": [10, 17, 264, 298, 300, 301], "Or": [10, 224, 292], "rel": [10, 16, 17, 54, 208, 210, 214, 215, 224, 247, 265, 303, 304], "discuss": [10, 14, 19, 21, 24, 300, 301, 302, 304], "workflow": [10, 11, 291, 301, 304], "write": [10, 16, 22, 25, 254, 255, 256, 274, 301], "own": [10, 13, 18, 21, 22, 38, 272, 281, 298, 299, 300, 302, 303, 304], "loop": 10, "logic": [10, 15, 25, 31, 56, 238, 257, 293, 296, 301, 304], "case": [10, 14, 16, 22, 25, 26, 36, 38, 55, 79, 80, 81, 147, 151, 216, 218, 254, 258, 262, 266, 272, 274, 279, 293, 298, 299, 300, 302, 304, 305, 307], "similar": [10, 13, 16, 60, 61, 63, 65, 66, 67, 70, 71, 72, 235, 247, 300, 302, 303, 304, 305, 307], "scratch": 10, "local": [10, 11, 14, 46, 55, 56, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 119, 148, 169, 179, 204, 273, 277, 281, 292, 298, 299, 300, 301], "single_devic": 10, "py": [10, 13, 24, 27, 74, 85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 166, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 207, 209, 210, 222, 247, 248, 249, 250, 261, 298, 300, 302], "recommend": [10, 60, 61, 62, 67, 68, 70, 72, 153, 214, 221, 273, 276, 299, 300, 305, 307], "python": [10, 24, 273, 277, 281, 287, 289, 298, 300, 306], "convent": [10, 213], "main": [10, 27, 29, 169, 209, 210, 292, 295, 300, 302], "script": [10, 22, 26, 296, 298, 300, 301, 302], "decor": [10, 25, 29], "pars": [10, 24, 27, 28, 241, 296, 301], "omegaconf": [10, 27], "dictconfig": [10, 24, 25, 27, 28, 29, 30, 273, 277, 282], "def": [10, 13, 15, 19, 21, 24, 25, 26, 29, 65, 66, 252, 257, 303, 304, 307], "cfg": [10, 24, 25, 28, 29, 30], "add": [10, 12, 13, 14, 16, 19, 21, 23, 24, 26, 50, 54, 57, 71, 144, 153, 216, 228, 240, 242, 256, 257, 299, 300, 302, 304, 305, 307], "here": [10, 12, 14, 15, 16, 18, 20, 21, 22, 23, 24, 26, 32, 62, 65, 66, 209, 210, 264, 294, 295, 296, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307], "attribut": [10, 212, 232, 242, 250, 260], "__name__": 10, "__main__": 10, "don": [10, 13, 14, 18, 19, 20, 22, 24, 25, 277, 281, 298, 299, 300, 301, 303, 305, 307], "experiment": [10, 21, 24], "optim": [10, 18, 19, 22, 24, 25, 48, 53, 55, 91, 158, 168, 247, 248, 249, 250, 256, 258, 260, 261, 264, 265, 278, 282, 294, 295, 296, 299, 300, 301, 302, 303, 304, 307], "them": [10, 12, 15, 18, 19, 22, 24, 53, 67, 216, 220, 226, 242, 285, 295, 298, 299, 300, 304, 305, 306, 307], "when": [10, 16, 17, 18, 20, 21, 22, 24, 25, 29, 53, 54, 55, 56, 57, 67, 71, 73, 75, 207, 208, 210, 212, 214, 215, 216, 218, 219, 220, 221, 223, 224, 225, 232, 235, 246, 261, 263, 276, 278, 283, 294, 298, 300, 302, 303, 304, 305, 306, 307], "mean": [10, 24, 144, 208, 209, 213, 214, 215, 224, 245, 272, 298, 299, 301, 304, 306], "high": [10, 53, 55, 56, 293, 303, 304, 305], "level": [10, 25, 55, 56, 221, 223, 243, 260, 272, 287, 293, 303, 307], "paramet": [10, 13, 14, 15, 16, 25, 27, 28, 29, 30, 31, 33, 35, 36, 37, 38, 42, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 91, 92, 93, 94, 95, 96, 97, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 116, 117, 118, 119, 120, 121, 122, 125, 126, 127, 128, 129, 130, 131, 132, 136, 137, 138, 139, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 154, 155, 156, 157, 158, 159, 160, 161, 162, 165, 166, 167, 169, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 269, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 291, 293, 294, 295, 296, 298, 299, 300, 301, 302, 303, 304, 306, 307], "easili": [10, 16, 22, 24, 293, 303, 304, 306, 307], "custom_decod": 10, "customtransformerdecod": 10, "nn": [10, 27, 45, 47, 50, 81, 142, 143, 205, 207, 208, 212, 213, 214, 215, 216, 217, 218, 219, 220, 224, 225, 226, 227, 228, 229, 232, 233, 234, 252, 259, 260, 272, 278, 279, 283, 284, 303, 304, 307], "modul": [10, 13, 15, 21, 24, 27, 65, 66, 78, 79, 80, 81, 142, 143, 144, 147, 151, 156, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 247, 248, 249, 252, 257, 259, 260, 263, 272, 278, 279, 281, 301, 303, 304, 305, 307], "A": [10, 13, 15, 19, 25, 26, 33, 34, 37, 40, 42, 43, 47, 48, 49, 50, 53, 54, 67, 81, 179, 204, 208, 212, 213, 214, 215, 216, 220, 224, 231, 235, 239, 240, 242, 244, 245, 246, 247, 248, 249, 250, 251, 252, 257, 258, 264, 265, 266, 271, 272, 290, 291, 297, 298, 299, 304, 305, 306, 307], "architectur": [10, 25, 100, 153, 181, 182, 183, 184, 185, 186, 188, 189, 190, 191, 193, 194, 195, 196, 197, 198, 200, 201, 202, 203, 214, 216, 224, 226, 257, 298], "present": [10, 33, 37, 42, 62, 63, 65, 66, 67, 68, 69, 240, 256, 283], "custom_model": 10, "num_lay": [10, 27, 81, 91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 214, 216, 224, 226], "int": [10, 15, 21, 24, 26, 47, 48, 49, 50, 51, 54, 61, 65, 66, 72, 73, 74, 75, 77, 78, 79, 80, 81, 85, 86, 87, 88, 89, 90, 91, 94, 95, 96, 97, 98, 99, 101, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 119, 120, 121, 122, 123, 124, 125, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 154, 155, 156, 157, 158, 160, 162, 163, 164, 165, 166, 167, 169, 170, 171, 172, 173, 174, 175, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 204, 207, 208, 209, 210, 213, 214, 215, 216, 219, 221, 222, 223, 224, 225, 226, 230, 231, 237, 238, 239, 240, 241, 242, 244, 251, 252, 254, 255, 256, 258, 259, 261, 263, 268, 272, 273, 274, 275, 276, 277, 279, 281, 282, 298, 303, 304, 305, 307], "classification_head": 10, "bool": [10, 15, 19, 21, 24, 31, 33, 35, 36, 37, 42, 45, 54, 57, 58, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 81, 85, 86, 87, 88, 89, 90, 91, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 144, 145, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 175, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 208, 213, 214, 215, 220, 221, 222, 224, 226, 230, 231, 235, 236, 238, 239, 240, 242, 245, 251, 252, 254, 255, 256, 263, 265, 269, 270, 272, 273, 276, 279, 282, 283, 288, 305, 307], "fals": [10, 13, 14, 15, 16, 18, 19, 22, 24, 33, 35, 36, 37, 42, 45, 53, 54, 58, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 75, 76, 81, 85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 145, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 175, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 208, 214, 215, 217, 218, 219, 224, 225, 226, 230, 231, 232, 235, 239, 251, 254, 255, 256, 267, 270, 282, 283, 298, 299, 300, 302, 304, 306, 307], "setup": [10, 22, 24, 25, 75, 207, 208, 213, 214, 215, 217, 218, 219, 224, 226, 259, 282, 298, 300, 304, 307], "expos": [10, 13, 24, 25, 256, 296, 301], "friendli": [10, 60, 64, 67, 71, 73, 299], "manner": [10, 20], "rather": [10, 247, 305], "everi": [10, 12, 22, 25, 62, 63, 67, 68, 69, 78, 79, 80, 146, 150, 216, 218, 276, 282, 292, 298, 305, 307], "construct": [10, 36, 63, 244, 296, 304], "care": [10, 22, 254, 256, 300, 302, 304], "how": [10, 13, 14, 18, 22, 23, 24, 25, 26, 216, 252, 273, 279, 291, 294, 295, 298, 299, 300, 301, 302, 305, 306, 307], "implement": [10, 19, 21, 22, 25, 55, 57, 59, 60, 61, 62, 63, 64, 67, 68, 69, 70, 71, 72, 205, 209, 210, 211, 216, 222, 229, 231, 237, 238, 243, 247, 248, 249, 250, 254, 261, 266, 276, 293, 295, 303, 304, 305, 306, 307], "llama3_2_vision_11b": 10, "custom_dataset": [10, 13], "sftdataset": [10, 13, 24, 55, 58, 59, 60, 62, 64, 65, 66, 68, 69], "packeddataset": [10, 17, 53, 58, 59, 60, 62, 64, 68, 69, 71, 72], "inputoutputtomessag": [10, 13, 14, 62, 68], "modeltoken": [10, 15, 21, 24, 36, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 67, 68, 69, 70, 71, 72, 242], "build": [10, 25, 71, 81, 91, 101, 116, 125, 146, 147, 150, 151, 158, 160, 175, 253, 293, 302, 304, 305], "block": [10, 25, 54, 85, 86, 87, 91, 95, 96, 97, 101, 107, 108, 109, 110, 111, 116, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 146, 149, 150, 151, 154, 155, 156, 157, 158, 165, 166, 171, 172, 173, 174, 175, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 208, 214, 215, 235, 236, 293], "tiny_cod": 10, "pack": [10, 54, 58, 59, 60, 62, 64, 65, 66, 68, 69, 71, 72, 208, 210, 214, 215, 224, 306], "subset": [10, 15, 16, 47, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 95, 107, 120, 129, 149, 154, 156, 165, 171, 227, 233], "nampdn": 10, "tini": 10, "respons": [10, 12, 13, 18, 19, 21, 32, 33, 35, 36, 37, 42, 55, 56, 60, 62, 63, 64, 65, 66, 67, 68, 69, 242, 245, 246, 247, 249, 250, 300, 301, 302], "model_transform": [10, 13, 15, 16, 55, 56, 62, 65, 66, 68, 69, 144], "message_transform": [10, 13, 55, 56], "column_map": [10, 12, 13, 16, 18, 31, 33, 35, 37, 42, 53, 58, 59, 62, 63, 64, 65, 66, 67, 68, 69, 70], "input": [10, 11, 12, 13, 14, 15, 20, 21, 22, 31, 35, 47, 48, 49, 50, 54, 55, 56, 59, 61, 62, 64, 65, 66, 68, 69, 72, 78, 79, 80, 81, 94, 106, 119, 142, 143, 144, 147, 148, 151, 162, 169, 171, 175, 205, 206, 208, 209, 210, 211, 212, 213, 214, 215, 216, 222, 223, 224, 225, 226, 230, 231, 239, 240, 244, 254, 256, 264, 281, 284, 299, 304, 307], "filter_fn": [10, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72], "lambda": [10, 245], "x": [10, 22, 45, 73, 74, 75, 78, 79, 80, 142, 143, 205, 206, 208, 209, 210, 211, 213, 214, 215, 216, 224, 225, 226, 230, 231, 267, 280, 303, 304, 306, 307], "split_across_pack": [10, 54, 71], "els": [10, 11, 12, 19, 25, 277, 293, 307], "posit": [10, 17, 24, 27, 54, 74, 76, 78, 79, 80, 81, 91, 95, 125, 129, 142, 147, 151, 154, 156, 158, 160, 165, 167, 207, 208, 210, 213, 214, 215, 216, 224, 225, 302], "automat": [10, 12, 16, 17, 19, 21, 23, 24, 26, 27, 59, 60, 298, 300, 307], "instanti": [10, 30, 38, 82, 83, 84, 85, 86, 87, 91, 92, 93, 94, 95, 96, 97, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 116, 117, 118, 119, 120, 121, 122, 125, 126, 127, 128, 129, 130, 131, 132, 136, 137, 138, 139, 145, 146, 147, 148, 149, 150, 151, 154, 155, 156, 157, 158, 159, 160, 161, 162, 165, 166, 167, 168, 169, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 258], "separ": [10, 55, 226, 242, 254, 299, 301, 302, 304, 307], "under": [10, 24, 282, 305, 307], "best": [10, 16, 18, 25, 295, 299, 303, 305], "root": [10, 209, 276, 277], "custom_finetun": 10, "32": [10, 27, 207, 216, 224, 226, 273, 302, 304, 305, 306, 307], "option": [10, 12, 18, 21, 22, 24, 25, 31, 33, 35, 37, 42, 50, 51, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 77, 80, 81, 85, 86, 87, 94, 95, 96, 97, 101, 106, 107, 108, 109, 110, 111, 116, 119, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 142, 143, 144, 146, 147, 148, 149, 150, 151, 154, 155, 156, 157, 162, 165, 166, 169, 171, 172, 173, 174, 175, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 204, 205, 208, 210, 213, 214, 215, 216, 219, 220, 224, 235, 236, 237, 239, 242, 245, 246, 248, 254, 255, 256, 258, 259, 262, 266, 273, 274, 277, 281, 282, 286, 287, 292, 293, 298, 299, 300, 305], "param": [10, 22, 25, 85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 145, 149, 166, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 227, 228, 230, 231, 233, 234, 236, 254, 304, 306, 307], "omit": [10, 303, 304, 305], "being": [10, 19, 22, 56, 218, 254, 255, 256, 260, 286, 305, 306, 307], "found": [10, 11, 22, 23, 24, 26, 209, 210, 254, 255, 256, 295, 298, 303, 304, 307], "correctli": [10, 21, 22, 25, 30, 235, 254, 292, 296, 299, 301, 307], "try": [10, 22, 24, 299, 300, 301, 302, 307], "after": [10, 19, 20, 23, 25, 38, 56, 65, 66, 94, 106, 119, 144, 148, 162, 169, 207, 208, 211, 214, 215, 224, 226, 251, 272, 273, 274, 275, 276, 277, 295, 299, 300, 302, 306, 307], "pythonpath": 10, "pwd": 10, "vlm": [11, 16], "hub": [11, 22, 55, 56, 298, 301], "remot": [11, 14, 35, 42, 46, 55, 56], "url": [11, 16, 35, 37, 42, 46, 292], "project": [11, 23, 26, 81, 85, 86, 87, 91, 95, 101, 105, 107, 108, 109, 110, 111, 116, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 142, 143, 146, 147, 149, 150, 151, 154, 155, 156, 157, 158, 161, 165, 166, 171, 174, 175, 181, 182, 185, 186, 188, 189, 190, 191, 205, 208, 214, 216, 224, 228, 235, 236, 257, 263, 273, 277, 291, 304, 305, 307], "prefer": [11, 13, 25, 48, 55, 63, 67, 70, 247, 248, 249, 250, 293, 296, 298, 305], "align": [11, 65, 66, 247, 299, 303], "continu": [11, 20, 54, 216, 273], "pretrain": [11, 142, 143, 144, 224, 226, 228, 239, 240, 298, 299, 301, 304, 307], "beyond": [11, 300, 305, 307], "those": [11, 22, 257, 300, 302, 304], "customiz": 11, "task": [11, 12, 16, 18, 19, 34, 40, 43, 53, 61, 144, 294, 299, 300, 302, 303, 304, 305, 306, 307], "supervis": [11, 20, 56], "rlhf": [11, 55, 63, 245, 246, 247, 248, 249, 250, 251], "queri": [11, 91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 207, 208, 214, 215, 224, 302, 305], "time": [11, 16, 17, 22, 60, 64, 91, 158, 218, 221, 223, 242, 245, 274, 276, 282, 295, 298, 299, 300, 302, 307], "take": [11, 12, 13, 16, 18, 22, 24, 25, 27, 48, 55, 56, 65, 66, 67, 142, 207, 216, 220, 226, 254, 256, 285, 286, 295, 299, 300, 301, 302, 303, 304, 305, 307], "object": [11, 13, 14, 15, 19, 21, 24, 27, 28, 81, 208, 247, 250, 252, 266], "appli": [11, 12, 15, 19, 22, 25, 47, 55, 56, 59, 65, 66, 85, 86, 87, 88, 89, 90, 91, 95, 96, 97, 98, 99, 101, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 120, 121, 122, 123, 124, 125, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 146, 149, 150, 151, 152, 154, 155, 156, 157, 158, 163, 164, 165, 166, 170, 171, 172, 173, 174, 175, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 208, 212, 213, 214, 215, 224, 230, 235, 236, 279, 293, 294, 303, 305, 307], "templat": [11, 31, 32, 34, 38, 39, 40, 43, 55, 56, 59, 62, 68, 94, 100, 106, 119, 144, 148, 153, 162, 169, 179, 204], "anyth": [11, 61, 285], "requir": [11, 15, 17, 19, 21, 22, 24, 47, 48, 53, 55, 56, 57, 65, 66, 67, 71, 144, 212, 214, 225, 254, 256, 258, 269, 270, 272, 273, 276, 277, 281, 282, 292, 295, 298, 299, 301, 305, 306, 307], "collat": [11, 47, 49, 50, 54], "packag": [11, 23, 26, 273, 276, 277, 292], "togeth": [11, 25, 54, 221, 277, 296, 301, 304, 305, 306], "form": [12, 18, 22, 24, 25, 31, 44, 52, 55, 56, 298], "along": [12, 22, 304], "describ": [12, 279], "hand": [12, 36], "grammar": [12, 19, 34, 62], "head": [12, 81, 91, 95, 101, 107, 116, 120, 125, 129, 142, 146, 147, 150, 151, 154, 156, 158, 160, 165, 167, 171, 175, 207, 208, 210, 214, 224, 228, 257, 302], "csv": [12, 55, 56, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72], "incorrect": [12, 19], "cat": [12, 16, 19, 244], "grammarerrorcorrectiontempl": [12, 19, 62], "prepend": [12, 14, 16, 19, 33, 35, 37, 38, 39, 42, 60, 62, 63, 64, 65, 66, 67, 68, 69, 94, 106, 119, 144, 148, 162, 169, 179, 204, 239], "english": [12, 19, 34], "ncorrect": [12, 34], "mask": [12, 13, 14, 15, 17, 19, 21, 36, 38, 50, 54, 56, 59, 60, 62, 64, 65, 66, 67, 68, 69, 74, 75, 76, 144, 208, 213, 214, 215, 224, 238, 242, 244, 245, 248, 267, 299, 303], "out": [12, 15, 18, 20, 22, 24, 25, 59, 60, 62, 64, 67, 68, 69, 75, 76, 244, 254, 255, 267, 291, 293, 294, 295, 296, 298, 299, 300, 301, 302, 304, 305, 307], "100": [12, 18, 25, 48, 49, 50, 59, 60, 62, 64, 67, 68, 69, 73, 221, 222, 223, 225, 303, 304, 307], "27957": 12, "736": 12, "577": 12, "anoth": [12, 13, 16, 24, 56, 212, 273, 300, 305], "c4": [12, 71, 306], "200m": 12, "liweili": [12, 62], "c4_200m": [12, 62], "chang": [12, 13, 16, 21, 22, 23, 24, 26, 31, 33, 35, 64, 66, 70, 256, 292, 298, 300, 301, 302, 303, 304, 305, 306, 307], "remap": 12, "someth": [12, 22, 25, 26, 299, 300, 306], "hello": [12, 13, 14, 19, 21, 44, 239, 240, 287, 299, 300, 302], "world": [12, 13, 14, 19, 21, 44, 239, 240, 268, 270, 287, 300], "bye": [12, 13], "robot": [12, 15], "am": [12, 14, 16, 60, 64, 100, 153, 299, 300, 302], "prompttempl": [12, 31, 34, 40, 43, 144], "relev": [12, 14, 25, 213, 214, 215, 224, 298, 300, 304, 305], "inform": [12, 14, 22, 273, 277, 279, 293, 298, 300, 301], "mai": [12, 16, 17, 24, 26, 60, 73, 216, 219, 225, 263, 283, 294, 295, 299, 301, 303, 304, 305], "alpaca_dataset": [12, 17, 24, 58], "grammar_dataset": 12, "samsum_dataset": 12, "dictionari": [13, 14, 15, 36, 38, 44, 47, 48, 49, 54, 55, 56, 94, 106, 119, 148, 162, 169, 179, 204, 265, 271, 273, 274, 275, 276, 277, 285, 300], "onc": [13, 21, 24, 38, 214, 224, 300, 301, 302, 304, 307], "repres": [13, 36, 48, 78, 79, 216, 253, 259, 299, 305, 306], "prepar": [13, 15, 299, 306], "ad": [13, 16, 19, 21, 25, 38, 50, 78, 79, 80, 146, 150, 160, 216, 224, 225, 228, 239, 242, 256, 257, 299, 304, 305, 306, 307], "column": [13, 16, 18, 20, 31, 33, 35, 37, 42, 55, 56, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 208, 214, 215, 224, 306], "worri": [13, 22, 299, 301], "itself": [13, 24], "do": [13, 15, 18, 21, 22, 23, 25, 36, 47, 65, 67, 218, 235, 242, 273, 277, 283, 298, 300, 301, 302, 304, 305, 306], "well": [13, 18, 22, 24, 25, 293, 298, 300, 302, 303, 305, 307], "flexibl": [13, 24, 53, 305], "inherit": [13, 14, 19, 25, 293], "__call__": [13, 15, 19, 65, 66, 144], "simpl": [13, 22, 25, 216, 250, 291, 301, 304, 306, 307], "contriv": [13, 19], "would": [13, 15, 19, 22, 24, 26, 38, 54, 214, 216, 224, 292, 299, 300, 304, 305, 307], "inde": [13, 262, 300], "quit": [13, 305, 307], "type": [13, 14, 15, 16, 21, 26, 27, 29, 36, 37, 44, 45, 46, 47, 48, 49, 50, 51, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 91, 92, 93, 94, 95, 96, 97, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 116, 117, 118, 119, 120, 121, 122, 125, 126, 127, 128, 129, 130, 131, 132, 136, 137, 138, 139, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 154, 155, 156, 157, 158, 159, 160, 161, 162, 165, 166, 167, 168, 169, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 213, 214, 215, 216, 219, 220, 221, 222, 223, 224, 225, 226, 227, 230, 231, 233, 237, 238, 239, 240, 241, 242, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 260, 262, 263, 264, 265, 266, 267, 268, 269, 270, 272, 279, 280, 281, 282, 284, 286, 287, 288, 295, 300, 304, 305, 306, 307], "map": [13, 15, 19, 21, 22, 31, 33, 35, 37, 38, 42, 47, 53, 54, 59, 62, 63, 64, 65, 66, 67, 68, 69, 70, 94, 106, 119, 144, 148, 162, 169, 179, 204, 234, 240, 241, 254, 258, 260, 273, 274, 275, 276, 277, 278, 282, 300, 304], "messagetransform": 13, "self": [13, 15, 18, 19, 20, 21, 25, 26, 54, 65, 66, 85, 86, 87, 91, 95, 96, 97, 101, 107, 108, 109, 110, 111, 116, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 146, 149, 150, 151, 154, 155, 156, 157, 158, 160, 165, 166, 167, 171, 172, 173, 174, 175, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 208, 213, 214, 215, 221, 223, 224, 226, 229, 235, 236, 254, 257, 258, 303, 304, 307], "str": [13, 15, 21, 24, 27, 28, 31, 33, 35, 36, 37, 38, 42, 44, 46, 47, 48, 49, 50, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 94, 106, 119, 144, 148, 149, 152, 162, 169, 179, 204, 220, 225, 226, 227, 229, 230, 231, 233, 234, 235, 236, 237, 238, 239, 240, 241, 253, 254, 255, 256, 257, 258, 259, 262, 265, 266, 269, 271, 273, 274, 275, 276, 277, 281, 282, 283, 284, 286, 287, 288, 305], "eot": [13, 14, 19, 36, 144], "_messag": 13, "0x7fb0a10094e0": 13, "0x7fb0a100a290": 13, "msg": [13, 14, 16, 19, 21, 299], "text_cont": [13, 14, 16, 19, 36, 299], "manipul": 13, "load_dataset_kwarg": [13, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72], "mymessagetransform": 13, "chosenrejectedtomessag": [13, 63, 67], "core": [14, 25, 55, 56, 293, 296, 301, 307], "govern": [14, 299], "serv": [14, 19, 24, 33, 35, 37, 42, 60, 62, 63, 64, 65, 66, 67, 68, 69, 242, 252, 304], "interfac": [14, 25, 38, 39, 53, 229, 243], "api": [14, 25, 26, 34, 40, 43, 55, 56, 57, 59, 65, 66, 88, 89, 90, 98, 99, 112, 113, 114, 115, 123, 124, 133, 134, 135, 140, 141, 152, 163, 164, 170, 235, 273, 292, 296, 298, 299, 301, 302, 307], "oper": [14, 25, 216, 232, 243, 281, 306], "send": 14, "other": [14, 15, 18, 20, 22, 25, 27, 35, 38, 53, 256, 263, 282, 285, 294, 295, 299, 301, 302, 303, 304, 305, 306], "special": [14, 16, 19, 36, 42, 119, 144, 146, 148, 150, 169, 179, 204, 216, 225, 237, 238, 240, 241, 242, 244, 258], "individu": [14, 36, 54, 224, 265, 277, 279, 299], "ref": [14, 55, 56, 57, 59, 65, 66, 168, 169, 277], "constructor": [14, 21], "ident": [14, 18, 20, 45, 47, 54, 65, 67, 153, 214, 230, 300, 305, 306], "from_dict": [14, 36, 299], "becaus": [14, 21, 55, 56, 95, 207, 214, 216, 224, 256, 298, 299, 306], "correspond": [14, 18, 21, 36, 48, 74, 75, 76, 227, 229, 233, 245, 248, 262, 295, 301, 302, 305, 306], "begin": [14, 22, 54, 71, 216, 240, 242, 299, 302, 307], "pil": [14, 15, 16, 36, 37, 44, 46], "img_msg": 14, "place": [14, 16, 20, 283, 299, 305], "mode": [14, 15, 16, 219, 259, 266, 273, 300], "rgb": [14, 15, 16, 142], "4": [14, 15, 16, 22, 24, 45, 47, 48, 49, 50, 76, 81, 144, 147, 151, 207, 208, 216, 267, 288, 293, 295, 298, 300, 302, 303, 304, 305, 306, 307], "appropri": [14, 36, 53, 76, 100, 225, 254, 261, 307], "load_imag": [14, 16], "image_path": [14, 16], "jpg": [14, 16, 35, 42, 46], "tag": [14, 16, 19, 21, 38, 42, 44, 94, 100, 106, 119, 144, 148, 153, 162, 169, 179, 204, 273, 274, 275, 276, 277, 299], "placehold": [14, 16, 42, 253], "should": [14, 15, 16, 18, 20, 22, 24, 25, 33, 35, 36, 37, 38, 42, 47, 54, 59, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 75, 76, 85, 86, 87, 95, 96, 97, 100, 101, 107, 108, 109, 110, 111, 116, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 142, 146, 149, 150, 151, 153, 154, 155, 156, 157, 158, 160, 165, 166, 167, 171, 172, 173, 174, 175, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 207, 208, 214, 216, 219, 224, 229, 235, 236, 245, 248, 252, 253, 271, 273, 274, 275, 276, 277, 292, 293, 300, 301, 302, 303, 304, 305, 306, 307], "insert": [14, 226, 306], "format_content_with_imag": [14, 16], "image_tag": [14, 16, 42, 44], "conveni": [14, 24, 25, 46, 298], "prompttemplateinterfac": [14, 19, 94, 106, 119, 148, 162, 169, 179, 204], "templated_msg": [14, 19], "contains_media": [14, 16, 36], "get_media": [14, 15, 16, 36], "4x4": 14, "0x7f8d27e72740": 14, "tokenize_messsag": 14, "hi": [14, 20, 73, 299], "tokenize_messag": [14, 15, 21, 36, 55, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 144, 238, 242, 299], "22557": 14, "1526": [14, 21], "28808": 14, "28705": [14, 21], "28748": [14, 21], "15359": 14, "28725": 14, "315": [14, 20], "837": 14, "396": 14, "16107": 14, "13892": 14, "28723": 14, "modal": [15, 16, 56, 144, 226], "current": [15, 16, 18, 22, 35, 42, 54, 67, 75, 91, 95, 107, 120, 129, 149, 150, 151, 154, 156, 158, 165, 168, 171, 207, 208, 210, 214, 215, 224, 248, 255, 256, 258, 263, 266, 268, 274, 276, 278, 281, 295, 296, 301, 302, 303, 305, 306], "intend": [15, 285, 299], "drop": [15, 144, 225, 303, 306], "replac": [15, 16, 42, 51, 59, 60, 62, 64, 67, 68, 69, 144, 220, 225, 283, 304], "llama3_2_vis": [15, 16, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152], "llama3visiontransform": [15, 16, 148], "__init__": [15, 24, 25, 65, 66, 303, 304, 307], "transform_imag": 15, "clipimagetransform": [15, 65, 66, 144, 216], "xattn_mask": 15, "visioncrossattentionmask": [15, 144, 243], "224": [15, 16, 144], "tile_s": [15, 79, 80, 81, 144, 147, 151, 216, 244], "patch_siz": [15, 79, 80, 81, 144, 147, 151, 216, 244], "14": [15, 48, 144, 216, 306, 307], "skip_special_token": [15, 16, 67, 144], "begin_of_text": [15, 16, 21, 299], "start_header_id": [15, 16, 299], "end_header_id": [15, 16, 299], "n": [15, 16, 18, 19, 21, 34, 38, 40, 43, 208, 216, 242, 290, 297, 298, 299, 306], "eot_id": [15, 16, 21, 299], "na": [15, 299], "encoder_input": [15, 16, 50, 213, 214, 224], "shape": [15, 16, 22, 47, 50, 73, 74, 75, 76, 78, 79, 80, 81, 142, 143, 144, 147, 151, 205, 206, 207, 208, 209, 210, 211, 213, 214, 215, 216, 221, 222, 223, 224, 225, 226, 230, 231, 244, 245, 246, 247, 248, 249, 250, 251, 267, 282, 283, 303], "num_til": [15, 16, 142, 143, 216], "num_channel": [15, 16, 216], "tile_height": [15, 16], "tile_width": [15, 16], "torch": [15, 16, 22, 24, 45, 47, 48, 49, 50, 73, 74, 75, 76, 77, 78, 79, 80, 81, 142, 143, 144, 205, 206, 207, 208, 209, 210, 211, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 230, 231, 245, 246, 247, 248, 249, 250, 251, 256, 258, 260, 261, 262, 264, 265, 267, 269, 270, 276, 278, 279, 280, 281, 282, 283, 284, 285, 286, 288, 292, 295, 300, 301, 302, 303, 304, 305, 307], "just": [15, 19, 22, 293, 295, 298, 299, 301, 302, 304, 305, 306], "the_cauldron_dataset": [15, 16], "ai2d": [15, 66], "respir": 15, "combust": 15, "give": [15, 21, 24, 253, 303, 304, 305], "choic": [15, 18], "oxygen": 15, "b": [15, 25, 45, 47, 142, 143, 207, 208, 210, 214, 215, 224, 231, 245, 246, 250, 267, 277, 304, 307], "carbon": 15, "dioxid": 15, "c": [15, 45, 47, 50, 65, 142, 299], "nitrogen": 15, "d": [15, 24, 36, 65, 142, 143, 207, 208, 214, 224, 298, 299, 303, 304, 306], "heat": 15, "letter": 15, "mymultimodaltransform": 15, "my_tokenizer_build": 15, "myimagetransform": 15, "add_eo": [15, 57, 71, 144, 239, 240, 299], "tupl": [15, 19, 21, 24, 27, 38, 48, 73, 74, 80, 94, 106, 119, 144, 148, 162, 169, 179, 204, 207, 216, 220, 238, 242, 245, 246, 247, 248, 249, 250, 251, 252, 268, 282, 283, 284], "infer": [15, 19, 22, 50, 56, 91, 100, 158, 207, 208, 210, 214, 215, 224, 286, 291, 295, 296, 299, 300, 301, 302, 306, 307], "vision": [15, 16, 56, 81, 142, 144, 145, 146, 147, 148, 149, 150, 151, 152, 225, 257], "aspect_ratio": [15, 50, 78, 79, 142, 216], "append": [15, 19, 38, 39, 94, 106, 119, 144, 148, 162, 169, 179, 204, 214, 224, 239, 273, 292], "addit": [15, 21, 22, 24, 25, 27, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 144, 146, 147, 150, 151, 153, 218, 225, 226, 235, 247, 252, 254, 255, 256, 262, 263, 269, 272, 273, 274, 276, 277, 279, 293, 299, 301, 304, 305], "kei": [15, 21, 22, 24, 26, 33, 35, 37, 42, 47, 48, 55, 56, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 207, 208, 213, 214, 215, 224, 226, 234, 235, 236, 250, 254, 256, 258, 273, 282, 298, 300, 301, 304, 305, 307], "e": [16, 18, 19, 36, 46, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 78, 79, 80, 81, 142, 143, 144, 147, 151, 208, 216, 220, 224, 229, 234, 244, 253, 254, 258, 265, 282, 286, 292, 295, 300, 302, 304, 305, 306, 307], "g": [16, 18, 46, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 78, 79, 80, 81, 142, 143, 144, 147, 151, 208, 216, 224, 229, 244, 253, 254, 265, 282, 286, 295, 302, 304, 305, 306, 307], "base": [16, 18, 20, 22, 27, 36, 38, 85, 86, 87, 88, 89, 90, 91, 95, 96, 97, 98, 99, 101, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 125, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 144, 145, 148, 149, 150, 151, 152, 154, 155, 156, 157, 158, 160, 163, 164, 165, 166, 167, 170, 171, 172, 173, 174, 175, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 210, 230, 231, 232, 234, 235, 236, 246, 247, 249, 250, 254, 261, 263, 264, 272, 274, 283, 286, 291, 299, 300, 301, 302, 303, 304, 305, 307], "multimodal_chat_dataset": 16, "visual": [16, 226], "get": [16, 22, 23, 24, 25, 26, 50, 144, 258, 262, 265, 268, 273, 287, 292, 293, 294, 295, 299, 300, 301, 303, 304, 305, 306], "below": [16, 23, 26, 47, 252, 302, 303, 304, 307], "clock": 16, "10": [16, 45, 47, 48, 49, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 216, 225, 300, 302, 305, 306, 307], "llama3_2_vision_transform": 16, "questionanswertempl": [16, 19, 70], "image_s": [16, 145, 148, 149, 152, 216], "560": [16, 145, 148, 149, 152], "image_dir": [16, 35, 42, 65], "home": [16, 24, 35, 42, 46], "nquestion": 16, "nit": 16, "00am": 16, "sharegpt": [16, 42, 60, 299], "q1": [16, 33, 55, 60, 67], "a1": [16, 33, 55, 60], "sharegpt4v": 16, "lin": 16, "chen": 16, "renam": 16, "themselv": [16, 307], "pathlib": 16, "pil_imag": 16, "Then": [16, 20, 26, 232, 301, 303, 305], "relat": [16, 213, 214, 224, 304], "user_messag": [16, 34, 40, 43, 144, 299], "locat": [16, 21, 24, 35, 42, 298, 302, 304, 306, 307], "long": [16, 54, 240, 299, 304], "image_dog": 16, "image_cat": 16, "image_bird": 16, "dog": [16, 244], "bird": [16, 46], "pet": 16, "three": [16, 22, 25, 50, 144, 247, 249, 250, 296, 301], "referenc": 16, "huggingfac": [16, 57, 61, 63, 71, 72, 161, 168, 169, 176, 177, 178, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 247, 249, 250, 254, 255, 261, 298, 300], "co": [16, 57, 61, 63, 71, 72, 161, 168, 169, 176, 177, 178, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 254, 255, 300], "img": 16, "llava_instruct_dataset": 16, "concaten": [17, 21, 48, 53, 147, 151, 238, 242], "sequenc": [17, 45, 47, 48, 49, 50, 54, 57, 61, 65, 66, 71, 72, 75, 76, 91, 94, 95, 101, 106, 107, 116, 119, 120, 125, 129, 142, 143, 144, 146, 148, 150, 154, 156, 158, 160, 162, 165, 167, 169, 171, 175, 179, 204, 207, 208, 210, 213, 214, 215, 216, 219, 224, 226, 240, 242, 244, 246, 250, 251, 267, 299], "upto": [17, 210], "maximum": [17, 24, 47, 50, 51, 54, 61, 72, 75, 78, 79, 81, 91, 94, 95, 101, 106, 107, 116, 119, 120, 125, 129, 144, 146, 147, 148, 150, 151, 154, 156, 158, 160, 162, 165, 167, 169, 171, 175, 207, 208, 210, 213, 214, 215, 219, 224, 226, 253, 298], "length": [17, 45, 47, 49, 50, 51, 52, 53, 54, 61, 72, 75, 91, 94, 95, 101, 106, 107, 116, 119, 120, 125, 129, 142, 143, 144, 146, 148, 150, 154, 156, 158, 160, 162, 165, 167, 168, 169, 171, 175, 179, 204, 207, 208, 210, 213, 214, 215, 219, 221, 223, 224, 226, 240, 244, 245, 246, 255, 267, 273, 305], "slow": [17, 305, 307], "down": [17, 216, 256, 304, 305, 307], "introduc": [17, 85, 86, 95, 96, 97, 107, 108, 109, 110, 120, 121, 122, 131, 132, 138, 139, 154, 155, 156, 157, 165, 166, 208, 209, 226, 230, 231, 250, 295, 299, 303, 304, 305, 306, 307], "signific": [17, 305, 306], "speedup": [17, 300, 302], "depend": [17, 25, 26, 254, 282, 298, 300, 303, 304, 305, 307], "iter": [17, 282, 283, 284, 307], "through": [17, 18, 22, 23, 24, 25, 26, 55, 81, 147, 151, 205, 207, 216, 226, 232, 293, 294, 295, 296, 298, 299, 300, 301, 303, 305, 306, 307], "greedi": [17, 54], "upon": [17, 25, 53, 214, 218, 224, 302], "initi": [17, 22, 25, 29, 53, 54, 82, 83, 84, 92, 93, 102, 103, 104, 105, 117, 118, 126, 127, 128, 136, 137, 159, 161, 176, 177, 178, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 230, 247, 258, 269, 270, 283, 295, 301, 304, 307], "max": [17, 50, 54, 179, 204, 214, 216, 224, 240, 253, 261, 298, 304], "llama3": [17, 20, 21, 24, 65, 66, 73, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 144, 146, 148, 149, 150, 152, 165, 221, 223, 257, 263, 291, 293, 294, 295, 298, 300, 305], "load": [17, 22, 25, 35, 42, 46, 53, 54, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 224, 235, 254, 255, 256, 258, 276, 283, 300, 302, 304], "isinst": [17, 252], "1b_full_single_devic": 17, "prevent": [17, 22, 54, 247, 298, 305], "irrelev": 17, "cross": [17, 50, 54, 146, 150, 213, 221, 223, 224, 226, 244, 303], "attend": [17, 54, 208, 213, 214, 215, 224, 244], "pytorch": [17, 24, 25, 74, 214, 220, 221, 252, 270, 276, 279, 281, 282, 291, 292, 293, 295, 300, 302, 304, 305, 306, 307], "flex": 17, "attent": [17, 50, 54, 74, 75, 76, 81, 85, 86, 87, 91, 95, 96, 97, 101, 107, 108, 109, 110, 111, 116, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 146, 147, 149, 150, 151, 154, 155, 156, 157, 158, 160, 165, 166, 167, 168, 171, 172, 173, 174, 175, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 207, 208, 210, 213, 214, 215, 217, 224, 226, 235, 236, 244, 302, 304, 305, 307], "flash": 17, "non": [17, 222, 223, 236, 246, 303], "causal": [17, 54, 75, 208, 214, 215, 224], "hardwar": [17, 262, 293, 300, 304, 305], "cuda": [17, 24, 262, 265, 282, 286, 292, 300, 305, 307], "ture": 17, "sdpa": 17, "memori": [17, 21, 25, 53, 54, 57, 61, 71, 72, 212, 214, 220, 221, 223, 224, 235, 263, 265, 271, 272, 282, 291, 293, 294, 295, 300, 301, 302, 303, 306], "effici": [17, 235, 263, 291, 293, 294, 300, 301, 304, 306], "fallback": 17, "while": [17, 24, 25, 85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 166, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 225, 230, 293, 295, 300, 305, 306, 307], "retain": [17, 247, 305, 307], "reward": [18, 105, 111, 115, 157, 161, 164, 245, 246, 247, 249, 250, 257], "downstream": 18, "captur": 18, "ground": [18, 221, 222, 223, 305], "truth": [18, 24, 221, 222, 223, 300, 302], "usual": [18, 21, 22, 210, 214, 251, 254, 267, 277, 298, 300, 304, 305], "outcom": 18, "binari": 18, "comparison": [18, 25, 304, 307], "annot": 18, "accord": [18, 19, 65, 66, 76, 153, 299], "criterion": 18, "style": [18, 31, 54, 58, 59, 60, 69, 226, 307], "interact": [18, 25, 55, 67, 291, 296, 301], "free": [18, 250, 296, 304], "preference_dataset": 18, "my_preference_dataset": [18, 67], "chosen_convers": [18, 67], "hole": [18, 67], "my": [18, 19, 23, 67, 73, 298, 299, 300, 302], "trouser": [18, 67], "fix": [18, 20, 67, 306], "rejected_convers": [18, 67], "off": [18, 25, 38, 67, 294, 295, 300, 306], "chosen": [18, 33, 55, 63, 67, 70, 247, 249, 250, 282], "reject": [18, 33, 55, 63, 67, 70, 247, 249, 250], "rejected_input_id": [18, 48, 67], "nwhat": 18, "ntake": 18, "rejected_label": [18, 48], "128006": 18, "78191": 18, "128007": 18, "271": 18, "18293": 18, "1124": 18, "1022": 18, "13": [18, 20, 21, 48, 216, 242, 251, 307], "128009": [18, 299], "accomplish": [18, 20, 53, 60, 64, 67, 71], "shown": [18, 300, 305, 306], "di": 18, "look": [18, 19, 22, 24, 25, 260, 276, 292, 299, 300, 301, 302, 303, 304, 306], "anthrop": [18, 63], "harmless": [18, 63], "granni": 18, "her": [18, 20], "mobil": [18, 300], "phone": [18, 300], "issu": [18, 296, 306], "grandmoth": 18, "manag": [18, 22, 53, 218, 219, 232, 273, 280, 299], "behavior": [18, 22, 272, 299], "thing": [18, 305, 307], "grandma": 18, "feel": [18, 296, 304], "box": [18, 293, 295, 307], "hh_rlhf_helpful_dataset": 18, "hendrydong": 18, "preference_700k": 18, "stack_exchange_paired_dataset": 18, "purpos": [19, 65, 66, 301, 302], "whenev": [19, 144, 221, 304], "llama2": [19, 22, 24, 25, 27, 61, 72, 82, 83, 84, 85, 86, 87, 88, 89, 90, 100, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 144, 205, 214, 215, 257, 291, 294, 298, 301, 302, 305, 306], "were": [19, 20, 21, 53, 142, 216, 232, 248, 301, 306], "gear": [19, 144], "summar": [19, 43, 68, 299, 305], "summarizetempl": [19, 68, 299], "commun": [19, 144, 300, 305], "chatmltempl": [19, 144], "gec_templ": 19, "extend": [19, 21, 22, 25, 293, 305], "customprompttempl": 19, "achiev": [19, 38, 278, 295, 300, 302, 303, 304, 306, 307], "prepend_tag": [19, 38], "append_tag": [19, 38], "thu": [19, 31, 38, 55, 56, 214, 305, 306], "empti": [19, 47, 50, 52, 77, 298], "standalon": [19, 207], "my_custom_templ": 19, "Is": 19, "overhyp": 19, "advanc": [19, 79, 80, 81, 147, 151, 216], "configur": [19, 21, 25, 55, 56, 59, 60, 61, 62, 64, 65, 66, 67, 68, 69, 70, 71, 72, 95, 107, 120, 129, 149, 154, 165, 171, 273, 293, 295, 296, 299, 301, 302, 303, 304, 305, 306, 307], "doesn": [19, 300], "neatli": 19, "fall": 19, "protocol": [19, 21, 229, 237, 238, 243], "arg": [19, 21, 24, 27, 32, 39, 80, 206, 214, 220, 226, 229, 237, 238, 243, 275, 282, 295, 306], "whether": [19, 31, 33, 35, 36, 37, 42, 47, 50, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 85, 86, 87, 91, 95, 96, 97, 107, 108, 109, 110, 111, 120, 121, 122, 129, 130, 131, 132, 138, 139, 144, 145, 149, 150, 151, 154, 155, 156, 157, 165, 166, 171, 172, 173, 174, 175, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 220, 222, 224, 230, 231, 235, 236, 239, 240, 252, 262, 264, 265, 273, 283, 299, 303], "sai": [19, 298, 299, 301], "eureka": 19, "eurekatempl": 19, "formatted_dialogu": 19, "llama2chattempl": [19, 106, 153, 299], "paradigm": [20, 25, 294, 305], "unstructur": [20, 57, 71, 72], "unlabel": 20, "text_complet": 20, "odyssei": 20, "clear": [20, 305], "river": 20, "oceanu": 20, "had": 20, "got": [20, 50], "sea": 20, "went": 20, "till": 20, "reach": 20, "aeaean": 20, "island": 20, "dawn": 20, "sunris": 20, "drew": 20, "ship": 20, "sand": 20, "shore": 20, "sleep": 20, "wait": [20, 282], "break": [20, 144, 240], "child": 20, "morn": 20, "rosi": 20, "finger": 20, "appear": [20, 305], "sent": [20, 277], "men": 20, "circ": 20, "hous": 20, "fetch": [20, 304], "bodi": 20, "elpenor": 20, "cut": 20, "firewood": 20, "wood": 20, "headland": 20, "jut": 20, "wept": 20, "over": [20, 21, 25, 36, 56, 222, 223, 247, 261, 293, 295, 298, 300, 303, 304, 305, 307], "him": 20, "lament": 20, "funer": 20, "rite": 20, "armour": 20, "been": [20, 73, 75, 207, 214, 224, 251, 258, 263, 299, 305, 306], "burn": 20, "ash": 20, "rais": [20, 22, 27, 30, 33, 35, 37, 42, 44, 46, 47, 50, 52, 53, 59, 60, 62, 64, 65, 66, 68, 69, 71, 75, 81, 171, 207, 208, 212, 213, 214, 216, 217, 218, 219, 235, 236, 242, 254, 255, 256, 258, 262, 264, 265, 269, 273, 277, 281, 283, 284, 285], "cairn": 20, "stone": 20, "top": [20, 74, 77, 147, 151, 260, 305, 307], "oar": 20, "he": 20, "row": [20, 55, 56, 208, 214, 215, 224], "text_completion_dataset": [20, 306], "128000": [20, 299, 306], "6153": 20, "584": 20, "1051": 20, "2867": 20, "279": 20, "15140": 20, "22302": 20, "355": 20, "11": [20, 22, 45, 47, 48, 216, 300, 306, 307], "323": 20, "1047": 20, "2751": 20, "704": 20, "1139": 20, "1825": 20, "9581": 20, "4024": 20, "389": 20, "12222": 20, "8813": 20, "362": 20, "12791": 20, "5420": 20, "13218": 20, "1405": 20, "1070": 20, "374": 20, "39493": 20, "64919": 20, "439": 20, "304": 20, "1023": 20, "7634": 20, "1226": 20, "1243": 20, "24465": 20, "1057": 20, "8448": 20, "311": 20, "70163": 20, "1077": 20, "31284": 20, "6212": 20, "30315": 20, "1938": 20, "1288": 20, "1464": 20, "128001": [20, 306], "similarli": [20, 119, 148, 169, 179, 204, 306], "wikimedia": 20, "wikipedia": [20, 46, 72], "cnn_dailymail_articles_dataset": 20, "index": [21, 48, 49, 50, 53, 54, 208, 210, 215, 222, 224, 246, 261, 286, 292, 299, 300], "embed": [21, 22, 78, 79, 80, 81, 91, 95, 101, 107, 116, 120, 125, 129, 142, 143, 144, 146, 147, 150, 151, 154, 156, 158, 160, 165, 167, 171, 175, 207, 208, 209, 210, 213, 214, 216, 224, 225, 226, 228, 263, 299, 302, 305, 306], "vector": [21, 230, 249, 299, 305], "understood": 21, "plai": [21, 300, 305], "necessari": [21, 22, 55, 56, 273, 274, 275, 276, 277, 299, 304], "phi3": [21, 22, 165, 166, 168, 169, 170, 257, 298], "phi3_mini_token": 21, "p_token": 21, "phi": [21, 168, 169, 257], "32010": 21, "29871": 21, "1792": [21, 242], "9508": [21, 242], "32007": 21, "32001": 21, "4299": 21, "2933": [21, 242], "nuser": 21, "nmodel": 21, "sentencepiec": [21, 239, 302], "tiktoken": [21, 144, 240, 302], "host": [21, 292, 298, 301, 305], "distribut": [21, 77, 258, 269, 270, 279, 281, 286, 293, 296, 298, 301, 302, 303, 305], "alongsid": [21, 263, 305], "alreadi": [21, 24, 33, 37, 42, 62, 63, 65, 66, 67, 68, 69, 207, 208, 218, 219, 224, 257, 269, 292, 298, 300, 303, 304], "_token": [21, 25], "mistraltoken": [21, 162, 299], "adher": [21, 37, 42], "arbitrarili": 21, "small": [21, 209, 300, 305], "seq": [21, 214, 224], "len": [21, 22, 50, 53, 59, 62, 65, 66, 68, 214, 216, 224], "demonstr": [21, 305, 306], "7": [21, 22, 45, 47, 48, 49, 50, 207, 216, 244, 248], "6312": 21, "28709": 21, "assign": [21, 24, 55, 56], "uniqu": [21, 55, 56, 257], "abil": 21, "NOT": [21, 22, 91, 144, 158], "presenc": [21, 31], "certain": [21, 22, 24, 282, 299], "proper": [21, 292, 301], "end_of_text": 21, "special_token": [21, 144, 240, 299], "added_token": 21, "128257": 21, "128258": 21, "remain": [21, 37, 42, 212, 261, 303, 304, 305], "special_tokens_path": [21, 119, 148, 169, 179, 204], "basetoken": 21, "actual": [21, 23, 24, 26, 31, 33, 35, 55, 56, 59, 62, 63, 64, 66, 67, 68, 70, 144, 295, 299, 306], "string": [21, 22, 35, 36, 38, 44, 60, 61, 94, 106, 119, 144, 148, 162, 169, 179, 204, 229, 237, 239, 240, 242, 253, 259, 262, 266, 273, 286, 298, 305], "kwarg": [21, 24, 27, 32, 39, 179, 204, 206, 213, 215, 220, 226, 229, 237, 238, 243, 269, 273, 274, 275, 276, 277, 279, 282], "dict": [21, 22, 24, 25, 26, 27, 31, 33, 35, 36, 37, 38, 42, 44, 47, 48, 49, 50, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 94, 106, 119, 144, 148, 162, 169, 179, 204, 213, 215, 220, 224, 226, 227, 233, 234, 235, 236, 237, 238, 240, 241, 243, 254, 255, 256, 258, 260, 265, 269, 271, 273, 278, 283, 285], "given": [21, 25, 27, 44, 47, 52, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 76, 144, 218, 219, 231, 232, 237, 238, 246, 262, 266, 272, 278, 286, 288, 293, 304], "token_id": [21, 144, 237, 240], "its": [21, 54, 100, 153, 156, 208, 210, 214, 215, 224, 226, 230, 258, 278, 281, 298, 299, 300, 302, 304, 305], "sentencepiecebasetoken": [21, 237], "bpe": 21, "sp_token": 21, "reason": [21, 25, 73, 300, 305, 306], "walk": [22, 25, 276, 293, 299, 300, 301, 306, 307], "design": [22, 25, 250], "cover": [22, 23, 24, 25, 26, 299, 300, 307], "scenario": [22, 53, 144], "compos": [22, 216], "plug": [22, 305], "evalu": [22, 25, 291, 293, 295, 296, 301, 303, 304, 307], "gener": [22, 25, 47, 54, 61, 71, 74, 75, 76, 77, 144, 218, 219, 232, 245, 264, 273, 280, 281, 282, 289, 291, 295, 299, 303, 304, 305, 306, 307], "easi": [22, 25, 293, 304, 305], "understand": [22, 24, 25, 226, 291, 293, 294, 299, 304, 305, 307], "concept": [22, 296, 300, 301, 305], "talk": 22, "close": [22, 25, 273, 274, 275, 276, 277, 304], "veri": [22, 53, 214, 224, 298, 300, 305], "dictat": 22, "state_dict": [22, 220, 225, 226, 235, 254, 255, 256, 257, 258, 283, 304, 307], "store": [22, 55, 56, 273, 274, 277, 304, 305, 307], "disk": [22, 57, 274], "identifi": [22, 273], "state": [22, 25, 143, 214, 216, 218, 220, 224, 227, 233, 234, 235, 236, 245, 247, 254, 255, 256, 258, 260, 283, 300, 302, 304, 307], "match": [22, 44, 236, 273, 283, 292, 298, 300, 302, 304], "up": [22, 23, 25, 26, 50, 54, 61, 72, 144, 214, 218, 219, 224, 240, 244, 260, 273, 282, 294, 295, 296, 298, 299, 301, 302, 304, 305, 307], "exactli": [22, 236, 253, 306], "definit": [22, 304], "either": [22, 47, 55, 56, 73, 208, 214, 215, 236, 254, 273, 279, 292, 298, 304, 305, 306, 307], "explicit": 22, "error": [22, 24, 34, 52, 254, 281, 298], "except": [22, 36, 153, 181, 182, 183, 184, 185, 186, 188, 189, 190, 191, 193, 194, 195, 196, 197, 198, 200, 201, 202, 203, 242, 303], "wors": [22, 305], "silent": 22, "succe": 22, "popular": [22, 224, 293, 300], "offici": [22, 100, 299, 301, 302], "websit": 22, "inspect": [22, 300, 304, 307], "mmap": [22, 300], "weights_onli": [22, 256], "map_loc": [22, 300], "cpu": [22, 25, 219, 220, 262, 282, 286, 292, 298, 300, 307], "tensor": [22, 45, 47, 48, 49, 50, 73, 74, 75, 76, 77, 78, 79, 80, 81, 142, 143, 205, 206, 207, 208, 209, 210, 211, 213, 214, 215, 216, 220, 221, 222, 223, 224, 225, 226, 230, 231, 245, 246, 247, 248, 249, 250, 251, 254, 267, 273, 274, 275, 276, 277, 280, 283, 285, 303, 304, 305, 307], "item": 22, "f": [22, 26, 59, 62, 65, 66, 68, 253, 299, 300, 303, 304, 307], "tok_embed": [22, 214, 224, 225], "32000": [22, 27, 304], "4096": [22, 27, 61, 72, 208, 210, 304, 306], "292": 22, "tabl": [22, 225, 299, 300, 302, 303, 305, 307], "layer": [22, 25, 81, 85, 86, 87, 88, 89, 90, 91, 95, 96, 97, 98, 99, 101, 105, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 120, 121, 122, 123, 124, 125, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 143, 146, 147, 149, 150, 151, 152, 154, 155, 156, 157, 158, 160, 161, 163, 164, 165, 166, 167, 170, 171, 172, 173, 174, 175, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 208, 211, 212, 213, 214, 215, 216, 217, 218, 219, 224, 226, 228, 230, 231, 235, 236, 252, 259, 263, 293, 294, 302, 304, 305, 306, 307], "dim": [22, 50, 142, 143, 205, 208, 209, 210, 214, 221, 223, 224, 303], "within": [22, 24, 27, 54, 73, 77, 78, 95, 107, 120, 129, 149, 150, 151, 154, 156, 165, 171, 216, 218, 219, 276, 281, 282, 298, 304, 307], "big": 22, "bin": [22, 298, 300], "piec": 22, "pytorch_model": [22, 300], "00001": [22, 298, 303], "00002": [22, 298, 303], "embed_token": 22, "241": 22, "Not": 22, "fewer": [22, 208], "sinc": [22, 24, 27, 55, 56, 223, 254, 256, 299, 300, 302, 305, 306], "mismatch": 22, "caus": [22, 239], "re": [22, 24, 218, 226, 250, 256, 293, 294, 295, 299, 300, 301, 304, 305], "end": [22, 25, 36, 57, 71, 144, 240, 242, 291, 293, 299, 302, 304, 306], "number": [22, 25, 44, 50, 54, 61, 72, 73, 78, 79, 81, 91, 95, 101, 107, 116, 120, 125, 129, 142, 143, 144, 146, 147, 150, 151, 154, 156, 158, 160, 165, 167, 171, 175, 207, 208, 214, 216, 222, 223, 244, 254, 255, 256, 258, 259, 261, 268, 281, 282, 298, 301, 303, 304, 305], "save": [22, 25, 26, 214, 220, 221, 223, 224, 254, 255, 256, 258, 263, 272, 277, 291, 295, 298, 299, 300, 302, 304, 305, 306], "less": [22, 50, 73, 300, 301, 302, 305, 307], "prone": 22, "invari": 22, "accept": [22, 24, 252, 301, 305, 307], "explicitli": [22, 229, 293, 304], "produc": [22, 258, 295, 306, 307], "One": [22, 50, 306], "advantag": [22, 245, 248, 295, 304], "abl": [22, 25, 300, 301, 306], "post": [22, 216, 278, 282, 295, 300, 302, 306, 307], "quantiz": [22, 85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 230, 231, 256, 266, 291, 292, 294, 296, 301, 307], "eval": [22, 291, 293, 306], "without": [22, 24, 26, 208, 212, 214, 218, 224, 235, 292, 293, 295, 299, 300, 304, 305, 306], "OR": 22, "surround": [22, 25, 293], "load_checkpoint": [22, 25, 254, 255, 256, 257], "save_checkpoint": [22, 25, 26, 254, 255, 256], "permut": 22, "behav": 22, "further": [22, 216, 250, 298, 303, 304, 305, 306, 307], "illustr": [22, 65, 66, 302], "whilst": [22, 294, 305], "read": [22, 254, 255, 256, 293, 305], "compat": [22, 254, 256, 305, 306], "framework": [22, 25, 293], "mention": [22, 300, 305, 307], "assum": [22, 35, 42, 45, 47, 65, 94, 106, 119, 148, 162, 169, 179, 204, 207, 208, 210, 215, 224, 225, 227, 233, 240, 258, 260, 261, 262, 264, 299, 300, 304], "checkpoint_dir": [22, 24, 254, 255, 256, 300, 302, 303, 306], "easiest": [22, 300, 301], "everyth": [22, 25, 293, 296, 301], "flow": [22, 54, 306, 307], "safetensor": [22, 253, 254, 298, 303], "output_dir": [22, 24, 254, 255, 256, 282, 300, 302, 304, 306, 307], "snippet": 22, "explain": [22, 305], "fullmodelhfcheckpoint": [22, 300, 303], "sort": [22, 254, 256], "order": [22, 23, 25, 254, 256, 276, 277, 301, 305], "matter": [22, 254, 256, 298, 304], "checkpoint_fil": [22, 24, 26, 254, 255, 256, 300, 302, 303, 304, 306, 307], "restart": [22, 298], "previou": [22, 54, 254, 255, 256, 303], "section": [22, 25, 265, 291, 300, 302, 305, 307], "recipe_checkpoint": [22, 254, 255, 256, 306], "model_typ": [22, 254, 255, 256, 300, 302, 306], "resume_from_checkpoint": [22, 254, 255, 256], "discrep": [22, 254], "github": [22, 27, 74, 85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 166, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 209, 210, 221, 222, 247, 248, 249, 250, 261, 292, 300, 302, 303], "repositori": [22, 55, 56, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 100, 294, 295, 300, 301], "fullmodelmetacheckpoint": [22, 302, 306], "test": [22, 24, 25, 73, 293, 295, 299, 305], "written": [22, 24, 25, 254, 255, 273, 274, 275, 276, 277, 293], "partit": [22, 254, 307], "key_1": [22, 256], "weight_1": 22, "key_2": 22, "weight_2": 22, "mid": 22, "chekpoint": 22, "middl": [22, 226, 300, 305], "subsequ": [22, 25, 207, 214, 216, 244], "recipe_st": [22, 254, 255, 256], "pt": [22, 26, 254, 255, 256, 300, 302, 303, 306], "epoch": [22, 25, 26, 254, 255, 256, 258, 261, 298, 299, 300, 301, 302, 306], "etc": [22, 25, 143, 254, 265, 301], "flood": 22, "overwritten": 22, "updat": [22, 24, 25, 38, 207, 208, 214, 224, 230, 243, 247, 248, 254, 258, 282, 285, 292, 299, 300, 301, 302, 304, 305, 306, 307], "hf_model_0001_0": [22, 300, 303], "hf_model_0002_0": [22, 300], "adapt": [22, 85, 86, 95, 96, 97, 107, 108, 109, 110, 120, 121, 122, 131, 132, 138, 139, 143, 154, 155, 156, 157, 165, 166, 224, 226, 227, 229, 230, 231, 232, 233, 234, 254, 255, 256, 272, 294, 299, 300, 304, 307], "merg": [22, 27, 28, 179, 204, 254, 300, 302, 307], "tutori": [22, 279, 293, 294, 295, 299, 300, 301, 302, 303, 304, 305, 306, 307], "save_adapter_weights_onli": 22, "choos": [22, 60, 304], "resum": [22, 25, 254, 255, 256, 261, 307], "frozen": [22, 143, 149, 152, 225, 247, 304, 305, 307], "learnt": [22, 299, 300], "refer": [22, 24, 25, 209, 210, 216, 221, 232, 246, 247, 248, 249, 250, 273, 293, 304, 305, 306], "adapter_checkpoint": [22, 254, 255, 256], "adapter_0": [22, 300], "knowledg": [22, 291], "forward": [22, 25, 78, 79, 80, 142, 143, 205, 206, 208, 209, 210, 211, 213, 214, 215, 216, 218, 219, 221, 222, 223, 224, 225, 226, 230, 231, 247, 248, 249, 250, 265, 282, 302, 303, 304, 305, 307], "modeltyp": 22, "llama2_13b": [22, 108], "right": [22, 47, 50, 76, 214, 254, 300, 302, 304], "pytorch_fil": 22, "00003": [22, 253, 303], "torchtune_sd": 22, "load_state_dict": [22, 224, 225, 226, 235, 258, 283, 304], "successfulli": [22, 298, 301], "vocab": [22, 27, 179, 204, 214, 224, 225, 302], "70": [22, 117], "randint": 22, "no_grad": 22, "6": [22, 45, 47, 48, 49, 50, 54, 91, 95, 209, 216, 267, 295, 306, 307], "3989": 22, "9": [22, 45, 47, 48, 50, 207, 216, 267, 300, 306, 307], "0531": 22, "2375": 22, "5": [22, 24, 45, 47, 48, 49, 50, 75, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 216, 247, 250, 251, 261, 300, 301, 302, 303, 305], "2822": 22, "4872": 22, "7469": 22, "8": [22, 45, 47, 48, 50, 59, 62, 65, 66, 68, 85, 86, 87, 88, 89, 90, 96, 97, 98, 99, 108, 109, 110, 111, 112, 113, 114, 115, 121, 122, 123, 124, 125, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 155, 157, 163, 164, 166, 170, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 207, 216, 221, 223, 300, 303, 304, 305, 306, 307], "6737": 22, "0023": 22, "8235": 22, "6819": 22, "2424": 22, "0109": 22, "6915": 22, "3618": 22, "1628": 22, "8594": 22, "5857": 22, "1151": 22, "7808": 22, "2322": 22, "8850": 22, "9604": 22, "7624": 22, "6040": 22, "3159": 22, "5849": 22, "8039": 22, "9322": 22, "2010": [22, 216], "6824": 22, "8929": 22, "8465": 22, "3794": 22, "3500": 22, "6145": 22, "5931": 22, "find": [22, 23, 25, 26, 247, 298, 300, 301, 303, 304, 305], "hope": 22, "deeper": [22, 294, 295, 301, 305], "insight": [22, 300], "happi": [22, 300], "cometlogg": 23, "checkpoint": [23, 24, 25, 220, 224, 226, 240, 253, 254, 255, 256, 257, 258, 259, 277, 279, 283, 293, 295, 298, 302, 303, 304, 306, 307], "workspac": [23, 26, 273], "seen": [23, 26, 304, 307], "screenshot": [23, 26], "comet_ml": [23, 273], "featur": [23, 25, 26, 292, 293, 294, 295, 300, 301, 305], "pip": [23, 26, 273, 276, 277, 292, 300, 302, 305], "login": [23, 26, 273, 277, 298, 300], "metric_logg": [23, 24, 25, 26], "metric_log": [23, 24, 26, 273, 274, 275, 276, 277], "experiment_nam": [23, 273], "experi": [23, 24, 273, 277, 291, 293, 302, 303, 304], "grab": [23, 26, 302], "tab": [23, 26], "asset": 23, "artifact": [23, 26, 282], "click": [23, 26], "effect": [24, 250, 303, 305, 306], "prerequisit": [24, 299, 300, 301, 302, 303, 304, 306, 307], "Be": [24, 299, 300, 301, 302, 303, 304, 305, 306, 307], "familiar": [24, 299, 300, 301, 302, 303, 304, 306, 307], "fundament": [24, 306], "reproduc": [24, 273], "overridden": [24, 282], "quick": 24, "seed": [24, 25, 26, 281, 301, 306], "shuffl": [24, 54, 306], "dtype": [24, 25, 77, 207, 208, 213, 214, 215, 217, 218, 219, 220, 224, 226, 262, 280, 284, 300, 303, 305, 306, 307], "fp32": [24, 214, 221, 223, 305, 306, 307], "enable_fsdp": 24, "keyword": [24, 27, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 220, 299], "subfield": 24, "dotpath": [24, 94, 106, 119, 148, 162, 169, 179, 204], "wish": [24, 207, 218, 283], "exact": [24, 27, 300], "normal": [24, 54, 144, 206, 208, 209, 213, 214, 215, 221, 222, 223, 239, 299, 304, 306, 307], "instanc": [24, 27, 53, 107, 120, 129, 149, 154, 156, 165, 171, 172, 173, 176, 177, 220, 227, 233, 234, 304], "preced": [24, 27, 298, 302, 304], "throw": 24, "notic": [24, 78, 79, 80, 216, 299, 304], "miss": [24, 235, 236, 282, 304], "llama2_token": [24, 299, 300], "llama2token": [24, 106], "512": [24, 81, 307], "overwrit": [24, 256, 283, 292, 298], "duplic": [24, 25, 293, 298], "sometim": 24, "resolv": [24, 28, 301], "alpaca": [24, 31, 53, 58, 59, 85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 166, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 303], "disklogg": 24, "log_dir": [24, 274, 276, 277], "verifi": [24, 262, 263, 286, 299, 301, 304], "properli": [24, 235, 270, 298], "wa": [24, 35, 42, 50, 75, 79, 80, 81, 147, 151, 216, 235, 299, 304, 306, 307], "7b_lora_single_devic": [24, 300, 301, 304, 307], "my_config": [24, 298], "guidelin": 24, "tempt": 24, "put": [24, 25, 296, 301, 304, 306], "much": [24, 225, 250, 300, 302, 304, 305, 306, 307], "switch": 24, "encourag": [24, 250, 304, 305], "clariti": 24, "significantli": [24, 247, 294, 295, 305], "easier": [24, 300, 301], "dont": 24, "privat": 24, "parent": [24, 298], "guarante": 24, "stabil": [24, 221, 223, 293, 295, 305, 306, 307], "underscor": 24, "_alpaca": 24, "k1": [24, 25], "v1": [24, 25, 72], "k2": [24, 25], "v2": [24, 25, 273], "my_model_checkpoint": 24, "file_1": 24, "file_2": 24, "my_tokenizer_path": 24, "nest": [24, 285], "dot": 24, "notat": [24, 50, 142, 143, 208, 210, 214, 224, 245, 246, 267], "flag": [24, 25, 36, 59, 60, 62, 64, 67, 68, 69, 252, 256, 263, 298, 305, 307], "bitsandbyt": [24, 305], "pagedadamw8bit": [24, 305], "delet": [24, 214, 217, 218, 219, 224], "foreach": 24, "8b_full": [24, 298], "adamw": [24, 304, 305], "2e": [24, 305], "fuse": [24, 146, 150, 224, 225, 226, 227, 278, 306], "nproc_per_nod": [24, 295, 302, 304, 306], "full_finetune_distribut": [24, 264, 298, 300, 301], "thought": [25, 293, 296, 301, 307], "target": [25, 75, 222, 223, 250, 293, 303], "pipelin": [25, 293, 295], "eg": [25, 214, 224, 254, 293], "meaning": [25, 293, 300], "fsdp": [25, 212, 252, 258, 263, 272, 301, 302, 305], "activ": [25, 81, 205, 259, 265, 271, 279, 282, 293, 295, 306, 307], "gradient": [25, 222, 223, 272, 278, 282, 293, 295, 300, 302, 304, 307], "accumul": [25, 278, 282, 293, 295], "mix": [25, 206, 298, 300, 305], "precis": [25, 206, 220, 262, 293, 295, 301, 307], "complex": 25, "becom": [25, 216, 292], "harder": 25, "anticip": 25, "methodolog": 25, "possibl": [25, 54, 253, 298, 305], "trade": [25, 305], "vs": [25, 301], "qualiti": [25, 300, 304, 306], "believ": 25, "suit": [25, 301, 305], "solut": 25, "result": [25, 65, 81, 147, 151, 216, 223, 242, 244, 282, 295, 300, 302, 303, 304, 305, 306, 307], "meant": [25, 220, 258], "expertis": 25, "routin": 25, "yourself": [25, 298, 302, 304], "exist": [25, 219, 226, 258, 273, 292, 298, 300, 301, 302, 307], "ones": [25, 50, 207], "modular": [25, 293], "wandb": [25, 26, 277, 301], "log": [25, 28, 247, 248, 249, 250, 265, 271, 273, 274, 275, 276, 277, 287, 300, 301, 302, 303, 304, 305, 307], "fulli": [25, 53, 149], "nativ": [25, 291, 293, 304, 306, 307], "numer": [25, 66, 293, 295, 306], "pariti": [25, 293], "verif": [25, 209], "benchmark": [25, 281, 293, 300, 302, 304, 306], "limit": [25, 258, 303, 305, 306], "hidden": [25, 81, 143, 147, 151, 205, 214, 216], "behind": 25, "unnecessari": 25, "abstract": [25, 237, 238, 293, 301, 307], "No": [25, 256, 293], "go": [25, 81, 100, 147, 151, 153, 216, 242, 293, 300, 301, 303, 305, 307], "figur": [25, 304, 307], "spectrum": 25, "decid": 25, "avail": [25, 35, 42, 72, 224, 226, 262, 270, 286, 293, 298, 300, 302, 304, 305], "consist": [25, 33, 37, 42, 65, 66, 72, 296, 301], "overrid": [25, 28, 29, 33, 37, 42, 62, 63, 65, 66, 67, 68, 69, 283, 296, 298, 300, 301, 302, 303, 307], "valid": [25, 52, 76, 222, 235, 236, 246, 264, 283, 284, 292, 296, 300, 301], "closer": [25, 303, 304], "monolith": [25, 293], "trainer": [25, 247, 249, 250], "wrapper": [25, 206, 239, 240, 258, 260, 298, 304], "around": [25, 144, 206, 239, 240, 265, 298, 299, 300, 304, 305, 306, 307], "extern": 25, "primarili": [25, 53, 304], "eleutherai": [25, 72, 293, 303, 304, 306], "har": [25, 293, 303, 304, 306], "stage": [25, 216], "distil": [25, 291], "dataload": [25, 54, 59, 62, 65, 66, 68], "applic": [25, 254, 255, 277], "clean": [25, 26, 58, 303], "group": [25, 208, 268, 269, 273, 274, 275, 276, 277, 298, 302, 306], "init_process_group": [25, 269], "backend": [25, 298, 306], "gloo": 25, "nccl": 25, "fullfinetunerecipedistribut": 25, "cleanup": 25, "stuff": 25, "carri": [25, 56], "metric": [25, 301, 303, 305, 306], "logger": [25, 271, 273, 274, 275, 276, 277, 287, 301], "_devic": 25, "get_devic": 25, "_dtype": 25, "get_dtyp": 25, "ckpt_dict": 25, "wrap": [25, 226, 252, 259, 263, 272, 279, 299, 305], "_model": [25, 258], "_setup_model": 25, "_setup_token": 25, "_optim": 25, "_setup_optim": 25, "_loss_fn": 25, "_setup_loss": 25, "_sampler": 25, "_dataload": 25, "_setup_data": 25, "backward": [25, 258, 260, 278, 282, 307], "zero_grad": 25, "curr_epoch": 25, "rang": [25, 225, 247, 248, 250, 281, 298, 302, 306], "epochs_run": [25, 26], "total_epoch": [25, 26], "idx": [25, 54], "enumer": 25, "_autocast": 25, "logit": [25, 73, 74, 77, 221, 222, 223, 267, 303], "global_step": 25, "_log_every_n_step": 25, "_metric_logg": 25, "log_dict": [25, 273, 274, 275, 276, 277], "step": [25, 54, 55, 56, 65, 66, 214, 224, 245, 258, 260, 261, 273, 274, 275, 276, 277, 278, 282, 291, 295, 300, 304, 306, 307], "recipe_main": [25, 29], "fullfinetunerecip": 25, "wandblogg": [26, 304, 307], "tip": 26, "straggler": 26, "background": 26, "crash": 26, "otherwis": [26, 45, 47, 50, 79, 80, 81, 147, 151, 214, 216, 270, 273, 299, 306], "exit": [26, 218, 219, 232, 292, 298], "resourc": [26, 273, 274, 275, 276, 277, 305, 306], "kill": 26, "ps": 26, "aux": 26, "grep": 26, "awk": 26, "xarg": 26, "desir": [26, 55, 56, 280, 299, 305], "suggest": [26, 303], "approach": [26, 53, 303], "full_finetun": 26, "joinpath": 26, "_checkpoint": [26, 300], "_output_dir": [26, 254, 255, 256], "torchtune_model_": 26, "with_suffix": 26, "wandb_at": 26, "descript": [26, 298], "whatev": 26, "metadata": [26, 306], "seed_kei": 26, "epochs_kei": 26, "total_epochs_kei": 26, "max_steps_kei": 26, "max_steps_per_epoch": [26, 306], "add_fil": 26, "log_artifact": 26, "hydra": 27, "facebook": 27, "research": 27, "com": [27, 74, 85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 166, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 209, 210, 221, 222, 247, 248, 249, 250, 261, 273, 292, 300, 302, 303], "facebookresearch": [27, 209], "blob": [27, 74, 85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 166, 169, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 209, 210, 222, 247, 248, 249, 250, 261], "_intern": 27, "_instantiate2": 27, "l148": 27, "num_head": [27, 81, 91, 95, 101, 107, 116, 120, 125, 129, 146, 147, 150, 151, 154, 156, 158, 160, 165, 167, 171, 175, 207, 208, 210, 214], "num_kv_head": [27, 91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 207, 208], "vocab_s": [27, 73, 74, 91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 221, 222, 223, 225], "parsed_yaml": 27, "embed_dim": [27, 78, 79, 80, 81, 91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 208, 210, 213, 214, 215, 216, 225, 226, 283, 304], "valueerror": [27, 33, 35, 37, 42, 44, 46, 47, 50, 52, 53, 59, 60, 62, 64, 65, 66, 68, 69, 71, 171, 207, 208, 216, 217, 218, 219, 254, 255, 256, 262, 265, 281, 284], "recipe_nam": 28, "rank": [28, 85, 86, 87, 95, 96, 97, 107, 108, 109, 110, 111, 120, 121, 122, 129, 130, 131, 132, 138, 139, 149, 150, 151, 154, 155, 156, 157, 165, 166, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 230, 231, 268, 270, 281, 294, 301, 304, 307], "zero": [28, 50, 207, 209, 214, 224, 253, 300, 302, 306], "displai": 28, "callabl": [29, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 81, 214, 232, 252, 263, 266, 272, 279], "With": [29, 300, 303, 304, 306, 307], "my_recip": 29, "foo": 29, "bar": [29, 293, 301, 305], "configerror": 30, "cannot": [30, 46, 256, 302], "equival": [31, 35, 79, 249, 250], "condit": [31, 73, 270, 298], "dedic": 31, "due": [31, 239, 304, 305, 307], "keep": [31, 33, 35, 37, 42, 63, 64, 66, 67, 70, 212, 225, 300, 304, 305], "openai": [32, 37, 60, 248], "markup": 32, "im_start": 32, "context": [32, 168, 218, 219, 232, 280, 282, 305], "im_end": 32, "goe": [32, 232], "a2": [33, 55], "functool": [34, 40, 43, 252], "partial": [34, 40, 43, 252], "_prompt_templ": [34, 40, 43], "assistant_messag": [34, 40, 43], "respect": [35, 53, 100, 207, 234, 282, 299], "final": [35, 42, 55, 56, 85, 86, 87, 91, 95, 101, 107, 108, 109, 110, 111, 116, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 146, 147, 149, 150, 151, 154, 155, 156, 157, 158, 165, 166, 171, 174, 175, 181, 182, 185, 186, 188, 189, 190, 191, 205, 214, 224, 235, 236, 300, 302, 303, 304, 305, 307], "leav": [35, 42, 305], "liter": [36, 38, 41, 85, 86, 87, 88, 89, 90, 94, 95, 96, 97, 98, 99, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 119, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 148, 149, 150, 151, 152, 154, 155, 156, 157, 162, 163, 164, 165, 166, 169, 170, 171, 172, 173, 174, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 204, 235, 236], "union": [36, 46, 47, 58, 59, 60, 62, 64, 68, 69, 71, 72, 94, 106, 119, 148, 162, 169, 179, 204, 214, 224, 236, 254, 259, 264, 273, 274, 275, 276, 277, 279, 281], "interleav": [36, 244], "attach": 36, "writer": 36, "calcul": [36, 38, 76, 142, 144, 208, 213, 215, 216, 245, 246, 248, 302], "consecut": [36, 52, 207, 244], "last": [36, 51, 54, 71, 214, 246, 258, 261], "properti": [36, 304, 305], "media": [36, 56], "classmethod": 36, "image_url": 37, "unmask": [37, 42, 222], "consid": [38, 53, 55, 56, 79, 80, 81, 147, 151, 216, 305], "come": [38, 52, 229, 304, 305], "nanswer": 40, "alia": [41, 252], "alwai": [42, 273, 283, 299, 305], "nsummari": [43, 299], "summari": [43, 53, 68, 216, 265], "batch_first": 45, "padding_valu": 45, "float": [45, 73, 74, 77, 85, 86, 87, 88, 89, 90, 91, 95, 96, 97, 98, 99, 101, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 120, 121, 122, 123, 124, 125, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 144, 149, 150, 151, 152, 154, 155, 156, 157, 158, 160, 163, 164, 165, 166, 167, 170, 171, 172, 173, 174, 175, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 208, 209, 230, 231, 245, 246, 247, 248, 249, 250, 258, 261, 264, 265, 271, 273, 274, 275, 276, 277, 304, 305, 306, 307], "rnn": [45, 47, 50], "pad_sequ": [45, 47, 50], "variabl": [45, 257, 270, 273, 305, 307], "left": [45, 47, 50, 144, 214, 304], "longest": [45, 49, 50], "trail": 45, "dimens": [45, 50, 91, 95, 101, 107, 116, 120, 125, 129, 142, 146, 147, 150, 151, 154, 156, 158, 160, 165, 167, 171, 175, 205, 207, 208, 210, 214, 216, 225, 230, 231, 302, 304, 305, 307], "element": [45, 47, 50, 53, 222, 267, 300], "12": [45, 47, 48, 69, 216, 292, 306], "image_loc": 46, "www": [46, 273], "org": [46, 65, 82, 83, 84, 85, 86, 88, 89, 90, 95, 96, 97, 98, 99, 102, 103, 104, 105, 107, 108, 109, 110, 112, 113, 114, 115, 120, 121, 122, 123, 124, 131, 132, 133, 134, 135, 138, 139, 140, 141, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 208, 209, 210, 216, 244, 245, 247, 248, 249, 250, 252, 270, 276, 279, 281, 287, 292], "en": [46, 57, 61, 63, 71, 72, 306], "pad_direct": [47, 50], "keys_to_pad": 47, "padding_idx": [47, 48, 49, 50, 54], "left_pad_sequ": [47, 50], "integ": [47, 49, 225, 252, 253, 259, 281], "batch_siz": [47, 59, 62, 65, 66, 68, 207, 208, 213, 214, 215, 217, 218, 219, 221, 222, 223, 224, 225, 226, 247, 249, 251, 300, 305, 306], "ignore_idx": [48, 49, 50], "input_id": [48, 267], "chosen_input_id": [48, 67], "chosen_label": 48, "15": [48, 216, 263, 299, 300, 304, 307], "16": [48, 85, 86, 87, 88, 89, 90, 96, 97, 98, 99, 108, 109, 110, 111, 112, 113, 114, 115, 121, 122, 123, 124, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 155, 157, 163, 164, 166, 170, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 207, 216, 304, 305, 307], "17": [48, 216, 304], "18": [48, 216, 302], "19": [48, 216, 307], "20": [48, 216, 251, 306], "token_pair": 49, "padded_col": 49, "pad_max_til": 50, "pad_max_imag": 50, "tile": [50, 78, 79, 80, 81, 142, 143, 144, 145, 147, 148, 149, 151, 216, 244], "aspect": [50, 78, 79, 293], "ratio": [50, 78, 79, 247, 248], "text_seq_len": [50, 244], "n_tile": [50, 78, 79, 216], "h": [50, 142, 207, 216, 221, 223, 292, 298], "w": [50, 82, 83, 84, 92, 93, 102, 103, 104, 105, 117, 118, 126, 127, 128, 136, 137, 142, 159, 161, 176, 177, 178, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 216, 273, 276, 277, 299, 300, 304, 307], "h_ratio": 50, "w_ratio": 50, "encoder_mask": [50, 213, 214, 224], "image_seq_len": [50, 244], "channel": [50, 81, 142, 144, 147, 151, 216, 230, 306], "height": [50, 142], "largest": 50, "bsz": [50, 73, 74, 75, 76, 78, 79, 216, 221, 223], "max_num_imag": 50, "max_num_til": [50, 78, 79, 81, 144, 147, 151, 216], "tokens_per_til": 50, "image_id": 50, "four": [50, 304], "model_input": 50, "max_text_seq_len": 50, "40": [50, 79, 80, 81, 147, 151, 216, 244, 305, 307], "did": [50, 302, 307], "extra": [50, 144, 224, 292, 299, 304, 305, 306, 307], "second": [50, 208, 225, 300, 304, 305, 307], "eos_id": [51, 144, 240, 242], "shorter": [52, 214], "min": [52, 304], "invalid": 52, "sub": [53, 276], "unifi": [53, 161], "simplifi": [53, 247, 298, 303, 304], "simultan": 53, "intern": 53, "aggreg": 53, "transpar": 53, "howev": [53, 169, 292, 303, 305], "constitu": 53, "might": [53, 218, 225, 228, 298, 300, 305], "larg": [53, 221, 223, 230, 231, 282, 298, 305, 307], "comput": [53, 55, 56, 101, 107, 116, 120, 125, 129, 142, 143, 146, 150, 171, 175, 208, 210, 214, 215, 221, 223, 224, 244, 247, 249, 250, 265, 281, 295, 300, 303, 305, 306, 307], "cumul": 53, "maintain": [53, 226, 294, 305, 307], "deleg": 53, "retriev": [53, 55, 56, 214, 263], "lead": [53, 239, 253, 295], "scale": [53, 73, 74, 77, 85, 86, 87, 95, 96, 97, 107, 108, 109, 110, 111, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 149, 150, 151, 154, 155, 156, 157, 165, 166, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 209, 211, 213, 215, 230, 231, 246, 250, 304, 305, 306, 307], "strategi": [53, 295], "stream": [53, 287, 305], "demand": 53, "deriv": [53, 205, 214, 215], "instans": 53, "dataset1": 53, "mycustomdataset": 53, "params1": 53, "dataset2": 53, "params2": 53, "concat_dataset": 53, "total": [53, 246, 248, 261, 268, 290, 297, 300, 302, 303, 304, 305], "data_point": 53, "1500": 53, "vicgal": 53, "gpt4": 53, "samsum": [53, 68], "focus": [53, 296, 301, 305], "enhanc": [53, 216, 250, 305, 307], "divers": 53, "machin": [53, 249, 286, 298, 300], "max_pack": 54, "outsid": [54, 281, 282, 304], "sampler": [54, 301], "part": [54, 225, 249, 299, 307], "buffer": [54, 214, 224, 305], "enough": [54, 299], "lower": [54, 295, 303, 304], "triangular": 54, "wise": 54, "made": [54, 60, 64, 67, 71, 144, 300], "smaller": [54, 225, 300, 302, 303, 304, 305, 306, 307], "jam": 54, "s1": [54, 239], "s2": [54, 239], "s3": 54, "s4": 54, "contamin": 54, "input_po": [54, 74, 208, 210, 214, 215, 224], "matrix": [54, 213, 214, 224], "increment": 54, "move": [54, 71, 214, 285, 305], "entir": [54, 71, 221, 228, 272, 299, 307], "avoid": [54, 71, 209, 216, 220, 281, 298, 306, 307], "truncat": [54, 61, 71, 72, 94, 106, 119, 144, 148, 162, 169, 179, 204, 240, 251], "sentenc": [54, 71], "techniqu": [55, 293, 294, 295, 300, 301, 302, 303, 304, 305, 306], "repons": 55, "At": [55, 56, 214, 224], "extract": [55, 56, 61, 241], "against": [55, 56, 250, 288, 306, 307], "unit": [55, 56, 272, 293], "filepath": [55, 56, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72], "filter": [55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 306], "prior": [55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 283], "doc": [55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 252, 270, 273, 276, 277, 281, 287, 298, 300], "round": [56, 306], "incorpor": [56, 247], "happen": [56, 221, 223], "ti": [56, 95, 171, 175, 212, 305], "agnost": 56, "treat": [56, 216, 232, 299], "minimum": [56, 65, 66], "corpu": [57, 61, 71, 72], "package_refer": [57, 61, 63, 71, 72], "loading_method": [57, 61, 63, 71, 72], "tabular": [57, 71], "txt": [57, 71, 179, 204, 274, 301], "eo": [57, 71, 169, 239, 242, 299], "yahma": 58, "variant": [58, 62, 68], "page": [58, 72, 292, 293, 298, 301, 302, 305], "tatsu": 59, "lab": [59, 74], "codebas": [59, 300], "independ": 59, "contribut": [59, 60, 62, 64, 67, 68, 69, 222, 223, 246, 248], "alpacatomessag": 59, "alpaca_d": 59, "altern": [60, 64, 67, 218, 301, 305], "toward": [60, 250], "my_dataset": [60, 64], "london": [60, 64], "ccdv": 61, "cnn_dailymail": 61, "textcompletiondataset": [61, 71, 72], "cnn": 61, "dailymail": 61, "articl": [61, 72], "highlight": [61, 307], "disabl": [61, 72, 214, 218, 224, 232, 281, 306], "highest": [61, 72], "conjunct": [62, 68, 70, 214, 305], "grammar_d": 62, "rlhflow": 63, "hh": 63, "preferencedataset": [63, 67, 70], "liuhaotian": 65, "llava": 65, "150k": 65, "coco": 65, "train2017": 65, "llava_instruct_150k": 65, "2017": 65, "visit": [65, 300], "cocodataset": 65, "wget": 65, "zip": [65, 289], "unzip": 65, "minim": [65, 66, 301, 303, 304, 305, 306, 307], "clip": [65, 66, 78, 79, 80, 81, 142, 143, 144, 147, 151, 216, 248], "mymodeltransform": [65, 66], "tokenizer_path": [65, 66], "image_transform": [65, 66], "yet": [65, 66, 153, 299, 300], "llava_instruct_d": 65, "huggingfacem4": 66, "the_cauldron": 66, "cauldron": 66, "card": 66, "cauldron_d": 66, "compris": 67, "share": [67, 208, 212, 300], "c1": 67, "r1": 67, "chosen_messag": 67, "rejected_messag": 67, "samsung": 68, "samsum_d": 68, "351": 69, "82": 69, "391": 69, "221": 69, "220": 69, "193": 69, "471": 69, "lvwerra": 70, "stack": [70, 216, 282], "exchang": 70, "allenai": [71, 306], "data_dir": 71, "realnewslik": 71, "wikitext_document_level": 72, "wikitext": [72, 306], "103": [72, 300], "transformerdecod": [73, 74, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 95, 96, 97, 98, 99, 101, 102, 103, 104, 105, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 146, 150, 154, 155, 156, 157, 158, 159, 160, 161, 163, 164, 165, 166, 167, 168, 170, 171, 172, 173, 174, 175, 176, 177, 178, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 213, 215, 224, 225, 226, 304], "max_generated_token": 73, "pad_id": [73, 251], "temperatur": [73, 74, 77, 247, 249, 250, 300], "top_k": [73, 74, 77, 300], "stop_token": [73, 251], "rng": 73, "custom_generate_next_token": 73, "seq_length": [73, 74, 75, 213, 215, 225, 226], "prune": [73, 77, 307], "probabl": [73, 77, 85, 86, 87, 95, 96, 97, 107, 109, 110, 111, 120, 121, 122, 129, 130, 131, 132, 138, 139, 149, 150, 151, 154, 155, 156, 157, 165, 166, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 230, 231, 247, 248, 249, 250, 300, 303], "stop": [73, 251], "random": [73, 216, 281, 301], "compil": [73, 221, 300, 302, 305, 307], "generate_next_token": 73, "llama3_8b": [73, 122, 130, 224, 302, 305, 306], "manual_se": 73, "tolist": 73, "jeremi": 73, "m": [73, 220, 299, 306], "seq_len": [73, 75, 76, 214], "num_generated_token": 73, "q": [74, 77, 208, 304], "randomli": [74, 77, 283], "softmax": [74, 77, 208, 214, 215, 224, 303], "trick": [74, 77], "fast": [74, 300], "32971d3129541c5bfb4f715abc33d1c5f408d204": 74, "l40": 74, "k": [74, 77, 79, 208, 304], "padding_mask": [75, 76, 248, 251], "target_seq_len": 75, "suitabl": 75, "scaled_dot_product_attent": [75, 91, 95, 101, 107, 116, 120, 125, 129, 154, 156, 158, 160, 165, 167, 171, 175, 208], "static": 75, "kv": [75, 207, 208, 214, 215, 217, 218, 219, 224, 306], "cach": [75, 207, 208, 210, 213, 214, 215, 217, 218, 219, 224, 226, 292, 298], "longer": [75, 207, 305], "boolean": [75, 76, 81, 208, 213, 214, 215, 224, 226, 252, 267], "assertionerror": [75, 81, 207, 213, 214, 235, 236, 283], "shift": [76, 214], "uniform_": 77, "int32": 77, "patch": [78, 79, 80, 81, 143, 144, 147, 151, 216, 244], "check": [78, 79, 80, 81, 213, 214, 215, 216, 224, 226, 235, 262, 270, 288, 291, 293, 294, 295, 296, 299, 300, 301, 304, 305], "vision_transform": [78, 79, 80, 81], "visiontransform": [78, 79, 80, 81], "divid": [78, 79, 80, 81, 144, 147, 151, 216, 222, 223, 244], "dimension": [78, 79, 80, 81, 147, 151, 216], "n_img": [78, 79, 216], "n_tokens_per_til": [78, 79, 80], "crop": [78, 79, 80, 81, 142, 147, 151, 216], "local_token_positional_embed": 79, "_position_embed": [79, 216], "tokenpositionalembed": [79, 216], "gate": [79, 211, 257, 294, 295, 298, 301], "global_token_positional_embed": 79, "400": [79, 80, 81, 147, 151, 216, 244], "10x10": [79, 80, 81, 147, 151, 216, 244], "grid": [79, 80, 81, 147, 151, 216, 244], "th": [79, 207], "silu": [81, 205], "cls_output_dim": [81, 216], "attn_bia": 81, "out_indic": [81, 216], "output_cls_project": 81, "in_channel": [81, 147, 151, 216], "intermediate_act": 81, "transformerencoderlay": 81, "cl": [81, 143, 216], "mlp": [81, 85, 86, 87, 91, 95, 96, 97, 101, 107, 108, 109, 110, 111, 116, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 146, 149, 150, 151, 154, 155, 156, 157, 158, 160, 165, 166, 167, 171, 172, 173, 174, 175, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 213, 214, 215, 235, 236, 302, 304, 305], "bia": [81, 212, 229, 230, 231, 283, 304, 306, 307], "intermedi": [81, 91, 95, 101, 107, 116, 120, 125, 129, 146, 147, 150, 151, 154, 156, 158, 160, 165, 167, 171, 175, 216, 256, 279, 302, 307], "fourth": [81, 147, 151, 216], "determin": [81, 147, 151, 236], "divis": [81, 209], "code_llama2": [82, 83, 84, 85, 86, 87, 88, 89, 90, 298], "arxiv": [82, 83, 84, 85, 86, 88, 89, 90, 95, 96, 97, 98, 99, 102, 103, 104, 105, 107, 108, 109, 110, 112, 113, 114, 115, 120, 121, 122, 123, 124, 131, 132, 133, 134, 135, 138, 139, 140, 141, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 208, 209, 210, 216, 244, 245, 247, 248, 249, 250], "pdf": [82, 83, 84, 244, 245], "2308": [82, 83, 84], "12950": [82, 83, 84], "lora_attn_modul": [85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 235, 236, 294, 304, 305, 307], "q_proj": [85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 208, 235, 236, 294, 304, 305, 306, 307], "k_proj": [85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 208, 235, 236, 294, 304, 305, 306, 307], "v_proj": [85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 208, 235, 236, 294, 304, 305, 306, 307], "output_proj": [85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 208, 235, 236, 304, 305, 306, 307], "apply_lora_to_mlp": [85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 235, 236, 294, 304, 305], "apply_lora_to_output": [85, 86, 87, 88, 89, 90, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 235, 236, 304, 305], "lora_rank": [85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 294, 304, 305], "lora_alpha": [85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 294, 304, 305], "lora_dropout": [85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 305], "use_dora": [85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 131, 132, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 305], "quantize_bas": [85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 230, 231, 305, 307], "code_llama2_13b": 85, "tloen": [85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 166, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191], "8bb8579e403dc78e37fe81ffbb253c413007323f": [85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 166, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191], "l41": [85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 166, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191], "l43": [85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 166, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191], "linear": [85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 143, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 212, 214, 229, 230, 231, 235, 236, 304, 305, 306, 307], "low": [85, 86, 87, 95, 96, 97, 107, 108, 109, 110, 111, 120, 121, 122, 129, 130, 131, 132, 138, 139, 149, 150, 151, 154, 155, 156, 157, 165, 166, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 230, 231, 294, 300, 303, 304, 307], "approxim": [85, 86, 87, 95, 96, 97, 107, 108, 109, 110, 111, 120, 121, 122, 129, 130, 131, 132, 138, 139, 149, 150, 151, 154, 155, 156, 157, 165, 166, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 230, 231, 304], "factor": [85, 86, 87, 95, 96, 97, 107, 108, 109, 110, 111, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 149, 150, 151, 154, 155, 156, 157, 165, 166, 171, 172, 173, 174, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 230, 231, 245, 300], "dropout": [85, 86, 87, 91, 95, 96, 97, 101, 107, 109, 110, 111, 116, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 149, 150, 151, 154, 155, 156, 157, 158, 160, 165, 166, 167, 171, 172, 173, 174, 175, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 208, 230, 231, 304, 305, 307], "decompos": [85, 86, 95, 96, 97, 107, 108, 109, 110, 120, 121, 122, 131, 132, 138, 139, 154, 155, 156, 157, 165, 166, 230, 294], "magnitud": [85, 86, 95, 96, 97, 107, 108, 109, 110, 120, 121, 122, 131, 132, 138, 139, 154, 155, 156, 157, 165, 166, 230, 305], "dora": [85, 86, 95, 96, 97, 107, 108, 109, 110, 120, 121, 122, 129, 131, 132, 138, 139, 150, 151, 154, 155, 156, 157, 165, 166, 230, 294], "ab": [85, 86, 88, 89, 90, 95, 96, 97, 98, 99, 102, 103, 104, 105, 107, 108, 109, 110, 112, 113, 114, 115, 120, 121, 122, 123, 124, 131, 132, 133, 134, 135, 138, 139, 140, 141, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 208, 209, 210, 216, 247, 248, 249, 250], "2402": [85, 86, 95, 96, 97, 107, 108, 109, 110, 120, 121, 122, 131, 132, 138, 139, 154, 155, 156, 157, 165, 166], "09353": [85, 86, 95, 96, 97, 107, 108, 109, 110, 120, 121, 122, 131, 132, 138, 139, 154, 155, 156, 157, 165, 166], "code_llama2_70b": 86, "code_llama2_7b": 87, "qlora": [88, 89, 90, 98, 99, 112, 113, 114, 115, 123, 124, 133, 134, 135, 140, 141, 152, 163, 164, 170, 220, 291, 293, 294, 302, 304], "paper": [88, 89, 90, 98, 99, 112, 113, 114, 115, 123, 124, 133, 134, 135, 140, 141, 152, 163, 164, 170, 244, 247, 249, 250, 303, 304, 307], "2305": [88, 89, 90, 98, 99, 112, 113, 114, 115, 123, 124, 133, 134, 135, 140, 141, 152, 163, 164, 170, 208, 247, 249], "14314": [88, 89, 90, 98, 99, 112, 113, 114, 115, 123, 124, 133, 134, 135, 140, 141, 152, 163, 164, 170], "lora_code_llama2_13b": 88, "lora_code_llama2_70b": 89, "lora_code_llama2_7b": 90, "head_dim": [91, 95, 207, 208, 214], "intermediate_dim": [91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175], "attn_dropout": [91, 95, 101, 107, 116, 120, 125, 129, 154, 156, 158, 160, 165, 167, 171, 175, 208, 214], "norm_ep": [91, 95, 101, 107, 116, 120, 125, 129, 154, 156, 158, 160, 165, 167, 171, 175], "1e": [91, 95, 101, 107, 116, 120, 125, 129, 154, 156, 158, 160, 165, 167, 171, 175, 209, 303, 305], "06": [91, 95, 209, 304], "rope_bas": [91, 95, 101, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175], "10000": [91, 95, 101, 154, 156, 158, 160, 165, 167, 210], "norm_embed": [91, 95], "transformerselfattentionlay": [91, 101, 116, 125, 158, 175, 213, 214, 224, 226], "rm": [91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175], "norm": [91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 214], "space": [91, 101, 116, 125, 146, 150, 158, 175, 214, 228, 305], "slide": [91, 158, 168], "window": [91, 158, 168], "vocabulari": [91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 221, 223, 304, 305], "mha": [91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 208, 214], "onto": [91, 95, 101, 107, 116, 120, 125, 129, 154, 156, 158, 160, 165, 167, 171, 175, 208, 228], "epsilon": [91, 95, 101, 107, 116, 120, 125, 129, 154, 156, 158, 160, 165, 167, 171, 175, 248], "rotari": [91, 95, 101, 125, 129, 154, 156, 158, 160, 165, 167, 210, 302], "10_000": [91, 95, 154, 156, 158, 160, 167], "blog": [92, 93], "technolog": [92, 93], "develop": [92, 93, 292, 307], "gemmatoken": 94, "_templatetyp": [94, 106, 119, 148, 162, 169, 179, 204], "gemma_2b": 96, "gemma_7b": 97, "lora_gemma_2b": 98, "lora_gemma_7b": 99, "taken": [100, 304, 307], "sy": [100, 299], "honest": [100, 299], "pari": [100, 153], "capit": [100, 153], "franc": [100, 153], "known": [100, 153, 266, 306], "stun": [100, 153], "05": [101, 107, 116, 120, 125, 129, 154, 156, 158, 160, 165, 167, 171, 175], "gqa": [101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 208], "mqa": [101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 208], "kvcach": [101, 107, 116, 120, 125, 129, 146, 150, 165, 171, 175, 208, 214, 217, 218, 219, 224], "scale_hidden_dim_for_mlp": [101, 107, 116, 120, 125, 129, 146, 150, 171, 175], "2307": [102, 103, 104, 105], "09288": [102, 103, 104, 105], "classif": [105, 156, 160, 161, 257], "llama2_70b": 109, "llama2_7b": [110, 304], "classifi": [111, 156, 160, 161, 283, 305], "llama2_reward_7b": [111, 257], "lora_llama2_13b": 112, "lora_llama2_70b": 113, "lora_llama2_7b": [114, 304], "lora_llama2_reward_7b": 115, "500000": [116, 120, 125, 129, 146, 150], "llama3token": [119, 144, 238], "regist": [119, 144, 148, 169, 179, 204, 220, 278, 307], "canon": [119, 144, 148, 169, 179, 204], "llama3_70b": 121, "lora_llama3_70b": 123, "lora_llama3_8b": [124, 305], "scale_factor": [125, 129], "500_000": [125, 129], "rope": [125, 129, 171, 175, 208, 210], "llama3_1": [126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 294, 303], "llama3_1_70b": 131, "llama3_1_8b": 132, "lora_llama3_1_405b": 133, "lora_llama3_1_70b": 134, "lora_llama3_1_8b": 135, "llama3_2_1b": [138, 217, 218, 219], "llama3_2_3b": 139, "lora_llama3_2_1b": 140, "lora_llama3_2_3b": 141, "projection_head": [142, 224, 228], "combin": [142, 144, 147, 151, 214, 224, 226, 228, 246, 303], "learnabl": [142, 211, 224, 226, 230, 300], "fusion": [142, 145, 146, 147, 149, 150, 151, 224, 225, 226, 227, 228], "encoder_dim": [142, 143], "decoder_dim": [142, 143], "num_img": [142, 143], "num_emb": [142, 143], "broken": [142, 143, 216, 226], "width": [142, 306], "clip_embeds_per_til": 142, "emb": [142, 143, 208, 213, 214, 224], "num_hidden_input": 143, "sequenti": [143, 224, 228], "num_hidden": 143, "hidden_st": [143, 216], "image_mean": 144, "image_std": 144, "tranform": 144, "possible_resolut": 144, "448": [144, 145, 148, 149], "deviat": 144, "still": [144, 221, 223, 225, 226, 294, 304, 306, 307], "transformed_data": 144, "img1": [144, 244], "img2": [144, 244], "31587": [144, 239, 240], "29644": [144, 239, 240], "102": [144, 239, 240], "truncate_at_eo": [144, 240], "skip": [144, 208], "tokenize_head": 144, "tokenize_end": 144, "header": 144, "eom": 144, "wether": 144, "decoder_train": [145, 149, 152, 224], "encoder_train": [145, 149, 152, 224], "fusion_train": [145, 149, 152, 224], "deepfusionmodel": [145, 149, 152], "trainabl": [145, 149, 226, 231, 234, 272, 304, 305, 307], "resiz": [145, 148, 149], "fusion_interv": [146, 150], "num_special_token": [146, 150], "encoder_max_seq_len": [146, 150, 213, 214, 215, 219, 224, 226], "causalselfattent": [146, 150], "interv": [146, 150, 301], "clip_embed_dim": [147, 151], "clip_num_lay": [147, 151], "clip_hidden_st": [147, 151], "num_layers_project": [147, 151], "decoder_embed_dim": [147, 151], "llama3visionencod": [147, 151], "spatial": [147, 151], "backbon": [147, 151], "trainbl": 149, "decoder_lora": 150, "fusion_lora": [150, 151], "encoder_lora": 151, "lora_llama3_2_vision_11b": 152, "num_class": [156, 160, 283], "announc": 159, "ray2333": 161, "feedback": [161, 247], "lora_mistral_7b": 163, "lora_mistral_reward_7b": 164, "phi3_mini": [166, 257], "128k": 168, "nor": 168, "phi3minitoken": 169, "tokenizer_config": 169, "spm": 169, "lm": [169, 248, 303], "bo": [169, 239, 242, 299], "unk": 169, "augment": [169, 307], "endoftext": 169, "phi3minisentencepiecebasetoken": 169, "lora_phi3_mini": 170, "1000000": [171, 175], "tie_word_embed": [171, 172, 173, 175, 176, 177, 180, 183, 184, 187, 192, 195, 196, 199], "qwen2transformerdecod": 171, "period": [171, 175], "word": [171, 175, 305, 306], "qwen2_0_5b": [172, 212], "qwen2_1_5b": [173, 212], "qwen2_7b": 174, "qwen": [176, 177, 178, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203], "merges_fil": [179, 204], "qwen2token": 179, "qwen2_5_0_5b": 180, "qwen2_5_14b_bas": 181, "slightli": [181, 182, 183, 184, 185, 186, 188, 189, 190, 191, 193, 194, 195, 196, 197, 198, 200, 201, 202, 203, 303], "qwen2_5_14b_instruct": 182, "qwen2_5_1_5b_bas": 183, "qwen2_5_1_5b_instruct": 184, "qwen2_5_32b_bas": 185, "qwen2_5_32b_instruct": 186, "qwen2_5_3b": 187, "qwen2_5_72b_bas": 188, "qwen2_5_72b_instruct": 189, "qwen2_5_7b_bas": 190, "qwen2_5_7b_instruct": 191, "qwen2_5token": 204, "gate_proj": 205, "down_proj": 205, "up_proj": 205, "feed": [205, 213, 215], "network": [205, 232, 304, 307], "fed": [205, 299], "multipli": [205, 305], "in_dim": [205, 229, 230, 231, 304, 305, 307], "out_dim": [205, 214, 229, 230, 231, 304, 305, 307], "layernorm": 206, "past": 207, "expand": 207, "dpython": [207, 208, 213, 214, 215, 219, 220, 224, 226, 280, 284], "reset": [207, 208, 213, 214, 215, 224, 226, 265], "k_val": 207, "v_val": 207, "fill": 207, "bfloat16": [207, 220, 280, 300, 301, 302, 304, 305, 306], "greater": [207, 216, 288], "pos_embed": [208, 213, 304, 306], "q_norm": 208, "k_norm": 208, "kv_cach": [208, 217, 218, 219], "is_caus": 208, "13245v1": 208, "multihead": 208, "extrem": 208, "credit": 208, "litgpt": 208, "v": [208, 214, 224, 304], "n_kv_head": 208, "rotarypositionalembed": [208, 304, 306], "rmsnorm": 208, "vice": [208, 298], "versa": [208, 298], "y": 208, "s_x": 208, "s_y": 208, "_masktyp": [208, 214, 215], "score": [208, 214, 215, 246], "encoder_max_cache_seq_len": [208, 214, 215], "j": [208, 213, 214, 215, 224], "blockmask": [208, 214, 215], "create_block_mask": [208, 214, 215], "flex_attent": [208, 214, 215], "n_h": [208, 210], "num": [208, 210], "n_kv": 208, "h_d": [208, 210], "reset_cach": [208, 213, 214, 215, 224, 226], "setup_cach": [208, 213, 214, 215, 217, 218, 224, 226], "ep": 209, "squar": 209, "1910": 209, "07467": 209, "propos": [210, 305], "2104": 210, "09864": 210, "verfic": 210, "l80": 210, "init": [210, 265, 277, 307], "exceed": 210, "freq": 210, "recomput": [210, 305], "geometr": 210, "progress": [210, 296, 301, 305], "rotat": 210, "angl": 210, "basic": [211, 302], "tied_modul": 212, "pointer": [212, 293], "why": [212, 299, 301, 304], "whose": [212, 232, 273, 278], "attributeerror": [212, 285], "attn": [213, 215, 217, 218, 219, 304, 306, 307], "multiheadattent": [213, 215, 304, 306], "ca_norm": 213, "mlp_norm": [213, 215], "ca_scal": 213, "mlp_scale": [213, 215], "ff": [213, 215], "caches_are_en": [213, 214, 215, 217, 218, 219, 224, 226], "func": [213, 215, 226], "caches_are_setup": [213, 214, 215, 217, 218, 219, 224, 226], "token_sequ": 213, "embed_sequ": 213, "decoder_max_seq_len": [213, 214, 215, 217, 218, 219, 224, 226], "modulelist": 214, "output_hidden_st": [214, 224], "belong": [214, 260], "reduc": [214, 247, 293, 294, 295, 303, 304, 305, 306, 307], "statement": 214, "improv": [214, 240, 249, 263, 295, 302, 303, 304, 305], "readabl": [214, 300], "behaviour": [214, 224, 283], "alter": [214, 224], "common_util": [214, 217, 218, 219, 220], "disable_kv_cach": [214, 224], "chunked_output": 214, "last_hidden_st": 214, "chunk": [214, 221, 223, 240], "cewithchunkedoutputloss": [214, 224], "upcast": [214, 221, 223], "set_num_output_chunk": [214, 224], "num_chunk": [214, 221, 223], "s_e": [214, 224], "d_e": [214, 224], "arang": [214, 224], "prompt_length": [214, 224], "correspondingli": 214, "padded_prompt_length": 214, "m_": [214, 224], "realloc": [214, 224], "runtimeerror": [214, 242, 258, 262, 264, 269], "num_output_chunk": [214, 221, 223, 224], "transformercrossattentionlay": [214, 224, 226], "fusionlay": [214, 224], "sa_norm": 215, "sa_scal": 215, "token_pos_embed": 216, "pre_tile_pos_emb": 216, "post_tile_pos_emb": 216, "cls_project": 216, "vit": 216, "11929": 216, "convolut": 216, "flatten": 216, "downscal": 216, "800x400": 216, "400x400": 216, "_transform": 216, "whole": [216, 303], "n_token": 216, "101": 216, "pool": 216, "tiledtokenpositionalembed": 216, "tilepositionalembed": 216, "tile_pos_emb": 216, "8x8": 216, "21": 216, "22": 216, "23": [216, 261], "24": [216, 301, 302], "25": [216, 300, 303], "26": 216, "27": [216, 300], "28": [216, 300], "29": [216, 307], "30": [216, 251, 306], "31": [216, 302], "33": 216, "34": 216, "35": [216, 307], "36": 216, "37": 216, "38": [216, 300], "39": 216, "41": 216, "43": 216, "44": 216, "45": 216, "46": 216, "47": 216, "48": [216, 300, 307], "49": 216, "50": [216, 251, 273, 300], "51": 216, "52": [216, 301], "53": 216, "54": 216, "55": [216, 301], "56": 216, "57": [216, 304, 307], "58": 216, "59": [216, 307], "60": 216, "61": [216, 300], "62": 216, "63": 216, "64": [216, 294, 304, 305], "num_patches_per_til": 216, "emb_dim": 216, "constain": 216, "anim": 216, "max_n_img": 216, "n_channel": 216, "vision_util": 216, "tile_crop": 216, "800": 216, "patch_grid_s": 216, "rand": 216, "nch": 216, "tile_cropped_imag": 216, "batch_imag": 216, "unsqueez": 216, "batch_aspect_ratio": 216, "clip_vision_encod": 216, "cache_en": 217, "float32": [217, 218, 219, 262, 303], "1024": [217, 218, 219, 306], "temporarili": [218, 219, 232, 305], "enter": [218, 219], "overhead": [218, 247, 295, 305, 306], "untouch": [218, 299], "yield": [218, 219, 232], "caller": [218, 219, 232], "delete_kv_cach": 219, "offload_to_cpu": 220, "hook": [220, 278, 305, 307], "nf4": [220, 305, 307], "restor": 220, "higher": [220, 302, 303, 305, 306, 307], "offload": [220, 307], "increas": [220, 247, 261, 302, 303, 304, 305, 306], "peak": [220, 265, 271, 300, 302, 304, 307], "gpu": [220, 295, 298, 300, 301, 302, 303, 304, 305, 306, 307], "_register_state_dict_hook": 220, "mymodul": 220, "_after_": 220, "nf4tensor": [220, 307], "unquant": [220, 306, 307], "unus": 220, "ignore_index": [221, 222, 223, 303], "entropi": [221, 223, 303], "bf16": [221, 223, 262, 305, 307], "ce": [221, 303], "better": [221, 223, 250, 293, 299, 300, 303, 305, 306], "accuraci": [221, 223, 295, 300, 302, 303, 304, 305, 306, 307], "doubl": [221, 223, 307], "therefor": [221, 223, 305, 307], "num_token": [221, 222, 223], "consider": [221, 223], "compute_cross_entropi": 221, "gain": [221, 295, 302], "won": [221, 299], "realiz": 221, "pull": [221, 294, 295, 298], "1390": 221, "loss_fn": [221, 223], "chunkedcrossentropyloss": 221, "output_chunk": [221, 223], "kullback": [222, 303], "leibler": [222, 303], "diverg": [222, 223, 246, 303], "jongwooko": [222, 303], "distillm": [222, 303], "17c0f98bc263b1861a02d5df578c84aea652ee65": 222, "student_logit": [222, 223, 303], "teacher_logit": [222, 223, 303], "student": [222, 223], "teacher": [222, 223, 300], "kl": [222, 223, 246, 303], "teacher_chunk": 223, "teacher_model": 223, "model_fus": [224, 225, 226, 227, 228], "deepfus": 224, "evolut": 224, "signatur": 224, "interchang": 224, "fusion_param": [224, 225, 226, 227, 228], "fusionembed": 224, "fusion_lay": [224, 226], "clip_vit_224": [224, 228], "feedforward": [224, 228], "register_fusion_modul": 224, "flamingo": [224, 226, 244], "strict": [224, 225, 226, 235, 304], "freez": [224, 300, 304], "fusion_vocab_s": 225, "necessit": 225, "rout": 225, "128": [225, 294, 302, 304, 305], "fusion_first": 226, "shot": [226, 300, 302, 306], "infus": 226, "interpret": 226, "enocd": 226, "isn": [226, 262, 298], "fused_lay": 226, "mark": [228, 299], "earli": 228, "peft": [229, 230, 231, 232, 233, 234, 235, 236, 254, 294, 304, 307], "adapter_param": [229, 230, 231, 232, 233, 234], "proj": 229, "loralinear": [229, 304, 305, 307], "alpha": [230, 231, 304, 305, 307], "use_bia": [230, 231], "scalar": [230, 273, 274, 275, 276, 277, 305], "orient": [230, 305], "bax": [230, 231], "distinct": [230, 307], "lora_a": [230, 231, 304, 307], "lora_b": [230, 231, 304, 307], "initialize_dora_magnitud": 230, "perturb": 231, "decomposit": [231, 304, 305], "matric": [231, 304, 307], "mapsto": 231, "w_0x": 231, "r": [231, 304], "polici": [232, 246, 247, 248, 249, 250, 252, 263, 272, 279, 296], "neural": [232, 304, 307], "get_adapter_param": [234, 304], "base_miss": 235, "base_unexpect": 235, "lora_miss": 235, "lora_unexpect": 235, "validate_state_dict_for_lora": [235, 304], "unlik": 235, "reli": [235, 242, 300, 302], "unexpect": 235, "nonempti": 235, "full_model_state_dict_kei": 236, "lora_state_dict_kei": 236, "base_model_state_dict_kei": 236, "confirm": [236, 292], "lora_modul": 236, "complement": 236, "disjoint": 236, "overlap": [236, 305], "tiktokenbasetoken": 237, "light": 239, "sentencepieceprocessor": 239, "trim": 239, "whitespac": 239, "spm_model": [239, 299], "tokenized_text": [239, 240], "add_bo": [239, 240, 299], "trim_leading_whitespac": 239, "prefix": [239, 305], "unbatch": 239, "bos_id": [240, 242], "lightweight": [240, 299], "substr": 240, "repetit": 240, "speed": [240, 282, 302, 305, 306, 307], "identif": 240, "regex": 240, "absent": 240, "tt_model": 240, "tokenizer_json_path": 241, "heavili": 242, "concat": 242, "1788": 242, "2643": 242, "465": 242, "22137": 242, "join": 242, "satisfi": [242, 300], "loos": 243, "image_token_id": 244, "particip": [244, 245], "laid": 244, "fig": 244, "2204": 244, "14198": 244, "immedi": [244, 305], "until": [244, 305], "img3": 244, "equal": [244, 288], "gamma": [245, 249, 250], "lmbda": 245, "estim": [245, 246], "1506": 245, "02438": 245, "response_len": [245, 246], "receiv": 245, "discount": 245, "gae": 245, "logprob": [246, 250], "ref_logprob": 246, "kl_coeff": 246, "valid_score_idx": 246, "coeffici": [246, 248], "total_reward": 246, "kl_reward": 246, "beta": [247, 250], "label_smooth": [247, 250], "18290": 247, "intuit": [247, 249, 250], "dispref": 247, "dynam": [247, 306], "degener": 247, "occur": [247, 295], "naiv": 247, "trl": [247, 249, 250], "5d1deb1445828cfd0e947cb3a7925b1c03a283fc": 247, "dpo_train": [247, 249], "l844": 247, "2009": 247, "01325": 247, "regular": [247, 250, 305, 306, 307], "baselin": [247, 248, 300, 303, 304], "uncertainti": [247, 250], "policy_chosen_logp": [247, 249, 250], "policy_rejected_logp": [247, 249, 250], "reference_chosen_logp": [247, 249], "reference_rejected_logp": [247, 249], "chosen_reward": [247, 249, 250], "rejected_reward": [247, 249, 250], "value_clip_rang": 248, "value_coeff": 248, "proxim": [248, 296], "1707": 248, "06347": 248, "eqn": 248, "vwxyzjn": 248, "ccc19538e817e98a60d3253242ac15e2a562cb49": 248, "lm_human_preference_detail": 248, "train_policy_acceler": 248, "l719": 248, "ea25b9e8b234e6ee1bca43083f8f3cf974143998": 248, "ppo2": 248, "l68": 248, "l75": 248, "pi_old_logprob": 248, "pi_logprob": 248, "phi_old_valu": 248, "phi_valu": 248, "value_padding_mask": 248, "old": 248, "participag": 248, "five": 248, "policy_loss": 248, "value_loss": 248, "clipfrac": 248, "fraction": 248, "statist": [249, 305], "rso": 249, "hing": 249, "2309": 249, "06657": 249, "logist": 249, "regress": 249, "slic": 249, "10425": 249, "almost": [249, 304], "svm": 249, "counter": 249, "4dce042a3863db1d375358e8c8092b874b02934b": 249, "l1141": 249, "simpo": 250, "2405": 250, "14734": 250, "averag": [250, 303], "implicit": 250, "margin": 250, "bradlei": 250, "terri": 250, "larger": [250, 256, 300, 302, 303, 305], "win": 250, "lose": 250, "98ad01ddfd1e1b67ec018014b83cba40e0caea66": 250, "cpo_train": 250, "l603": 250, "pretti": [250, 300], "identitc": 250, "elimin": 250, "kind": 250, "ipoloss": 250, "fill_valu": 251, "sequence_length": 251, "stop_token_id": 251, "869": 251, "eos_mask": 251, "truncated_sequ": 251, "datatyp": [252, 305, 307], "denot": 252, "auto_wrap_polici": [252, 263, 279], "submodul": [252, 272], "obei": 252, "contract": 252, "get_fsdp_polici": 252, "modules_to_wrap": [252, 263, 272], "min_num_param": 252, "my_fsdp_polici": 252, "recurs": [252, 272, 276], "sum": [252, 303, 304], "p": [252, 258, 304, 306, 307], "numel": [252, 304], "1000": [252, 306], "stabl": [252, 270, 276, 281, 292, 305], "html": [252, 270, 276, 279, 281, 287, 291], "filename_format": 253, "max_filenam": 253, "concis": 253, "filenam": [253, 274], "file_": 253, "_of_": 253, "n_file": 253, "build_checkpoint_filenam": 253, "file_00001_of_00003": 253, "file_00002_of_00003": 253, "file_00003_of_00003": 253, "safe_seri": 254, "from_pretrain": 254, "0001_of_0003": 254, "0002_of_0003": 254, "todo": 254, "preserv": [254, 307], "weight_map": [254, 300], "convert_weight": 254, "_model_typ": [254, 257], "intermediate_checkpoint": [254, 255, 256], "adapter_onli": [254, 255, 256], "_weight_map": 254, "shard": [255, 302], "wip": 255, "qualnam": 257, "boundari": 257, "distinguish": 257, "llama3_vis": 257, "llama3_2_vision_decod": 257, "mistral_reward_7b": 257, "my_new_model": 257, "my_custom_state_dict_map": 257, "optim_map": 258, "bare": 258, "bone": 258, "optim_dict": [258, 260, 278], "cfg_optim": 258, "ckpt": 258, "optim_ckpt": 258, "placeholder_optim_dict": 258, "optiminbackwardwrapp": 258, "get_last_lr": 258, "rate": [258, 261, 264, 293, 301, 305], "schedul": [258, 261, 282, 301, 305], "get_optim_kei": 258, "arbitrari": [258, 304, 305], "optim_ckpt_map": 258, "set_lr_schedul": 258, "lr_schedul": [258, 261], "lrschedul": 258, "loadabl": 258, "step_lr_schedul": 258, "ac_mod": 259, "ac_opt": 259, "op": [259, 306], "ac": [259, 263], "optimizerinbackwardwrapp": [260, 264], "named_paramet": [260, 283], "num_warmup_step": 261, "num_training_step": 261, "num_cycl": [261, 282], "last_epoch": 261, "lambdalr": 261, "linearli": 261, "decreas": [261, 304, 305, 306, 307], "cosin": 261, "v4": 261, "src": 261, "l104": 261, "warmup": [261, 282], "phase": 261, "wave": 261, "half": [261, 305], "kernel": 262, "memory_efficient_fsdp_wrap": [263, 306], "maxim": [263, 272, 291, 293], "workload": [263, 295, 305, 306], "fullyshardeddataparallel": [263, 272, 305], "fsdppolicytyp": [263, 272], "warpper": 264, "optimizer_in_backward": 264, "reset_stat": 265, "track": [265, 273], "alloc": [265, 271, 272, 302, 305, 307], "reserv": [265, 271, 299, 307], "stat": [265, 271, 307], "int4": [266, 306], "4w": 266, "recogn": 266, "int8dynactint4weightquant": [266, 295, 306], "8da4w": [266, 306], "int4weightonlyquant": [266, 306], "int8dynactint4weightqatquant": [266, 295, 306], "qat": [266, 291, 296], "int4weightonlyqatquant": 266, "exclud": 267, "aka": 268, "master": 270, "port": [270, 298], "address": [270, 303, 305], "hold": [270, 301], "peak_memory_act": 271, "peak_memory_alloc": 271, "peak_memory_reserv": 271, "get_memory_stat": 271, "hierarch": 272, "api_kei": 273, "experiment_kei": 273, "onlin": 273, "log_cod": 273, "comet": 273, "site": [273, 300], "ml": 273, "team": 273, "compar": [273, 276, 288, 300, 302, 303, 304, 306, 307], "sdk": 273, "uncategor": 273, "alphanumer": 273, "charact": 273, "get_or_cr": 273, "fresh": 273, "persist": 273, "hpo": 273, "sweep": 273, "server": 273, "offlin": 273, "auto": [273, 298], "creation": 273, "experimentconfig": 273, "project_nam": 273, "my_workspac": 273, "my_metr": [273, 276, 277], "importerror": [273, 277], "termin": [273, 276, 277], "comet_api_kei": 273, "flush": [273, 274, 275, 276, 277], "ndarrai": [273, 274, 275, 276, 277], "record": [273, 274, 275, 276, 277, 282], "log_config": [273, 277], "payload": [273, 274, 275, 276, 277], "log_": 274, "unixtimestamp": 274, "thread": 274, "safe": 274, "organize_log": 276, "tensorboard": 276, "subdirectori": 276, "logdir": 276, "startup": 276, "tree": [276, 300, 302], "tfevent": 276, "encount": 276, "frontend": 276, "organ": [276, 298], "accordingli": [276, 306], "my_log_dir": 276, "view": [276, 303], "entiti": 277, "bias": [277, 304, 307], "usernam": 277, "my_ent": 277, "my_group": 277, "account": [277, 304, 307], "link": [277, 300, 302], "capecap": 277, "6053ofw0": 277, "torchtune_config_j67sb73v": 277, "soon": [278, 305], "readi": [278, 291, 299, 306], "grad": 278, "acwrappolicytyp": 279, "author": [279, 293, 301, 305, 307], "fsdp_adavnced_tutori": 279, "insid": 280, "contextmanag": 280, "debug_mod": 281, "pseudo": 281, "commonli": [281, 304, 305, 307], "numpi": 281, "determinist": 281, "global": [281, 305], "warn": 281, "nondeterminist": 281, "cudnn": 281, "set_deterministic_debug_mod": 281, "profile_memori": 282, "with_stack": 282, "record_shap": 282, "with_flop": 282, "wait_step": 282, "warmup_step": 282, "active_step": 282, "profil": 282, "layout": 282, "trace": 282, "profileract": 282, "gradient_accumul": 282, "sensibl": 282, "default_schedul": 282, "reduct": [282, 295, 304], "scope": 282, "flop": 282, "cycl": 282, "repeat": [282, 305], "model_named_paramet": 283, "force_overrid": 283, "concret": [283, 305], "vocab_dim": 283, "named_param": 284, "inplac": [285, 304], "too": [285, 295, 302], "handler": 287, "_log": 287, "__version__": 288, "generated_examples_python": 289, "galleri": [289, 297], "sphinx": 289, "000": [290, 297, 302], "execut": [290, 297], "generated_exampl": 290, "mem": [290, 297], "mb": [290, 297], "gentl": 291, "introduct": 291, "first_finetune_tutori": 291, "kd": 291, "torchvis": 292, "torchao": [292, 295, 300, 302, 305, 306, 307], "latest": [292, 295, 301, 305, 307], "whl": 292, "cu121": 292, "cu118": 292, "cu124": 292, "And": [292, 300], "welcom": [292, 298], "greatest": [292, 301], "contributor": 292, "dev": 292, "commit": 292, "branch": 292, "therebi": [292, 305, 306, 307], "forc": [292, 303], "reinstal": 292, "opt": [292, 301], "suffix": 292, "On": [293, 304], "emphas": 293, "simplic": 293, "component": 293, "prove": 293, "democrat": 293, "zoo": 293, "varieti": [293, 304], "integr": [293, 300, 301, 302, 304, 306, 307], "fsdp2": 293, "excit": 293, "checkout": 293, "quickstart": 293, "attain": 293, "embodi": 293, "philosophi": 293, "usabl": 293, "composit": 293, "hard": 293, "outlin": 293, "unecessari": 293, "never": 293, "thoroughli": 293, "competit": 294, "grant": [294, 295, 301], "interest": [294, 295, 300, 303], "8b_lora_single_devic": [294, 298, 299, 302, 303, 305], "lever": [294, 295], "action": [294, 295], "degrad": [295, 305, 306, 307], "simul": [295, 305, 306], "compromis": 295, "blogpost": [295, 305], "qat_distribut": [295, 306], "8b_qat_ful": [295, 306], "least": [295, 302, 304, 306], "vram": [295, 302, 304, 305, 306], "80gb": [295, 306], "a100": 295, "h100": 295, "delai": 295, "fake": [295, 306], "empir": [295, 306], "potenti": [295, 304, 305], "fake_quant_after_n_step": [295, 306], "idea": [295, 303, 307], "roughli": 295, "total_step": 295, "futur": [295, 306], "plan": [295, 300], "un": 295, "groupsiz": [295, 306], "256": [295, 302, 306], "hackabl": [296, 301], "singularli": [296, 301], "technic": [296, 301], "awar": [296, 305, 306], "tracker": 296, "short": 298, "subcommand": 298, "anytim": 298, "symlink": 298, "wrote": 298, "readm": [298, 300, 302], "md": 298, "lot": [298, 300, 305], "recent": 298, "releas": [298, 302], "agre": 298, "term": [298, 305], "perman": 298, "eat": 298, "bandwith": 298, "storag": [298, 307], "00030": 298, "ootb": 298, "7b_full_low_memori": [298, 300, 301], "8b_full_single_devic": 298, "mini_full_low_memori": 298, "7b_full": [298, 300, 301], "13b_full": [298, 300, 301], "70b_full": 298, "edit": 298, "clobber": 298, "destin": 298, "lora_finetune_distribut": [298, 302, 304], "torchrun": 298, "launch": [298, 299, 301], "nproc": 298, "node": 298, "worker": 298, "nnode": [298, 304, 306], "minimum_nod": 298, "maximum_nod": 298, "fail": 298, "rdzv": 298, "rendezv": 298, "endpoint": 298, "8b_lora": [298, 302], "bypass": 298, "fancy_lora": 298, "8b_fancy_lora": 298, "nice": 299, "meet": 299, "overhaul": 299, "multiturn": 299, "untrain": 299, "accompani": 299, "who": 299, "influenti": 299, "hip": 299, "hop": 299, "artist": 299, "2pac": 299, "rakim": 299, "flavor": 299, "formatted_messag": 299, "nyou": 299, "nwho": 299, "518": 299, "25580": 299, "29962": 299, "3532": 299, "14816": 299, "29903": 299, "6778": 299, "_spm_model": 299, "piece_to_id": 299, "manual": [299, 307], "529": 299, "29879": 299, "29958": 299, "nhere": 299, "pure": 299, "mess": 299, "prime": 299, "strictli": 299, "ask": [299, 305], "though": 299, "robust": 299, "pretend": 299, "zuckerberg": 299, "seem": [299, 300], "good": [299, 304, 305], "altogeth": 299, "honor": 299, "custom_8b_lora_single_devic": 299, "favorit": [300, 304], "seemlessli": 300, "connect": [300, 306], "amount": 300, "natur": 300, "export": 300, "leverag": [300, 302, 307], "percentag": 300, "16gb": [300, 304], "rtx": 300, "3090": 300, "4090": 300, "hour": 300, "7b_qlora_single_devic": [300, 301, 307], "473": 300, "98": [300, 307], "gb": [300, 302, 304, 306, 307], "484": 300, "01": [300, 301], "fact": [300, 302, 304, 305], "third": 300, "But": [300, 304], "realli": 300, "eleuther_ev": [300, 302, 306], "eleuther_evalu": [300, 302, 306], "lm_eval": [300, 302], "custom_eval_config": [300, 302], "truthfulqa_mc2": [300, 302, 303, 304], "measur": [300, 302], "propens": [300, 302], "324": 300, "loglikelihood": 300, "195": 300, "121": 300, "197": 300, "acc": [300, 306], "388": 300, "489": 300, "great": [300, 305], "custom_generation_config": [300, 302], "kick": 300, "300": 300, "bai": 300, "area": 300, "92": 300, "exploratorium": 300, "san": 300, "francisco": 300, "magazin": 300, "awesom": 300, "bridg": 300, "cool": 300, "96": [300, 307], "sec": [300, 302], "83": 300, "99": [300, 304], "72": 300, "littl": 300, "int8_weight_onli": [300, 302], "int8_dynamic_activation_int8_weight": [300, 302], "ao": [300, 302], "quant_api": [300, 302], "quantize_": [300, 302], "int4_weight_onli": [300, 302], "previous": [300, 302, 304], "benefit": 300, "clone": [300, 304, 306, 307], "assumpt": 300, "new_dir": 300, "output_dict": 300, "sd_1": 300, "sd_2": 300, "dump": 300, "convert_hf_checkpoint": 300, "checkpoint_path": 300, "justin": 300, "school": 300, "math": 300, "ws": 300, "94": [300, 302], "bandwidth": [300, 302], "1391": 300, "84": 300, "thats": 300, "seamlessli": 300, "authent": [300, 301], "hopefulli": 300, "gave": 300, "minut": 301, "agreement": 301, "depth": 301, "principl": 301, "boilerpl": 301, "substanti": [301, 304], "custom_config": 301, "replic": 301, "lorafinetunerecipesingledevic": 301, "lora_finetune_output": 301, "log_1713194212": 301, "3697006702423096": 301, "25880": [301, 307], "83it": 301, "monitor": 301, "tqdm": 301, "e2": 301, "focu": 302, "theta": 302, "observ": [302, 306], "consum": [302, 307], "overal": [302, 303], "8b_qlora_single_devic": [302, 305], "coupl": [302, 304, 307], "meta_model_0": [302, 306], "122": 302, "sarah": 302, "busi": 302, "mum": 302, "young": 302, "children": 302, "live": 302, "north": 302, "east": 302, "england": 302, "135": 302, "88": 302, "138": 302, "346": 302, "09": 302, "139": 302, "broader": 302, "teach": [303, 304], "straight": [303, 304], "jump": [303, 304], "compress": 303, "transfer": 303, "capac": 303, "computation": 303, "expens": 303, "deploi": 303, "imit": 303, "diagram": 303, "aim": [303, 305], "minillm": 303, "forwardklloss": 303, "super": 303, "teacher_prob": 303, "student_logprob": 303, "log_softmax": 303, "prod_prob": 303, "forwardklwithchunkedoutputloss": 303, "knowledge_distillation_single_devic": 303, "bit": [303, 304, 305, 306, 307], "alpaca_cleaned_dataset": 303, "hellaswag": [303, 306], "commonsense_qa": 303, "kd_ratio": 303, "teacher_checkpoint": 303, "00004": 303, "truthfulqa": [303, 304], "commonsens": 303, "constant": 303, "boost": 303, "graph": [303, 305], "irrespect": 303, "3e": 303, "truthful_qa": 303, "wherea": 303, "unfamiliar": 304, "oppos": [304, 307], "momentum": [304, 305], "aghajanyan": 304, "et": 304, "al": 304, "hypothes": 304, "intrins": 304, "eight": 304, "practic": 304, "blue": 304, "although": [304, 306], "rememb": 304, "approx": 304, "15m": 304, "65k": 304, "requires_grad": [304, 307], "frozen_out": [304, 307], "lora_out": [304, 307], "base_model": 304, "lora_model": 304, "lora_llama_2_7b": [304, 307], "alon": 304, "in_featur": [304, 306], "out_featur": [304, 306], "validate_missing_and_unexpected_for_lora": 304, "peft_util": 304, "set_trainable_param": 304, "lora_param": 304, "total_param": 304, "trainable_param": 304, "2f": 304, "6742609920": 304, "4194304": 304, "7b_lora": 304, "my_model_checkpoint_path": [304, 306, 307], "tokenizer_checkpoint": [304, 306, 307], "my_tokenizer_checkpoint_path": [304, 306, 307], "constraint": 304, "factori": 304, "benefici": 304, "impact": [304, 305], "minor": 304, "lora_experiment_1": 304, "smooth": [304, 307], "curv": [304, 307], "500": 304, "ran": 304, "footprint": [304, 306], "commod": 304, "cogniz": 304, "ax": 304, "parallel": 304, "475": 304, "87": 304, "508": 304, "86": 304, "504": 304, "04": 304, "514": 304, "lowest": 304, "absolut": 304, "4gb": 304, "tradeoff": 304, "salman": 305, "mohammadi": 305, "brief": 305, "glossari": 305, "struggl": 305, "constrain": [305, 306], "cost": 305, "particularli": 305, "gradient_accumulation_step": 305, "throughput": 305, "ram": 305, "bottleneck": 305, "sebastian": 305, "raschka": 305, "fp16": 305, "sound": 305, "quot": 305, "aliv": 305, "region": 305, "enable_activation_checkpoint": 305, "bring": 305, "autograd": [305, 307], "saved_tensors_hook": 305, "cours": 305, "runtim": 305, "hide": 305, "later": 305, "brought": 305, "enable_activation_offload": 305, "total_batch_s": 305, "count": 305, "suppos": 305, "log_every_n_step": 305, "translat": 305, "frequent": 305, "slowli": 305, "num_devic": 305, "adamw8bit": 305, "pagedadamw": 305, "modern": 305, "converg": 305, "stateless": 305, "stochast": 305, "descent": 305, "sacrif": 305, "remov": 305, "optimizer_in_bwd": 305, "cpuoffloadoptim": 305, "offload_gradi": 305, "prototyp": 305, "low_bit_optim": 305, "4e": 305, "adam": 305, "hint": 305, "slowdown": 305, "4x": 305, "fsdp_cpu_offload": 305, "greatli": 305, "lora_": 305, "lora_llama3": 305, "_lora": 305, "firstli": 305, "secondli": 305, "affect": 305, "fashion": 305, "slower": [305, 307], "jointli": 305, "sens": 305, "novel": 305, "normalfloat": [305, 307], "8x": [305, 307], "worth": 305, "cast": [305, 306], "incur": [305, 306, 307], "penalti": 305, "qlora_": 305, "qlora_llama3_8b": 305, "_qlora": 305, "reap": 305, "hood": [305, 307], "doralinear": 305, "swap": [305, 306], "perplex": 306, "goal": 306, "ptq": 306, "kept": 306, "nois": 306, "henc": 306, "x_q": 306, "int8": 306, "zp": 306, "x_float": 306, "qmin": 306, "qmax": 306, "clamp": 306, "x_fq": 306, "dequant": 306, "proce": 306, "prepared_model": 306, "int8dynactint4weightqatlinear": 306, "int8dynactint4weightlinear": 306, "train_loop": 306, "converted_model": 306, "recov": 306, "custom_8b_qat_ful": 306, "2000": 306, "led": 306, "presum": 306, "mutat": 306, "5gb": 306, "custom_quant": 306, "poorli": 306, "custom_eleuther_evalu": 306, "fullmodeltorchtunecheckpoint": 306, "max_seq_length": 306, "my_eleuther_evalu": 306, "stderr": 306, "word_perplex": 306, "9148": 306, "byte_perplex": 306, "5357": 306, "bits_per_byt": 306, "6189": 306, "5687": 306, "0049": 306, "acc_norm": 306, "7536": 306, "0043": 306, "portion": [306, 307], "74": 306, "048": 306, "190": 306, "7735": 306, "5598": 306, "6413": 306, "5481": 306, "0050": 306, "7390": 306, "0044": 306, "7251": 306, "4994": 306, "5844": 306, "5740": 306, "7610": 306, "outperform": 306, "importantli": 306, "characterist": 306, "187": 306, "958": 306, "halv": 306, "motiv": 306, "edg": 306, "smartphon": 306, "executorch": 306, "xnnpack": 306, "export_llama": 306, "use_sdpa_with_kv_cach": 306, "qmode": 306, "group_siz": 306, "get_bos_id": 306, "get_eos_id": 306, "output_nam": 306, "llama3_8da4w": 306, "pte": 306, "881": 306, "oneplu": 306, "709": 306, "tok": 306, "815": 306, "316": 306, "364": 306, "highli": 307, "vanilla": 307, "held": 307, "bespok": 307, "vast": 307, "major": 307, "normatfloat": 307, "deepdiv": 307, "de": 307, "counterpart": 307, "set_default_devic": 307, "qlora_linear": 307, "memory_alloc": 307, "177": 307, "152": 307, "del": 307, "empty_cach": 307, "lora_linear": 307, "081": 307, "344": 307, "qlora_llama2_7b": 307, "qlora_model": 307, "essenti": 307, "reparametrize_as_dtype_state_dict_post_hook": 307, "149": 307, "9157477021217346": 307, "02": 307, "08": 307, "15it": 307, "nightli": 307, "200": 307, "hundr": 307, "228": 307, "8158286809921265": 307, "95it": 307, "exercis": 307, "linear_nf4": 307, "to_nf4": 307, "linear_weight": 307, "incom": 307}, "objects": {"torchtune.config": [[27, 0, 1, "", "instantiate"], [28, 0, 1, "", "log_config"], [29, 0, 1, "", "parse"], [30, 0, 1, "", "validate"]], "torchtune.data": [[31, 1, 1, "", "AlpacaToMessages"], [32, 1, 1, "", "ChatMLTemplate"], [33, 1, 1, "", "ChosenRejectedToMessages"], [34, 2, 1, "", "GrammarErrorCorrectionTemplate"], [35, 1, 1, "", "InputOutputToMessages"], [36, 1, 1, "", "Message"], [37, 1, 1, "", "OpenAIToMessages"], [38, 1, 1, "", "PromptTemplate"], [39, 1, 1, "", "PromptTemplateInterface"], [40, 2, 1, "", "QuestionAnswerTemplate"], [41, 2, 1, "", "Role"], [42, 1, 1, "", "ShareGPTToMessages"], [43, 2, 1, "", "SummarizeTemplate"], [44, 0, 1, "", "format_content_with_images"], [45, 0, 1, "", "left_pad_sequence"], [46, 0, 1, "", "load_image"], [47, 0, 1, "", "padded_collate"], [48, 0, 1, "", "padded_collate_dpo"], [49, 0, 1, "", "padded_collate_sft"], [50, 0, 1, "", "padded_collate_tiled_images_and_mask"], [51, 0, 1, "", "truncate"], [52, 0, 1, "", "validate_messages"]], "torchtune.data.Message": [[36, 3, 1, "", "contains_media"], [36, 4, 1, "", "from_dict"], [36, 4, 1, "", "get_media"], [36, 3, 1, "", "text_content"]], "torchtune.datasets": [[53, 1, 1, "", "ConcatDataset"], [54, 1, 1, "", "PackedDataset"], [55, 1, 1, "", "PreferenceDataset"], [56, 1, 1, "", "SFTDataset"], [57, 1, 1, "", "TextCompletionDataset"], [58, 0, 1, "", "alpaca_cleaned_dataset"], [59, 0, 1, "", "alpaca_dataset"], [60, 0, 1, "", "chat_dataset"], [61, 0, 1, "", "cnn_dailymail_articles_dataset"], [62, 0, 1, "", "grammar_dataset"], [63, 0, 1, "", "hh_rlhf_helpful_dataset"], [64, 0, 1, "", "instruct_dataset"], [67, 0, 1, "", "preference_dataset"], [68, 0, 1, "", "samsum_dataset"], [69, 0, 1, "", "slimorca_dataset"], [70, 0, 1, "", "stack_exchange_paired_dataset"], [71, 0, 1, "", "text_completion_dataset"], [72, 0, 1, "", "wikitext_dataset"]], "torchtune.datasets.multimodal": [[65, 0, 1, "", "llava_instruct_dataset"], [66, 0, 1, "", "the_cauldron_dataset"]], "torchtune.generation": [[73, 0, 1, "", "generate"], [74, 0, 1, "", "generate_next_token"], [75, 0, 1, "", "get_causal_mask_from_padding_mask"], [76, 0, 1, "", "get_position_ids_from_padding_mask"], [77, 0, 1, "", "sample"]], "torchtune.models.clip": [[78, 1, 1, "", "TilePositionalEmbedding"], [79, 1, 1, "", "TiledTokenPositionalEmbedding"], [80, 1, 1, "", "TokenPositionalEmbedding"], [81, 0, 1, "", "clip_vision_encoder"]], "torchtune.models.clip.TilePositionalEmbedding": [[78, 4, 1, "", "forward"]], "torchtune.models.clip.TiledTokenPositionalEmbedding": [[79, 4, 1, "", "forward"]], "torchtune.models.clip.TokenPositionalEmbedding": [[80, 4, 1, "", "forward"]], "torchtune.models.code_llama2": [[82, 0, 1, "", "code_llama2_13b"], [83, 0, 1, "", "code_llama2_70b"], [84, 0, 1, "", "code_llama2_7b"], [85, 0, 1, "", "lora_code_llama2_13b"], [86, 0, 1, "", "lora_code_llama2_70b"], [87, 0, 1, "", "lora_code_llama2_7b"], [88, 0, 1, "", "qlora_code_llama2_13b"], [89, 0, 1, "", "qlora_code_llama2_70b"], [90, 0, 1, "", "qlora_code_llama2_7b"]], "torchtune.models.gemma": [[91, 0, 1, "", "gemma"], [92, 0, 1, "", "gemma_2b"], [93, 0, 1, "", "gemma_7b"], [94, 0, 1, "", "gemma_tokenizer"], [95, 0, 1, "", "lora_gemma"], [96, 0, 1, "", "lora_gemma_2b"], [97, 0, 1, "", "lora_gemma_7b"], [98, 0, 1, "", "qlora_gemma_2b"], [99, 0, 1, "", "qlora_gemma_7b"]], "torchtune.models.llama2": [[100, 1, 1, "", "Llama2ChatTemplate"], [101, 0, 1, "", "llama2"], [102, 0, 1, "", "llama2_13b"], [103, 0, 1, "", "llama2_70b"], [104, 0, 1, "", "llama2_7b"], [105, 0, 1, "", "llama2_reward_7b"], [106, 0, 1, "", "llama2_tokenizer"], [107, 0, 1, "", "lora_llama2"], [108, 0, 1, "", "lora_llama2_13b"], [109, 0, 1, "", "lora_llama2_70b"], [110, 0, 1, "", "lora_llama2_7b"], [111, 0, 1, "", "lora_llama2_reward_7b"], [112, 0, 1, "", "qlora_llama2_13b"], [113, 0, 1, "", "qlora_llama2_70b"], [114, 0, 1, "", "qlora_llama2_7b"], [115, 0, 1, "", "qlora_llama2_reward_7b"]], "torchtune.models.llama3": [[116, 0, 1, "", "llama3"], [117, 0, 1, "", "llama3_70b"], [118, 0, 1, "", "llama3_8b"], [119, 0, 1, "", "llama3_tokenizer"], [120, 0, 1, "", "lora_llama3"], [121, 0, 1, "", "lora_llama3_70b"], [122, 0, 1, "", "lora_llama3_8b"], [123, 0, 1, "", "qlora_llama3_70b"], [124, 0, 1, "", "qlora_llama3_8b"]], "torchtune.models.llama3_1": [[125, 0, 1, "", "llama3_1"], [126, 0, 1, "", "llama3_1_405b"], [127, 0, 1, "", "llama3_1_70b"], [128, 0, 1, "", "llama3_1_8b"], [129, 0, 1, "", "lora_llama3_1"], [130, 0, 1, "", "lora_llama3_1_405b"], [131, 0, 1, "", "lora_llama3_1_70b"], [132, 0, 1, "", "lora_llama3_1_8b"], [133, 0, 1, "", "qlora_llama3_1_405b"], [134, 0, 1, "", "qlora_llama3_1_70b"], [135, 0, 1, "", "qlora_llama3_1_8b"]], "torchtune.models.llama3_2": [[136, 0, 1, "", "llama3_2_1b"], [137, 0, 1, "", "llama3_2_3b"], [138, 0, 1, "", "lora_llama3_2_1b"], [139, 0, 1, "", "lora_llama3_2_3b"], [140, 0, 1, "", "qlora_llama3_2_1b"], [141, 0, 1, "", "qlora_llama3_2_3b"]], "torchtune.models.llama3_2_vision": [[142, 1, 1, "", "Llama3VisionEncoder"], [143, 1, 1, "", "Llama3VisionProjectionHead"], [144, 1, 1, "", "Llama3VisionTransform"], [145, 0, 1, "", "llama3_2_vision_11b"], [146, 0, 1, "", "llama3_2_vision_decoder"], [147, 0, 1, "", "llama3_2_vision_encoder"], [148, 0, 1, "", "llama3_2_vision_transform"], [149, 0, 1, "", "lora_llama3_2_vision_11b"], [150, 0, 1, "", "lora_llama3_2_vision_decoder"], [151, 0, 1, "", "lora_llama3_2_vision_encoder"], [152, 0, 1, "", "qlora_llama3_2_vision_11b"]], "torchtune.models.llama3_2_vision.Llama3VisionEncoder": [[142, 4, 1, "", "forward"]], "torchtune.models.llama3_2_vision.Llama3VisionProjectionHead": [[143, 4, 1, "", "forward"]], "torchtune.models.llama3_2_vision.Llama3VisionTransform": [[144, 4, 1, "", "decode"], [144, 4, 1, "", "tokenize_message"], [144, 4, 1, "", "tokenize_messages"]], "torchtune.models.mistral": [[153, 1, 1, "", "MistralChatTemplate"], [154, 0, 1, "", "lora_mistral"], [155, 0, 1, "", "lora_mistral_7b"], [156, 0, 1, "", "lora_mistral_classifier"], [157, 0, 1, "", "lora_mistral_reward_7b"], [158, 0, 1, "", "mistral"], [159, 0, 1, "", "mistral_7b"], [160, 0, 1, "", "mistral_classifier"], [161, 0, 1, "", "mistral_reward_7b"], [162, 0, 1, "", "mistral_tokenizer"], [163, 0, 1, "", "qlora_mistral_7b"], [164, 0, 1, "", "qlora_mistral_reward_7b"]], "torchtune.models.phi3": [[165, 0, 1, "", "lora_phi3"], [166, 0, 1, "", "lora_phi3_mini"], [167, 0, 1, "", "phi3"], [168, 0, 1, "", "phi3_mini"], [169, 0, 1, "", "phi3_mini_tokenizer"], [170, 0, 1, "", "qlora_phi3_mini"]], "torchtune.models.qwen2": [[171, 0, 1, "", "lora_qwen2"], [172, 0, 1, "", "lora_qwen2_0_5b"], [173, 0, 1, "", "lora_qwen2_1_5b"], [174, 0, 1, "", "lora_qwen2_7b"], [175, 0, 1, "", "qwen2"], [176, 0, 1, "", "qwen2_0_5b"], [177, 0, 1, "", "qwen2_1_5b"], [178, 0, 1, "", "qwen2_7b"], [179, 0, 1, "", "qwen2_tokenizer"]], "torchtune.models.qwen2_5": [[180, 0, 1, "", "lora_qwen2_5_0_5b"], [181, 0, 1, "", "lora_qwen2_5_14b_base"], [182, 0, 1, "", "lora_qwen2_5_14b_instruct"], [183, 0, 1, "", "lora_qwen2_5_1_5b_base"], [184, 0, 1, "", "lora_qwen2_5_1_5b_instruct"], [185, 0, 1, "", "lora_qwen2_5_32b_base"], [186, 0, 1, "", "lora_qwen2_5_32b_instruct"], [187, 0, 1, "", "lora_qwen2_5_3b"], [188, 0, 1, "", "lora_qwen2_5_72b_base"], [189, 0, 1, "", "lora_qwen2_5_72b_instruct"], [190, 0, 1, "", "lora_qwen2_5_7b_base"], [191, 0, 1, "", "lora_qwen2_5_7b_instruct"], [192, 0, 1, "", "qwen2_5_0_5b"], [193, 0, 1, "", "qwen2_5_14b_base"], [194, 0, 1, "", "qwen2_5_14b_instruct"], [195, 0, 1, "", "qwen2_5_1_5b_base"], [196, 0, 1, "", "qwen2_5_1_5b_instruct"], [197, 0, 1, "", "qwen2_5_32b_base"], [198, 0, 1, "", "qwen2_5_32b_instruct"], [199, 0, 1, "", "qwen2_5_3b"], [200, 0, 1, "", "qwen2_5_72b_base"], [201, 0, 1, "", "qwen2_5_72b_instruct"], [202, 0, 1, "", "qwen2_5_7b_base"], [203, 0, 1, "", "qwen2_5_7b_instruct"], [204, 0, 1, "", "qwen2_5_tokenizer"]], "torchtune.modules": [[205, 1, 1, "", "FeedForward"], [206, 1, 1, "", "Fp32LayerNorm"], [207, 1, 1, "", "KVCache"], [208, 1, 1, "", "MultiHeadAttention"], [209, 1, 1, "", "RMSNorm"], [210, 1, 1, "", "RotaryPositionalEmbeddings"], [211, 1, 1, "", "TanhGate"], [212, 1, 1, "", "TiedLinear"], [213, 1, 1, "", "TransformerCrossAttentionLayer"], [214, 1, 1, "", "TransformerDecoder"], [215, 1, 1, "", "TransformerSelfAttentionLayer"], [216, 1, 1, "", "VisionTransformer"]], "torchtune.modules.FeedForward": [[205, 4, 1, "", "forward"]], "torchtune.modules.Fp32LayerNorm": [[206, 4, 1, "", "forward"]], "torchtune.modules.KVCache": [[207, 4, 1, "", "reset"], [207, 4, 1, "", "update"]], "torchtune.modules.MultiHeadAttention": [[208, 4, 1, "", "forward"], [208, 4, 1, "", "reset_cache"], [208, 4, 1, "", "setup_cache"]], "torchtune.modules.RMSNorm": [[209, 4, 1, "", "forward"]], "torchtune.modules.RotaryPositionalEmbeddings": [[210, 4, 1, "", "forward"]], "torchtune.modules.TanhGate": [[211, 4, 1, "", "forward"]], "torchtune.modules.TransformerCrossAttentionLayer": [[213, 4, 1, "", "caches_are_enabled"], [213, 4, 1, "", "caches_are_setup"], [213, 4, 1, "", "forward"], [213, 4, 1, "", "reset_cache"], [213, 4, 1, "", "setup_caches"]], "torchtune.modules.TransformerDecoder": [[214, 4, 1, "", "caches_are_enabled"], [214, 4, 1, "", "caches_are_setup"], [214, 4, 1, "", "chunked_output"], [214, 4, 1, "", "forward"], [214, 4, 1, "", "reset_caches"], [214, 4, 1, "", "set_num_output_chunks"], [214, 4, 1, "", "setup_caches"]], "torchtune.modules.TransformerSelfAttentionLayer": [[215, 4, 1, "", "caches_are_enabled"], [215, 4, 1, "", "caches_are_setup"], [215, 4, 1, "", "forward"], [215, 4, 1, "", "reset_cache"], [215, 4, 1, "", "setup_caches"]], "torchtune.modules.VisionTransformer": [[216, 4, 1, "", "forward"]], "torchtune.modules.common_utils": [[217, 0, 1, "", "delete_kv_caches"], [218, 0, 1, "", "disable_kv_cache"], [219, 0, 1, "", "local_kv_cache"], [220, 0, 1, "", "reparametrize_as_dtype_state_dict_post_hook"]], "torchtune.modules.loss": [[221, 1, 1, "", "CEWithChunkedOutputLoss"], [222, 1, 1, "", "ForwardKLLoss"], [223, 1, 1, "", "ForwardKLWithChunkedOutputLoss"]], "torchtune.modules.loss.CEWithChunkedOutputLoss": [[221, 4, 1, "", "compute_cross_entropy"], [221, 4, 1, "", "forward"]], "torchtune.modules.loss.ForwardKLLoss": [[222, 4, 1, "", "forward"]], "torchtune.modules.loss.ForwardKLWithChunkedOutputLoss": [[223, 4, 1, "", "forward"]], "torchtune.modules.model_fusion": [[224, 1, 1, "", "DeepFusionModel"], [225, 1, 1, "", "FusionEmbedding"], [226, 1, 1, "", "FusionLayer"], [227, 0, 1, "", "get_fusion_params"], [228, 0, 1, "", "register_fusion_module"]], "torchtune.modules.model_fusion.DeepFusionModel": [[224, 4, 1, "", "caches_are_enabled"], [224, 4, 1, "", "caches_are_setup"], [224, 4, 1, "", "forward"], [224, 4, 1, "", "reset_caches"], [224, 4, 1, "", "set_num_output_chunks"], [224, 4, 1, "", "setup_caches"]], "torchtune.modules.model_fusion.FusionEmbedding": [[225, 4, 1, "", "forward"], [225, 4, 1, "", "fusion_params"]], "torchtune.modules.model_fusion.FusionLayer": [[226, 4, 1, "", "caches_are_enabled"], [226, 4, 1, "", "caches_are_setup"], [226, 4, 1, "", "forward"], [226, 4, 1, "", "fusion_params"], [226, 4, 1, "", "reset_cache"], [226, 4, 1, "", "setup_caches"]], "torchtune.modules.peft": [[229, 1, 1, "", "AdapterModule"], [230, 1, 1, "", "DoRALinear"], [231, 1, 1, "", "LoRALinear"], [232, 0, 1, "", "disable_adapter"], [233, 0, 1, "", "get_adapter_params"], [234, 0, 1, "", "set_trainable_params"], [235, 0, 1, "", "validate_missing_and_unexpected_for_lora"], [236, 0, 1, "", "validate_state_dict_for_lora"]], "torchtune.modules.peft.AdapterModule": [[229, 4, 1, "", "adapter_params"]], "torchtune.modules.peft.DoRALinear": [[230, 4, 1, "", "adapter_params"], [230, 4, 1, "", "forward"], [230, 4, 1, "", "initialize_dora_magnitude"]], "torchtune.modules.peft.LoRALinear": [[231, 4, 1, "", "adapter_params"], [231, 4, 1, "", "forward"]], "torchtune.modules.tokenizers": [[237, 1, 1, "", "BaseTokenizer"], [238, 1, 1, "", "ModelTokenizer"], [239, 1, 1, "", "SentencePieceBaseTokenizer"], [240, 1, 1, "", "TikTokenBaseTokenizer"], [241, 0, 1, "", "parse_hf_tokenizer_json"], [242, 0, 1, "", "tokenize_messages_no_special_tokens"]], "torchtune.modules.tokenizers.BaseTokenizer": [[237, 4, 1, "", "decode"], [237, 4, 1, "", "encode"]], "torchtune.modules.tokenizers.ModelTokenizer": [[238, 4, 1, "", "tokenize_messages"]], "torchtune.modules.tokenizers.SentencePieceBaseTokenizer": [[239, 4, 1, "", "decode"], [239, 4, 1, "", "encode"]], "torchtune.modules.tokenizers.TikTokenBaseTokenizer": [[240, 4, 1, "", "decode"], [240, 4, 1, "", "encode"]], "torchtune.modules.transforms": [[243, 1, 1, "", "Transform"], [244, 1, 1, "", "VisionCrossAttentionMask"]], "torchtune.rlhf": [[245, 0, 1, "", "estimate_advantages"], [246, 0, 1, "", "get_rewards_ppo"], [251, 0, 1, "", "truncate_sequence_at_first_stop_token"]], "torchtune.rlhf.loss": [[247, 1, 1, "", "DPOLoss"], [248, 1, 1, "", "PPOLoss"], [249, 1, 1, "", "RSOLoss"], [250, 1, 1, "", "SimPOLoss"]], "torchtune.rlhf.loss.DPOLoss": [[247, 4, 1, "", "forward"]], "torchtune.rlhf.loss.PPOLoss": [[248, 4, 1, "", "forward"]], "torchtune.rlhf.loss.RSOLoss": [[249, 4, 1, "", "forward"]], "torchtune.rlhf.loss.SimPOLoss": [[250, 4, 1, "", "forward"]], "torchtune.training": [[252, 2, 1, "", "FSDPPolicyType"], [253, 1, 1, "", "FormattedCheckpointFiles"], [254, 1, 1, "", "FullModelHFCheckpointer"], [255, 1, 1, "", "FullModelMetaCheckpointer"], [256, 1, 1, "", "FullModelTorchTuneCheckpointer"], [257, 1, 1, "", "ModelType"], [258, 1, 1, "", "OptimizerInBackwardWrapper"], [259, 0, 1, "", "apply_selective_activation_checkpointing"], [260, 0, 1, "", "create_optim_in_bwd_wrapper"], [261, 0, 1, "", "get_cosine_schedule_with_warmup"], [262, 0, 1, "", "get_dtype"], [263, 0, 1, "", "get_full_finetune_fsdp_wrap_policy"], [264, 0, 1, "", "get_lr"], [265, 0, 1, "", "get_memory_stats"], [266, 0, 1, "", "get_quantizer_mode"], [267, 0, 1, "", "get_unmasked_sequence_lengths"], [268, 0, 1, "", "get_world_size_and_rank"], [269, 0, 1, "", "init_distributed"], [270, 0, 1, "", "is_distributed"], [271, 0, 1, "", "log_memory_stats"], [272, 0, 1, "", "lora_fsdp_wrap_policy"], [278, 0, 1, "", "register_optim_in_bwd_hooks"], [279, 0, 1, "", "set_activation_checkpointing"], [280, 0, 1, "", "set_default_dtype"], [281, 0, 1, "", "set_seed"], [282, 0, 1, "", "setup_torch_profiler"], [283, 0, 1, "", "update_state_dict_for_classifier"], [284, 0, 1, "", "validate_expected_param_dtype"]], "torchtune.training.FormattedCheckpointFiles": [[253, 4, 1, "", "build_checkpoint_filenames"]], "torchtune.training.FullModelHFCheckpointer": [[254, 4, 1, "", "load_checkpoint"], [254, 4, 1, "", "save_checkpoint"]], "torchtune.training.FullModelMetaCheckpointer": [[255, 4, 1, "", "load_checkpoint"], [255, 4, 1, "", "save_checkpoint"]], "torchtune.training.FullModelTorchTuneCheckpointer": [[256, 4, 1, "", "load_checkpoint"], [256, 4, 1, "", "save_checkpoint"]], "torchtune.training.OptimizerInBackwardWrapper": [[258, 4, 1, "", "get_last_lr"], [258, 4, 1, "", "get_optim_key"], [258, 4, 1, "", "load_state_dict"], [258, 4, 1, "", "set_lr_scheduler"], [258, 4, 1, "", "state_dict"], [258, 4, 1, "", "step_lr_scheduler"]], "torchtune.training.metric_logging": [[273, 1, 1, "", "CometLogger"], [274, 1, 1, "", "DiskLogger"], [275, 1, 1, "", "StdoutLogger"], [276, 1, 1, "", "TensorBoardLogger"], [277, 1, 1, "", "WandBLogger"]], "torchtune.training.metric_logging.CometLogger": [[273, 4, 1, "", "close"], [273, 4, 1, "", "log"], [273, 4, 1, "", "log_config"], [273, 4, 1, "", "log_dict"]], "torchtune.training.metric_logging.DiskLogger": [[274, 4, 1, "", "close"], [274, 4, 1, "", "log"], [274, 4, 1, "", "log_dict"]], "torchtune.training.metric_logging.StdoutLogger": [[275, 4, 1, "", "close"], [275, 4, 1, "", "log"], [275, 4, 1, "", "log_dict"]], "torchtune.training.metric_logging.TensorBoardLogger": [[276, 4, 1, "", "close"], [276, 4, 1, "", "log"], [276, 4, 1, "", "log_dict"]], "torchtune.training.metric_logging.WandBLogger": [[277, 4, 1, "", "close"], [277, 4, 1, "", "log"], [277, 4, 1, "", "log_config"], [277, 4, 1, "", "log_dict"]], "torchtune.utils": [[285, 0, 1, "", "batch_to_device"], [286, 0, 1, "", "get_device"], [287, 0, 1, "", "get_logger"], [288, 0, 1, "", "torch_version_ge"]]}, "objtypes": {"0": "py:function", "1": "py:class", "2": "py:data", "3": "py:property", "4": "py:method"}, "objnames": {"0": ["py", "function", "Python function"], "1": ["py", "class", "Python class"], "2": ["py", "data", "Python data"], "3": ["py", "property", "Python property"], "4": ["py", "method", "Python method"]}, "titleterms": {"torchtun": [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 22, 34, 40, 41, 43, 252, 291, 293, 298, 300, 302, 303, 304, 306, 307], "config": [0, 10, 24, 25, 298, 301], "data": [1, 11, 34, 40, 41, 43, 299], "text": [1, 2, 14, 16, 20, 302], "templat": [1, 9, 12, 14, 19, 21, 299], "type": 1, "messag": [1, 13, 14, 36], "transform": [1, 5, 13, 14, 15, 243], "collat": 1, "helper": 1, "function": 1, "dataset": [2, 9, 11, 12, 16, 18, 20, 299], "imag": [2, 14, 16], "gener": [2, 3, 73, 300, 302], "builder": 2, "class": [2, 19, 25], "model": [4, 5, 15, 21, 26, 298, 300, 301, 302, 303, 304, 305, 306], "llama3": [4, 116, 299, 302, 303, 306], "2": [4, 303], "vision": [4, 5], "1": [4, 303], "llama2": [4, 101, 299, 300, 304, 307], "code": 4, "llama": 4, "qwen": 4, "5": 4, "phi": 4, "3": 4, "mistral": [4, 158], "gemma": [4, 91], "clip": 4, "modul": 5, "compon": [5, 10, 24, 305], "build": [5, 292, 307], "block": 5, "loss": 5, "base": [5, 21], "token": [5, 14, 21, 299], "util": [5, 8], "peft": [5, 305], "fusion": 5, "rlhf": 6, "train": [7, 252, 295, 301], "checkpoint": [7, 22, 26, 300, 305], "reduc": 7, "precis": [7, 305], "distribut": [7, 295], "memori": [7, 304, 305, 307], "manag": 7, "schedul": 7, "metric": [7, 23, 26], "log": [7, 23, 26], "perform": [7, 304], "profil": 7, "miscellan": [7, 8], "chat": [9, 299], "exampl": [9, 12, 13, 15, 16, 18, 20], "format": [9, 12, 14, 16, 18, 20, 22], "load": [9, 12, 16, 18, 20, 21], "from": [9, 12, 16, 18, 20, 21, 299, 307], "hug": [9, 12, 16, 18, 20, 21, 300], "face": [9, 12, 16, 18, 20, 21, 300], "local": [9, 12, 16, 18, 20], "remot": [9, 12, 16], "specifi": 9, "convers": 9, "style": 9, "sharegpt": 9, "openai": 9, "renam": [9, 12], "column": [9, 12], "built": [9, 12, 16, 18, 19, 20, 298], "custom": [10, 13, 19, 299], "recip": [10, 25, 296, 298, 301, 303, 304, 306], "set": [10, 21], "up": [10, 300], "your": [10, 24, 25, 300, 301], "project": 10, "launch": 10, "overview": [11, 22, 293, 296, 300, 305], "pipelin": 11, "instruct": [12, 292, 302], "configur": [13, 24], "creat": [14, 15], "prompt": [14, 19, 21, 299], "access": [14, 302], "content": 14, "multimod": [15, 16], "us": [15, 19, 24, 25, 299, 300, 303, 307], "interleav": 16, "sampl": [17, 77], "pack": 17, "prefer": 18, "defin": 19, "via": [19, 292, 302], "dotpath": 19, "string": 19, "dictionari": 19, "prompttempl": [19, 38], "complet": 20, "json": 20, "txt": 20, "download": [21, 298, 300, 301], "file": 21, "max": 21, "sequenc": 21, "length": 21, "special": [21, 299], "handl": 22, "differ": 22, "hfcheckpoint": 22, "metacheckpoint": 22, "torchtunecheckpoint": 22, "intermedi": 22, "vs": 22, "final": 22, "lora": [22, 294, 300, 304, 305, 307], "put": [22, 307], "thi": 22, "all": [22, 24, 307], "togeth": [22, 307], "comet": 23, "logger": [23, 26], "about": 24, "where": 24, "do": 24, "paramet": [24, 305], "live": 24, "write": 24, "instanti": [24, 27], "referenc": 24, "other": [24, 300], "field": 24, "interpol": 24, "valid": [24, 30, 298], "best": 24, "practic": 24, "airtight": 24, "public": 24, "api": 24, "onli": 24, "command": 24, "line": 24, "overrid": 24, "remov": 24, "what": [25, 293, 303, 304, 306, 307], "ar": 25, "script": 25, "run": [25, 298, 300], "cli": [25, 298], "pars": [25, 29], "weight": [26, 305], "bias": 26, "w": 26, "b": 26, "log_config": 28, "alpacatomessag": 31, "chatmltempl": 32, "chosenrejectedtomessag": 33, "grammarerrorcorrectiontempl": 34, "inputoutputtomessag": 35, "openaitomessag": 37, "prompttemplateinterfac": 39, "questionanswertempl": 40, "role": 41, "sharegpttomessag": 42, "summarizetempl": 43, "format_content_with_imag": 44, "left_pad_sequ": 45, "load_imag": 46, "padded_col": 47, "padded_collate_dpo": 48, "padded_collate_sft": 49, "padded_collate_tiled_images_and_mask": 50, "truncat": 51, "validate_messag": 52, "concatdataset": 53, "packeddataset": 54, "preferencedataset": 55, "sftdataset": 56, "textcompletiondataset": 57, "alpaca_cleaned_dataset": 58, "alpaca_dataset": 59, "chat_dataset": 60, "cnn_dailymail_articles_dataset": 61, "grammar_dataset": 62, "hh_rlhf_helpful_dataset": 63, "instruct_dataset": 64, "llava_instruct_dataset": 65, "the_cauldron_dataset": 66, "preference_dataset": 67, "samsum_dataset": 68, "slimorca_dataset": 69, "stack_exchange_paired_dataset": 70, "text_completion_dataset": 71, "wikitext_dataset": 72, "generate_next_token": 74, "get_causal_mask_from_padding_mask": 75, "get_position_ids_from_padding_mask": 76, "tilepositionalembed": 78, "tiledtokenpositionalembed": 79, "tokenpositionalembed": 80, "clip_vision_encod": 81, "code_llama2_13b": 82, "code_llama2_70b": 83, "code_llama2_7b": 84, "lora_code_llama2_13b": 85, "lora_code_llama2_70b": 86, "lora_code_llama2_7b": 87, "qlora_code_llama2_13b": 88, "qlora_code_llama2_70b": 89, "qlora_code_llama2_7b": 90, "gemma_2b": 92, "gemma_7b": 93, "gemma_token": 94, "lora_gemma": 95, "lora_gemma_2b": 96, "lora_gemma_7b": 97, "qlora_gemma_2b": 98, "qlora_gemma_7b": 99, "llama2chattempl": 100, "llama2_13b": 102, "llama2_70b": 103, "llama2_7b": 104, "llama2_reward_7b": 105, "llama2_token": 106, "lora_llama2": 107, "lora_llama2_13b": 108, "lora_llama2_70b": 109, "lora_llama2_7b": 110, "lora_llama2_reward_7b": 111, "qlora_llama2_13b": 112, "qlora_llama2_70b": 113, "qlora_llama2_7b": 114, "qlora_llama2_reward_7b": 115, "llama3_70b": 117, "llama3_8b": 118, "llama3_token": 119, "lora_llama3": 120, "lora_llama3_70b": 121, "lora_llama3_8b": 122, "qlora_llama3_70b": 123, "qlora_llama3_8b": 124, "llama3_1": 125, "llama3_1_405b": 126, "llama3_1_70b": 127, "llama3_1_8b": 128, "lora_llama3_1": 129, "lora_llama3_1_405b": 130, "lora_llama3_1_70b": 131, "lora_llama3_1_8b": 132, "qlora_llama3_1_405b": 133, "qlora_llama3_1_70b": 134, "qlora_llama3_1_8b": 135, "llama3_2_1b": 136, "llama3_2_3b": 137, "lora_llama3_2_1b": 138, "lora_llama3_2_3b": 139, "qlora_llama3_2_1b": 140, "qlora_llama3_2_3b": 141, "llama3visionencod": 142, "llama3visionprojectionhead": 143, "llama3visiontransform": 144, "llama3_2_vision_11b": 145, "llama3_2_vision_decod": 146, "llama3_2_vision_encod": 147, "llama3_2_vision_transform": 148, "lora_llama3_2_vision_11b": 149, "lora_llama3_2_vision_decod": 150, "lora_llama3_2_vision_encod": 151, "qlora_llama3_2_vision_11b": 152, "mistralchattempl": 153, "lora_mistr": 154, "lora_mistral_7b": 155, "lora_mistral_classifi": 156, "lora_mistral_reward_7b": 157, "mistral_7b": 159, "mistral_classifi": 160, "mistral_reward_7b": 161, "mistral_token": 162, "qlora_mistral_7b": 163, "qlora_mistral_reward_7b": 164, "lora_phi3": 165, "lora_phi3_mini": 166, "phi3": 167, "phi3_mini": 168, "phi3_mini_token": 169, "qlora_phi3_mini": 170, "lora_qwen2": 171, "lora_qwen2_0_5b": 172, "lora_qwen2_1_5b": 173, "lora_qwen2_7b": 174, "qwen2": [175, 303], "qwen2_0_5b": 176, "qwen2_1_5b": 177, "qwen2_7b": 178, "qwen2_token": 179, "lora_qwen2_5_0_5b": 180, "lora_qwen2_5_14b_bas": 181, "lora_qwen2_5_14b_instruct": 182, "lora_qwen2_5_1_5b_bas": 183, "lora_qwen2_5_1_5b_instruct": 184, "lora_qwen2_5_32b_bas": 185, "lora_qwen2_5_32b_instruct": 186, "lora_qwen2_5_3b": 187, "lora_qwen2_5_72b_bas": 188, "lora_qwen2_5_72b_instruct": 189, "lora_qwen2_5_7b_bas": 190, "lora_qwen2_5_7b_instruct": 191, "qwen2_5_0_5b": 192, "qwen2_5_14b_bas": 193, "qwen2_5_14b_instruct": 194, "qwen2_5_1_5b_bas": 195, "qwen2_5_1_5b_instruct": 196, "qwen2_5_32b_bas": 197, "qwen2_5_32b_instruct": 198, "qwen2_5_3b": 199, "qwen2_5_72b_bas": 200, "qwen2_5_72b_instruct": 201, "qwen2_5_7b_bas": 202, "qwen2_5_7b_instruct": 203, "qwen2_5_token": 204, "feedforward": 205, "fp32layernorm": 206, "kvcach": 207, "multiheadattent": 208, "rmsnorm": 209, "rotarypositionalembed": 210, "tanhgat": 211, "tiedlinear": 212, "transformercrossattentionlay": 213, "transformerdecod": 214, "transformerselfattentionlay": 215, "visiontransform": 216, "delete_kv_cach": 217, "disable_kv_cach": 218, "local_kv_cach": 219, "reparametrize_as_dtype_state_dict_post_hook": 220, "cewithchunkedoutputloss": 221, "forwardklloss": 222, "forwardklwithchunkedoutputloss": 223, "deepfusionmodel": 224, "fusionembed": 225, "fusionlay": 226, "get_fusion_param": 227, "register_fusion_modul": 228, "adaptermodul": 229, "doralinear": 230, "loralinear": 231, "disable_adapt": 232, "get_adapter_param": 233, "set_trainable_param": 234, "validate_missing_and_unexpected_for_lora": 235, "validate_state_dict_for_lora": 236, "basetoken": 237, "modeltoken": 238, "sentencepiecebasetoken": 239, "tiktokenbasetoken": 240, "parse_hf_tokenizer_json": 241, "tokenize_messages_no_special_token": 242, "visioncrossattentionmask": 244, "estimate_advantag": 245, "get_rewards_ppo": 246, "dpoloss": 247, "ppoloss": 248, "rsoloss": 249, "simpoloss": 250, "truncate_sequence_at_first_stop_token": 251, "fsdppolicytyp": 252, "formattedcheckpointfil": 253, "fullmodelhfcheckpoint": 254, "fullmodelmetacheckpoint": 255, "fullmodeltorchtunecheckpoint": 256, "modeltyp": 257, "optimizerinbackwardwrapp": 258, "apply_selective_activation_checkpoint": 259, "create_optim_in_bwd_wrapp": 260, "get_cosine_schedule_with_warmup": 261, "get_dtyp": 262, "get_full_finetune_fsdp_wrap_polici": 263, "get_lr": 264, "get_memory_stat": 265, "get_quantizer_mod": 266, "get_unmasked_sequence_length": 267, "get_world_size_and_rank": 268, "init_distribut": 269, "is_distribut": 270, "log_memory_stat": 271, "lora_fsdp_wrap_polici": 272, "cometlogg": 273, "disklogg": 274, "stdoutlogg": 275, "tensorboardlogg": 276, "wandblogg": 277, "register_optim_in_bwd_hook": 278, "set_activation_checkpoint": 279, "set_default_dtyp": 280, "set_se": 281, "setup_torch_profil": 282, "update_state_dict_for_classifi": 283, "validate_expected_param_dtyp": 284, "batch_to_devic": 285, "get_devic": 286, "get_logg": 287, "torch_version_g": 288, "comput": [290, 297], "time": [290, 297], "welcom": 291, "document": 291, "get": [291, 298, 302], "start": [291, 298], "tutori": 291, "instal": 292, "pre": 292, "requisit": 292, "pypi": 292, "git": 292, "clone": 292, "nightli": 292, "kei": 293, "concept": 293, "design": 293, "principl": 293, "singl": 294, "devic": [294, 306], "finetun": [294, 296, 300, 304, 306, 307], "quantiz": [295, 300, 302, 305, 306], "awar": 295, "qat": [295, 306], "list": 298, "copi": 298, "fine": [299, 301, 302, 303, 304, 305, 306, 307], "tune": [299, 301, 302, 303, 304, 305, 306, 307], "chang": 299, "when": 299, "should": 299, "i": 299, "end": 300, "workflow": 300, "7b": 300, "evalu": [300, 302, 306], "eleutherai": [300, 302], "s": [300, 302], "eval": [300, 302], "har": [300, 302], "speed": 300, "librari": 300, "upload": 300, "hub": 300, "first": 301, "llm": 301, "select": 301, "modifi": 301, "next": 301, "step": [301, 305], "meta": 302, "8b": [302, 303], "our": 302, "faster": 302, "distil": 303, "1b": 303, "knowledg": 303, "how": [303, 304], "doe": [303, 304], "work": [303, 304], "kd": 303, "ablat": 303, "studi": 303, "teacher": 303, "student": 303, "hyperparamet": 303, "learn": 303, "rate": 303, "ratio": 303, "5b": 303, "0": 303, "appli": [304, 306], "trade": 304, "off": 304, "optim": 305, "activ": 305, "offload": 305, "gradient": 305, "accumul": 305, "lower": [305, 306], "fuse": 305, "backward": 305, "pass": 305, "state": 305, "cpu": 305, "effici": 305, "low": 305, "rank": 305, "adapt": 305, "qlora": [305, 307], "decompos": 305, "dora": 305, "option": 306, "save": 307, "deep": 307, "dive": 307}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 6, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1, "sphinx": 56}}) \ No newline at end of file