From ac8832d996b51cc09485b142ac3bf651b62ac0b3 Mon Sep 17 00:00:00 2001 From: pytorchbot Date: Wed, 30 Oct 2024 16:43:18 +0000 Subject: [PATCH] auto-generating sphinx docs --- main/_modules/torchtune/data/_collate.html | 3 ++- .../models/llama3_2_vision/_transform.html | 2 +- .../torchtune/modules/transforms/_transforms.html | 13 ++----------- ...modules.transforms.VisionCrossAttentionMask.html | 4 +--- main/searchindex.js | 2 +- 5 files changed, 7 insertions(+), 17 deletions(-) diff --git a/main/_modules/torchtune/data/_collate.html b/main/_modules/torchtune/data/_collate.html index 6f94d96b8c..02163b951c 100644 --- a/main/_modules/torchtune/data/_collate.html +++ b/main/_modules/torchtune/data/_collate.html @@ -855,7 +855,8 @@

Source code for torchtune.data._collate

     if pad_max_images is not None:
         _, _, img_seq = concat_masks.shape
         concat_masks = F.pad(
-            concat_masks, (0, pad_max_images * image_seq_len - img_seq)
+            concat_masks,
+            (0, pad_max_images * max_num_tiles * tokens_per_tile - img_seq),
         )
 
     batch_dict = {
diff --git a/main/_modules/torchtune/models/llama3_2_vision/_transform.html b/main/_modules/torchtune/models/llama3_2_vision/_transform.html
index b84c30cb18..0f17254037 100644
--- a/main/_modules/torchtune/models/llama3_2_vision/_transform.html
+++ b/main/_modules/torchtune/models/llama3_2_vision/_transform.html
@@ -522,11 +522,11 @@ 

Source code for torchtune.models.llama3_2_vision._transform

tile_size=tile_size, patch_size=patch_size, image_token_id=self.tokenizer.image_id, - max_num_tiles=max_num_tiles, ) self.stop_tokens = self.tokenizer.stop_tokens self.max_seq_len = max_seq_len + self.max_num_tiles = max_num_tiles self.image_seq_len = max_num_tiles * (self.xattn_mask.patches_per_tile + 1) self.prompt_template = prompt_template self.pad_id = self.tokenizer.pad_id diff --git a/main/_modules/torchtune/modules/transforms/_transforms.html b/main/_modules/torchtune/modules/transforms/_transforms.html index 37f763a547..8d69df8d0b 100644 --- a/main/_modules/torchtune/modules/transforms/_transforms.html +++ b/main/_modules/torchtune/modules/transforms/_transforms.html @@ -433,7 +433,7 @@

Source code for torchtune.modules.transforms._transforms

# This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -from typing import Any, List, Mapping, Optional, Protocol +from typing import Any, List, Mapping, Protocol import torch @@ -486,8 +486,6 @@

Source code for torchtune.modules.transforms._transforms

E.g. for patch_size = 40, a tile of shape (400, 400) will have 10x10 grid of patches with shape (40, 40) each. image_token_id (int): Token ID of the image special token. - max_num_tiles (Optional[int]): Maximum number of tiles in an image, used to - pad mask during inference. Defaults to None """ def __init__( @@ -495,12 +493,10 @@

Source code for torchtune.modules.transforms._transforms

tile_size: int, patch_size: int, image_token_id: int, - max_num_tiles: Optional[int] = None, ): patch_grid_size = tile_size // patch_size self.patches_per_tile = patch_grid_size**2 self.image_token_id = image_token_id - self.max_num_tiles = max_num_tiles def _get_image_attention_intervals(self, tokens: List[int]) -> List[List[int]]: """ @@ -592,9 +588,6 @@

Source code for torchtune.modules.transforms._transforms

# which can vary based on number of tiles since they are not yet tile padded. # The masks are padded and concatenated together in the batch collator text_seq_len = len(tokens) - max_image_size = None - if inference and self.max_num_tiles is not None: - max_image_size = self.max_num_tiles * (self.patches_per_tile + 1) masks = [] for image_num, interval in enumerate(intervals): # Identify what part of text sequence should be attended @@ -607,9 +600,7 @@

Source code for torchtune.modules.transforms._transforms

# to a single image, so text tokens attend to all the image's tokens. # The mask is text_seq_len x mask_image_size if defined, otherwise # it uses current text/image sequence lengths. - mask = torch.zeros( - text_seq_len, max_image_size or image_seq_len, dtype=torch.bool - ) + mask = torch.zeros(text_seq_len, image_seq_len, dtype=torch.bool) mask[start:end, :image_seq_len] = True masks.append(mask) diff --git a/main/generated/torchtune.modules.transforms.VisionCrossAttentionMask.html b/main/generated/torchtune.modules.transforms.VisionCrossAttentionMask.html index 8301fd3831..a006bfa8f8 100644 --- a/main/generated/torchtune.modules.transforms.VisionCrossAttentionMask.html +++ b/main/generated/torchtune.modules.transforms.VisionCrossAttentionMask.html @@ -437,7 +437,7 @@

VisionCrossAttentionMask

-class torchtune.modules.transforms.VisionCrossAttentionMask(tile_size: int, patch_size: int, image_token_id: int, max_num_tiles: Optional[int] = None)[source]
+class torchtune.modules.transforms.VisionCrossAttentionMask(tile_size: int, patch_size: int, image_token_id: int)[source]

Computes the cross-attention mask for text + image inputs. Text tokens that participate in cross-attention with an image token will show True in the mask and follow the interleaved structure laid out in Fig. 7 of the Flamingo paper @@ -472,8 +472,6 @@

VisionCrossAttentionMaskint) – Token ID of the image special token.

-
  • max_num_tiles (Optional[int]) – Maximum number of tiles in an image, used to -pad mask during inference. Defaults to None

  • diff --git a/main/searchindex.js b/main/searchindex.js index 6454d498b3..8ecc4b31d3 100644 --- a/main/searchindex.js +++ b/main/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["api_ref_config", "api_ref_data", "api_ref_datasets", "api_ref_generation", "api_ref_models", "api_ref_modules", "api_ref_rlhf", "api_ref_training", "api_ref_utilities", "basics/chat_datasets", "basics/custom_components", "basics/datasets_overview", "basics/instruct_datasets", "basics/message_transforms", "basics/messages", "basics/model_transforms", "basics/multimodal_datasets", "basics/packing", "basics/preference_datasets", "basics/prompt_templates", "basics/text_completion_datasets", "basics/tokenizers", "deep_dives/checkpointer", "deep_dives/comet_logging", "deep_dives/configs", "deep_dives/recipe_deepdive", "deep_dives/wandb_logging", "generated/torchtune.config.instantiate", "generated/torchtune.config.log_config", "generated/torchtune.config.parse", "generated/torchtune.config.validate", "generated/torchtune.data.AlpacaToMessages", "generated/torchtune.data.ChatMLTemplate", "generated/torchtune.data.ChosenRejectedToMessages", "generated/torchtune.data.GrammarErrorCorrectionTemplate", "generated/torchtune.data.InputOutputToMessages", "generated/torchtune.data.Message", "generated/torchtune.data.OpenAIToMessages", "generated/torchtune.data.PromptTemplate", "generated/torchtune.data.PromptTemplateInterface", "generated/torchtune.data.QuestionAnswerTemplate", "generated/torchtune.data.Role", "generated/torchtune.data.ShareGPTToMessages", "generated/torchtune.data.SummarizeTemplate", "generated/torchtune.data.format_content_with_images", "generated/torchtune.data.left_pad_sequence", "generated/torchtune.data.load_image", "generated/torchtune.data.padded_collate", "generated/torchtune.data.padded_collate_dpo", "generated/torchtune.data.padded_collate_sft", "generated/torchtune.data.padded_collate_tiled_images_and_mask", "generated/torchtune.data.truncate", "generated/torchtune.data.validate_messages", "generated/torchtune.datasets.ConcatDataset", "generated/torchtune.datasets.PackedDataset", "generated/torchtune.datasets.PreferenceDataset", "generated/torchtune.datasets.SFTDataset", "generated/torchtune.datasets.TextCompletionDataset", "generated/torchtune.datasets.alpaca_cleaned_dataset", "generated/torchtune.datasets.alpaca_dataset", "generated/torchtune.datasets.chat_dataset", "generated/torchtune.datasets.cnn_dailymail_articles_dataset", "generated/torchtune.datasets.grammar_dataset", "generated/torchtune.datasets.hh_rlhf_helpful_dataset", "generated/torchtune.datasets.instruct_dataset", "generated/torchtune.datasets.multimodal.llava_instruct_dataset", "generated/torchtune.datasets.multimodal.the_cauldron_dataset", "generated/torchtune.datasets.preference_dataset", "generated/torchtune.datasets.samsum_dataset", "generated/torchtune.datasets.slimorca_dataset", "generated/torchtune.datasets.stack_exchange_paired_dataset", "generated/torchtune.datasets.text_completion_dataset", "generated/torchtune.datasets.wikitext_dataset", "generated/torchtune.generation.generate", "generated/torchtune.generation.generate_next_token", "generated/torchtune.generation.get_causal_mask_from_padding_mask", "generated/torchtune.generation.get_position_ids_from_padding_mask", "generated/torchtune.generation.sample", "generated/torchtune.models.clip.TilePositionalEmbedding", "generated/torchtune.models.clip.TiledTokenPositionalEmbedding", "generated/torchtune.models.clip.TokenPositionalEmbedding", "generated/torchtune.models.clip.clip_vision_encoder", "generated/torchtune.models.code_llama2.code_llama2_13b", "generated/torchtune.models.code_llama2.code_llama2_70b", "generated/torchtune.models.code_llama2.code_llama2_7b", "generated/torchtune.models.code_llama2.lora_code_llama2_13b", "generated/torchtune.models.code_llama2.lora_code_llama2_70b", "generated/torchtune.models.code_llama2.lora_code_llama2_7b", "generated/torchtune.models.code_llama2.qlora_code_llama2_13b", "generated/torchtune.models.code_llama2.qlora_code_llama2_70b", "generated/torchtune.models.code_llama2.qlora_code_llama2_7b", "generated/torchtune.models.gemma.gemma", "generated/torchtune.models.gemma.gemma_2b", "generated/torchtune.models.gemma.gemma_7b", "generated/torchtune.models.gemma.gemma_tokenizer", "generated/torchtune.models.gemma.lora_gemma", "generated/torchtune.models.gemma.lora_gemma_2b", "generated/torchtune.models.gemma.lora_gemma_7b", "generated/torchtune.models.gemma.qlora_gemma_2b", "generated/torchtune.models.gemma.qlora_gemma_7b", "generated/torchtune.models.llama2.Llama2ChatTemplate", "generated/torchtune.models.llama2.llama2", "generated/torchtune.models.llama2.llama2_13b", "generated/torchtune.models.llama2.llama2_70b", "generated/torchtune.models.llama2.llama2_7b", "generated/torchtune.models.llama2.llama2_reward_7b", "generated/torchtune.models.llama2.llama2_tokenizer", "generated/torchtune.models.llama2.lora_llama2", "generated/torchtune.models.llama2.lora_llama2_13b", "generated/torchtune.models.llama2.lora_llama2_70b", "generated/torchtune.models.llama2.lora_llama2_7b", "generated/torchtune.models.llama2.lora_llama2_reward_7b", "generated/torchtune.models.llama2.qlora_llama2_13b", "generated/torchtune.models.llama2.qlora_llama2_70b", "generated/torchtune.models.llama2.qlora_llama2_7b", "generated/torchtune.models.llama2.qlora_llama2_reward_7b", "generated/torchtune.models.llama3.llama3", "generated/torchtune.models.llama3.llama3_70b", "generated/torchtune.models.llama3.llama3_8b", "generated/torchtune.models.llama3.llama3_tokenizer", "generated/torchtune.models.llama3.lora_llama3", "generated/torchtune.models.llama3.lora_llama3_70b", "generated/torchtune.models.llama3.lora_llama3_8b", "generated/torchtune.models.llama3.qlora_llama3_70b", "generated/torchtune.models.llama3.qlora_llama3_8b", "generated/torchtune.models.llama3_1.llama3_1", "generated/torchtune.models.llama3_1.llama3_1_405b", "generated/torchtune.models.llama3_1.llama3_1_70b", "generated/torchtune.models.llama3_1.llama3_1_8b", "generated/torchtune.models.llama3_1.lora_llama3_1", "generated/torchtune.models.llama3_1.lora_llama3_1_405b", "generated/torchtune.models.llama3_1.lora_llama3_1_70b", "generated/torchtune.models.llama3_1.lora_llama3_1_8b", "generated/torchtune.models.llama3_1.qlora_llama3_1_405b", "generated/torchtune.models.llama3_1.qlora_llama3_1_70b", "generated/torchtune.models.llama3_1.qlora_llama3_1_8b", "generated/torchtune.models.llama3_2.llama3_2_1b", "generated/torchtune.models.llama3_2.llama3_2_3b", "generated/torchtune.models.llama3_2.lora_llama3_2_1b", "generated/torchtune.models.llama3_2.lora_llama3_2_3b", "generated/torchtune.models.llama3_2.qlora_llama3_2_1b", "generated/torchtune.models.llama3_2.qlora_llama3_2_3b", "generated/torchtune.models.llama3_2_vision.Llama3VisionEncoder", "generated/torchtune.models.llama3_2_vision.Llama3VisionProjectionHead", "generated/torchtune.models.llama3_2_vision.Llama3VisionTransform", "generated/torchtune.models.llama3_2_vision.llama3_2_vision_11b", "generated/torchtune.models.llama3_2_vision.llama3_2_vision_decoder", "generated/torchtune.models.llama3_2_vision.llama3_2_vision_encoder", "generated/torchtune.models.llama3_2_vision.llama3_2_vision_transform", "generated/torchtune.models.llama3_2_vision.lora_llama3_2_vision_11b", "generated/torchtune.models.llama3_2_vision.lora_llama3_2_vision_decoder", "generated/torchtune.models.llama3_2_vision.lora_llama3_2_vision_encoder", "generated/torchtune.models.llama3_2_vision.qlora_llama3_2_vision_11b", "generated/torchtune.models.mistral.MistralChatTemplate", "generated/torchtune.models.mistral.lora_mistral", "generated/torchtune.models.mistral.lora_mistral_7b", "generated/torchtune.models.mistral.lora_mistral_classifier", "generated/torchtune.models.mistral.lora_mistral_reward_7b", "generated/torchtune.models.mistral.mistral", "generated/torchtune.models.mistral.mistral_7b", "generated/torchtune.models.mistral.mistral_classifier", "generated/torchtune.models.mistral.mistral_reward_7b", "generated/torchtune.models.mistral.mistral_tokenizer", "generated/torchtune.models.mistral.qlora_mistral_7b", "generated/torchtune.models.mistral.qlora_mistral_reward_7b", "generated/torchtune.models.phi3.lora_phi3", "generated/torchtune.models.phi3.lora_phi3_mini", "generated/torchtune.models.phi3.phi3", "generated/torchtune.models.phi3.phi3_mini", "generated/torchtune.models.phi3.phi3_mini_tokenizer", "generated/torchtune.models.phi3.qlora_phi3_mini", "generated/torchtune.models.qwen2.lora_qwen2", "generated/torchtune.models.qwen2.lora_qwen2_0_5b", "generated/torchtune.models.qwen2.lora_qwen2_1_5b", "generated/torchtune.models.qwen2.lora_qwen2_7b", "generated/torchtune.models.qwen2.qwen2", "generated/torchtune.models.qwen2.qwen2_0_5b", "generated/torchtune.models.qwen2.qwen2_1_5b", "generated/torchtune.models.qwen2.qwen2_7b", "generated/torchtune.models.qwen2.qwen2_tokenizer", "generated/torchtune.modules.FeedForward", "generated/torchtune.modules.Fp32LayerNorm", "generated/torchtune.modules.KVCache", "generated/torchtune.modules.MultiHeadAttention", "generated/torchtune.modules.RMSNorm", "generated/torchtune.modules.RotaryPositionalEmbeddings", "generated/torchtune.modules.TanhGate", "generated/torchtune.modules.TiedLinear", "generated/torchtune.modules.TransformerCrossAttentionLayer", "generated/torchtune.modules.TransformerDecoder", "generated/torchtune.modules.TransformerSelfAttentionLayer", "generated/torchtune.modules.VisionTransformer", "generated/torchtune.modules.common_utils.delete_kv_caches", "generated/torchtune.modules.common_utils.disable_kv_cache", "generated/torchtune.modules.common_utils.local_kv_cache", "generated/torchtune.modules.common_utils.reparametrize_as_dtype_state_dict_post_hook", "generated/torchtune.modules.loss.CEWithChunkedOutputLoss", "generated/torchtune.modules.loss.ForwardKLLoss", "generated/torchtune.modules.loss.ForwardKLWithChunkedOutputLoss", "generated/torchtune.modules.model_fusion.DeepFusionModel", "generated/torchtune.modules.model_fusion.FusionEmbedding", "generated/torchtune.modules.model_fusion.FusionLayer", "generated/torchtune.modules.model_fusion.get_fusion_params", "generated/torchtune.modules.model_fusion.register_fusion_module", "generated/torchtune.modules.peft.AdapterModule", "generated/torchtune.modules.peft.DoRALinear", "generated/torchtune.modules.peft.LoRALinear", "generated/torchtune.modules.peft.disable_adapter", "generated/torchtune.modules.peft.get_adapter_params", "generated/torchtune.modules.peft.set_trainable_params", "generated/torchtune.modules.peft.validate_missing_and_unexpected_for_lora", "generated/torchtune.modules.peft.validate_state_dict_for_lora", "generated/torchtune.modules.tokenizers.BaseTokenizer", "generated/torchtune.modules.tokenizers.ModelTokenizer", "generated/torchtune.modules.tokenizers.SentencePieceBaseTokenizer", "generated/torchtune.modules.tokenizers.TikTokenBaseTokenizer", "generated/torchtune.modules.tokenizers.parse_hf_tokenizer_json", "generated/torchtune.modules.tokenizers.tokenize_messages_no_special_tokens", "generated/torchtune.modules.transforms.Transform", "generated/torchtune.modules.transforms.VisionCrossAttentionMask", "generated/torchtune.rlhf.estimate_advantages", "generated/torchtune.rlhf.get_rewards_ppo", "generated/torchtune.rlhf.loss.DPOLoss", "generated/torchtune.rlhf.loss.PPOLoss", "generated/torchtune.rlhf.loss.RSOLoss", "generated/torchtune.rlhf.loss.SimPOLoss", "generated/torchtune.rlhf.truncate_sequence_at_first_stop_token", "generated/torchtune.training.FSDPPolicyType", "generated/torchtune.training.FormattedCheckpointFiles", "generated/torchtune.training.FullModelHFCheckpointer", "generated/torchtune.training.FullModelMetaCheckpointer", "generated/torchtune.training.FullModelTorchTuneCheckpointer", "generated/torchtune.training.ModelType", "generated/torchtune.training.OptimizerInBackwardWrapper", "generated/torchtune.training.apply_selective_activation_checkpointing", "generated/torchtune.training.create_optim_in_bwd_wrapper", "generated/torchtune.training.get_cosine_schedule_with_warmup", "generated/torchtune.training.get_dtype", "generated/torchtune.training.get_full_finetune_fsdp_wrap_policy", "generated/torchtune.training.get_lr", "generated/torchtune.training.get_memory_stats", "generated/torchtune.training.get_quantizer_mode", "generated/torchtune.training.get_unmasked_sequence_lengths", "generated/torchtune.training.get_world_size_and_rank", "generated/torchtune.training.init_distributed", "generated/torchtune.training.is_distributed", "generated/torchtune.training.log_memory_stats", "generated/torchtune.training.lora_fsdp_wrap_policy", "generated/torchtune.training.metric_logging.CometLogger", "generated/torchtune.training.metric_logging.DiskLogger", "generated/torchtune.training.metric_logging.StdoutLogger", "generated/torchtune.training.metric_logging.TensorBoardLogger", "generated/torchtune.training.metric_logging.WandBLogger", "generated/torchtune.training.register_optim_in_bwd_hooks", "generated/torchtune.training.set_activation_checkpointing", "generated/torchtune.training.set_default_dtype", "generated/torchtune.training.set_seed", "generated/torchtune.training.setup_torch_profiler", "generated/torchtune.training.update_state_dict_for_classifier", "generated/torchtune.training.validate_expected_param_dtype", "generated/torchtune.utils.batch_to_device", "generated/torchtune.utils.get_device", "generated/torchtune.utils.get_logger", "generated/torchtune.utils.torch_version_ge", "generated_examples/index", "generated_examples/sg_execution_times", "index", "install", "overview", "recipes/lora_finetune_single_device", "recipes/qat_distributed", "recipes/recipes_overview", "sg_execution_times", "tune_cli", "tutorials/chat", "tutorials/e2e_flow", "tutorials/first_finetune_tutorial", "tutorials/llama3", "tutorials/llama_kd_tutorial", "tutorials/lora_finetune", "tutorials/memory_optimizations", "tutorials/qat_finetune", "tutorials/qlora_finetune"], "filenames": ["api_ref_config.rst", "api_ref_data.rst", "api_ref_datasets.rst", "api_ref_generation.rst", "api_ref_models.rst", "api_ref_modules.rst", "api_ref_rlhf.rst", "api_ref_training.rst", "api_ref_utilities.rst", "basics/chat_datasets.rst", "basics/custom_components.rst", "basics/datasets_overview.rst", "basics/instruct_datasets.rst", "basics/message_transforms.rst", "basics/messages.rst", "basics/model_transforms.rst", "basics/multimodal_datasets.rst", "basics/packing.rst", "basics/preference_datasets.rst", "basics/prompt_templates.rst", "basics/text_completion_datasets.rst", "basics/tokenizers.rst", "deep_dives/checkpointer.rst", "deep_dives/comet_logging.rst", "deep_dives/configs.rst", "deep_dives/recipe_deepdive.rst", "deep_dives/wandb_logging.rst", "generated/torchtune.config.instantiate.rst", "generated/torchtune.config.log_config.rst", "generated/torchtune.config.parse.rst", "generated/torchtune.config.validate.rst", "generated/torchtune.data.AlpacaToMessages.rst", "generated/torchtune.data.ChatMLTemplate.rst", "generated/torchtune.data.ChosenRejectedToMessages.rst", "generated/torchtune.data.GrammarErrorCorrectionTemplate.rst", "generated/torchtune.data.InputOutputToMessages.rst", "generated/torchtune.data.Message.rst", "generated/torchtune.data.OpenAIToMessages.rst", "generated/torchtune.data.PromptTemplate.rst", "generated/torchtune.data.PromptTemplateInterface.rst", "generated/torchtune.data.QuestionAnswerTemplate.rst", "generated/torchtune.data.Role.rst", "generated/torchtune.data.ShareGPTToMessages.rst", "generated/torchtune.data.SummarizeTemplate.rst", "generated/torchtune.data.format_content_with_images.rst", "generated/torchtune.data.left_pad_sequence.rst", "generated/torchtune.data.load_image.rst", "generated/torchtune.data.padded_collate.rst", "generated/torchtune.data.padded_collate_dpo.rst", "generated/torchtune.data.padded_collate_sft.rst", "generated/torchtune.data.padded_collate_tiled_images_and_mask.rst", "generated/torchtune.data.truncate.rst", "generated/torchtune.data.validate_messages.rst", "generated/torchtune.datasets.ConcatDataset.rst", "generated/torchtune.datasets.PackedDataset.rst", "generated/torchtune.datasets.PreferenceDataset.rst", "generated/torchtune.datasets.SFTDataset.rst", "generated/torchtune.datasets.TextCompletionDataset.rst", "generated/torchtune.datasets.alpaca_cleaned_dataset.rst", "generated/torchtune.datasets.alpaca_dataset.rst", "generated/torchtune.datasets.chat_dataset.rst", "generated/torchtune.datasets.cnn_dailymail_articles_dataset.rst", "generated/torchtune.datasets.grammar_dataset.rst", "generated/torchtune.datasets.hh_rlhf_helpful_dataset.rst", "generated/torchtune.datasets.instruct_dataset.rst", "generated/torchtune.datasets.multimodal.llava_instruct_dataset.rst", "generated/torchtune.datasets.multimodal.the_cauldron_dataset.rst", "generated/torchtune.datasets.preference_dataset.rst", "generated/torchtune.datasets.samsum_dataset.rst", "generated/torchtune.datasets.slimorca_dataset.rst", "generated/torchtune.datasets.stack_exchange_paired_dataset.rst", "generated/torchtune.datasets.text_completion_dataset.rst", "generated/torchtune.datasets.wikitext_dataset.rst", "generated/torchtune.generation.generate.rst", "generated/torchtune.generation.generate_next_token.rst", "generated/torchtune.generation.get_causal_mask_from_padding_mask.rst", "generated/torchtune.generation.get_position_ids_from_padding_mask.rst", "generated/torchtune.generation.sample.rst", "generated/torchtune.models.clip.TilePositionalEmbedding.rst", "generated/torchtune.models.clip.TiledTokenPositionalEmbedding.rst", "generated/torchtune.models.clip.TokenPositionalEmbedding.rst", "generated/torchtune.models.clip.clip_vision_encoder.rst", "generated/torchtune.models.code_llama2.code_llama2_13b.rst", "generated/torchtune.models.code_llama2.code_llama2_70b.rst", "generated/torchtune.models.code_llama2.code_llama2_7b.rst", "generated/torchtune.models.code_llama2.lora_code_llama2_13b.rst", "generated/torchtune.models.code_llama2.lora_code_llama2_70b.rst", "generated/torchtune.models.code_llama2.lora_code_llama2_7b.rst", "generated/torchtune.models.code_llama2.qlora_code_llama2_13b.rst", "generated/torchtune.models.code_llama2.qlora_code_llama2_70b.rst", "generated/torchtune.models.code_llama2.qlora_code_llama2_7b.rst", "generated/torchtune.models.gemma.gemma.rst", "generated/torchtune.models.gemma.gemma_2b.rst", "generated/torchtune.models.gemma.gemma_7b.rst", "generated/torchtune.models.gemma.gemma_tokenizer.rst", "generated/torchtune.models.gemma.lora_gemma.rst", "generated/torchtune.models.gemma.lora_gemma_2b.rst", "generated/torchtune.models.gemma.lora_gemma_7b.rst", "generated/torchtune.models.gemma.qlora_gemma_2b.rst", "generated/torchtune.models.gemma.qlora_gemma_7b.rst", "generated/torchtune.models.llama2.Llama2ChatTemplate.rst", "generated/torchtune.models.llama2.llama2.rst", "generated/torchtune.models.llama2.llama2_13b.rst", "generated/torchtune.models.llama2.llama2_70b.rst", "generated/torchtune.models.llama2.llama2_7b.rst", "generated/torchtune.models.llama2.llama2_reward_7b.rst", "generated/torchtune.models.llama2.llama2_tokenizer.rst", "generated/torchtune.models.llama2.lora_llama2.rst", "generated/torchtune.models.llama2.lora_llama2_13b.rst", "generated/torchtune.models.llama2.lora_llama2_70b.rst", "generated/torchtune.models.llama2.lora_llama2_7b.rst", "generated/torchtune.models.llama2.lora_llama2_reward_7b.rst", "generated/torchtune.models.llama2.qlora_llama2_13b.rst", "generated/torchtune.models.llama2.qlora_llama2_70b.rst", "generated/torchtune.models.llama2.qlora_llama2_7b.rst", "generated/torchtune.models.llama2.qlora_llama2_reward_7b.rst", "generated/torchtune.models.llama3.llama3.rst", "generated/torchtune.models.llama3.llama3_70b.rst", "generated/torchtune.models.llama3.llama3_8b.rst", "generated/torchtune.models.llama3.llama3_tokenizer.rst", "generated/torchtune.models.llama3.lora_llama3.rst", "generated/torchtune.models.llama3.lora_llama3_70b.rst", "generated/torchtune.models.llama3.lora_llama3_8b.rst", "generated/torchtune.models.llama3.qlora_llama3_70b.rst", "generated/torchtune.models.llama3.qlora_llama3_8b.rst", "generated/torchtune.models.llama3_1.llama3_1.rst", "generated/torchtune.models.llama3_1.llama3_1_405b.rst", "generated/torchtune.models.llama3_1.llama3_1_70b.rst", "generated/torchtune.models.llama3_1.llama3_1_8b.rst", "generated/torchtune.models.llama3_1.lora_llama3_1.rst", "generated/torchtune.models.llama3_1.lora_llama3_1_405b.rst", "generated/torchtune.models.llama3_1.lora_llama3_1_70b.rst", "generated/torchtune.models.llama3_1.lora_llama3_1_8b.rst", "generated/torchtune.models.llama3_1.qlora_llama3_1_405b.rst", "generated/torchtune.models.llama3_1.qlora_llama3_1_70b.rst", "generated/torchtune.models.llama3_1.qlora_llama3_1_8b.rst", "generated/torchtune.models.llama3_2.llama3_2_1b.rst", "generated/torchtune.models.llama3_2.llama3_2_3b.rst", "generated/torchtune.models.llama3_2.lora_llama3_2_1b.rst", "generated/torchtune.models.llama3_2.lora_llama3_2_3b.rst", "generated/torchtune.models.llama3_2.qlora_llama3_2_1b.rst", "generated/torchtune.models.llama3_2.qlora_llama3_2_3b.rst", "generated/torchtune.models.llama3_2_vision.Llama3VisionEncoder.rst", "generated/torchtune.models.llama3_2_vision.Llama3VisionProjectionHead.rst", "generated/torchtune.models.llama3_2_vision.Llama3VisionTransform.rst", "generated/torchtune.models.llama3_2_vision.llama3_2_vision_11b.rst", "generated/torchtune.models.llama3_2_vision.llama3_2_vision_decoder.rst", "generated/torchtune.models.llama3_2_vision.llama3_2_vision_encoder.rst", "generated/torchtune.models.llama3_2_vision.llama3_2_vision_transform.rst", "generated/torchtune.models.llama3_2_vision.lora_llama3_2_vision_11b.rst", "generated/torchtune.models.llama3_2_vision.lora_llama3_2_vision_decoder.rst", "generated/torchtune.models.llama3_2_vision.lora_llama3_2_vision_encoder.rst", "generated/torchtune.models.llama3_2_vision.qlora_llama3_2_vision_11b.rst", "generated/torchtune.models.mistral.MistralChatTemplate.rst", "generated/torchtune.models.mistral.lora_mistral.rst", "generated/torchtune.models.mistral.lora_mistral_7b.rst", "generated/torchtune.models.mistral.lora_mistral_classifier.rst", "generated/torchtune.models.mistral.lora_mistral_reward_7b.rst", "generated/torchtune.models.mistral.mistral.rst", "generated/torchtune.models.mistral.mistral_7b.rst", "generated/torchtune.models.mistral.mistral_classifier.rst", "generated/torchtune.models.mistral.mistral_reward_7b.rst", "generated/torchtune.models.mistral.mistral_tokenizer.rst", "generated/torchtune.models.mistral.qlora_mistral_7b.rst", "generated/torchtune.models.mistral.qlora_mistral_reward_7b.rst", "generated/torchtune.models.phi3.lora_phi3.rst", "generated/torchtune.models.phi3.lora_phi3_mini.rst", "generated/torchtune.models.phi3.phi3.rst", "generated/torchtune.models.phi3.phi3_mini.rst", "generated/torchtune.models.phi3.phi3_mini_tokenizer.rst", "generated/torchtune.models.phi3.qlora_phi3_mini.rst", "generated/torchtune.models.qwen2.lora_qwen2.rst", "generated/torchtune.models.qwen2.lora_qwen2_0_5b.rst", "generated/torchtune.models.qwen2.lora_qwen2_1_5b.rst", "generated/torchtune.models.qwen2.lora_qwen2_7b.rst", "generated/torchtune.models.qwen2.qwen2.rst", "generated/torchtune.models.qwen2.qwen2_0_5b.rst", "generated/torchtune.models.qwen2.qwen2_1_5b.rst", "generated/torchtune.models.qwen2.qwen2_7b.rst", "generated/torchtune.models.qwen2.qwen2_tokenizer.rst", "generated/torchtune.modules.FeedForward.rst", "generated/torchtune.modules.Fp32LayerNorm.rst", "generated/torchtune.modules.KVCache.rst", "generated/torchtune.modules.MultiHeadAttention.rst", "generated/torchtune.modules.RMSNorm.rst", "generated/torchtune.modules.RotaryPositionalEmbeddings.rst", "generated/torchtune.modules.TanhGate.rst", "generated/torchtune.modules.TiedLinear.rst", "generated/torchtune.modules.TransformerCrossAttentionLayer.rst", "generated/torchtune.modules.TransformerDecoder.rst", "generated/torchtune.modules.TransformerSelfAttentionLayer.rst", "generated/torchtune.modules.VisionTransformer.rst", "generated/torchtune.modules.common_utils.delete_kv_caches.rst", "generated/torchtune.modules.common_utils.disable_kv_cache.rst", "generated/torchtune.modules.common_utils.local_kv_cache.rst", "generated/torchtune.modules.common_utils.reparametrize_as_dtype_state_dict_post_hook.rst", "generated/torchtune.modules.loss.CEWithChunkedOutputLoss.rst", "generated/torchtune.modules.loss.ForwardKLLoss.rst", "generated/torchtune.modules.loss.ForwardKLWithChunkedOutputLoss.rst", "generated/torchtune.modules.model_fusion.DeepFusionModel.rst", "generated/torchtune.modules.model_fusion.FusionEmbedding.rst", "generated/torchtune.modules.model_fusion.FusionLayer.rst", "generated/torchtune.modules.model_fusion.get_fusion_params.rst", "generated/torchtune.modules.model_fusion.register_fusion_module.rst", "generated/torchtune.modules.peft.AdapterModule.rst", "generated/torchtune.modules.peft.DoRALinear.rst", "generated/torchtune.modules.peft.LoRALinear.rst", "generated/torchtune.modules.peft.disable_adapter.rst", "generated/torchtune.modules.peft.get_adapter_params.rst", "generated/torchtune.modules.peft.set_trainable_params.rst", "generated/torchtune.modules.peft.validate_missing_and_unexpected_for_lora.rst", "generated/torchtune.modules.peft.validate_state_dict_for_lora.rst", "generated/torchtune.modules.tokenizers.BaseTokenizer.rst", "generated/torchtune.modules.tokenizers.ModelTokenizer.rst", "generated/torchtune.modules.tokenizers.SentencePieceBaseTokenizer.rst", "generated/torchtune.modules.tokenizers.TikTokenBaseTokenizer.rst", "generated/torchtune.modules.tokenizers.parse_hf_tokenizer_json.rst", "generated/torchtune.modules.tokenizers.tokenize_messages_no_special_tokens.rst", "generated/torchtune.modules.transforms.Transform.rst", "generated/torchtune.modules.transforms.VisionCrossAttentionMask.rst", "generated/torchtune.rlhf.estimate_advantages.rst", "generated/torchtune.rlhf.get_rewards_ppo.rst", "generated/torchtune.rlhf.loss.DPOLoss.rst", "generated/torchtune.rlhf.loss.PPOLoss.rst", "generated/torchtune.rlhf.loss.RSOLoss.rst", "generated/torchtune.rlhf.loss.SimPOLoss.rst", "generated/torchtune.rlhf.truncate_sequence_at_first_stop_token.rst", "generated/torchtune.training.FSDPPolicyType.rst", "generated/torchtune.training.FormattedCheckpointFiles.rst", "generated/torchtune.training.FullModelHFCheckpointer.rst", "generated/torchtune.training.FullModelMetaCheckpointer.rst", "generated/torchtune.training.FullModelTorchTuneCheckpointer.rst", "generated/torchtune.training.ModelType.rst", "generated/torchtune.training.OptimizerInBackwardWrapper.rst", "generated/torchtune.training.apply_selective_activation_checkpointing.rst", "generated/torchtune.training.create_optim_in_bwd_wrapper.rst", "generated/torchtune.training.get_cosine_schedule_with_warmup.rst", "generated/torchtune.training.get_dtype.rst", "generated/torchtune.training.get_full_finetune_fsdp_wrap_policy.rst", "generated/torchtune.training.get_lr.rst", "generated/torchtune.training.get_memory_stats.rst", "generated/torchtune.training.get_quantizer_mode.rst", "generated/torchtune.training.get_unmasked_sequence_lengths.rst", "generated/torchtune.training.get_world_size_and_rank.rst", "generated/torchtune.training.init_distributed.rst", "generated/torchtune.training.is_distributed.rst", "generated/torchtune.training.log_memory_stats.rst", "generated/torchtune.training.lora_fsdp_wrap_policy.rst", "generated/torchtune.training.metric_logging.CometLogger.rst", "generated/torchtune.training.metric_logging.DiskLogger.rst", "generated/torchtune.training.metric_logging.StdoutLogger.rst", "generated/torchtune.training.metric_logging.TensorBoardLogger.rst", "generated/torchtune.training.metric_logging.WandBLogger.rst", "generated/torchtune.training.register_optim_in_bwd_hooks.rst", "generated/torchtune.training.set_activation_checkpointing.rst", "generated/torchtune.training.set_default_dtype.rst", "generated/torchtune.training.set_seed.rst", "generated/torchtune.training.setup_torch_profiler.rst", "generated/torchtune.training.update_state_dict_for_classifier.rst", "generated/torchtune.training.validate_expected_param_dtype.rst", "generated/torchtune.utils.batch_to_device.rst", "generated/torchtune.utils.get_device.rst", "generated/torchtune.utils.get_logger.rst", "generated/torchtune.utils.torch_version_ge.rst", "generated_examples/index.rst", "generated_examples/sg_execution_times.rst", "index.rst", "install.rst", "overview.rst", "recipes/lora_finetune_single_device.rst", "recipes/qat_distributed.rst", "recipes/recipes_overview.rst", "sg_execution_times.rst", "tune_cli.rst", "tutorials/chat.rst", "tutorials/e2e_flow.rst", "tutorials/first_finetune_tutorial.rst", "tutorials/llama3.rst", "tutorials/llama_kd_tutorial.rst", "tutorials/lora_finetune.rst", "tutorials/memory_optimizations.rst", "tutorials/qat_finetune.rst", "tutorials/qlora_finetune.rst"], "titles": ["torchtune.config", "torchtune.data", "torchtune.datasets", "torchtune.generation", "torchtune.models", "torchtune.modules", "torchtune.rlhf", "torchtune.training", "torchtune.utils", "Chat Datasets", "Custom Components and Recipes", "Datasets Overview", "Instruct Datasets", "Message Transforms", "Messages", "Multimodal Transforms", "Multimodal Datasets", "Sample packing", "Preference Datasets", "Prompt Templates", "Text-completion Datasets", "Tokenizers", "Checkpointing in torchtune", "Logging to Comet", "All About Configs", "What Are Recipes?", "Logging to Weights & Biases", "instantiate", "log_config", "parse", "validate", "AlpacaToMessages", "ChatMLTemplate", "ChosenRejectedToMessages", "torchtune.data.GrammarErrorCorrectionTemplate", "InputOutputToMessages", "Message", "OpenAIToMessages", "PromptTemplate", "PromptTemplateInterface", "torchtune.data.QuestionAnswerTemplate", "torchtune.data.Role", "ShareGPTToMessages", "torchtune.data.SummarizeTemplate", "format_content_with_images", "left_pad_sequence", "load_image", "padded_collate", "padded_collate_dpo", "padded_collate_sft", "padded_collate_tiled_images_and_mask", "truncate", "validate_messages", "ConcatDataset", "PackedDataset", "PreferenceDataset", "SFTDataset", "TextCompletionDataset", "alpaca_cleaned_dataset", "alpaca_dataset", "chat_dataset", "cnn_dailymail_articles_dataset", "grammar_dataset", "hh_rlhf_helpful_dataset", "instruct_dataset", "llava_instruct_dataset", "the_cauldron_dataset", "preference_dataset", "samsum_dataset", "slimorca_dataset", "stack_exchange_paired_dataset", "text_completion_dataset", "wikitext_dataset", "generate", "generate_next_token", "get_causal_mask_from_padding_mask", "get_position_ids_from_padding_mask", "sample", "TilePositionalEmbedding", "TiledTokenPositionalEmbedding", "TokenPositionalEmbedding", "clip_vision_encoder", "code_llama2_13b", "code_llama2_70b", "code_llama2_7b", "lora_code_llama2_13b", "lora_code_llama2_70b", "lora_code_llama2_7b", "qlora_code_llama2_13b", "qlora_code_llama2_70b", "qlora_code_llama2_7b", "gemma", "gemma_2b", "gemma_7b", "gemma_tokenizer", "lora_gemma", "lora_gemma_2b", "lora_gemma_7b", "qlora_gemma_2b", "qlora_gemma_7b", "Llama2ChatTemplate", "llama2", "llama2_13b", "llama2_70b", "llama2_7b", "llama2_reward_7b", "llama2_tokenizer", "lora_llama2", "lora_llama2_13b", "lora_llama2_70b", "lora_llama2_7b", "lora_llama2_reward_7b", "qlora_llama2_13b", "qlora_llama2_70b", "qlora_llama2_7b", "qlora_llama2_reward_7b", "llama3", "llama3_70b", "llama3_8b", "llama3_tokenizer", "lora_llama3", "lora_llama3_70b", "lora_llama3_8b", "qlora_llama3_70b", "qlora_llama3_8b", "llama3_1", "llama3_1_405b", "llama3_1_70b", "llama3_1_8b", "lora_llama3_1", "lora_llama3_1_405b", "lora_llama3_1_70b", "lora_llama3_1_8b", "qlora_llama3_1_405b", "qlora_llama3_1_70b", "qlora_llama3_1_8b", "llama3_2_1b", "llama3_2_3b", "lora_llama3_2_1b", "lora_llama3_2_3b", "qlora_llama3_2_1b", "qlora_llama3_2_3b", "Llama3VisionEncoder", "Llama3VisionProjectionHead", "Llama3VisionTransform", "llama3_2_vision_11b", "llama3_2_vision_decoder", "llama3_2_vision_encoder", "llama3_2_vision_transform", "lora_llama3_2_vision_11b", "lora_llama3_2_vision_decoder", "lora_llama3_2_vision_encoder", "qlora_llama3_2_vision_11b", "MistralChatTemplate", "lora_mistral", "lora_mistral_7b", "lora_mistral_classifier", "lora_mistral_reward_7b", "mistral", "mistral_7b", "mistral_classifier", "mistral_reward_7b", "mistral_tokenizer", "qlora_mistral_7b", "qlora_mistral_reward_7b", "lora_phi3", "lora_phi3_mini", "phi3", "phi3_mini", "phi3_mini_tokenizer", "qlora_phi3_mini", "lora_qwen2", "lora_qwen2_0_5b", "lora_qwen2_1_5b", "lora_qwen2_7b", "qwen2", "qwen2_0_5b", "qwen2_1_5b", "qwen2_7b", "qwen2_tokenizer", "FeedForward", "Fp32LayerNorm", "KVCache", "MultiHeadAttention", "RMSNorm", "RotaryPositionalEmbeddings", "TanhGate", "TiedLinear", "TransformerCrossAttentionLayer", "TransformerDecoder", "TransformerSelfAttentionLayer", "VisionTransformer", "delete_kv_caches", "disable_kv_cache", "local_kv_cache", "reparametrize_as_dtype_state_dict_post_hook", "CEWithChunkedOutputLoss", "ForwardKLLoss", "ForwardKLWithChunkedOutputLoss", "DeepFusionModel", "FusionEmbedding", "FusionLayer", "get_fusion_params", "register_fusion_module", "AdapterModule", "DoRALinear", "LoRALinear", "disable_adapter", "get_adapter_params", "set_trainable_params", "validate_missing_and_unexpected_for_lora", "validate_state_dict_for_lora", "BaseTokenizer", "ModelTokenizer", "SentencePieceBaseTokenizer", "TikTokenBaseTokenizer", "parse_hf_tokenizer_json", "tokenize_messages_no_special_tokens", "Transform", "VisionCrossAttentionMask", "estimate_advantages", "get_rewards_ppo", "DPOLoss", "PPOLoss", "RSOLoss", "SimPOLoss", "truncate_sequence_at_first_stop_token", "torchtune.training.FSDPPolicyType", "FormattedCheckpointFiles", "FullModelHFCheckpointer", "FullModelMetaCheckpointer", "FullModelTorchTuneCheckpointer", "ModelType", "OptimizerInBackwardWrapper", "apply_selective_activation_checkpointing", "create_optim_in_bwd_wrapper", "get_cosine_schedule_with_warmup", "get_dtype", "get_full_finetune_fsdp_wrap_policy", "get_lr", "get_memory_stats", "get_quantizer_mode", "get_unmasked_sequence_lengths", "get_world_size_and_rank", "init_distributed", "is_distributed", "log_memory_stats", "lora_fsdp_wrap_policy", "CometLogger", "DiskLogger", "StdoutLogger", "TensorBoardLogger", "WandBLogger", "register_optim_in_bwd_hooks", "set_activation_checkpointing", "set_default_dtype", "set_seed", "setup_torch_profiler", "update_state_dict_for_classifier", "validate_expected_param_dtype", "batch_to_device", "get_device", "get_logger", "torch_version_ge", "<no title>", "Computation times", "Welcome to the torchtune Documentation", "Install Instructions", "torchtune Overview", "LoRA Single Device Finetuning", "Distributed Quantization-Aware Training (QAT)", "Recipes Overview", "Computation times", "torchtune CLI", "Fine-Tuning Llama3 with Chat Data", "End-to-End Workflow with torchtune", "Fine-Tune Your First LLM", "Meta Llama3 in torchtune", "Distilling Llama3.1 8B into Llama3.2 1B using Knowledge Distillation", "Fine-Tuning Llama2 with LoRA", "Memory Optimization Overview", "Fine-Tuning Llama3 with QAT", "Fine-Tuning Llama2 with QLoRA"], "terms": {"instruct": [1, 2, 4, 9, 10, 11, 13, 15, 16, 17, 18, 19, 21, 31, 32, 33, 35, 37, 42, 54, 56, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 71, 145, 148, 149, 153, 161, 167, 168, 169, 176, 177, 178, 266, 269, 270, 273, 274, 276, 278, 279, 281, 282], "prompt": [1, 9, 10, 11, 12, 13, 18, 31, 33, 34, 35, 36, 37, 38, 39, 40, 42, 43, 55, 56, 59, 60, 62, 63, 64, 67, 68, 69, 70, 73, 74, 94, 100, 106, 119, 144, 148, 153, 162, 169, 179, 189, 199, 217, 275, 277], "chat": [1, 2, 11, 13, 16, 18, 32, 37, 42, 56, 60, 100, 169, 269], "includ": [1, 9, 11, 12, 16, 18, 19, 21, 22, 24, 25, 38, 39, 56, 77, 81, 91, 101, 116, 125, 146, 147, 148, 150, 151, 158, 169, 175, 189, 205, 206, 212, 229, 230, 268, 271, 273, 274, 275, 276, 277, 278, 279, 282], "some": [1, 17, 18, 20, 21, 22, 24, 32, 156, 200, 202, 208, 209, 266, 268, 269, 270, 273, 274, 275, 276, 278, 279, 280, 281, 282], "specif": [1, 5, 11, 12, 15, 19, 21, 24, 25, 27, 55, 56, 65, 66, 144, 213, 238, 270, 274, 275, 280, 281, 282], "format": [1, 2, 7, 11, 19, 21, 36, 45, 46, 55, 56, 59, 60, 63, 64, 67, 100, 144, 153, 213, 228, 229, 230, 231, 232, 273, 274, 275, 276, 277, 279, 280], "differ": [1, 9, 10, 17, 18, 19, 21, 24, 26, 48, 53, 60, 64, 78, 79, 80, 144, 191, 205, 214, 222, 232, 259, 268, 269, 270, 273, 274, 275, 277, 278, 279, 280, 281, 282], "dataset": [1, 10, 13, 14, 15, 17, 19, 24, 31, 33, 35, 36, 37, 42, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 222, 268, 276, 277, 278, 281], "model": [1, 2, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 25, 27, 31, 32, 33, 35, 36, 37, 42, 53, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 182, 183, 184, 185, 187, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 216, 217, 218, 220, 221, 222, 223, 224, 225, 229, 230, 231, 232, 234, 235, 238, 240, 247, 248, 253, 254, 258, 266, 268, 269, 270, 274, 282], "convert": [1, 9, 11, 14, 21, 22, 33, 35, 37, 42, 49, 55, 56, 60, 65, 66, 67, 75, 142, 229, 275, 281, 282], "from": [1, 2, 4, 10, 11, 13, 14, 15, 17, 19, 22, 23, 24, 25, 26, 27, 31, 33, 36, 37, 42, 45, 46, 47, 50, 53, 54, 55, 56, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 76, 77, 78, 79, 80, 81, 82, 83, 84, 92, 93, 100, 102, 103, 104, 105, 119, 143, 144, 148, 159, 161, 169, 176, 177, 178, 179, 180, 183, 188, 189, 190, 191, 192, 193, 194, 196, 197, 198, 201, 202, 203, 204, 208, 211, 214, 216, 219, 222, 224, 225, 228, 229, 230, 231, 233, 235, 236, 248, 251, 252, 253, 258, 265, 267, 270, 272, 273, 275, 276, 277, 278, 279, 280, 281], "common": [1, 2, 5, 9, 14, 15, 24, 217, 273, 274, 277, 279, 280, 281], "schema": [1, 9, 11, 12, 16], "convers": [1, 13, 16, 18, 19, 21, 22, 33, 42, 52, 55, 56, 60, 65, 67, 69, 229, 231, 232, 268, 274, 275, 279, 280, 282], "json": [1, 9, 12, 13, 16, 18, 21, 22, 37, 42, 55, 56, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 119, 148, 169, 179, 216, 229, 273, 274, 275, 281], "list": [1, 9, 11, 14, 15, 18, 19, 21, 22, 24, 33, 36, 38, 44, 45, 47, 48, 49, 50, 51, 52, 53, 55, 56, 60, 61, 65, 66, 67, 72, 73, 81, 85, 86, 87, 88, 89, 90, 94, 95, 96, 97, 98, 99, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 119, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 143, 144, 147, 148, 149, 150, 151, 152, 154, 155, 156, 157, 162, 163, 164, 165, 166, 169, 170, 171, 172, 173, 174, 189, 191, 196, 198, 199, 200, 201, 204, 205, 206, 210, 211, 212, 213, 214, 215, 217, 219, 228, 229, 230, 231, 248, 262, 271, 274, 275, 276, 277, 280, 281], "us": [1, 2, 4, 5, 9, 10, 11, 12, 13, 14, 16, 17, 18, 20, 21, 22, 23, 26, 27, 29, 32, 35, 36, 38, 44, 47, 50, 53, 54, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 77, 79, 80, 81, 100, 101, 107, 116, 119, 120, 125, 129, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 165, 169, 171, 175, 179, 180, 182, 183, 184, 185, 187, 189, 190, 191, 192, 193, 194, 195, 196, 199, 200, 203, 207, 210, 214, 215, 219, 220, 221, 222, 223, 225, 227, 229, 230, 232, 233, 237, 238, 240, 247, 248, 249, 250, 251, 252, 256, 258, 260, 261, 266, 267, 268, 269, 270, 271, 273, 276, 277, 279, 280, 281], "collect": [1, 24, 276], "sampl": [1, 9, 11, 12, 13, 14, 15, 16, 19, 20, 21, 23, 26, 33, 35, 36, 37, 42, 44, 50, 54, 55, 56, 57, 62, 63, 65, 66, 67, 68, 69, 71, 73, 74, 183, 185, 189, 190, 191, 199, 218, 219, 224, 274, 275, 280], "batch": [1, 11, 17, 25, 47, 48, 49, 50, 54, 59, 62, 65, 66, 68, 79, 142, 143, 182, 183, 185, 188, 189, 190, 191, 194, 199, 201, 220, 221, 222, 224, 225, 242, 257, 260, 268, 276, 277, 279, 280], "handl": [1, 13, 16, 17, 24, 29, 31, 53, 56, 144, 214, 215, 274, 275, 279, 280, 282], "ani": [1, 5, 10, 11, 13, 14, 15, 16, 17, 21, 22, 24, 25, 27, 29, 30, 33, 36, 37, 38, 42, 44, 47, 50, 51, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 76, 80, 181, 189, 195, 199, 201, 202, 208, 209, 210, 211, 212, 213, 214, 217, 229, 230, 231, 233, 244, 247, 248, 256, 259, 273, 274, 276, 279, 280, 281], "pad": [1, 45, 47, 48, 49, 50, 54, 73, 75, 76, 189, 191, 219, 221, 223, 226, 242], "miscellan": 1, "modifi": [1, 10, 21, 24, 25, 26, 193, 195, 205, 233, 268, 275, 277, 278, 279, 280, 281, 282], "For": [2, 7, 9, 10, 12, 14, 16, 18, 19, 20, 21, 22, 24, 25, 33, 35, 36, 37, 38, 42, 50, 53, 54, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 78, 79, 80, 81, 91, 95, 101, 107, 116, 120, 125, 129, 143, 146, 147, 150, 151, 154, 156, 158, 160, 165, 167, 171, 175, 183, 189, 191, 196, 199, 200, 203, 207, 218, 229, 235, 241, 248, 252, 254, 256, 267, 269, 270, 271, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282], "detail": [2, 9, 10, 12, 13, 16, 21, 22, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 78, 79, 80, 81, 160, 182, 191, 196, 223, 227, 238, 247, 256, 269, 270, 273, 275, 276, 277, 278, 279, 280, 281, 282], "usag": [2, 21, 195, 196, 198, 228, 232, 233, 257, 267, 273, 275, 276, 277, 280, 281, 282], "guid": [2, 23, 24, 26, 33, 35, 37, 42, 60, 62, 63, 64, 65, 66, 67, 68, 69, 225, 248, 268, 274, 276, 278, 279], "pleas": [2, 7, 34, 40, 43, 78, 79, 80, 81, 88, 89, 90, 98, 99, 112, 113, 114, 115, 123, 124, 133, 134, 135, 140, 141, 152, 163, 164, 170, 191, 196, 227, 238, 247, 254, 267, 270, 271, 275, 277, 282], "see": [2, 7, 9, 10, 11, 12, 13, 14, 16, 18, 19, 21, 22, 23, 26, 34, 40, 43, 47, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 74, 88, 89, 90, 98, 99, 100, 112, 113, 114, 115, 123, 124, 133, 134, 135, 140, 141, 152, 153, 160, 163, 164, 170, 182, 188, 190, 191, 201, 204, 212, 213, 218, 227, 232, 238, 247, 248, 252, 254, 256, 262, 267, 268, 269, 270, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282], "overview": [2, 7, 24, 26, 199, 266, 269, 270, 276, 278, 279, 282], "support": [2, 4, 10, 11, 15, 16, 17, 18, 21, 22, 23, 25, 26, 27, 36, 37, 54, 55, 56, 59, 60, 61, 62, 65, 66, 67, 68, 69, 72, 77, 95, 107, 120, 129, 142, 149, 150, 151, 153, 154, 156, 165, 168, 169, 171, 181, 183, 191, 200, 201, 206, 224, 230, 231, 233, 237, 240, 241, 268, 269, 270, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282], "sever": [2, 280], "wide": [2, 9, 183, 278], "onli": [2, 4, 10, 16, 18, 22, 23, 26, 35, 36, 42, 54, 55, 56, 61, 67, 73, 77, 81, 95, 107, 120, 129, 144, 149, 150, 151, 153, 154, 156, 165, 171, 183, 187, 189, 191, 196, 198, 202, 206, 208, 210, 214, 229, 230, 231, 233, 237, 238, 240, 241, 247, 273, 275, 276, 278, 279, 280, 281, 282], "help": [2, 11, 18, 19, 22, 63, 100, 189, 191, 199, 229, 248, 266, 267, 268, 273, 274, 275, 276, 278, 280, 281, 282], "quickli": [2, 11, 24, 38, 57, 269, 274, 280], "bootstrap": [2, 11], "your": [2, 7, 9, 11, 12, 13, 14, 16, 17, 18, 21, 23, 26, 27, 38, 57, 60, 64, 67, 79, 80, 81, 147, 151, 191, 200, 248, 251, 252, 258, 266, 267, 268, 269, 270, 273, 274, 277, 278, 279, 280, 281, 282], "fine": [2, 9, 10, 11, 12, 16, 18, 19, 20, 22, 23, 25, 26, 36, 54, 55, 56, 71, 205, 258, 266, 268, 269, 270, 271, 275], "tune": [2, 4, 9, 10, 11, 12, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 29, 36, 54, 55, 56, 71, 205, 258, 266, 267, 268, 269, 270, 271, 273, 275], "also": [2, 9, 10, 12, 14, 16, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 53, 60, 64, 67, 71, 73, 74, 91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 169, 171, 175, 183, 189, 192, 205, 206, 225, 238, 240, 247, 248, 252, 258, 261, 267, 270, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282], "like": [2, 6, 12, 22, 23, 24, 25, 26, 169, 191, 196, 198, 200, 231, 267, 273, 274, 275, 276, 278, 279, 280, 281], "These": [2, 5, 10, 13, 15, 18, 19, 21, 22, 24, 25, 27, 54, 55, 67, 191, 219, 269, 271, 274, 275, 276, 277, 279, 280, 281, 282], "ar": [2, 5, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 22, 23, 24, 26, 27, 31, 35, 38, 39, 42, 45, 47, 48, 52, 54, 55, 56, 59, 60, 64, 65, 66, 67, 73, 75, 76, 79, 85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 100, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 144, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 182, 188, 189, 190, 191, 193, 199, 200, 201, 205, 206, 207, 210, 211, 219, 221, 227, 229, 230, 232, 233, 235, 237, 239, 240, 245, 247, 257, 258, 267, 268, 269, 271, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282], "especi": [2, 268, 273, 275, 280], "specifi": [2, 10, 12, 16, 18, 20, 22, 24, 25, 27, 31, 33, 35, 37, 42, 44, 60, 62, 63, 64, 65, 66, 67, 68, 69, 73, 75, 77, 94, 101, 106, 107, 116, 119, 120, 125, 129, 146, 148, 150, 162, 169, 171, 175, 179, 183, 189, 190, 197, 198, 199, 227, 238, 241, 247, 252, 254, 257, 270, 271, 273, 274, 275, 276, 277, 280, 281, 282], "yaml": [2, 10, 17, 18, 20, 24, 25, 27, 28, 29, 53, 60, 64, 67, 71, 252, 268, 271, 273, 274, 275, 276, 277, 279, 281, 282], "config": [2, 9, 12, 13, 16, 17, 18, 19, 20, 21, 22, 23, 26, 27, 28, 29, 30, 53, 60, 64, 67, 71, 183, 210, 229, 233, 248, 252, 257, 268, 269, 270, 271, 274, 275, 277, 278, 279, 280, 281, 282], "represent": [2, 228, 278, 279, 281, 282], "abov": [2, 4, 9, 16, 17, 18, 20, 22, 55, 195, 245, 267, 270, 275, 277, 279, 280, 281, 282], "text": [4, 5, 9, 11, 12, 15, 18, 19, 21, 35, 36, 37, 38, 39, 42, 44, 50, 54, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 144, 200, 201, 212, 214, 215, 217, 219, 274, 275, 281], "version": [4, 58, 73, 95, 107, 120, 129, 149, 154, 156, 165, 171, 183, 263, 267, 277, 280, 281, 282], "famili": [4, 22, 25, 59, 61, 65, 66, 69, 70, 72, 232, 268, 273, 277, 278], "import": [4, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 27, 60, 64, 65, 66, 67, 71, 77, 191, 192, 193, 194, 222, 248, 251, 252, 274, 275, 276, 277, 278, 279, 280, 281, 282], "you": [4, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 36, 38, 55, 56, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 100, 182, 189, 191, 194, 196, 198, 201, 203, 232, 248, 251, 252, 258, 266, 267, 268, 269, 270, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282], "need": [4, 9, 10, 12, 14, 16, 18, 19, 20, 22, 23, 24, 25, 26, 38, 54, 56, 183, 189, 191, 199, 200, 225, 247, 248, 251, 252, 253, 267, 269, 270, 271, 273, 274, 275, 276, 277, 279, 280, 282], "request": [4, 237, 275], "access": [4, 10, 22, 24, 25, 53, 229, 235, 269, 270, 273, 275, 276], "hug": [4, 11, 22, 32, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 119, 148, 169, 179, 216, 236, 268, 273, 276, 277], "face": [4, 11, 22, 32, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 119, 148, 169, 179, 216, 236, 268, 273, 276, 277], "befor": [4, 19, 22, 38, 52, 54, 65, 78, 79, 81, 91, 95, 147, 151, 183, 188, 189, 190, 191, 196, 198, 199, 201, 206, 215, 229, 248, 270, 273, 275, 280, 281], "download": [4, 10, 11, 16, 22, 65, 264, 267, 269, 270, 274, 277, 278, 279, 281, 282], "To": [4, 9, 12, 13, 14, 16, 17, 18, 19, 20, 22, 24, 25, 26, 54, 65, 189, 191, 201, 229, 258, 267, 268, 270, 271, 273, 275, 276, 277, 278, 279, 280, 281, 282], "1b": [4, 10, 17, 136, 138, 140, 266], "meta": [4, 10, 15, 16, 20, 21, 22, 100, 185, 229, 230, 269, 270, 273, 274, 275, 276, 278], "output": [4, 10, 12, 13, 14, 20, 21, 22, 31, 35, 45, 53, 55, 56, 59, 62, 64, 68, 69, 73, 81, 85, 86, 87, 91, 95, 101, 105, 107, 108, 109, 110, 111, 116, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 142, 143, 146, 147, 149, 150, 151, 154, 155, 156, 157, 158, 161, 165, 166, 171, 174, 175, 180, 181, 183, 185, 186, 188, 189, 190, 191, 196, 198, 199, 200, 201, 205, 206, 209, 210, 211, 219, 231, 238, 250, 257, 258, 267, 269, 270, 273, 275, 276, 277, 278, 279, 280, 282], "dir": [4, 10, 21, 22, 252, 267, 269, 270, 273, 275, 276, 277, 278, 281], "tmp": [4, 9, 10, 12, 14, 15, 16, 17, 18, 19, 20, 21, 24, 233, 269, 270, 274, 276, 278], "ignor": [4, 9, 10, 12, 22, 42, 71, 187, 188, 190, 197, 198, 234, 258, 269, 270, 273, 278], "pattern": [4, 10, 19, 215, 269, 270, 273, 278], "origin": [4, 10, 15, 16, 17, 20, 21, 22, 58, 59, 63, 195, 200, 201, 205, 206, 269, 270, 274, 275, 277, 278, 279, 280, 281, 282], "consolid": [4, 10, 22, 269, 270, 278], "00": [4, 10, 16, 22, 60, 64, 265, 269, 270, 272, 276, 278], "pth": [4, 10, 22, 228, 269, 270, 275, 278], "hf": [4, 9, 18, 20, 21, 22, 222, 224, 229, 273, 274, 275, 276, 277], "token": [4, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 22, 24, 25, 36, 42, 47, 49, 50, 51, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 91, 94, 95, 101, 106, 107, 116, 119, 120, 125, 129, 144, 146, 147, 148, 150, 151, 154, 156, 158, 160, 162, 165, 167, 169, 171, 175, 179, 183, 185, 188, 189, 190, 191, 196, 198, 199, 200, 201, 212, 213, 214, 215, 216, 217, 219, 221, 223, 226, 238, 242, 269, 273, 275, 276, 277, 278, 279, 280, 281, 282], "hf_token": [4, 21, 270, 278], "3b": [4, 137, 139, 141], "The": [4, 9, 11, 12, 15, 16, 18, 19, 20, 21, 22, 23, 24, 25, 26, 29, 30, 32, 36, 46, 47, 52, 53, 54, 55, 56, 60, 63, 64, 65, 66, 67, 70, 78, 79, 80, 81, 85, 86, 87, 95, 96, 97, 107, 108, 109, 110, 111, 120, 121, 122, 129, 130, 131, 132, 138, 139, 142, 144, 147, 149, 150, 151, 154, 156, 165, 166, 171, 172, 173, 174, 181, 184, 185, 186, 187, 191, 195, 196, 197, 198, 199, 200, 201, 205, 207, 212, 213, 214, 215, 216, 217, 219, 220, 222, 223, 224, 225, 227, 229, 231, 233, 236, 237, 239, 241, 248, 252, 255, 257, 261, 262, 263, 267, 268, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282], "reus": [4, 268], "llama3_token": [4, 15, 17, 20, 21, 65, 66, 73, 274, 277], "class": [4, 10, 13, 14, 15, 21, 24, 26, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 42, 43, 53, 54, 55, 56, 57, 65, 66, 78, 79, 80, 81, 94, 100, 105, 106, 119, 142, 143, 144, 148, 153, 156, 160, 161, 162, 169, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 196, 197, 198, 199, 200, 201, 202, 204, 205, 206, 208, 209, 212, 213, 214, 215, 218, 219, 222, 223, 224, 225, 228, 229, 230, 231, 232, 233, 248, 249, 250, 251, 252, 271, 274, 276, 278, 279, 280, 282], "languag": [4, 10, 16, 32, 73, 150, 200, 201, 205, 206, 222, 258, 279, 280], "11b": [4, 145, 152], "8b": [4, 15, 16, 20, 21, 118, 122, 124, 128, 130, 132, 135, 166, 266, 269, 270, 273, 274, 281], "70b": [4, 83, 86, 89, 103, 109, 113, 117, 121, 123, 127, 131, 134, 277], "405b": [4, 126, 130, 133], "weight": [4, 21, 22, 25, 50, 85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 145, 148, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 187, 195, 204, 205, 206, 210, 214, 222, 229, 230, 231, 232, 241, 252, 258, 266, 269, 270, 273, 274, 275, 276, 277, 278, 279, 281, 282], "can": [4, 5, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 30, 33, 35, 36, 37, 38, 39, 42, 50, 53, 55, 56, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 78, 79, 81, 144, 147, 151, 184, 185, 187, 188, 189, 191, 196, 198, 199, 201, 203, 207, 214, 215, 227, 229, 232, 234, 238, 247, 248, 251, 252, 254, 257, 266, 267, 268, 269, 270, 271, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282], "instead": [4, 9, 12, 14, 16, 22, 25, 31, 45, 54, 55, 71, 81, 129, 150, 151, 182, 187, 191, 206, 225, 273, 277, 279, 280, 281], "builder": [4, 9, 10, 11, 12, 13, 15, 16, 17, 22, 58, 60, 61, 64, 67, 82, 83, 84, 85, 86, 87, 88, 89, 90, 92, 93, 96, 97, 98, 99, 102, 103, 104, 105, 108, 109, 110, 111, 112, 113, 114, 115, 117, 118, 121, 122, 123, 124, 126, 127, 128, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 152, 155, 157, 159, 161, 163, 164, 166, 168, 170, 172, 173, 174, 176, 177, 178, 274, 280, 282], "all": [4, 5, 10, 11, 14, 15, 19, 21, 25, 30, 35, 36, 38, 42, 45, 47, 50, 53, 54, 55, 56, 81, 119, 142, 148, 169, 179, 183, 187, 189, 191, 192, 193, 194, 195, 199, 200, 201, 203, 207, 218, 229, 233, 235, 239, 245, 253, 259, 260, 264, 266, 268, 269, 270, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281], "7b": [4, 9, 12, 14, 18, 19, 20, 21, 22, 61, 72, 84, 87, 90, 93, 97, 104, 105, 110, 111, 114, 115, 155, 157, 159, 161, 164, 174, 178, 229, 230, 274, 276, 277, 279, 282], "13b": [4, 22, 82, 85, 88, 102, 108, 112], "codellama": 4, "size": [4, 14, 15, 16, 22, 25, 27, 45, 50, 59, 62, 65, 66, 68, 79, 80, 81, 142, 143, 144, 145, 147, 148, 149, 151, 182, 183, 184, 185, 188, 189, 190, 191, 194, 196, 198, 199, 200, 201, 219, 220, 221, 242, 243, 245, 268, 270, 273, 275, 276, 277, 279, 280, 281], "0": [4, 9, 10, 12, 14, 15, 16, 18, 20, 22, 25, 45, 47, 48, 49, 50, 54, 60, 64, 67, 73, 74, 76, 77, 81, 85, 86, 87, 88, 89, 90, 91, 95, 96, 97, 98, 99, 101, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 120, 121, 122, 123, 124, 125, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 143, 144, 146, 147, 149, 150, 151, 152, 154, 155, 156, 157, 158, 160, 163, 164, 165, 166, 167, 170, 171, 172, 173, 174, 175, 176, 177, 182, 183, 189, 191, 192, 193, 194, 200, 205, 206, 217, 222, 223, 224, 225, 226, 236, 242, 248, 251, 252, 256, 261, 263, 265, 270, 272, 274, 275, 276, 277, 279, 280, 281, 282], "5b": [4, 172, 173, 176, 177, 280], "qwen2": [4, 10, 171, 172, 173, 174, 176, 177, 178, 179, 232, 280], "exampl": [4, 10, 19, 21, 22, 23, 24, 25, 26, 27, 29, 33, 35, 37, 38, 42, 44, 45, 46, 47, 48, 49, 50, 53, 54, 56, 59, 60, 61, 62, 64, 65, 66, 67, 68, 69, 71, 72, 73, 75, 76, 77, 81, 143, 144, 147, 151, 182, 183, 191, 192, 193, 194, 196, 198, 199, 200, 201, 203, 204, 207, 212, 213, 214, 215, 217, 218, 222, 224, 225, 226, 227, 228, 229, 230, 232, 233, 241, 242, 248, 251, 252, 255, 258, 261, 262, 263, 264, 265, 267, 269, 270, 272, 273, 274, 275, 277, 278, 279, 280, 281, 282], "none": [4, 9, 16, 25, 26, 28, 30, 31, 33, 35, 37, 42, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 77, 81, 94, 101, 106, 107, 116, 119, 120, 125, 129, 142, 143, 144, 146, 148, 150, 162, 169, 179, 180, 182, 183, 185, 188, 189, 190, 191, 192, 193, 194, 199, 201, 207, 209, 210, 211, 214, 217, 219, 220, 221, 223, 229, 230, 231, 232, 233, 234, 237, 241, 246, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 259, 260, 261, 262, 273, 275, 281], "mini": [4, 21, 166, 167, 168, 169, 170], "4k": [4, 21, 167, 168, 169], "microsoft": [4, 168, 169], "ai": [4, 10, 12, 14, 19, 55, 56, 159, 252, 274, 277], "thi": [4, 9, 10, 12, 13, 14, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 31, 33, 34, 35, 36, 37, 42, 43, 44, 45, 47, 48, 50, 53, 54, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 75, 79, 80, 81, 91, 95, 101, 107, 116, 120, 125, 129, 142, 144, 146, 147, 150, 151, 153, 154, 156, 158, 160, 165, 167, 168, 169, 171, 175, 180, 182, 183, 185, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 199, 200, 201, 203, 204, 207, 210, 211, 212, 213, 214, 215, 217, 218, 219, 221, 222, 223, 225, 227, 228, 229, 230, 231, 233, 236, 237, 240, 242, 245, 247, 248, 249, 251, 252, 253, 254, 256, 258, 260, 261, 266, 267, 268, 269, 270, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282], "v0": [4, 9, 14, 18, 19, 21, 153], "mistralai": [4, 21, 273], "2b": [4, 92, 96], "gemma2": 4, "googl": [4, 92, 93], "gguf": 4, "compon": [4, 6, 14, 21, 22, 25, 30, 48, 55, 56, 65, 66, 205, 268, 271, 276, 278, 279, 282], "multimod": [4, 11, 14, 36, 42, 56, 65, 66, 199, 267], "encod": [4, 5, 15, 21, 50, 56, 73, 74, 81, 142, 143, 145, 146, 147, 149, 150, 151, 183, 188, 189, 190, 194, 199, 200, 201, 203, 212, 214, 215, 217, 219, 222, 225, 274], "perform": [5, 12, 13, 17, 19, 20, 21, 22, 54, 73, 191, 196, 207, 218, 225, 268, 269, 270, 274, 275, 277, 278, 280, 281, 282], "direct": [5, 18, 25, 48, 85, 86, 95, 96, 97, 107, 108, 109, 110, 120, 121, 122, 131, 132, 138, 139, 154, 155, 156, 157, 165, 166, 205, 222, 267, 271, 280], "id": [5, 14, 17, 21, 22, 47, 48, 49, 50, 54, 61, 65, 66, 72, 73, 74, 76, 77, 144, 183, 185, 189, 190, 199, 212, 213, 214, 215, 216, 217, 219, 229, 231, 248, 274, 275], "decod": [5, 9, 12, 14, 15, 16, 18, 20, 21, 60, 64, 67, 73, 91, 95, 101, 107, 116, 120, 125, 129, 143, 144, 145, 146, 147, 149, 150, 151, 154, 156, 158, 160, 165, 167, 171, 175, 183, 188, 189, 190, 194, 199, 201, 203, 212, 214, 215, 274], "typic": [5, 9, 12, 20, 24, 33, 37, 42, 50, 54, 55, 56, 57, 71, 169, 203, 222, 225, 280, 281, 282], "byte": [5, 21, 215, 280, 282], "pair": [5, 10, 18, 21, 24, 48, 49, 63, 67, 70, 215], "underli": [5, 13, 18, 21, 214, 280, 282], "helper": 5, "method": [5, 13, 14, 15, 19, 21, 22, 24, 25, 26, 29, 46, 55, 57, 59, 60, 61, 62, 63, 64, 67, 68, 69, 70, 71, 72, 144, 189, 195, 196, 199, 202, 203, 204, 208, 210, 212, 213, 233, 241, 267, 268, 279, 282], "two": [5, 15, 18, 19, 22, 24, 35, 50, 52, 65, 66, 73, 74, 79, 191, 200, 203, 205, 219, 226, 228, 268, 270, 275, 276, 277, 279, 280, 281, 282], "pre": [5, 9, 11, 12, 17, 18, 19, 20, 54, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 100, 145, 148, 149, 191, 199, 201, 203, 205, 270, 274, 280], "train": [5, 9, 10, 11, 12, 13, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 31, 33, 35, 50, 53, 54, 55, 56, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 100, 144, 145, 148, 149, 181, 183, 185, 189, 190, 195, 196, 198, 199, 200, 201, 203, 205, 222, 225, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 266, 268, 269, 271, 273, 274, 275, 277, 278, 279, 280, 281, 282], "function": [5, 10, 22, 24, 25, 27, 29, 45, 46, 47, 48, 60, 64, 67, 73, 79, 80, 81, 147, 151, 180, 183, 191, 192, 195, 207, 210, 211, 222, 223, 227, 229, 243, 256, 258, 260, 261, 268, 278, 282], "preprocess": [5, 54, 191], "imag": [5, 11, 15, 35, 36, 37, 42, 44, 46, 50, 56, 65, 66, 78, 79, 80, 81, 142, 143, 144, 145, 147, 148, 149, 151, 191, 200, 219, 279], "loss": [6, 9, 12, 14, 24, 25, 36, 38, 55, 56, 59, 60, 62, 64, 67, 68, 69, 196, 197, 198, 222, 223, 224, 225, 276, 278, 279, 282], "algorithm": [6, 21, 220, 225, 256], "ppo": [6, 220, 221, 222, 223, 271], "dpo": [6, 18, 48, 55, 207, 222, 224, 225, 271], "offer": 7, "allow": [7, 10, 53, 201, 205, 210, 251, 270, 273, 280, 281, 282], "seamless": 7, "transit": 7, "between": [7, 9, 18, 19, 21, 22, 55, 60, 67, 146, 150, 188, 189, 193, 199, 221, 223, 225, 229, 232, 248, 275, 277, 278, 279, 280, 281, 282], "interoper": [7, 22, 25, 268, 275, 282], "rest": [7, 274, 280, 282], "ecosystem": [7, 22, 25, 268, 275, 277, 282], "comprehens": [7, 280], "deep": [7, 22, 23, 24, 25, 26, 201, 203, 268, 271, 276, 277, 280], "dive": [7, 22, 23, 24, 25, 26, 268, 270, 271, 276, 277, 280], "util": [7, 14, 16, 22, 24, 25, 27, 45, 47, 50, 142, 234, 251, 253, 254, 260, 261, 262, 263, 268, 275, 276, 280, 282], "work": [7, 22, 25, 35, 42, 187, 200, 201, 268, 270, 273, 275, 277, 280, 282], "set": [7, 9, 12, 17, 18, 20, 22, 23, 24, 25, 26, 33, 36, 37, 42, 50, 54, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 71, 72, 101, 107, 116, 120, 125, 129, 144, 146, 149, 150, 154, 156, 158, 160, 165, 167, 171, 175, 183, 185, 188, 189, 192, 193, 194, 199, 207, 209, 227, 233, 238, 245, 247, 248, 254, 255, 256, 257, 260, 261, 268, 271, 273, 274, 275, 276, 277, 278, 279, 280, 281], "enabl": [7, 10, 11, 17, 21, 23, 24, 25, 26, 53, 85, 86, 87, 88, 89, 90, 96, 97, 98, 99, 108, 109, 110, 111, 112, 113, 114, 115, 121, 122, 123, 124, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 152, 155, 157, 163, 164, 166, 170, 172, 173, 174, 176, 177, 183, 188, 189, 190, 192, 193, 194, 199, 201, 205, 206, 256, 257, 270, 277, 279, 280, 282], "consumpt": [7, 53, 75, 269, 280], "dure": [7, 10, 11, 22, 54, 59, 60, 62, 64, 67, 68, 69, 182, 183, 185, 189, 190, 191, 195, 199, 200, 219, 225, 240, 269, 270, 274, 275, 277, 279, 280, 281, 282], "control": [7, 13, 18, 21, 25, 36, 59, 60, 62, 64, 67, 68, 69, 193, 194, 201, 207, 248, 256, 270, 275, 280], "lr": [7, 24, 233, 236, 239, 278, 280], "process": [7, 11, 14, 15, 17, 25, 26, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 81, 147, 151, 191, 195, 243, 244, 256, 276, 281, 282], "variou": 7, "provid": [7, 10, 11, 12, 14, 22, 24, 25, 27, 32, 33, 35, 37, 42, 46, 47, 51, 53, 54, 73, 75, 81, 183, 187, 189, 191, 199, 207, 217, 222, 231, 238, 248, 252, 257, 261, 268, 269, 270, 273, 274, 275, 276, 277, 280], "debug": [7, 22, 24, 25, 248, 273], "finetun": [7, 10, 22, 24, 25, 85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 166, 172, 173, 174, 199, 266, 268, 270, 276, 277, 280], "job": [7, 10, 26, 256, 276], "involv": [9, 12, 17, 20, 56, 281], "multi": [9, 18, 25, 183, 277], "turn": [9, 18, 25, 33, 36, 37, 42, 52, 55, 67, 274, 280], "multipl": [9, 16, 17, 18, 22, 24, 25, 33, 36, 37, 42, 48, 53, 56, 67, 142, 143, 183, 189, 190, 191, 199, 206, 248, 249, 250, 251, 252, 257, 276, 277, 278, 280], "back": [9, 21, 22, 52, 207, 229, 279, 280, 282], "forth": [9, 52], "user": [9, 12, 13, 14, 15, 16, 18, 19, 21, 25, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 52, 55, 56, 60, 64, 67, 94, 101, 106, 107, 116, 119, 120, 125, 129, 146, 148, 150, 154, 156, 158, 160, 162, 165, 167, 169, 171, 175, 179, 183, 217, 271, 274, 276, 281], "assist": [9, 12, 13, 14, 15, 16, 18, 19, 21, 31, 32, 33, 35, 36, 37, 38, 39, 41, 42, 44, 52, 55, 56, 60, 67, 73, 94, 100, 106, 119, 148, 162, 169, 179, 217, 274], "role": [9, 13, 14, 15, 16, 18, 19, 21, 33, 36, 37, 38, 39, 42, 44, 55, 56, 60, 67, 94, 106, 119, 144, 148, 162, 169, 179, 217, 274], "content": [9, 13, 15, 16, 18, 19, 21, 22, 33, 36, 37, 38, 39, 42, 44, 55, 56, 60, 67, 217, 274], "what": [9, 14, 15, 16, 18, 22, 23, 24, 26, 36, 37, 55, 56, 60, 64, 67, 100, 153, 191, 266, 271, 274, 275, 276, 277, 280], "answer": [9, 15, 16, 19, 40, 64, 275, 277], "ultim": [9, 281], "question": [9, 15, 16, 19, 40, 64, 275, 277], "life": 9, "42": [9, 73, 191], "That": [9, 274], "s": [9, 10, 12, 13, 14, 16, 17, 18, 19, 20, 22, 24, 25, 26, 27, 29, 32, 37, 42, 52, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 85, 86, 87, 100, 107, 108, 109, 110, 111, 120, 121, 122, 129, 130, 131, 132, 138, 139, 142, 143, 144, 149, 150, 151, 153, 154, 155, 156, 157, 165, 166, 169, 171, 174, 175, 182, 183, 185, 189, 190, 191, 195, 199, 202, 203, 204, 205, 208, 210, 211, 215, 222, 224, 225, 226, 227, 229, 230, 233, 238, 240, 242, 247, 248, 251, 254, 255, 258, 260, 261, 268, 273, 274, 276, 278, 279, 280, 281, 282], "ridicul": 9, "oh": 9, "i": [9, 12, 14, 18, 19, 20, 25, 36, 67, 73, 100, 142, 143, 153, 183, 188, 189, 190, 191, 195, 199, 209, 228, 233, 275, 277, 280, 281, 282], "know": [9, 274, 275, 278, 279], "more": [9, 10, 11, 12, 13, 14, 16, 18, 19, 21, 22, 24, 25, 38, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 182, 191, 196, 203, 210, 227, 228, 231, 248, 252, 254, 256, 260, 268, 269, 270, 271, 273, 275, 276, 277, 278, 279, 280, 281, 282], "structur": [9, 12, 13, 14, 19, 25, 37, 39, 42, 60, 119, 144, 148, 169, 179, 219, 274, 275, 281], "than": [9, 10, 12, 16, 18, 24, 50, 52, 73, 75, 182, 183, 191, 222, 227, 231, 232, 259, 260, 263, 274, 275, 276, 277, 278, 279, 280, 282], "freeform": [9, 12, 57, 71], "associ": [9, 10, 11, 12, 22, 24, 25, 73, 74, 81, 91, 101, 116, 125, 146, 150, 158, 175, 248, 275, 279], "where": [9, 10, 12, 14, 16, 18, 19, 20, 36, 38, 45, 48, 59, 73, 75, 76, 79, 105, 142, 143, 161, 180, 183, 189, 191, 193, 196, 198, 199, 206, 214, 219, 220, 222, 223, 226, 238, 242, 247, 278, 280], "thei": [9, 11, 12, 19, 21, 24, 25, 53, 65, 66, 81, 142, 147, 151, 189, 191, 201, 211, 238, 273, 274, 279, 280, 281], "learn": [9, 12, 25, 53, 200, 201, 203, 233, 236, 239, 268, 269, 270, 271, 274, 276, 277, 279, 280, 281, 282], "simpli": [9, 12, 13, 14, 16, 20, 22, 24, 54, 56, 222, 273, 274, 275, 277, 278, 280, 282], "predict": [9, 12, 73, 74, 77, 220, 221, 223, 269], "next": [9, 12, 22, 54, 71, 73, 74, 81, 191, 219, 269, 277, 282], "respond": 9, "accur": 9, "primari": [9, 12, 16, 18, 20, 22, 24, 25, 55, 56, 271, 276], "entri": [9, 12, 16, 18, 20, 24, 25, 47, 50, 271, 276, 280], "point": [9, 10, 12, 16, 18, 20, 21, 24, 25, 46, 60, 217, 271, 275, 276, 277, 279, 281, 282], "torchtun": [9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 37, 38, 39, 42, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 267, 269, 270, 271, 274, 276, 280], "chat_dataset": [9, 12, 13, 18, 274], "let": [9, 10, 11, 12, 16, 18, 22, 24, 26, 273, 274, 275, 276, 277, 278, 279, 280, 282], "follow": [9, 10, 11, 12, 15, 16, 19, 22, 25, 36, 37, 38, 42, 50, 54, 55, 56, 64, 67, 144, 183, 188, 219, 223, 231, 232, 233, 236, 245, 252, 257, 266, 267, 270, 271, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282], "data": [9, 10, 12, 13, 14, 15, 16, 19, 21, 23, 31, 32, 33, 35, 36, 37, 38, 39, 42, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 55, 56, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 100, 148, 179, 191, 218, 222, 224, 240, 248, 249, 250, 251, 252, 260, 269, 270, 275, 280, 281, 282], "directli": [9, 10, 12, 13, 14, 16, 22, 24, 25, 27, 31, 55, 56, 60, 64, 65, 67, 71, 222, 227, 229, 273, 275, 276, 277, 279, 280, 281, 282], "llm": [9, 10, 11, 12, 21, 25, 199, 201, 266, 267, 268, 269, 271, 275, 277, 278, 279], "my_data": [9, 12, 13, 16, 274], "human": [9, 16, 18, 36, 42, 60, 100, 222, 223, 224, 274], "valu": [9, 16, 22, 24, 33, 35, 37, 42, 45, 47, 48, 50, 59, 60, 62, 63, 64, 67, 68, 69, 70, 73, 74, 76, 77, 82, 83, 84, 91, 92, 93, 95, 101, 102, 103, 104, 105, 107, 116, 117, 118, 120, 125, 126, 127, 128, 129, 136, 137, 144, 146, 150, 154, 156, 158, 159, 160, 161, 165, 167, 171, 175, 176, 177, 178, 182, 183, 184, 188, 189, 190, 197, 198, 199, 201, 210, 220, 221, 223, 226, 229, 232, 233, 236, 242, 248, 249, 250, 251, 252, 256, 270, 273, 274, 276, 277, 279, 280, 281], "gpt": [9, 16, 42, 60, 74, 274, 275], "mistral": [9, 14, 18, 19, 21, 144, 153, 154, 155, 156, 157, 159, 160, 161, 162, 163, 164, 232, 273, 274, 275, 276], "mistral_token": [9, 14, 18, 19, 21], "m_token": [9, 14, 18, 19, 20, 21], "path": [9, 10, 12, 14, 15, 16, 17, 18, 19, 20, 21, 24, 25, 26, 27, 35, 42, 46, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 94, 106, 119, 144, 148, 162, 169, 179, 214, 215, 216, 229, 230, 231, 257, 273, 274, 275, 277, 279], "1": [9, 14, 16, 18, 19, 20, 21, 22, 25, 35, 42, 45, 47, 48, 49, 50, 54, 69, 73, 74, 76, 77, 78, 79, 101, 107, 116, 120, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 143, 144, 146, 150, 153, 154, 156, 158, 160, 165, 167, 171, 172, 173, 175, 176, 177, 182, 183, 189, 191, 192, 193, 194, 196, 197, 198, 214, 215, 217, 222, 223, 224, 225, 230, 232, 236, 242, 245, 248, 251, 252, 255, 256, 268, 269, 273, 274, 275, 276, 279, 280, 281, 282], "prompt_templ": [9, 12, 14, 16, 18, 19, 94, 106, 119, 144, 148, 162, 169, 179], "mistralchattempl": [9, 14, 18, 19, 162, 274], "max_seq_len": [9, 10, 12, 14, 16, 17, 18, 20, 21, 24, 27, 47, 50, 51, 54, 59, 60, 61, 62, 64, 65, 66, 68, 69, 71, 72, 91, 94, 95, 101, 106, 107, 116, 119, 120, 125, 129, 144, 146, 148, 150, 154, 156, 158, 160, 162, 165, 167, 169, 171, 175, 179, 182, 183, 185, 189, 194, 281], "8192": [9, 12, 14, 16, 17, 18, 20, 21, 148, 279, 281], "ds": [9, 10, 12, 15, 16, 18, 20, 54, 69, 274], "sourc": [9, 10, 12, 13, 16, 18, 20, 22, 24, 27, 28, 29, 30, 31, 32, 33, 35, 36, 37, 38, 39, 42, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 91, 92, 93, 94, 95, 96, 97, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 116, 117, 118, 119, 120, 121, 122, 125, 126, 127, 128, 129, 130, 131, 132, 136, 137, 138, 139, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 165, 166, 167, 168, 169, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 274, 275, 281], "data_fil": [9, 12, 13, 16, 18, 20, 55, 56, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 274], "split": [9, 10, 12, 13, 14, 16, 18, 20, 22, 44, 53, 54, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 215, 274, 275, 281], "conversation_column": [9, 60, 274], "conversation_styl": [9, 60, 274], "By": [9, 12, 22, 205, 270, 273, 278, 279, 280, 281, 282], "default": [9, 10, 12, 16, 22, 24, 31, 32, 33, 35, 36, 37, 42, 45, 48, 49, 50, 51, 54, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 77, 81, 82, 83, 84, 85, 86, 87, 91, 92, 93, 94, 95, 96, 97, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 116, 117, 118, 119, 120, 121, 122, 125, 126, 127, 128, 129, 130, 131, 132, 136, 137, 138, 139, 144, 145, 148, 149, 150, 151, 154, 155, 156, 157, 158, 159, 160, 161, 162, 165, 166, 167, 169, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 183, 184, 185, 188, 189, 190, 195, 197, 198, 199, 205, 206, 210, 214, 215, 217, 219, 220, 221, 222, 225, 229, 230, 231, 233, 236, 237, 243, 247, 248, 249, 252, 255, 256, 257, 267, 270, 273, 274, 275, 277, 278, 279, 280, 281, 282], "true": [9, 10, 12, 13, 14, 15, 16, 17, 22, 24, 31, 36, 45, 53, 54, 57, 58, 59, 60, 62, 64, 65, 66, 67, 68, 69, 71, 72, 75, 76, 81, 88, 89, 90, 91, 95, 98, 99, 112, 113, 114, 115, 123, 124, 133, 134, 135, 140, 141, 144, 145, 152, 163, 164, 170, 183, 188, 189, 190, 192, 193, 194, 195, 196, 197, 199, 201, 207, 214, 215, 217, 219, 220, 223, 226, 227, 229, 230, 231, 238, 239, 240, 242, 244, 245, 248, 251, 257, 263, 269, 273, 274, 275, 277, 279, 280, 281, 282], "train_on_input": [9, 12, 13, 18, 24, 31, 33, 35, 37, 42, 53, 58, 59, 60, 62, 63, 64, 67, 68, 69, 70], "new_system_prompt": [9, 12, 13, 33, 35, 37, 42, 60, 62, 63, 64, 65, 66, 67, 68, 69], "tokenized_dict": [9, 12, 15, 16, 18, 20], "label": [9, 12, 20, 25, 47, 48, 49, 50, 54, 61, 69, 72, 196, 197, 198, 222, 225, 278], "print": [9, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 26, 44, 50, 53, 59, 62, 65, 66, 68, 69, 73, 144, 191, 192, 193, 194, 214, 215, 217, 263, 274, 276, 279, 281, 282], "inst": [9, 14, 19, 21, 100, 144, 153, 274], "733": [9, 14, 21], "16289": [9, 14, 21], "28793": [9, 14, 21], "1824": 9, "349": 9, "272": 9, "4372": 9, "In": [9, 10, 12, 13, 14, 16, 17, 18, 19, 20, 21, 22, 24, 25, 55, 79, 80, 81, 147, 151, 185, 189, 191, 206, 227, 247, 251, 252, 270, 274, 275, 277, 278, 279, 280, 281, 282], "_component_": [9, 10, 12, 13, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 53, 60, 64, 67, 71, 257, 270, 274, 275, 277, 278, 279, 280, 281], "null": [9, 22, 24, 281], "have": [9, 10, 13, 14, 18, 21, 22, 24, 27, 35, 36, 55, 60, 67, 75, 79, 80, 81, 142, 147, 151, 181, 182, 183, 184, 187, 189, 191, 192, 193, 194, 196, 198, 199, 204, 211, 219, 225, 228, 231, 233, 238, 239, 251, 259, 267, 274, 275, 276, 277, 278, 279, 280, 281, 282], "singl": [9, 10, 16, 17, 18, 19, 22, 24, 27, 33, 35, 37, 42, 47, 53, 54, 55, 56, 57, 60, 67, 71, 79, 80, 81, 94, 105, 106, 119, 142, 143, 144, 147, 148, 151, 161, 162, 169, 183, 189, 191, 199, 229, 230, 231, 232, 233, 235, 271, 273, 274, 275, 276, 277, 278, 279, 280, 282], "name": [9, 12, 13, 14, 16, 18, 20, 22, 23, 24, 26, 28, 31, 33, 35, 37, 42, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 204, 209, 211, 215, 229, 230, 231, 232, 233, 235, 248, 249, 250, 251, 252, 258, 259, 261, 273, 274, 275, 277, 280, 281], "messag": [9, 11, 12, 15, 16, 18, 19, 21, 31, 32, 33, 35, 37, 38, 39, 42, 44, 52, 55, 56, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 94, 106, 119, 144, 148, 162, 169, 213, 217, 267, 273, 274], "contain": [9, 11, 13, 14, 15, 16, 18, 20, 22, 33, 35, 36, 42, 47, 48, 49, 50, 54, 55, 56, 57, 60, 65, 71, 119, 144, 148, 169, 179, 182, 183, 185, 189, 190, 199, 202, 204, 208, 209, 210, 215, 217, 220, 226, 229, 230, 231, 233, 235, 240, 246, 251, 257, 258, 260, 274, 275, 277, 279], "topic": [9, 266], "per": [9, 16, 47, 88, 89, 90, 98, 99, 112, 113, 114, 115, 123, 124, 133, 134, 135, 140, 141, 143, 144, 152, 163, 164, 170, 182, 191, 195, 219, 221, 222, 273, 280, 281, 282], "could": [9, 18, 19, 239, 278, 279], "system": [9, 12, 13, 18, 19, 32, 33, 35, 36, 37, 38, 39, 41, 42, 44, 52, 55, 56, 60, 62, 63, 64, 65, 66, 67, 68, 69, 94, 100, 106, 119, 148, 153, 162, 169, 179, 217, 274], "tool": [9, 18, 19, 22, 36, 38, 56, 153, 248, 275, 276], "call": [9, 14, 18, 21, 22, 27, 36, 38, 56, 65, 66, 153, 183, 189, 191, 192, 193, 195, 199, 210, 248, 249, 250, 251, 252, 253, 257, 258, 274, 279, 282], "return": [9, 10, 13, 15, 18, 19, 21, 27, 29, 36, 38, 44, 45, 46, 47, 48, 49, 50, 51, 54, 55, 56, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 91, 92, 93, 94, 95, 96, 97, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 116, 117, 118, 119, 120, 121, 122, 125, 126, 127, 128, 129, 130, 131, 132, 136, 137, 138, 139, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 154, 155, 156, 157, 158, 159, 160, 161, 162, 165, 166, 167, 168, 169, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 188, 189, 190, 191, 193, 194, 196, 197, 198, 199, 200, 201, 202, 204, 205, 206, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 231, 233, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 255, 256, 257, 261, 262, 263, 278, 279, 282], "dai": [9, 20], "todai": 9, "It": [9, 10, 14, 16, 32, 36, 38, 55, 56, 60, 62, 64, 65, 66, 68, 70, 144, 147, 151, 153, 187, 189, 191, 199, 222, 225, 248, 273, 274, 278, 282], "tuesdai": 9, "about": [9, 10, 13, 14, 18, 22, 25, 65, 66, 191, 222, 225, 248, 252, 268, 269, 270, 271, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282], "tomorrow": 9, "wednesdai": 9, "As": [9, 12, 16, 22, 24, 25, 26, 206, 268, 275, 280, 282], "an": [9, 10, 12, 14, 16, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 46, 50, 52, 53, 57, 60, 62, 64, 65, 66, 67, 68, 71, 72, 78, 79, 80, 107, 120, 129, 144, 147, 149, 151, 154, 156, 160, 165, 171, 172, 173, 176, 177, 183, 187, 189, 191, 199, 200, 201, 203, 204, 207, 208, 209, 213, 218, 219, 222, 227, 228, 229, 230, 231, 233, 234, 238, 239, 248, 252, 257, 261, 268, 269, 270, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282], "slimorca": [9, 69], "pass": [9, 10, 11, 12, 14, 15, 16, 17, 18, 19, 20, 21, 24, 27, 36, 38, 53, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 91, 95, 101, 107, 116, 120, 125, 129, 149, 154, 156, 158, 160, 165, 167, 171, 175, 183, 189, 193, 194, 195, 199, 207, 211, 215, 223, 227, 231, 237, 238, 240, 244, 247, 248, 251, 252, 254, 257, 273, 274, 279, 281, 282], "repo": [9, 10, 12, 16, 18, 20, 22, 65, 229, 230, 232, 273, 275], "select": [9, 234], "one": [9, 10, 11, 12, 13, 16, 18, 22, 25, 33, 35, 37, 42, 47, 50, 52, 60, 66, 67, 191, 196, 198, 217, 231, 248, 275, 276, 277, 280, 282], "most": [9, 12, 13, 16, 18, 20, 22, 24, 36, 38, 274, 276, 279, 280, 282], "gemma": [9, 12, 18, 20, 92, 93, 94, 95, 96, 97, 98, 99, 187, 232, 280], "gemma_token": [9, 12, 18, 20], "g_token": [9, 12, 18, 20], "open": [9, 20, 46, 69, 92, 93, 275], "orca": [9, 69], "dedup": [9, 69], "recip": [9, 11, 12, 16, 18, 20, 22, 23, 24, 26, 27, 28, 29, 144, 189, 199, 229, 230, 231, 268, 269, 270, 274, 275, 277, 280, 282], "via": [9, 12, 14, 16, 17, 18, 20, 23, 24, 26, 55, 60, 64, 67, 71, 183, 189, 190, 205, 206, 229, 279, 282], "http": [9, 12, 16, 27, 46, 57, 61, 63, 65, 71, 72, 74, 82, 83, 84, 85, 86, 87, 88, 89, 90, 92, 93, 95, 96, 97, 98, 99, 102, 103, 104, 105, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 152, 154, 155, 156, 157, 159, 161, 163, 164, 165, 166, 168, 169, 170, 172, 173, 174, 176, 177, 178, 183, 184, 185, 191, 196, 197, 219, 220, 222, 223, 224, 225, 227, 229, 230, 236, 245, 248, 251, 252, 254, 256, 262, 267, 275, 277, 278], "ha": [9, 18, 22, 64, 73, 143, 186, 188, 189, 191, 194, 196, 198, 199, 202, 204, 207, 208, 211, 226, 231, 233, 258, 259, 274, 275, 276, 277, 278, 279, 280, 282], "addition": [9, 22, 214, 215, 225, 256, 274, 279, 280], "argument": [9, 10, 12, 16, 22, 24, 27, 34, 40, 43, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 88, 89, 90, 98, 99, 112, 113, 114, 115, 123, 124, 133, 134, 135, 140, 141, 152, 163, 164, 170, 227, 238, 244, 248, 249, 251, 252, 254, 273, 274, 279, 280, 281], "load_dataset": [9, 12, 16, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 274], "document": [9, 12, 16, 17, 78, 79, 80, 81, 183, 189, 190, 227, 238, 247, 269, 271, 273, 280], "file": [9, 10, 11, 12, 16, 22, 23, 24, 25, 26, 27, 28, 29, 46, 55, 56, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 119, 144, 148, 169, 179, 214, 215, 216, 229, 230, 231, 249, 252, 257, 265, 268, 270, 272, 273, 274, 275, 276, 277, 279, 280, 281, 282], "raw": [9, 11, 13, 14, 16, 21, 44], "vari": [9, 50, 54, 189], "field": [9, 10, 14, 15, 27, 31, 35, 36, 42, 44, 54, 55, 56, 59, 65, 66, 246], "indic": [9, 14, 16, 18, 19, 50, 53, 54, 75, 76, 81, 147, 151, 183, 185, 189, 190, 191, 199, 200, 219, 220, 223, 226, 227, 242, 245, 274], "There": [9, 24, 52, 79, 274, 276, 277, 278, 279, 280], "few": [9, 10, 201, 277, 279, 282], "standard": [9, 12, 14, 15, 17, 19, 22, 34, 55, 56, 60, 63, 101, 107, 116, 120, 125, 129, 144, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 183, 205, 250, 268, 274, 275, 277, 278], "across": [9, 22, 25, 50, 53, 205, 229, 251, 256, 275, 277, 278, 281], "mani": [9, 14, 16, 19, 24, 54, 269, 270, 275, 278], "we": [9, 10, 11, 12, 18, 19, 20, 21, 22, 23, 24, 25, 26, 47, 50, 54, 55, 56, 60, 61, 67, 72, 73, 77, 182, 183, 185, 189, 190, 191, 193, 196, 198, 199, 206, 222, 225, 229, 230, 231, 237, 241, 247, 253, 258, 268, 269, 270, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282], "ipython": [9, 14, 19, 36, 38, 41, 55, 56, 94, 106, 119, 148, 162, 169, 179], "transform": [9, 10, 11, 16, 22, 25, 31, 33, 35, 55, 56, 59, 60, 62, 63, 65, 66, 67, 68, 69, 81, 85, 86, 87, 91, 95, 96, 97, 101, 107, 108, 109, 110, 111, 116, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 143, 144, 146, 147, 148, 149, 150, 151, 154, 155, 156, 157, 158, 160, 165, 166, 167, 171, 172, 173, 174, 175, 188, 189, 190, 191, 201, 219, 236, 254, 279, 280, 281], "sharegpttomessag": [9, 13, 60, 69], "expect": [9, 12, 13, 15, 16, 18, 19, 20, 22, 24, 27, 31, 33, 35, 36, 37, 42, 46, 50, 55, 56, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 143, 144, 185, 199, 211, 233, 248, 252, 259, 274, 279, 280, 281], "code": [9, 10, 12, 13, 16, 19, 21, 22, 25, 82, 83, 84, 85, 86, 87, 88, 89, 90, 189, 248, 264, 268, 276, 280], "openaitomessag": [9, 13, 60, 67], "If": [9, 10, 13, 14, 16, 17, 19, 21, 22, 24, 30, 33, 35, 36, 37, 42, 44, 46, 47, 50, 51, 52, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 71, 73, 75, 77, 81, 94, 101, 106, 107, 116, 119, 120, 125, 129, 142, 144, 146, 148, 150, 162, 169, 171, 175, 179, 182, 183, 185, 187, 189, 190, 191, 193, 194, 195, 196, 198, 199, 205, 206, 211, 217, 229, 230, 231, 232, 233, 234, 237, 238, 239, 240, 241, 244, 248, 251, 252, 256, 257, 259, 261, 267, 273, 274, 275, 276, 277, 278, 279, 280, 281], "doe": [9, 17, 22, 44, 50, 54, 67, 71, 91, 153, 158, 168, 183, 187, 189, 190, 192, 193, 194, 197, 198, 199, 204, 217, 229, 231, 233, 258, 273, 275, 281], "fit": [9, 25, 54, 61, 71, 72, 191, 222, 274], "creat": [9, 10, 13, 16, 19, 22, 24, 27, 38, 54, 56, 60, 67, 75, 82, 83, 84, 85, 86, 87, 88, 89, 90, 92, 93, 96, 97, 98, 99, 102, 103, 104, 105, 108, 109, 110, 111, 112, 113, 114, 115, 117, 118, 121, 122, 123, 124, 126, 127, 128, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 152, 155, 157, 159, 161, 163, 164, 166, 168, 170, 172, 173, 174, 176, 177, 178, 182, 183, 189, 190, 191, 199, 227, 229, 230, 231, 235, 236, 248, 249, 251, 273, 275, 282], "custom": [9, 15, 16, 21, 24, 25, 31, 38, 55, 56, 60, 64, 65, 66, 67, 71, 94, 106, 119, 148, 162, 169, 179, 254, 268, 269, 270, 273, 276, 277, 279, 280], "dialogu": [9, 16, 43, 68, 274], "defin": [9, 10, 17, 22, 24, 25, 38, 55, 56, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 183, 188, 189, 199, 202, 204, 206, 208, 221, 276, 279], "same": [9, 10, 11, 15, 18, 22, 24, 38, 45, 78, 79, 85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 143, 166, 172, 173, 174, 181, 182, 184, 186, 187, 188, 190, 191, 194, 199, 201, 217, 223, 225, 226, 233, 238, 239, 252, 258, 260, 270, 273, 274, 275, 277, 278, 279, 280, 281, 282], "wai": [9, 14, 19, 22, 24, 55, 56, 210, 228, 273, 275, 276, 277, 278], "instruct_dataset": [9, 12, 13, 53], "info": [9, 262, 276], "slimorca_dataset": [9, 24], "command": [10, 12, 17, 21, 23, 25, 26, 267, 270, 271, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282], "line": [10, 17, 22, 23, 25, 271, 273, 276, 277, 280], "both": [10, 14, 15, 21, 22, 37, 50, 53, 63, 67, 180, 199, 201, 203, 211, 273, 275, 278, 279, 280, 281, 282], "built": [10, 11, 13, 23, 24, 26, 63, 67, 70, 267, 274, 276, 282], "done": [10, 17, 54, 189, 210, 237, 247, 258, 279, 281, 282], "run": [10, 17, 22, 23, 24, 26, 29, 91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 182, 183, 189, 195, 196, 229, 230, 231, 233, 234, 235, 245, 248, 251, 252, 253, 267, 268, 269, 270, 271, 274, 276, 277, 278, 279, 280, 281, 282], "cli": [10, 24, 26, 28, 29, 267, 269, 275, 276, 280], "which": [10, 11, 12, 14, 16, 17, 18, 19, 20, 21, 22, 24, 25, 46, 47, 53, 54, 57, 59, 60, 62, 64, 67, 68, 69, 71, 76, 77, 85, 86, 87, 94, 95, 96, 97, 106, 107, 108, 109, 110, 111, 119, 120, 121, 122, 129, 130, 131, 132, 138, 139, 144, 147, 148, 149, 150, 151, 153, 154, 155, 156, 157, 162, 165, 166, 169, 171, 172, 173, 174, 182, 183, 185, 189, 190, 191, 192, 193, 194, 199, 201, 210, 211, 214, 229, 230, 231, 233, 236, 237, 249, 252, 254, 258, 268, 269, 270, 271, 273, 274, 275, 276, 278, 279, 280, 281, 282], "folder": [10, 22], "first": [10, 17, 22, 24, 27, 42, 52, 54, 65, 76, 81, 147, 151, 189, 191, 192, 193, 199, 226, 229, 266, 268, 269, 274, 275, 277, 278, 279, 281, 282], "ensur": [10, 19, 21, 22, 24, 30, 52, 55, 56, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 183, 192, 229, 231, 237, 268, 276], "instal": [10, 23, 24, 26, 245, 248, 251, 252, 266, 273, 275, 276, 277, 278, 279, 280, 281, 282], "environ": [10, 25, 245, 248, 261, 267, 271, 273, 275, 276, 281], "so": [10, 13, 18, 19, 20, 22, 24, 54, 65, 187, 191, 229, 267, 268, 274, 275, 276, 277, 279, 280, 281, 282], "directori": [10, 22, 24, 35, 42, 65, 229, 230, 231, 249, 251, 252, 257, 273, 275, 276, 277], "new": [10, 14, 15, 16, 19, 21, 25, 37, 42, 59, 61, 62, 63, 65, 67, 68, 69, 159, 182, 200, 201, 232, 248, 249, 251, 274, 275, 276, 277, 278, 279, 282], "librari": [10, 222, 224, 237, 256, 262, 266, 267, 268, 273, 280, 282], "mkdir": 10, "my_project": [10, 248, 252], "cd": [10, 21, 267, 275], "llama": [10, 15, 16, 17, 20, 21, 22, 100, 142, 144, 145, 146, 147, 148, 150, 151, 184, 185, 229, 230, 269, 270, 273, 274, 275, 276, 277, 278, 279], "3": [10, 15, 16, 17, 20, 21, 22, 45, 47, 48, 49, 50, 54, 76, 77, 81, 142, 144, 145, 146, 147, 148, 150, 151, 153, 166, 168, 169, 191, 232, 242, 255, 262, 269, 270, 273, 274, 275, 276, 277, 278, 281, 282], "2": [10, 14, 15, 17, 21, 22, 26, 45, 47, 48, 49, 50, 52, 54, 69, 76, 77, 78, 79, 136, 137, 138, 139, 140, 141, 142, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 182, 183, 191, 214, 215, 217, 223, 225, 226, 229, 230, 232, 242, 255, 256, 257, 263, 270, 274, 275, 276, 277, 279, 280, 281], "lora": [10, 24, 85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 205, 206, 207, 210, 211, 229, 247, 266, 268, 271, 274, 276, 277, 278], "devic": [10, 17, 24, 25, 194, 233, 237, 240, 260, 261, 271, 273, 274, 275, 276, 277, 279, 280, 282], "lora_finetune_single_devic": [10, 24, 269, 273, 274, 275, 276, 277, 278, 279, 280, 282], "llama3_2": [10, 17, 136, 137, 138, 139, 140, 141, 192, 193, 194, 232, 278], "1b_lora_single_devic": 10, "often": [10, 279, 280], "ll": [10, 18, 20, 22, 24, 25, 73, 241, 268, 270, 274, 275, 276, 277, 278, 280, 281, 282], "want": [10, 12, 19, 22, 24, 25, 26, 27, 50, 55, 56, 73, 203, 267, 273, 274, 275, 276, 277, 278, 279, 280], "start": [10, 23, 25, 26, 46, 76, 217, 232, 248, 267, 268, 274, 275, 276, 278, 280, 281], "our": [10, 12, 13, 20, 22, 25, 268, 269, 270, 271, 274, 275, 276, 278, 279, 280, 281, 282], "particular": [10, 11, 13, 19, 21, 24, 53, 144, 227, 279, 282], "adjust": [10, 205, 269, 270, 278, 280, 281], "hyperparamet": [10, 23, 225, 233, 268, 276, 279, 282], "cp": [10, 24, 267, 273, 274, 275, 276, 277, 281], "copi": [10, 274, 275, 276, 277, 280, 281, 282], "make": [10, 17, 19, 22, 23, 24, 25, 26, 145, 191, 268, 273, 275, 276, 277, 278, 279, 280, 281, 282], "modif": [10, 281], "show": [10, 144, 219, 267, 269, 270, 273, 274, 278, 279], "each": [10, 12, 15, 18, 19, 20, 22, 25, 38, 39, 42, 47, 48, 50, 53, 54, 55, 56, 78, 79, 80, 81, 85, 86, 87, 95, 96, 97, 107, 108, 109, 110, 111, 120, 121, 122, 129, 130, 131, 132, 138, 139, 143, 144, 147, 149, 150, 151, 154, 155, 156, 157, 165, 166, 171, 172, 173, 174, 183, 185, 189, 190, 191, 196, 198, 199, 201, 205, 210, 211, 217, 219, 220, 221, 222, 224, 225, 242, 256, 257, 268, 270, 271, 273, 275, 276, 279, 280, 281], "ls": [10, 21, 267, 271, 273, 275, 276, 277], "full": [10, 11, 13, 16, 22, 24, 25, 34, 40, 43, 55, 72, 88, 89, 90, 98, 99, 112, 113, 114, 115, 123, 124, 133, 134, 135, 140, 141, 149, 152, 163, 164, 170, 199, 210, 211, 217, 234, 267, 268, 271, 273, 275, 277, 279, 280, 281], "5b_full_single_devic": 10, "qwen_config": 10, "now": [10, 19, 22, 182, 193, 233, 235, 270, 274, 275, 276, 277, 278, 279, 281, 282], "sure": [10, 17, 22, 24, 275, 276, 277, 278, 279, 280, 281, 282], "correct": [10, 12, 14, 19, 25, 34, 62, 184, 185, 189, 261, 268, 274], "ve": [10, 18, 21, 24, 182, 270, 273, 274, 275, 277, 278, 279, 280], "even": [10, 191, 258, 267, 273, 274, 277, 278, 279, 280, 282], "didn": 10, "t": [10, 13, 14, 18, 19, 20, 22, 24, 25, 45, 142, 143, 196, 201, 237, 252, 256, 273, 274, 275, 276, 278, 280, 282], "complet": [10, 11, 12, 18, 22, 25, 37, 54, 61, 71, 169, 274, 275, 276, 277, 280], "note": [10, 16, 21, 22, 24, 95, 199, 204, 233, 253, 256, 258, 270, 274, 275, 278, 279, 280, 281, 282], "must": [10, 13, 17, 27, 38, 53, 65, 66, 183, 193, 204, 228, 248, 282], "extens": [10, 25, 231, 268], "full_finetune_single_devic": [10, 17, 239, 273, 275, 276], "Or": [10, 199, 267], "rel": [10, 16, 17, 54, 183, 185, 189, 190, 199, 222, 240, 278, 279], "discuss": [10, 14, 19, 21, 24, 275, 276, 277, 279], "workflow": [10, 11, 266, 276, 279], "write": [10, 16, 22, 25, 229, 230, 231, 249, 276], "own": [10, 13, 18, 21, 22, 38, 247, 256, 273, 274, 275, 277, 278, 279], "loop": 10, "logic": [10, 15, 25, 31, 56, 213, 232, 268, 271, 276, 279], "case": [10, 14, 16, 22, 25, 26, 36, 38, 55, 79, 80, 81, 147, 151, 191, 193, 229, 233, 237, 241, 247, 249, 254, 268, 273, 274, 275, 277, 279, 280, 282], "similar": [10, 13, 16, 60, 61, 63, 65, 66, 67, 70, 71, 72, 210, 222, 275, 277, 278, 279, 280, 282], "scratch": 10, "local": [10, 11, 14, 46, 55, 56, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 119, 148, 169, 179, 248, 252, 256, 267, 273, 274, 275, 276], "single_devic": 10, "py": [10, 13, 24, 27, 74, 85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 166, 172, 173, 174, 182, 184, 185, 197, 222, 223, 224, 225, 236, 273, 275, 277], "recommend": [10, 60, 61, 62, 67, 68, 70, 72, 153, 189, 196, 248, 251, 274, 275, 280, 282], "python": [10, 24, 248, 252, 256, 262, 264, 273, 275, 281], "convent": [10, 188], "main": [10, 27, 29, 169, 184, 185, 267, 270, 275, 277], "script": [10, 22, 26, 271, 273, 275, 276, 277], "decor": [10, 25, 29], "pars": [10, 24, 27, 28, 216, 271, 276], "omegaconf": [10, 27], "dictconfig": [10, 24, 25, 27, 28, 29, 30, 248, 252, 257], "def": [10, 13, 15, 19, 21, 24, 25, 26, 29, 65, 66, 227, 232, 278, 279, 282], "cfg": [10, 24, 25, 28, 29, 30], "add": [10, 12, 13, 14, 16, 19, 21, 23, 24, 26, 50, 54, 57, 71, 144, 153, 191, 203, 215, 217, 231, 232, 274, 275, 277, 279, 280, 282], "here": [10, 12, 14, 15, 16, 18, 20, 21, 22, 23, 24, 26, 32, 62, 65, 66, 184, 185, 239, 269, 270, 271, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282], "attribut": [10, 187, 207, 217, 225, 235], "__name__": 10, "__main__": 10, "don": [10, 13, 14, 18, 19, 20, 22, 24, 25, 252, 256, 273, 274, 275, 276, 278, 280, 282], "experiment": [10, 21, 24], "optim": [10, 18, 19, 22, 24, 25, 48, 53, 55, 91, 158, 168, 222, 223, 224, 225, 231, 233, 235, 236, 239, 240, 253, 257, 269, 270, 271, 274, 275, 276, 277, 278, 279, 282], "them": [10, 12, 15, 18, 19, 22, 24, 53, 67, 191, 195, 201, 217, 260, 270, 273, 274, 275, 279, 280, 281, 282], "when": [10, 16, 17, 18, 20, 21, 22, 24, 25, 29, 53, 54, 55, 56, 57, 67, 71, 73, 75, 182, 183, 185, 189, 190, 191, 193, 194, 195, 196, 198, 199, 200, 207, 210, 221, 236, 238, 251, 253, 258, 269, 273, 275, 277, 278, 279, 280, 281, 282], "mean": [10, 24, 144, 183, 184, 188, 189, 190, 199, 220, 247, 273, 274, 276, 279, 281], "high": [10, 53, 55, 56, 268, 278, 279, 280], "level": [10, 25, 55, 56, 196, 198, 218, 235, 247, 262, 268, 278, 282], "paramet": [10, 13, 14, 15, 16, 25, 27, 28, 29, 30, 31, 33, 35, 36, 37, 38, 42, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 91, 92, 93, 94, 95, 96, 97, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 116, 117, 118, 119, 120, 121, 122, 125, 126, 127, 128, 129, 130, 131, 132, 136, 137, 138, 139, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 154, 155, 156, 157, 158, 159, 160, 161, 162, 165, 166, 167, 169, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 244, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 266, 268, 269, 270, 271, 273, 274, 275, 276, 277, 278, 279, 281, 282], "easili": [10, 16, 22, 24, 268, 278, 279, 281, 282], "custom_decod": 10, "customtransformerdecod": 10, "nn": [10, 27, 45, 47, 50, 81, 142, 143, 180, 182, 183, 187, 188, 189, 190, 191, 192, 193, 194, 195, 199, 200, 201, 202, 203, 204, 207, 208, 209, 227, 234, 235, 247, 253, 254, 258, 259, 278, 279, 282], "modul": [10, 13, 15, 21, 24, 27, 65, 66, 78, 79, 80, 81, 142, 143, 144, 147, 151, 156, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 222, 223, 224, 227, 232, 234, 235, 238, 247, 253, 254, 256, 276, 278, 279, 280, 282], "A": [10, 13, 15, 19, 25, 26, 33, 34, 37, 40, 42, 43, 47, 48, 49, 50, 53, 54, 67, 81, 179, 183, 187, 188, 189, 190, 191, 195, 199, 206, 210, 214, 215, 217, 219, 220, 221, 222, 223, 224, 225, 226, 227, 232, 233, 239, 240, 241, 246, 247, 265, 266, 272, 273, 274, 279, 280, 281, 282], "architectur": [10, 25, 100, 153, 189, 191, 199, 201, 232, 273], "present": [10, 33, 37, 42, 62, 63, 65, 66, 67, 68, 69, 215, 231, 258], "custom_model": 10, "num_lay": [10, 27, 81, 91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 189, 191, 199, 201], "int": [10, 15, 21, 24, 26, 47, 48, 49, 50, 51, 54, 61, 65, 66, 72, 73, 74, 75, 77, 78, 79, 80, 81, 85, 86, 87, 88, 89, 90, 91, 94, 95, 96, 97, 98, 99, 101, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 119, 120, 121, 122, 123, 124, 125, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 154, 155, 156, 157, 158, 160, 162, 163, 164, 165, 166, 167, 169, 170, 171, 172, 173, 174, 175, 179, 182, 183, 184, 185, 188, 189, 190, 191, 194, 196, 197, 198, 199, 200, 201, 205, 206, 212, 213, 214, 215, 216, 217, 219, 226, 227, 229, 230, 231, 233, 234, 236, 238, 243, 247, 248, 249, 250, 251, 252, 254, 256, 257, 273, 278, 279, 280, 282], "classification_head": 10, "bool": [10, 15, 19, 21, 24, 31, 33, 35, 36, 37, 42, 45, 54, 57, 58, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 81, 85, 86, 87, 88, 89, 90, 91, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 144, 145, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 175, 183, 188, 189, 190, 195, 196, 197, 199, 201, 205, 206, 210, 211, 213, 214, 215, 217, 220, 226, 227, 229, 230, 231, 238, 240, 244, 245, 247, 248, 251, 254, 257, 258, 263, 280, 282], "fals": [10, 13, 14, 15, 16, 18, 19, 22, 24, 33, 35, 36, 37, 42, 45, 53, 54, 58, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 75, 76, 81, 85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 145, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 175, 183, 189, 190, 192, 193, 194, 199, 200, 201, 205, 206, 207, 210, 214, 226, 229, 230, 231, 242, 245, 257, 258, 273, 274, 275, 277, 279, 281, 282], "setup": [10, 22, 24, 25, 75, 182, 183, 188, 189, 190, 192, 193, 194, 199, 201, 234, 257, 273, 275, 279, 282], "expos": [10, 13, 24, 25, 231, 271, 276], "friendli": [10, 60, 64, 67, 71, 73, 274], "manner": [10, 20], "rather": [10, 222, 280], "everi": [10, 12, 22, 25, 62, 63, 67, 68, 69, 78, 79, 80, 146, 150, 191, 193, 251, 257, 267, 273, 280, 282], "construct": [10, 36, 63, 219, 271, 279], "care": [10, 22, 229, 231, 275, 277, 279], "how": [10, 13, 14, 18, 22, 23, 24, 25, 26, 191, 227, 248, 254, 266, 269, 270, 273, 274, 275, 276, 277, 280, 281, 282], "implement": [10, 19, 21, 22, 25, 55, 57, 59, 60, 61, 62, 63, 64, 67, 68, 69, 70, 71, 72, 180, 184, 185, 186, 191, 197, 204, 206, 212, 213, 218, 222, 223, 224, 225, 229, 236, 241, 251, 268, 270, 278, 279, 280, 281, 282], "llama3_2_vision_11b": 10, "custom_dataset": [10, 13], "sftdataset": [10, 13, 24, 55, 58, 59, 60, 62, 64, 65, 66, 68, 69], "packeddataset": [10, 17, 53, 58, 59, 60, 62, 64, 68, 69, 71, 72], "inputoutputtomessag": [10, 13, 14, 62, 68], "modeltoken": [10, 15, 21, 24, 36, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 67, 68, 69, 70, 71, 72, 217], "build": [10, 25, 71, 81, 91, 101, 116, 125, 146, 147, 150, 151, 158, 160, 175, 228, 268, 277, 279, 280], "block": [10, 25, 54, 85, 86, 87, 91, 95, 96, 97, 101, 107, 108, 109, 110, 111, 116, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 146, 149, 150, 151, 154, 155, 156, 157, 158, 165, 166, 171, 172, 173, 174, 175, 183, 189, 190, 210, 211, 268], "tiny_cod": 10, "pack": [10, 54, 58, 59, 60, 62, 64, 65, 66, 68, 69, 71, 72, 183, 185, 189, 190, 199, 281], "subset": [10, 15, 16, 47, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 95, 107, 120, 129, 149, 154, 156, 165, 171, 202, 208], "nampdn": 10, "tini": 10, "respons": [10, 12, 13, 18, 19, 21, 32, 33, 35, 36, 37, 42, 55, 56, 60, 62, 63, 64, 65, 66, 67, 68, 69, 217, 220, 221, 222, 224, 225, 275, 276, 277], "model_transform": [10, 13, 15, 16, 55, 56, 62, 65, 66, 68, 69, 144], "message_transform": [10, 13, 55, 56], "column_map": [10, 12, 13, 16, 18, 31, 33, 35, 37, 42, 53, 58, 59, 62, 63, 64, 65, 66, 67, 68, 69, 70], "input": [10, 11, 12, 13, 14, 15, 20, 21, 22, 31, 35, 47, 48, 49, 50, 54, 55, 56, 59, 61, 62, 64, 65, 66, 68, 69, 72, 78, 79, 80, 81, 94, 106, 119, 142, 143, 144, 147, 148, 151, 162, 169, 171, 175, 180, 181, 183, 184, 185, 186, 187, 188, 189, 190, 191, 197, 198, 199, 200, 201, 205, 206, 214, 215, 219, 229, 231, 239, 256, 259, 274, 279, 282], "filter_fn": [10, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72], "lambda": [10, 220], "x": [10, 22, 45, 73, 74, 75, 78, 79, 80, 142, 143, 180, 181, 183, 184, 185, 186, 188, 189, 190, 191, 199, 200, 201, 205, 206, 242, 255, 278, 279, 281, 282], "split_across_pack": [10, 54, 71], "els": [10, 11, 12, 19, 25, 252, 268, 282], "posit": [10, 17, 24, 27, 54, 74, 76, 78, 79, 80, 81, 91, 95, 125, 129, 142, 147, 151, 154, 156, 158, 160, 165, 167, 182, 183, 185, 188, 189, 190, 191, 199, 200, 277], "automat": [10, 12, 16, 17, 19, 21, 23, 24, 26, 27, 59, 60, 273, 275, 282], "instanti": [10, 30, 38, 82, 83, 84, 85, 86, 87, 91, 92, 93, 94, 95, 96, 97, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 116, 117, 118, 119, 120, 121, 122, 125, 126, 127, 128, 129, 130, 131, 132, 136, 137, 138, 139, 145, 146, 147, 148, 149, 150, 151, 154, 155, 156, 157, 158, 159, 160, 161, 162, 165, 166, 167, 168, 169, 171, 172, 173, 174, 175, 176, 177, 178, 179, 233], "separ": [10, 55, 201, 217, 229, 274, 276, 277, 279, 282], "under": [10, 24, 257, 280, 282], "best": [10, 16, 18, 25, 270, 274, 278, 280], "root": [10, 184, 251, 252], "custom_finetun": 10, "32": [10, 27, 182, 191, 199, 201, 248, 277, 279, 280, 281, 282], "option": [10, 12, 18, 21, 22, 24, 25, 31, 33, 35, 37, 42, 50, 51, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 77, 80, 81, 85, 86, 87, 94, 95, 96, 97, 101, 106, 107, 108, 109, 110, 111, 116, 119, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 142, 143, 144, 146, 147, 148, 149, 150, 151, 154, 155, 156, 157, 162, 165, 166, 169, 171, 172, 173, 174, 175, 179, 180, 183, 185, 188, 189, 190, 191, 194, 195, 199, 210, 211, 212, 214, 217, 219, 220, 221, 223, 229, 230, 231, 233, 234, 237, 241, 248, 249, 252, 256, 257, 261, 262, 267, 268, 273, 274, 275, 280], "param": [10, 22, 25, 85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 145, 149, 166, 172, 173, 174, 202, 203, 205, 206, 208, 209, 211, 229, 279, 281, 282], "omit": [10, 278, 279, 280], "being": [10, 19, 22, 56, 193, 229, 230, 231, 235, 261, 280, 281, 282], "found": [10, 11, 22, 23, 24, 26, 184, 185, 229, 230, 231, 270, 273, 278, 279, 282], "correctli": [10, 21, 22, 25, 30, 210, 229, 267, 271, 274, 276, 282], "try": [10, 22, 24, 274, 275, 276, 277, 282], "after": [10, 19, 20, 23, 25, 38, 56, 65, 66, 94, 106, 119, 144, 148, 162, 169, 182, 183, 186, 187, 189, 190, 199, 201, 226, 247, 248, 249, 250, 251, 252, 270, 274, 275, 277, 281, 282], "pythonpath": 10, "pwd": 10, "vlm": [11, 16], "hub": [11, 22, 55, 56, 273, 276], "remot": [11, 14, 35, 42, 46, 55, 56], "url": [11, 16, 35, 37, 42, 46, 267], "project": [11, 23, 26, 81, 85, 86, 87, 91, 95, 101, 105, 107, 108, 109, 110, 111, 116, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 142, 143, 146, 147, 149, 150, 151, 154, 155, 156, 157, 158, 161, 165, 166, 171, 174, 175, 180, 183, 189, 191, 199, 203, 210, 211, 232, 238, 248, 252, 266, 279, 280, 282], "prefer": [11, 13, 25, 48, 55, 63, 67, 70, 222, 223, 224, 225, 268, 271, 273, 280], "align": [11, 65, 66, 222, 274, 278], "continu": [11, 20, 54, 191, 248], "pretrain": [11, 142, 143, 144, 199, 201, 203, 214, 215, 273, 274, 276, 279, 282], "beyond": [11, 275, 280, 282], "those": [11, 22, 232, 275, 277, 279], "customiz": 11, "task": [11, 12, 16, 18, 19, 34, 40, 43, 53, 61, 144, 269, 274, 275, 277, 278, 279, 280, 281, 282], "supervis": [11, 20, 56], "rlhf": [11, 55, 63, 220, 221, 222, 223, 224, 225, 226], "queri": [11, 91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 182, 183, 189, 190, 199, 277, 280], "time": [11, 16, 17, 22, 60, 64, 91, 158, 193, 196, 198, 217, 220, 249, 251, 257, 270, 273, 274, 275, 277, 282], "take": [11, 12, 13, 16, 18, 22, 24, 25, 27, 48, 55, 56, 65, 66, 67, 142, 182, 191, 195, 201, 229, 231, 260, 261, 270, 274, 275, 276, 277, 278, 279, 280, 282], "object": [11, 13, 14, 15, 19, 21, 24, 27, 28, 81, 183, 222, 225, 227, 241], "appli": [11, 12, 15, 19, 22, 25, 47, 55, 56, 59, 65, 66, 85, 86, 87, 88, 89, 90, 91, 95, 96, 97, 98, 99, 101, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 120, 121, 122, 123, 124, 125, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 146, 149, 150, 151, 152, 154, 155, 156, 157, 158, 163, 164, 165, 166, 170, 171, 172, 173, 174, 175, 183, 187, 188, 189, 190, 199, 205, 210, 211, 254, 268, 269, 278, 280, 282], "templat": [11, 31, 32, 34, 38, 39, 40, 43, 55, 56, 59, 62, 68, 94, 100, 106, 119, 144, 148, 153, 162, 169, 179], "anyth": [11, 61, 260], "requir": [11, 15, 17, 19, 21, 22, 24, 47, 48, 53, 55, 56, 57, 65, 66, 67, 71, 144, 187, 189, 200, 229, 231, 233, 244, 245, 247, 248, 251, 252, 256, 257, 267, 270, 273, 274, 276, 280, 281, 282], "collat": [11, 47, 49, 50, 54], "packag": [11, 23, 26, 248, 251, 252, 267], "togeth": [11, 25, 54, 196, 252, 271, 276, 279, 280, 281], "form": [12, 18, 22, 24, 25, 31, 44, 52, 55, 56, 273], "along": [12, 22, 279], "describ": [12, 254], "hand": [12, 36], "grammar": [12, 19, 34, 62], "head": [12, 81, 91, 95, 101, 107, 116, 120, 125, 129, 142, 146, 147, 150, 151, 154, 156, 158, 160, 165, 167, 171, 175, 182, 183, 185, 189, 199, 203, 232, 277], "csv": [12, 55, 56, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72], "incorrect": [12, 19], "cat": [12, 16, 19, 219], "grammarerrorcorrectiontempl": [12, 19, 62], "prepend": [12, 14, 16, 19, 33, 35, 37, 38, 39, 42, 60, 62, 63, 64, 65, 66, 67, 68, 69, 94, 106, 119, 144, 148, 162, 169, 179, 214], "english": [12, 19, 34], "ncorrect": [12, 34], "mask": [12, 13, 14, 15, 17, 19, 21, 36, 38, 50, 54, 56, 59, 60, 62, 64, 65, 66, 67, 68, 69, 74, 75, 76, 144, 183, 188, 189, 190, 199, 213, 217, 219, 220, 223, 242, 274, 278], "out": [12, 15, 18, 20, 22, 24, 25, 59, 60, 62, 64, 67, 68, 69, 75, 76, 219, 229, 230, 242, 266, 268, 269, 270, 271, 273, 274, 275, 276, 277, 279, 280, 282], "100": [12, 18, 25, 48, 49, 50, 59, 60, 62, 64, 67, 68, 69, 73, 196, 197, 198, 200, 278, 279, 282], "27957": 12, "736": 12, "577": 12, "anoth": [12, 13, 16, 24, 56, 187, 248, 275, 280], "c4": [12, 71, 281], "200m": 12, "liweili": [12, 62], "c4_200m": [12, 62], "chang": [12, 13, 16, 21, 22, 23, 24, 26, 31, 33, 35, 64, 66, 70, 231, 267, 273, 275, 276, 277, 278, 279, 280, 281, 282], "remap": 12, "someth": [12, 22, 25, 26, 274, 275, 281], "hello": [12, 13, 14, 19, 21, 44, 214, 215, 262, 274, 275, 277], "world": [12, 13, 14, 19, 21, 44, 214, 215, 243, 245, 262, 275], "bye": [12, 13], "robot": [12, 15], "am": [12, 14, 16, 60, 64, 100, 153, 274, 275, 277], "prompttempl": [12, 31, 34, 40, 43, 144], "relev": [12, 14, 25, 188, 189, 190, 199, 273, 275, 279, 280], "inform": [12, 14, 22, 248, 252, 254, 268, 273, 275, 276], "mai": [12, 16, 17, 24, 26, 60, 73, 191, 194, 200, 238, 258, 269, 270, 274, 276, 278, 279, 280], "alpaca_dataset": [12, 17, 24, 58], "grammar_dataset": 12, "samsum_dataset": 12, "dictionari": [13, 14, 15, 36, 38, 44, 47, 48, 49, 54, 55, 56, 94, 106, 119, 148, 162, 169, 179, 240, 246, 248, 249, 250, 251, 252, 260, 275], "onc": [13, 21, 24, 38, 189, 199, 275, 276, 277, 279, 282], "repres": [13, 36, 48, 78, 79, 191, 228, 234, 274, 280, 281], "prepar": [13, 15, 274, 281], "ad": [13, 16, 19, 21, 25, 38, 50, 78, 79, 80, 146, 150, 160, 191, 199, 200, 203, 214, 217, 231, 232, 274, 279, 280, 281, 282], "column": [13, 16, 18, 20, 31, 33, 35, 37, 42, 55, 56, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 183, 189, 190, 199, 281], "worri": [13, 22, 274, 276], "itself": [13, 24], "do": [13, 15, 18, 21, 22, 23, 25, 36, 47, 65, 67, 193, 210, 217, 248, 252, 258, 273, 275, 276, 277, 279, 280, 281], "well": [13, 18, 22, 24, 25, 268, 273, 275, 277, 278, 280, 282], "flexibl": [13, 24, 53, 280], "inherit": [13, 14, 19, 25, 268], "__call__": [13, 15, 19, 65, 66, 144], "simpl": [13, 22, 25, 191, 225, 266, 276, 279, 281, 282], "contriv": [13, 19], "would": [13, 15, 19, 22, 24, 26, 38, 54, 189, 191, 199, 267, 274, 275, 279, 280, 282], "inde": [13, 237, 275], "quit": [13, 280, 282], "type": [13, 14, 15, 16, 21, 26, 27, 29, 36, 37, 44, 45, 46, 47, 48, 49, 50, 51, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 91, 92, 93, 94, 95, 96, 97, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 116, 117, 118, 119, 120, 121, 122, 125, 126, 127, 128, 129, 130, 131, 132, 136, 137, 138, 139, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 154, 155, 156, 157, 158, 159, 160, 161, 162, 165, 166, 167, 168, 169, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 188, 189, 190, 191, 194, 195, 196, 197, 198, 199, 200, 201, 202, 205, 206, 208, 212, 213, 214, 215, 216, 217, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 235, 237, 238, 239, 240, 241, 242, 243, 244, 245, 247, 254, 255, 256, 257, 259, 261, 262, 263, 270, 275, 279, 280, 281, 282], "map": [13, 15, 19, 21, 22, 31, 33, 35, 37, 38, 42, 47, 53, 54, 59, 62, 63, 64, 65, 66, 67, 68, 69, 70, 94, 106, 119, 144, 148, 162, 169, 179, 209, 215, 216, 229, 233, 235, 248, 249, 250, 251, 252, 253, 257, 275, 279], "messagetransform": 13, "self": [13, 15, 18, 19, 20, 21, 25, 26, 54, 65, 66, 85, 86, 87, 91, 95, 96, 97, 101, 107, 108, 109, 110, 111, 116, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 146, 149, 150, 151, 154, 155, 156, 157, 158, 160, 165, 166, 167, 171, 172, 173, 174, 175, 183, 188, 189, 190, 196, 198, 199, 201, 204, 210, 211, 229, 232, 233, 278, 279, 282], "str": [13, 15, 21, 24, 27, 28, 31, 33, 35, 36, 37, 38, 42, 44, 46, 47, 48, 49, 50, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 94, 106, 119, 144, 148, 149, 152, 162, 169, 179, 195, 200, 201, 202, 204, 205, 206, 208, 209, 210, 211, 212, 213, 214, 215, 216, 228, 229, 230, 231, 232, 233, 234, 237, 240, 241, 244, 246, 248, 249, 250, 251, 252, 256, 257, 258, 259, 261, 262, 263, 280], "eot": [13, 14, 19, 36, 144], "_messag": 13, "0x7fb0a10094e0": 13, "0x7fb0a100a290": 13, "msg": [13, 14, 16, 19, 21, 274], "text_cont": [13, 14, 16, 19, 36, 274], "manipul": 13, "load_dataset_kwarg": [13, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72], "mymessagetransform": 13, "chosenrejectedtomessag": [13, 63, 67], "core": [14, 25, 55, 56, 268, 271, 276, 282], "govern": [14, 274], "serv": [14, 19, 24, 33, 35, 37, 42, 60, 62, 63, 64, 65, 66, 67, 68, 69, 217, 227, 279], "interfac": [14, 25, 38, 39, 53, 204, 218], "api": [14, 25, 26, 34, 40, 43, 55, 56, 57, 59, 65, 66, 88, 89, 90, 98, 99, 112, 113, 114, 115, 123, 124, 133, 134, 135, 140, 141, 152, 163, 164, 170, 210, 248, 267, 271, 273, 274, 276, 277, 282], "oper": [14, 25, 191, 207, 218, 256, 281], "send": 14, "other": [14, 15, 18, 20, 22, 25, 27, 35, 38, 53, 231, 238, 257, 260, 269, 270, 274, 276, 277, 278, 279, 280, 281], "special": [14, 16, 19, 36, 42, 119, 144, 146, 148, 150, 169, 179, 191, 200, 212, 213, 215, 216, 217, 219, 233], "individu": [14, 36, 54, 199, 240, 252, 254, 274], "ref": [14, 55, 56, 57, 59, 65, 66, 168, 169, 252], "constructor": [14, 21], "ident": [14, 18, 20, 45, 47, 54, 65, 67, 153, 189, 205, 275, 280, 281], "from_dict": [14, 36, 274], "becaus": [14, 21, 55, 56, 95, 182, 189, 191, 199, 231, 273, 274, 281], "correspond": [14, 18, 21, 36, 48, 74, 75, 76, 202, 204, 208, 220, 223, 237, 270, 276, 277, 280, 281], "begin": [14, 22, 54, 71, 191, 215, 217, 274, 277, 282], "pil": [14, 15, 16, 36, 37, 44, 46], "img_msg": 14, "place": [14, 16, 20, 258, 274, 280], "mode": [14, 15, 16, 194, 234, 241, 248, 275], "rgb": [14, 15, 16, 142], "4": [14, 15, 16, 22, 24, 45, 47, 48, 49, 50, 76, 81, 144, 147, 151, 182, 183, 191, 242, 263, 268, 270, 273, 275, 277, 278, 279, 280, 281, 282], "appropri": [14, 36, 53, 76, 100, 200, 229, 236, 282], "load_imag": [14, 16], "image_path": [14, 16], "jpg": [14, 16, 35, 42, 46], "tag": [14, 16, 19, 21, 38, 42, 44, 94, 100, 106, 119, 144, 148, 153, 162, 169, 179, 248, 249, 250, 251, 252, 274], "placehold": [14, 16, 42, 228], "should": [14, 15, 16, 18, 20, 22, 24, 25, 33, 35, 36, 37, 38, 42, 47, 54, 59, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 75, 76, 85, 86, 87, 95, 96, 97, 100, 101, 107, 108, 109, 110, 111, 116, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 142, 146, 149, 150, 151, 153, 154, 155, 156, 157, 158, 160, 165, 166, 167, 171, 172, 173, 174, 175, 182, 183, 189, 191, 194, 199, 204, 210, 211, 220, 223, 227, 228, 246, 248, 249, 250, 251, 252, 267, 268, 275, 276, 277, 278, 279, 280, 281, 282], "insert": [14, 201, 281], "format_content_with_imag": [14, 16], "image_tag": [14, 16, 42, 44], "conveni": [14, 24, 25, 46, 273], "prompttemplateinterfac": [14, 19, 94, 106, 119, 148, 162, 169, 179], "templated_msg": [14, 19], "contains_media": [14, 16, 36], "get_media": [14, 15, 16, 36], "4x4": 14, "0x7f8d27e72740": 14, "tokenize_messsag": 14, "hi": [14, 20, 73, 274], "tokenize_messag": [14, 15, 21, 36, 55, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 144, 213, 217, 274], "22557": 14, "1526": [14, 21], "28808": 14, "28705": [14, 21], "28748": [14, 21], "15359": 14, "28725": 14, "315": [14, 20], "837": 14, "396": 14, "16107": 14, "13892": 14, "28723": 14, "modal": [15, 16, 56, 144, 201], "current": [15, 16, 18, 22, 35, 42, 54, 67, 75, 91, 95, 107, 120, 129, 149, 150, 151, 154, 156, 158, 165, 168, 171, 182, 183, 185, 189, 190, 199, 223, 230, 231, 233, 238, 241, 243, 249, 251, 253, 256, 270, 271, 276, 277, 278, 280, 281], "intend": [15, 260, 274], "drop": [15, 144, 200, 278, 281], "replac": [15, 16, 42, 51, 59, 60, 62, 64, 67, 68, 69, 144, 195, 200, 258, 279], "llama3_2_vis": [15, 16, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152], "llama3visiontransform": [15, 16, 148], "__init__": [15, 24, 25, 65, 66, 278, 279, 282], "transform_imag": 15, "clipimagetransform": [15, 65, 66, 144, 191], "xattn_mask": 15, "visioncrossattentionmask": [15, 144, 218], "224": [15, 16, 144], "tile_s": [15, 79, 80, 81, 144, 147, 151, 191, 219], "patch_siz": [15, 79, 80, 81, 144, 147, 151, 191, 219], "14": [15, 48, 144, 191, 281, 282], "skip_special_token": [15, 16, 67, 144], "begin_of_text": [15, 16, 21, 274], "start_header_id": [15, 16, 274], "end_header_id": [15, 16, 274], "n": [15, 16, 18, 19, 21, 34, 38, 40, 43, 183, 191, 217, 265, 272, 273, 274, 281], "eot_id": [15, 16, 21, 274], "na": [15, 274], "encoder_input": [15, 16, 50, 188, 189, 199], "shape": [15, 16, 22, 47, 50, 73, 74, 75, 76, 78, 79, 80, 81, 142, 143, 144, 147, 151, 180, 181, 182, 183, 184, 185, 186, 188, 189, 190, 191, 196, 197, 198, 199, 200, 201, 205, 206, 219, 220, 221, 222, 223, 224, 225, 226, 242, 257, 258, 278], "num_til": [15, 16, 142, 143, 191], "num_channel": [15, 16, 191], "tile_height": [15, 16], "tile_width": [15, 16], "torch": [15, 16, 22, 24, 45, 47, 48, 49, 50, 73, 74, 75, 76, 77, 78, 79, 80, 81, 142, 143, 144, 180, 181, 182, 183, 184, 185, 186, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 205, 206, 220, 221, 222, 223, 224, 225, 226, 231, 233, 235, 236, 237, 239, 240, 242, 244, 245, 251, 253, 254, 255, 256, 257, 258, 259, 260, 261, 263, 267, 270, 275, 276, 277, 278, 279, 280, 282], "just": [15, 19, 22, 268, 270, 273, 274, 276, 277, 279, 280, 281], "the_cauldron_dataset": [15, 16], "ai2d": [15, 66], "respir": 15, "combust": 15, "give": [15, 21, 24, 228, 278, 279, 280], "choic": [15, 18], "oxygen": 15, "b": [15, 25, 45, 47, 142, 143, 182, 183, 185, 189, 190, 199, 206, 220, 221, 225, 242, 252, 279, 282], "carbon": 15, "dioxid": 15, "c": [15, 45, 47, 50, 65, 142, 274], "nitrogen": 15, "d": [15, 24, 36, 65, 142, 143, 182, 183, 189, 199, 273, 274, 278, 279, 281], "heat": 15, "letter": 15, "mymultimodaltransform": 15, "my_tokenizer_build": 15, "myimagetransform": 15, "add_eo": [15, 57, 71, 144, 214, 215, 274], "tupl": [15, 19, 21, 24, 27, 38, 48, 73, 74, 80, 94, 106, 119, 144, 148, 162, 169, 179, 182, 191, 195, 213, 217, 220, 221, 222, 223, 224, 225, 226, 227, 243, 257, 258, 259], "infer": [15, 19, 22, 50, 56, 91, 100, 158, 182, 183, 185, 189, 190, 199, 219, 261, 266, 270, 271, 274, 275, 276, 277, 281, 282], "vision": [15, 16, 56, 81, 142, 144, 145, 146, 147, 148, 149, 150, 151, 152, 200, 232], "aspect_ratio": [15, 50, 78, 79, 142, 191], "append": [15, 19, 38, 39, 94, 106, 119, 144, 148, 162, 169, 179, 189, 199, 214, 248, 267], "addit": [15, 21, 22, 24, 25, 27, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 144, 146, 147, 150, 151, 153, 193, 200, 201, 210, 222, 227, 229, 230, 231, 237, 238, 244, 247, 248, 249, 251, 252, 254, 268, 274, 276, 279, 280], "kei": [15, 21, 22, 24, 26, 33, 35, 37, 42, 47, 48, 55, 56, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 182, 183, 188, 189, 190, 199, 201, 209, 210, 211, 225, 229, 231, 233, 248, 257, 273, 275, 276, 279, 280, 282], "e": [16, 18, 19, 36, 46, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 78, 79, 80, 81, 142, 143, 144, 147, 151, 183, 191, 195, 199, 204, 209, 219, 228, 229, 233, 240, 257, 261, 267, 270, 275, 277, 279, 280, 281, 282], "g": [16, 18, 46, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 78, 79, 80, 81, 142, 143, 144, 147, 151, 183, 191, 199, 204, 219, 228, 229, 240, 257, 261, 270, 277, 279, 280, 281, 282], "base": [16, 18, 20, 22, 27, 36, 38, 85, 86, 87, 88, 89, 90, 91, 95, 96, 97, 98, 99, 101, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 125, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 144, 145, 148, 149, 150, 151, 152, 154, 155, 156, 157, 158, 160, 163, 164, 165, 166, 167, 170, 171, 172, 173, 174, 175, 185, 205, 206, 207, 209, 210, 211, 221, 222, 224, 225, 229, 236, 238, 239, 247, 249, 258, 261, 266, 274, 275, 276, 277, 278, 279, 280, 282], "multimodal_chat_dataset": 16, "visual": [16, 201], "get": [16, 22, 23, 24, 25, 26, 50, 144, 233, 237, 240, 243, 248, 262, 267, 268, 269, 270, 274, 275, 276, 278, 279, 280, 281], "below": [16, 23, 26, 47, 227, 277, 278, 279, 282], "clock": 16, "10": [16, 45, 47, 48, 49, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 191, 200, 275, 277, 280, 281, 282], "llama3_2_vision_transform": 16, "questionanswertempl": [16, 19, 70], "image_s": [16, 145, 148, 149, 152, 191], "560": [16, 145, 148, 149, 152], "image_dir": [16, 35, 42, 65], "home": [16, 24, 35, 42, 46], "nquestion": 16, "nit": 16, "00am": 16, "sharegpt": [16, 42, 60, 274], "q1": [16, 33, 55, 60, 67], "a1": [16, 33, 55, 60], "sharegpt4v": 16, "lin": 16, "chen": 16, "renam": 16, "themselv": [16, 282], "pathlib": 16, "pil_imag": 16, "Then": [16, 20, 26, 207, 276, 278, 280], "relat": [16, 188, 189, 199, 279], "user_messag": [16, 34, 40, 43, 144, 274], "locat": [16, 21, 24, 35, 42, 273, 277, 279, 281, 282], "long": [16, 54, 215, 274, 279], "image_dog": 16, "image_cat": 16, "image_bird": 16, "dog": [16, 219], "bird": [16, 46], "pet": 16, "three": [16, 22, 25, 50, 144, 222, 224, 225, 271, 276], "referenc": 16, "huggingfac": [16, 57, 61, 63, 71, 72, 161, 168, 169, 176, 177, 178, 222, 224, 225, 229, 230, 236, 273, 275], "co": [16, 57, 61, 63, 71, 72, 161, 168, 169, 176, 177, 178, 229, 230, 275], "img": 16, "llava_instruct_dataset": 16, "concaten": [17, 21, 48, 53, 147, 151, 213, 217], "sequenc": [17, 45, 47, 48, 49, 50, 54, 57, 61, 65, 66, 71, 72, 75, 76, 91, 94, 95, 101, 106, 107, 116, 119, 120, 125, 129, 142, 143, 144, 146, 148, 150, 154, 156, 158, 160, 162, 165, 167, 169, 171, 175, 179, 182, 183, 185, 188, 189, 190, 191, 194, 199, 201, 215, 217, 219, 221, 225, 226, 242, 274], "upto": [17, 185], "maximum": [17, 24, 47, 50, 51, 54, 61, 72, 75, 78, 79, 81, 91, 94, 95, 101, 106, 107, 116, 119, 120, 125, 129, 144, 146, 147, 148, 150, 151, 154, 156, 158, 160, 162, 165, 167, 169, 171, 175, 182, 183, 185, 188, 189, 190, 194, 199, 201, 219, 228, 273], "length": [17, 45, 47, 49, 50, 51, 52, 53, 54, 61, 72, 75, 91, 94, 95, 101, 106, 107, 116, 119, 120, 125, 129, 142, 143, 144, 146, 148, 150, 154, 156, 158, 160, 162, 165, 167, 168, 169, 171, 175, 179, 182, 183, 185, 188, 189, 190, 194, 196, 198, 199, 201, 215, 219, 220, 221, 230, 242, 248, 280], "slow": [17, 280, 282], "down": [17, 191, 231, 279, 280, 282], "introduc": [17, 85, 86, 95, 96, 97, 107, 108, 109, 110, 120, 121, 122, 131, 132, 138, 139, 154, 155, 156, 157, 165, 166, 183, 184, 201, 205, 206, 225, 270, 274, 278, 279, 280, 281, 282], "signific": [17, 280, 281], "speedup": [17, 275, 277], "depend": [17, 25, 26, 229, 257, 273, 275, 278, 279, 280, 282], "iter": [17, 257, 258, 259, 282], "through": [17, 18, 22, 23, 24, 25, 26, 55, 81, 147, 151, 180, 182, 191, 201, 207, 268, 269, 270, 271, 273, 274, 275, 276, 278, 280, 281, 282], "greedi": [17, 54], "upon": [17, 25, 53, 189, 193, 199, 277], "initi": [17, 22, 25, 29, 53, 54, 82, 83, 84, 92, 93, 102, 103, 104, 105, 117, 118, 126, 127, 128, 136, 137, 159, 161, 176, 177, 178, 205, 222, 233, 244, 245, 258, 270, 276, 279, 282], "max": [17, 50, 54, 179, 189, 191, 199, 215, 228, 236, 273, 279], "llama3": [17, 20, 21, 24, 65, 66, 73, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 144, 146, 148, 149, 150, 152, 165, 196, 198, 232, 238, 266, 268, 269, 270, 273, 275, 280], "load": [17, 22, 25, 35, 42, 46, 53, 54, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 199, 210, 229, 230, 231, 233, 251, 258, 275, 277, 279], "isinst": [17, 227], "1b_full_single_devic": 17, "prevent": [17, 22, 54, 222, 273, 280], "irrelev": 17, "cross": [17, 50, 54, 146, 150, 188, 196, 198, 199, 201, 219, 278], "attend": [17, 54, 183, 188, 189, 190, 199, 219], "pytorch": [17, 24, 25, 74, 189, 195, 196, 227, 245, 251, 254, 256, 257, 266, 267, 268, 270, 275, 277, 279, 280, 281, 282], "flex": 17, "attent": [17, 50, 54, 74, 75, 76, 81, 85, 86, 87, 91, 95, 96, 97, 101, 107, 108, 109, 110, 111, 116, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 146, 147, 149, 150, 151, 154, 155, 156, 157, 158, 160, 165, 166, 167, 168, 171, 172, 173, 174, 175, 182, 183, 185, 188, 189, 190, 192, 199, 201, 210, 211, 219, 277, 279, 280, 282], "flash": 17, "non": [17, 197, 198, 211, 221, 278], "causal": [17, 54, 75, 183, 189, 190, 199], "hardwar": [17, 237, 268, 275, 279, 280], "cuda": [17, 24, 237, 240, 257, 261, 267, 275, 280, 282], "ture": 17, "sdpa": 17, "memori": [17, 21, 25, 53, 54, 57, 61, 71, 72, 187, 189, 195, 196, 198, 199, 210, 238, 240, 246, 247, 257, 266, 268, 269, 270, 275, 276, 277, 278, 281], "effici": [17, 210, 238, 266, 268, 269, 275, 276, 279, 281], "fallback": 17, "while": [17, 24, 25, 85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 166, 172, 173, 174, 200, 205, 268, 270, 275, 280, 281, 282], "retain": [17, 222, 280, 282], "reward": [18, 105, 111, 115, 157, 161, 164, 220, 221, 222, 224, 225, 232], "downstream": 18, "captur": 18, "ground": [18, 196, 197, 198, 280], "truth": [18, 24, 196, 197, 198, 275, 277], "usual": [18, 21, 22, 185, 189, 226, 229, 242, 252, 273, 275, 279, 280], "outcom": 18, "binari": 18, "comparison": [18, 25, 279, 282], "annot": 18, "accord": [18, 19, 65, 66, 76, 153, 274], "criterion": 18, "style": [18, 31, 54, 58, 59, 60, 69, 201, 282], "interact": [18, 25, 55, 67, 266, 271, 276], "free": [18, 225, 271, 279], "preference_dataset": 18, "my_preference_dataset": [18, 67], "chosen_convers": [18, 67], "hole": [18, 67], "my": [18, 19, 23, 67, 73, 273, 274, 275, 277], "trouser": [18, 67], "fix": [18, 20, 67, 281], "rejected_convers": [18, 67], "off": [18, 25, 38, 67, 269, 270, 275, 281], "chosen": [18, 33, 55, 63, 67, 70, 222, 224, 225, 257], "reject": [18, 33, 55, 63, 67, 70, 222, 224, 225], "rejected_input_id": [18, 48, 67], "nwhat": 18, "ntake": 18, "rejected_label": [18, 48], "128006": 18, "78191": 18, "128007": 18, "271": 18, "18293": 18, "1124": 18, "1022": 18, "13": [18, 20, 21, 48, 191, 217, 226, 282], "128009": [18, 274], "accomplish": [18, 20, 53, 60, 64, 67, 71], "shown": [18, 275, 280, 281], "di": 18, "look": [18, 19, 22, 24, 25, 235, 251, 267, 274, 275, 276, 277, 278, 279, 281], "anthrop": [18, 63], "harmless": [18, 63], "granni": 18, "her": [18, 20], "mobil": [18, 275], "phone": [18, 275], "issu": [18, 271, 281], "grandmoth": 18, "manag": [18, 22, 53, 193, 194, 207, 248, 255, 274], "behavior": [18, 22, 247, 274], "thing": [18, 280, 282], "grandma": 18, "feel": [18, 271, 279], "box": [18, 268, 270, 282], "hh_rlhf_helpful_dataset": 18, "hendrydong": 18, "preference_700k": 18, "stack_exchange_paired_dataset": 18, "purpos": [19, 65, 66, 276, 277], "whenev": [19, 144, 196, 279], "llama2": [19, 22, 24, 25, 27, 61, 72, 82, 83, 84, 85, 86, 87, 88, 89, 90, 100, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 144, 180, 189, 190, 232, 266, 269, 273, 276, 277, 280, 281], "were": [19, 20, 21, 53, 142, 191, 207, 223, 276, 281], "gear": [19, 144], "summar": [19, 43, 68, 274, 280], "summarizetempl": [19, 68, 274], "commun": [19, 144, 275, 280], "chatmltempl": [19, 144, 179], "gec_templ": 19, "extend": [19, 21, 22, 25, 268, 280], "customprompttempl": 19, "achiev": [19, 38, 253, 270, 275, 277, 278, 279, 281, 282], "prepend_tag": [19, 38], "append_tag": [19, 38], "thu": [19, 31, 38, 55, 56, 189, 280, 281], "empti": [19, 47, 50, 52, 77, 273], "standalon": [19, 182], "my_custom_templ": 19, "Is": 19, "overhyp": 19, "advanc": [19, 79, 80, 81, 147, 151, 191], "configur": [19, 21, 25, 55, 56, 59, 60, 61, 62, 64, 65, 66, 67, 68, 69, 70, 71, 72, 95, 107, 120, 129, 149, 154, 165, 171, 248, 268, 270, 271, 274, 276, 277, 278, 279, 280, 281, 282], "doesn": [19, 275], "neatli": 19, "fall": 19, "protocol": [19, 21, 204, 212, 213, 218], "arg": [19, 21, 24, 27, 39, 80, 181, 189, 195, 201, 204, 212, 213, 218, 250, 257, 270, 281], "whether": [19, 31, 33, 35, 36, 37, 42, 47, 50, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 85, 86, 87, 91, 95, 96, 97, 107, 108, 109, 110, 111, 120, 121, 122, 129, 130, 131, 132, 138, 139, 144, 145, 149, 150, 151, 154, 155, 156, 157, 165, 166, 171, 172, 173, 174, 175, 195, 197, 199, 205, 206, 210, 211, 214, 215, 227, 237, 239, 240, 248, 258, 274, 278], "sai": [19, 273, 274, 276], "eureka": 19, "eurekatempl": 19, "formatted_dialogu": 19, "llama2chattempl": [19, 106, 153, 179, 274], "paradigm": [20, 25, 269, 280], "unstructur": [20, 57, 71, 72], "unlabel": 20, "text_complet": 20, "odyssei": 20, "clear": [20, 280], "river": 20, "oceanu": 20, "had": 20, "got": [20, 50], "sea": 20, "went": 20, "till": 20, "reach": 20, "aeaean": 20, "island": 20, "dawn": 20, "sunris": 20, "drew": 20, "ship": 20, "sand": 20, "shore": 20, "sleep": 20, "wait": [20, 257], "break": [20, 144, 215], "child": 20, "morn": 20, "rosi": 20, "finger": 20, "appear": [20, 280], "sent": [20, 252], "men": 20, "circ": 20, "hous": 20, "fetch": [20, 279], "bodi": 20, "elpenor": 20, "cut": 20, "firewood": 20, "wood": 20, "headland": 20, "jut": 20, "wept": 20, "over": [20, 21, 25, 36, 56, 197, 198, 222, 236, 268, 270, 273, 275, 278, 279, 280, 282], "him": 20, "lament": 20, "funer": 20, "rite": 20, "armour": 20, "been": [20, 73, 75, 182, 189, 199, 226, 233, 238, 274, 280, 281], "burn": 20, "ash": 20, "rais": [20, 22, 27, 30, 33, 35, 37, 42, 44, 46, 47, 50, 52, 53, 59, 60, 62, 64, 65, 66, 68, 69, 71, 75, 81, 171, 182, 183, 187, 188, 189, 191, 192, 193, 194, 210, 211, 217, 229, 230, 231, 233, 237, 239, 240, 244, 248, 252, 256, 258, 259, 260], "cairn": 20, "stone": 20, "top": [20, 74, 77, 147, 151, 235, 280, 282], "oar": 20, "he": 20, "row": [20, 55, 56, 183, 189, 190, 199], "text_completion_dataset": [20, 281], "128000": [20, 274, 281], "6153": 20, "584": 20, "1051": 20, "2867": 20, "279": 20, "15140": 20, "22302": 20, "355": 20, "11": [20, 22, 45, 47, 48, 191, 275, 281, 282], "323": 20, "1047": 20, "2751": 20, "704": 20, "1139": 20, "1825": 20, "9581": 20, "4024": 20, "389": 20, "12222": 20, "8813": 20, "362": 20, "12791": 20, "5420": 20, "13218": 20, "1405": 20, "1070": 20, "374": 20, "39493": 20, "64919": 20, "439": 20, "304": 20, "1023": 20, "7634": 20, "1226": 20, "1243": 20, "24465": 20, "1057": 20, "8448": 20, "311": 20, "70163": 20, "1077": 20, "31284": 20, "6212": 20, "30315": 20, "1938": 20, "1288": 20, "1464": 20, "128001": [20, 281], "similarli": [20, 119, 148, 169, 179, 281], "wikimedia": 20, "wikipedia": [20, 46, 72], "cnn_dailymail_articles_dataset": 20, "index": [21, 48, 49, 50, 53, 54, 183, 185, 190, 197, 199, 221, 236, 261, 267, 274, 275], "embed": [21, 22, 78, 79, 80, 81, 91, 95, 101, 107, 116, 120, 125, 129, 142, 143, 144, 146, 147, 150, 151, 154, 156, 158, 160, 165, 167, 171, 175, 182, 183, 184, 185, 188, 189, 191, 199, 200, 201, 203, 238, 274, 277, 280, 281], "vector": [21, 205, 224, 274, 280], "understood": 21, "plai": [21, 275, 280], "necessari": [21, 22, 55, 56, 248, 249, 250, 251, 252, 274, 279], "phi3": [21, 22, 165, 166, 168, 169, 170, 232, 273], "phi3_mini_token": 21, "p_token": 21, "phi": [21, 168, 169, 232], "32010": 21, "29871": 21, "1792": [21, 217], "9508": [21, 217], "32007": 21, "32001": 21, "4299": 21, "2933": [21, 217], "nuser": 21, "nmodel": 21, "sentencepiec": [21, 214, 277], "tiktoken": [21, 144, 215, 277], "host": [21, 267, 273, 276, 280], "distribut": [21, 77, 233, 244, 245, 254, 256, 261, 268, 271, 273, 276, 277, 278, 280], "alongsid": [21, 238, 280], "alreadi": [21, 24, 33, 37, 42, 62, 63, 65, 66, 67, 68, 69, 182, 183, 193, 194, 199, 232, 244, 267, 273, 275, 278, 279], "_token": [21, 25], "mistraltoken": [21, 162, 274], "adher": [21, 37, 42], "arbitrarili": 21, "small": [21, 184, 275, 280], "seq": [21, 189, 199], "len": [21, 22, 50, 53, 59, 62, 65, 66, 68, 189, 191, 199], "demonstr": [21, 280, 281], "7": [21, 22, 45, 47, 48, 49, 50, 182, 191, 219, 223], "6312": 21, "28709": 21, "assign": [21, 24, 55, 56], "uniqu": [21, 55, 56, 232], "abil": 21, "NOT": [21, 22, 91, 144, 158], "presenc": [21, 31], "certain": [21, 22, 24, 257, 274], "proper": [21, 267, 276], "end_of_text": 21, "special_token": [21, 144, 215, 274], "added_token": 21, "128257": 21, "128258": 21, "remain": [21, 37, 42, 236, 278, 279, 280], "special_tokens_path": [21, 119, 148, 169, 179], "basetoken": 21, "actual": [21, 23, 24, 26, 31, 33, 35, 55, 56, 59, 62, 63, 64, 66, 67, 68, 70, 144, 270, 274, 281], "string": [21, 22, 35, 36, 38, 44, 60, 61, 94, 106, 119, 144, 148, 162, 169, 179, 204, 212, 214, 215, 217, 228, 234, 237, 241, 248, 261, 273, 280], "kwarg": [21, 24, 27, 39, 179, 181, 188, 190, 195, 201, 204, 212, 213, 218, 244, 248, 249, 250, 251, 252, 254, 257], "dict": [21, 22, 24, 25, 26, 27, 31, 33, 35, 36, 37, 38, 42, 44, 47, 48, 49, 50, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 94, 106, 119, 144, 148, 162, 169, 179, 188, 190, 195, 199, 201, 202, 208, 209, 210, 211, 212, 213, 215, 216, 218, 229, 230, 231, 233, 235, 240, 244, 246, 248, 253, 258, 260], "given": [21, 25, 27, 44, 47, 52, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 76, 144, 193, 194, 206, 207, 212, 213, 221, 237, 241, 247, 253, 261, 263, 268, 279], "token_id": [21, 144, 212, 215], "its": [21, 54, 100, 153, 156, 183, 185, 189, 190, 199, 201, 205, 233, 253, 256, 273, 274, 275, 277, 279, 280], "sentencepiecebasetoken": [21, 212], "bpe": 21, "sp_token": 21, "reason": [21, 25, 73, 275, 280, 281], "walk": [22, 25, 251, 268, 274, 275, 276, 281, 282], "design": [22, 25, 225], "cover": [22, 23, 24, 25, 26, 274, 275, 282], "scenario": [22, 53, 144], "compos": [22, 191], "plug": [22, 280], "evalu": [22, 25, 266, 268, 270, 271, 276, 278, 279, 282], "gener": [22, 25, 47, 54, 61, 71, 74, 75, 76, 77, 144, 193, 194, 207, 220, 239, 248, 255, 256, 257, 264, 266, 270, 274, 278, 279, 280, 281, 282], "easi": [22, 25, 268, 279, 280], "understand": [22, 24, 25, 201, 266, 268, 269, 274, 279, 280, 282], "concept": [22, 271, 275, 276, 280], "talk": 22, "close": [22, 25, 248, 249, 250, 251, 252, 279], "veri": [22, 53, 189, 199, 273, 275, 280], "dictat": 22, "state_dict": [22, 195, 200, 201, 210, 229, 230, 231, 232, 233, 258, 279, 282], "store": [22, 55, 56, 248, 249, 252, 279, 280, 282], "disk": [22, 57, 249], "identifi": [22, 248], "state": [22, 25, 143, 189, 191, 193, 195, 199, 202, 208, 209, 210, 211, 220, 222, 229, 230, 231, 233, 235, 258, 275, 277, 279, 282], "match": [22, 44, 211, 248, 258, 267, 273, 275, 277, 279], "up": [22, 23, 25, 26, 50, 54, 61, 72, 144, 189, 193, 194, 199, 215, 219, 235, 248, 257, 269, 270, 271, 273, 274, 276, 277, 279, 280, 282], "exactli": [22, 211, 228, 281], "definit": [22, 279], "either": [22, 47, 55, 56, 73, 183, 189, 190, 211, 229, 248, 254, 267, 273, 279, 280, 281, 282], "explicit": 22, "error": [22, 24, 34, 52, 229, 256, 273], "except": [22, 36, 153, 217, 278], "wors": [22, 280], "silent": 22, "succe": 22, "popular": [22, 199, 268, 275], "offici": [22, 100, 274, 276, 277], "websit": 22, "inspect": [22, 275, 279, 282], "mmap": [22, 275], "weights_onli": [22, 231], "map_loc": [22, 275], "cpu": [22, 25, 194, 195, 237, 257, 261, 267, 273, 275, 282], "tensor": [22, 45, 47, 48, 49, 50, 73, 74, 75, 76, 77, 78, 79, 80, 81, 142, 143, 180, 181, 182, 183, 184, 185, 186, 188, 189, 190, 191, 195, 196, 197, 198, 199, 200, 201, 205, 206, 220, 221, 222, 223, 224, 225, 226, 229, 242, 248, 249, 250, 251, 252, 255, 258, 260, 278, 279, 280, 282], "item": 22, "f": [22, 26, 59, 62, 65, 66, 68, 228, 274, 275, 278, 279, 282], "tok_embed": [22, 189, 199, 200], "32000": [22, 27, 279], "4096": [22, 27, 61, 72, 183, 185, 279, 281], "292": 22, "tabl": [22, 200, 274, 275, 277, 278, 280, 282], "layer": [22, 25, 81, 85, 86, 87, 88, 89, 90, 91, 95, 96, 97, 98, 99, 101, 105, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 120, 121, 122, 123, 124, 125, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 143, 146, 147, 149, 150, 151, 152, 154, 155, 156, 157, 158, 160, 161, 163, 164, 165, 166, 167, 170, 171, 172, 173, 174, 175, 183, 186, 187, 188, 189, 190, 191, 192, 193, 194, 199, 201, 203, 205, 206, 210, 211, 227, 234, 238, 268, 269, 277, 279, 280, 281, 282], "dim": [22, 50, 142, 143, 180, 183, 184, 185, 189, 196, 198, 199, 278], "within": [22, 24, 27, 54, 73, 77, 78, 95, 107, 120, 129, 149, 150, 151, 154, 156, 165, 171, 191, 193, 194, 251, 256, 257, 273, 279, 282], "big": 22, "bin": [22, 273, 275], "piec": 22, "pytorch_model": [22, 275], "00001": [22, 273, 278], "00002": [22, 273, 278], "embed_token": 22, "241": 22, "Not": 22, "fewer": [22, 183], "sinc": [22, 24, 27, 55, 56, 198, 229, 231, 274, 275, 277, 280, 281], "mismatch": 22, "caus": [22, 214], "re": [22, 24, 193, 201, 225, 231, 268, 269, 270, 274, 275, 276, 279, 280], "end": [22, 25, 36, 57, 71, 144, 215, 217, 266, 268, 274, 277, 279, 281], "number": [22, 25, 44, 50, 54, 61, 72, 73, 78, 79, 81, 91, 95, 101, 107, 116, 120, 125, 129, 142, 143, 144, 146, 147, 150, 151, 154, 156, 158, 160, 165, 167, 171, 175, 182, 183, 189, 191, 197, 198, 219, 229, 230, 231, 233, 234, 236, 243, 256, 257, 273, 276, 278, 279, 280], "save": [22, 25, 26, 189, 195, 196, 198, 199, 229, 230, 231, 233, 238, 247, 252, 266, 270, 273, 274, 275, 277, 279, 280, 281], "less": [22, 50, 73, 275, 276, 277, 280, 282], "prone": 22, "invari": 22, "accept": [22, 24, 227, 276, 280, 282], "explicitli": [22, 204, 268, 279], "produc": [22, 233, 270, 281, 282], "One": [22, 50, 281], "advantag": [22, 220, 223, 270, 279], "abl": [22, 25, 275, 276, 281], "post": [22, 191, 253, 257, 270, 275, 277, 281, 282], "quantiz": [22, 85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 205, 206, 231, 241, 266, 267, 269, 271, 276, 282], "eval": [22, 266, 268, 281], "without": [22, 24, 26, 183, 187, 189, 193, 199, 210, 267, 268, 270, 274, 275, 279, 280, 281], "OR": 22, "surround": [22, 25, 268], "load_checkpoint": [22, 25, 229, 230, 231, 232], "save_checkpoint": [22, 25, 26, 229, 230, 231], "permut": 22, "behav": 22, "further": [22, 191, 225, 273, 278, 279, 280, 281, 282], "illustr": [22, 65, 66, 277], "whilst": [22, 269, 280], "read": [22, 229, 230, 231, 268, 280], "compat": [22, 229, 231, 280, 281], "framework": [22, 25, 268], "mention": [22, 275, 280, 282], "assum": [22, 35, 42, 45, 47, 65, 94, 106, 119, 148, 162, 169, 179, 182, 183, 185, 190, 199, 200, 202, 208, 215, 233, 235, 236, 237, 239, 274, 275, 279], "checkpoint_dir": [22, 24, 229, 230, 231, 275, 277, 278, 281], "easiest": [22, 275, 276], "everyth": [22, 25, 268, 271, 276], "flow": [22, 54, 281, 282], "safetensor": [22, 228, 229, 273, 278], "output_dir": [22, 24, 229, 230, 231, 257, 275, 277, 279, 281, 282], "snippet": 22, "explain": [22, 280], "fullmodelhfcheckpoint": [22, 275, 278], "sort": [22, 229, 231], "order": [22, 23, 25, 229, 231, 251, 252, 276, 280], "matter": [22, 229, 231, 273, 279], "checkpoint_fil": [22, 24, 26, 229, 230, 231, 275, 277, 278, 279, 281, 282], "restart": [22, 273], "previou": [22, 54, 229, 230, 231, 278], "section": [22, 25, 240, 266, 275, 277, 280, 282], "recipe_checkpoint": [22, 229, 230, 231, 281], "model_typ": [22, 229, 230, 231, 275, 277, 281], "resume_from_checkpoint": [22, 229, 230, 231], "discrep": [22, 229], "github": [22, 27, 74, 85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 166, 172, 173, 174, 184, 185, 196, 197, 222, 223, 224, 225, 236, 267, 275, 277, 278], "repositori": [22, 55, 56, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 100, 269, 270, 275, 276], "fullmodelmetacheckpoint": [22, 277, 281], "test": [22, 24, 25, 73, 268, 270, 274, 280], "written": [22, 24, 25, 229, 230, 248, 249, 250, 251, 252, 268], "partit": [22, 229, 282], "key_1": [22, 231], "weight_1": 22, "key_2": 22, "weight_2": 22, "mid": 22, "chekpoint": 22, "middl": [22, 201, 275, 280], "subsequ": [22, 25, 182, 189, 191, 219], "recipe_st": [22, 229, 230, 231], "pt": [22, 26, 229, 230, 231, 275, 277, 278, 281], "epoch": [22, 25, 26, 229, 230, 231, 233, 236, 273, 274, 275, 276, 277, 281], "etc": [22, 25, 143, 229, 240, 276], "flood": 22, "overwritten": 22, "updat": [22, 24, 25, 38, 182, 183, 189, 199, 205, 218, 222, 223, 229, 233, 257, 260, 267, 274, 275, 276, 277, 279, 280, 281, 282], "hf_model_0001_0": [22, 275, 278], "hf_model_0002_0": [22, 275], "adapt": [22, 85, 86, 95, 96, 97, 107, 108, 109, 110, 120, 121, 122, 131, 132, 138, 139, 143, 154, 155, 156, 157, 165, 166, 199, 201, 202, 204, 205, 206, 207, 208, 209, 229, 230, 231, 247, 269, 274, 275, 279, 282], "merg": [22, 27, 28, 179, 229, 275, 277, 282], "tutori": [22, 254, 268, 269, 270, 274, 275, 276, 277, 278, 279, 280, 281, 282], "save_adapter_weights_onli": 22, "choos": [22, 60, 279], "resum": [22, 25, 229, 230, 231, 236, 282], "frozen": [22, 143, 149, 152, 200, 222, 279, 280, 282], "learnt": [22, 274, 275], "refer": [22, 24, 25, 184, 185, 187, 191, 196, 207, 221, 222, 223, 224, 225, 248, 268, 279, 280, 281], "adapter_checkpoint": [22, 229, 230, 231], "adapter_0": [22, 275], "knowledg": [22, 266], "forward": [22, 25, 78, 79, 80, 142, 143, 180, 181, 183, 184, 185, 186, 188, 189, 190, 191, 193, 194, 196, 197, 198, 199, 200, 201, 205, 206, 222, 223, 224, 225, 240, 257, 277, 278, 279, 280, 282], "modeltyp": [22, 229, 230, 231], "llama2_13b": [22, 108], "right": [22, 47, 50, 76, 189, 229, 275, 277, 279], "pytorch_fil": 22, "00003": [22, 228, 278], "torchtune_sd": 22, "load_state_dict": [22, 199, 200, 201, 210, 233, 258, 279], "successfulli": [22, 273, 276], "vocab": [22, 27, 179, 189, 199, 200, 277], "70": [22, 117], "randint": 22, "no_grad": 22, "6": [22, 45, 47, 48, 49, 50, 54, 91, 95, 184, 191, 242, 270, 281, 282], "3989": 22, "9": [22, 45, 47, 48, 50, 182, 191, 242, 275, 281, 282], "0531": 22, "2375": 22, "5": [22, 24, 45, 47, 48, 49, 50, 75, 191, 222, 225, 226, 236, 275, 276, 277, 278, 280], "2822": 22, "4872": 22, "7469": 22, "8": [22, 45, 47, 48, 50, 59, 62, 65, 66, 68, 85, 86, 87, 88, 89, 90, 96, 97, 98, 99, 108, 109, 110, 111, 112, 113, 114, 115, 121, 122, 123, 124, 125, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 155, 157, 163, 164, 166, 170, 172, 173, 174, 182, 191, 196, 198, 275, 278, 279, 280, 281, 282], "6737": 22, "0023": 22, "8235": 22, "6819": 22, "2424": 22, "0109": 22, "6915": 22, "3618": 22, "1628": 22, "8594": 22, "5857": 22, "1151": 22, "7808": 22, "2322": 22, "8850": 22, "9604": 22, "7624": 22, "6040": 22, "3159": 22, "5849": 22, "8039": 22, "9322": 22, "2010": [22, 191], "6824": 22, "8929": 22, "8465": 22, "3794": 22, "3500": 22, "6145": 22, "5931": 22, "find": [22, 23, 25, 26, 222, 273, 275, 276, 278, 279, 280], "hope": 22, "deeper": [22, 269, 270, 276, 280], "insight": [22, 275], "happi": [22, 275], "cometlogg": 23, "checkpoint": [23, 24, 25, 195, 199, 201, 215, 228, 229, 230, 231, 232, 233, 234, 252, 254, 258, 268, 270, 273, 277, 278, 279, 281, 282], "workspac": [23, 26, 248], "seen": [23, 26, 279, 282], "screenshot": [23, 26], "comet_ml": [23, 248], "featur": [23, 25, 26, 267, 268, 269, 270, 275, 276, 280], "pip": [23, 26, 248, 251, 252, 267, 275, 277, 280], "login": [23, 26, 248, 252, 273, 275], "metric_logg": [23, 24, 25, 26], "metric_log": [23, 24, 26, 248, 249, 250, 251, 252], "experiment_nam": [23, 248], "experi": [23, 24, 248, 252, 266, 268, 277, 278, 279], "grab": [23, 26, 277], "tab": [23, 26], "asset": 23, "artifact": [23, 26, 257], "click": [23, 26], "effect": [24, 225, 278, 280, 281], "prerequisit": [24, 274, 275, 276, 277, 278, 279, 281, 282], "Be": [24, 274, 275, 276, 277, 278, 279, 280, 281, 282], "familiar": [24, 274, 275, 276, 277, 278, 279, 281, 282], "fundament": [24, 281], "reproduc": [24, 248], "overridden": [24, 257], "quick": 24, "seed": [24, 25, 26, 256, 276, 281], "shuffl": [24, 54, 281], "dtype": [24, 25, 77, 182, 183, 188, 189, 190, 192, 193, 194, 195, 199, 201, 237, 255, 259, 275, 278, 280, 281, 282], "fp32": [24, 189, 196, 198, 280, 281, 282], "enable_fsdp": 24, "keyword": [24, 27, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 195, 274], "subfield": 24, "dotpath": [24, 94, 106, 119, 148, 162, 169, 179], "wish": [24, 182, 193, 258], "exact": [24, 27, 275], "normal": [24, 54, 144, 181, 183, 184, 188, 189, 190, 196, 197, 198, 214, 274, 279, 281, 282], "instanc": [24, 27, 53, 107, 120, 129, 149, 154, 156, 165, 171, 172, 173, 176, 177, 195, 202, 208, 209, 279], "preced": [24, 27, 273, 277, 279], "throw": 24, "notic": [24, 78, 79, 80, 191, 274, 279], "miss": [24, 210, 211, 257, 279], "llama2_token": [24, 274, 275], "llama2token": [24, 106], "512": [24, 81, 282], "overwrit": [24, 231, 258, 267, 273], "duplic": [24, 25, 268, 273], "sometim": 24, "resolv": [24, 28, 276], "alpaca": [24, 31, 53, 58, 59, 85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 166, 172, 173, 174, 278], "disklogg": 24, "log_dir": [24, 249, 251, 252], "verifi": [24, 237, 238, 261, 274, 276, 279], "properli": [24, 210, 245, 273], "wa": [24, 35, 42, 50, 75, 79, 80, 81, 147, 151, 191, 210, 274, 279, 281, 282], "7b_lora_single_devic": [24, 275, 276, 279, 282], "my_config": [24, 273], "guidelin": 24, "tempt": 24, "put": [24, 25, 271, 276, 279, 281], "much": [24, 200, 225, 275, 277, 279, 280, 281, 282], "switch": 24, "encourag": [24, 225, 279, 280], "clariti": 24, "significantli": [24, 222, 269, 270, 280], "easier": [24, 275, 276], "dont": 24, "privat": 24, "parent": [24, 273], "guarante": 24, "stabil": [24, 196, 198, 268, 270, 280, 281, 282], "underscor": 24, "_alpaca": 24, "k1": [24, 25], "v1": [24, 25, 72], "k2": [24, 25], "v2": [24, 25, 248], "my_model_checkpoint": 24, "file_1": 24, "file_2": 24, "my_tokenizer_path": 24, "nest": [24, 260], "dot": 24, "notat": [24, 50, 142, 143, 183, 185, 189, 199, 220, 221, 242], "flag": [24, 25, 36, 59, 60, 62, 64, 67, 68, 69, 227, 231, 238, 273, 280, 282], "bitsandbyt": [24, 280], "pagedadamw8bit": [24, 280], "delet": [24, 189, 192, 193, 194, 199], "foreach": 24, "8b_full": [24, 273], "adamw": [24, 279, 280], "2e": [24, 280], "fuse": [24, 146, 150, 199, 200, 201, 202, 253, 281], "nproc_per_nod": [24, 270, 277, 279, 281], "full_finetune_distribut": [24, 239, 273, 275, 276], "thought": [25, 268, 271, 276, 282], "target": [25, 75, 197, 198, 225, 268, 278], "pipelin": [25, 268, 270], "eg": [25, 189, 199, 229, 268], "meaning": [25, 268, 275], "fsdp": [25, 187, 227, 233, 238, 247, 276, 277, 280], "activ": [25, 81, 180, 234, 240, 246, 254, 257, 268, 270, 281, 282], "gradient": [25, 197, 198, 247, 253, 257, 268, 270, 275, 277, 279, 282], "accumul": [25, 253, 257, 268, 270], "mix": [25, 181, 273, 275, 280], "precis": [25, 181, 195, 237, 268, 270, 276, 282], "complex": 25, "becom": [25, 191, 267], "harder": 25, "anticip": 25, "methodolog": 25, "possibl": [25, 54, 228, 273, 280], "trade": [25, 280], "vs": [25, 276], "qualiti": [25, 275, 279, 281], "believ": 25, "suit": [25, 276, 280], "solut": 25, "result": [25, 65, 81, 147, 151, 191, 198, 217, 219, 257, 270, 275, 277, 278, 279, 280, 281, 282], "meant": [25, 195, 233], "expertis": 25, "routin": 25, "yourself": [25, 273, 277, 279], "exist": [25, 194, 201, 233, 248, 267, 273, 275, 276, 277, 282], "ones": [25, 50, 182], "modular": [25, 268], "wandb": [25, 26, 252, 276], "log": [25, 28, 222, 223, 224, 225, 240, 246, 248, 249, 250, 251, 252, 262, 275, 276, 277, 278, 279, 280, 282], "fulli": [25, 53, 149], "nativ": [25, 266, 268, 279, 281, 282], "numer": [25, 66, 268, 270, 281], "pariti": [25, 268], "verif": [25, 184], "benchmark": [25, 256, 268, 275, 277, 279, 281], "limit": [25, 233, 278, 280, 281], "hidden": [25, 81, 143, 147, 151, 180, 189, 191], "behind": 25, "unnecessari": 25, "abstract": [25, 212, 213, 268, 276, 282], "No": [25, 231, 268], "go": [25, 81, 100, 147, 151, 153, 191, 217, 268, 275, 276, 278, 280, 282], "figur": [25, 279, 282], "spectrum": 25, "decid": 25, "avail": [25, 35, 42, 72, 199, 201, 237, 245, 261, 268, 273, 275, 277, 279, 280], "consist": [25, 33, 37, 42, 65, 66, 72, 271, 276], "overrid": [25, 28, 29, 33, 37, 42, 62, 63, 65, 66, 67, 68, 69, 258, 271, 273, 275, 276, 277, 278, 282], "valid": [25, 52, 76, 197, 210, 211, 221, 239, 258, 259, 267, 271, 275, 276], "closer": [25, 278, 279], "monolith": [25, 268], "trainer": [25, 222, 224, 225], "wrapper": [25, 181, 214, 215, 233, 235, 273, 279], "around": [25, 144, 181, 214, 215, 240, 273, 274, 275, 279, 280, 281, 282], "extern": 25, "primarili": [25, 53, 279], "eleutherai": [25, 72, 268, 278, 279, 281], "har": [25, 268, 278, 279, 281], "stage": [25, 191], "distil": [25, 266], "dataload": [25, 54, 59, 62, 65, 66, 68], "applic": [25, 229, 230, 252], "clean": [25, 26, 58, 278], "group": [25, 183, 243, 244, 248, 249, 250, 251, 252, 273, 277, 281], "init_process_group": [25, 244], "backend": [25, 273, 281], "gloo": 25, "nccl": 25, "fullfinetunerecipedistribut": 25, "cleanup": 25, "stuff": 25, "carri": [25, 56], "metric": [25, 276, 278, 280, 281], "logger": [25, 246, 248, 249, 250, 251, 252, 262, 276], "_devic": 25, "get_devic": 25, "_dtype": 25, "get_dtyp": 25, "ckpt_dict": 25, "wrap": [25, 201, 227, 234, 238, 247, 254, 274, 280], "_model": [25, 233], "_setup_model": 25, "_setup_token": 25, "_optim": 25, "_setup_optim": 25, "_loss_fn": 25, "_setup_loss": 25, "_sampler": 25, "_dataload": 25, "_setup_data": 25, "backward": [25, 233, 235, 253, 257, 282], "zero_grad": 25, "curr_epoch": 25, "rang": [25, 200, 222, 223, 225, 256, 273, 277, 281], "epochs_run": [25, 26], "total_epoch": [25, 26], "idx": [25, 54], "enumer": 25, "_autocast": 25, "logit": [25, 73, 74, 77, 196, 197, 198, 242, 278], "global_step": 25, "_log_every_n_step": 25, "_metric_logg": 25, "log_dict": [25, 248, 249, 250, 251, 252], "step": [25, 54, 55, 56, 65, 66, 189, 199, 220, 233, 235, 236, 248, 249, 250, 251, 252, 253, 257, 266, 270, 275, 279, 281, 282], "recipe_main": [25, 29], "fullfinetunerecip": 25, "wandblogg": [26, 279, 282], "tip": 26, "straggler": 26, "background": 26, "crash": 26, "otherwis": [26, 45, 47, 50, 79, 80, 81, 147, 151, 187, 189, 191, 245, 248, 274, 281], "exit": [26, 193, 194, 207, 267, 273], "resourc": [26, 248, 249, 250, 251, 252, 280, 281], "kill": 26, "ps": 26, "aux": 26, "grep": 26, "awk": 26, "xarg": 26, "desir": [26, 55, 56, 255, 274, 280], "suggest": [26, 278], "approach": [26, 53, 278], "full_finetun": 26, "joinpath": 26, "_checkpoint": [26, 275], "_output_dir": [26, 229, 230, 231], "torchtune_model_": 26, "with_suffix": 26, "wandb_at": 26, "descript": [26, 273], "whatev": 26, "metadata": [26, 281], "seed_kei": 26, "epochs_kei": 26, "total_epochs_kei": 26, "max_steps_kei": 26, "max_steps_per_epoch": [26, 281], "add_fil": 26, "log_artifact": 26, "hydra": 27, "facebook": 27, "research": 27, "com": [27, 74, 85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 166, 172, 173, 174, 184, 185, 196, 197, 222, 223, 224, 225, 236, 248, 267, 275, 277, 278], "facebookresearch": [27, 184], "blob": [27, 74, 85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 166, 169, 172, 173, 174, 184, 185, 197, 222, 223, 224, 225, 236], "_intern": 27, "_instantiate2": 27, "l148": 27, "num_head": [27, 81, 91, 95, 101, 107, 116, 120, 125, 129, 146, 147, 150, 151, 154, 156, 158, 160, 165, 167, 171, 175, 182, 183, 185, 189], "num_kv_head": [27, 91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 182, 183], "vocab_s": [27, 73, 74, 91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 196, 197, 198, 200], "parsed_yaml": 27, "embed_dim": [27, 78, 79, 80, 81, 91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 183, 185, 188, 189, 190, 191, 200, 201, 258, 279], "valueerror": [27, 33, 35, 37, 42, 44, 46, 47, 50, 52, 53, 59, 60, 62, 64, 65, 66, 68, 69, 71, 171, 182, 183, 191, 192, 193, 194, 229, 230, 231, 237, 240, 256, 259], "recipe_nam": 28, "rank": [28, 85, 86, 87, 95, 96, 97, 107, 108, 109, 110, 111, 120, 121, 122, 129, 130, 131, 132, 138, 139, 149, 150, 151, 154, 155, 156, 157, 165, 166, 171, 172, 173, 174, 205, 206, 243, 245, 256, 269, 276, 279, 282], "zero": [28, 50, 182, 184, 189, 199, 228, 275, 277, 281], "displai": 28, "callabl": [29, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 81, 189, 207, 227, 238, 241, 247, 254], "With": [29, 275, 278, 279, 281, 282], "my_recip": 29, "foo": 29, "bar": [29, 268, 276, 280], "configerror": 30, "cannot": [30, 46, 231, 277], "equival": [31, 35, 79, 224, 225], "condit": [31, 73, 245, 273], "dedic": 31, "due": [31, 214, 279, 280, 282], "keep": [31, 33, 35, 37, 42, 63, 64, 66, 67, 70, 200, 275, 279, 280], "openai": [32, 37, 60, 223], "markup": 32, "im_start": 32, "context": [32, 168, 193, 194, 207, 255, 257, 280], "im_end": 32, "goe": [32, 207], "a2": [33, 55], "functool": [34, 40, 43, 227], "partial": [34, 40, 43, 227], "_prompt_templ": [34, 40, 43], "assistant_messag": [34, 40, 43], "respect": [35, 53, 100, 182, 209, 257, 274], "final": [35, 42, 55, 56, 85, 86, 87, 91, 95, 101, 107, 108, 109, 110, 111, 116, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 146, 147, 149, 150, 151, 154, 155, 156, 157, 158, 165, 166, 171, 174, 175, 180, 189, 199, 210, 211, 275, 277, 278, 279, 280, 282], "leav": [35, 42, 280], "liter": [36, 38, 41, 85, 86, 87, 88, 89, 90, 94, 95, 96, 97, 98, 99, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 119, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 148, 149, 150, 151, 152, 154, 155, 156, 157, 162, 163, 164, 165, 166, 169, 170, 171, 172, 173, 174, 179, 210, 211], "union": [36, 46, 47, 58, 59, 60, 62, 64, 68, 69, 71, 72, 94, 106, 119, 148, 162, 169, 179, 189, 199, 211, 229, 234, 239, 248, 249, 250, 251, 252, 254, 256], "interleav": [36, 219], "attach": 36, "writer": 36, "calcul": [36, 38, 76, 142, 144, 183, 188, 190, 191, 220, 221, 223, 277], "consecut": [36, 52, 182, 219], "last": [36, 51, 54, 71, 189, 221, 233, 236], "properti": [36, 279, 280], "media": [36, 56], "classmethod": 36, "image_url": 37, "unmask": [37, 42, 197], "consid": [38, 53, 55, 56, 79, 80, 81, 147, 151, 191, 280], "come": [38, 52, 204, 279, 280], "nanswer": 40, "alia": [41, 227], "alwai": [42, 248, 258, 274, 280], "nsummari": [43, 274], "summari": [43, 53, 68, 191, 240], "batch_first": 45, "padding_valu": 45, "float": [45, 73, 74, 77, 85, 86, 87, 88, 89, 90, 91, 95, 96, 97, 98, 99, 101, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 120, 121, 122, 123, 124, 125, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 144, 149, 150, 151, 152, 154, 155, 156, 157, 158, 160, 163, 164, 165, 166, 167, 170, 171, 172, 173, 174, 175, 183, 184, 205, 206, 220, 221, 222, 223, 224, 225, 233, 236, 239, 240, 246, 248, 249, 250, 251, 252, 279, 280, 281, 282], "rnn": [45, 47, 50], "pad_sequ": [45, 47, 50], "variabl": [45, 232, 245, 248, 280, 282], "left": [45, 47, 50, 144, 189, 279], "longest": [45, 49, 50], "trail": 45, "dimens": [45, 50, 91, 95, 101, 107, 116, 120, 125, 129, 142, 146, 147, 150, 151, 154, 156, 158, 160, 165, 167, 171, 175, 180, 182, 183, 185, 189, 191, 200, 205, 206, 277, 279, 280, 282], "element": [45, 47, 50, 53, 197, 242, 275], "12": [45, 47, 48, 69, 191, 267, 281], "image_loc": 46, "www": [46, 248], "org": [46, 65, 82, 83, 84, 85, 86, 88, 89, 90, 95, 96, 97, 98, 99, 102, 103, 104, 105, 107, 108, 109, 110, 112, 113, 114, 115, 120, 121, 122, 123, 124, 131, 132, 133, 134, 135, 138, 139, 140, 141, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 183, 184, 185, 191, 219, 220, 222, 223, 224, 225, 227, 245, 251, 254, 256, 262, 267], "en": [46, 57, 61, 63, 71, 72, 281], "pad_direct": [47, 50], "keys_to_pad": 47, "padding_idx": [47, 48, 49, 50, 54], "left_pad_sequ": [47, 50], "integ": [47, 49, 200, 227, 228, 234, 256], "batch_siz": [47, 59, 62, 65, 66, 68, 182, 183, 188, 189, 190, 192, 193, 194, 196, 197, 198, 199, 200, 201, 222, 224, 226, 275, 280, 281], "ignore_idx": [48, 49, 50], "input_id": [48, 242], "chosen_input_id": [48, 67], "chosen_label": 48, "15": [48, 191, 238, 274, 275, 279, 282], "16": [48, 85, 86, 87, 88, 89, 90, 96, 97, 98, 99, 108, 109, 110, 111, 112, 113, 114, 115, 121, 122, 123, 124, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 155, 157, 163, 164, 166, 170, 172, 173, 174, 182, 191, 279, 280, 282], "17": [48, 191, 279], "18": [48, 191, 277], "19": [48, 191, 282], "20": [48, 191, 226, 281], "token_pair": 49, "padded_col": 49, "pad_max_til": 50, "pad_max_imag": 50, "tile": [50, 78, 79, 80, 81, 142, 143, 144, 145, 147, 148, 149, 151, 191, 219], "aspect": [50, 78, 79, 268], "ratio": [50, 78, 79, 222, 223], "text_seq_len": [50, 219], "n_tile": [50, 78, 79, 191], "h": [50, 142, 182, 191, 196, 198, 267, 273], "w": [50, 82, 83, 84, 92, 93, 102, 103, 104, 105, 117, 118, 126, 127, 128, 136, 137, 142, 159, 161, 176, 177, 178, 191, 248, 251, 252, 274, 275, 279, 282], "h_ratio": 50, "w_ratio": 50, "encoder_mask": [50, 188, 189, 199], "image_seq_len": [50, 219], "channel": [50, 81, 142, 144, 147, 151, 191, 205, 281], "height": [50, 142], "largest": 50, "bsz": [50, 73, 74, 75, 76, 78, 79, 191, 196, 198], "max_num_imag": 50, "max_num_til": [50, 78, 79, 81, 144, 147, 151, 191, 219], "tokens_per_til": 50, "image_id": 50, "four": [50, 279], "model_input": 50, "max_text_seq_len": 50, "40": [50, 79, 80, 81, 147, 151, 191, 219, 280, 282], "did": [50, 277, 282], "extra": [50, 144, 199, 267, 274, 279, 280, 281, 282], "second": [50, 183, 200, 275, 279, 280, 282], "eos_id": [51, 144, 215, 217], "shorter": [52, 189], "min": [52, 279], "invalid": 52, "sub": [53, 251], "unifi": [53, 161], "simplifi": [53, 222, 273, 278, 279], "simultan": 53, "intern": 53, "aggreg": 53, "transpar": 53, "howev": [53, 169, 267, 278, 280], "constitu": 53, "might": [53, 193, 200, 203, 273, 275, 280], "larg": [53, 196, 198, 205, 206, 257, 273, 280, 282], "comput": [53, 55, 56, 101, 107, 116, 120, 125, 129, 142, 143, 146, 150, 171, 175, 183, 185, 189, 190, 196, 198, 199, 219, 222, 224, 225, 240, 256, 270, 275, 278, 280, 281, 282], "cumul": 53, "maintain": [53, 201, 269, 280, 282], "deleg": 53, "retriev": [53, 55, 56, 189, 238], "lead": [53, 214, 228, 270], "scale": [53, 73, 74, 77, 85, 86, 87, 95, 96, 97, 107, 108, 109, 110, 111, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 149, 150, 151, 154, 155, 156, 157, 165, 166, 171, 172, 173, 174, 184, 186, 188, 190, 205, 206, 221, 225, 279, 280, 281, 282], "strategi": [53, 270], "stream": [53, 262, 280], "demand": 53, "deriv": [53, 180, 189, 190], "instans": 53, "dataset1": 53, "mycustomdataset": 53, "params1": 53, "dataset2": 53, "params2": 53, "concat_dataset": 53, "total": [53, 221, 223, 236, 243, 265, 272, 275, 277, 278, 279, 280], "data_point": 53, "1500": 53, "vicgal": 53, "gpt4": 53, "samsum": [53, 68], "focus": [53, 271, 276, 280], "enhanc": [53, 191, 225, 280, 282], "divers": 53, "machin": [53, 224, 261, 273, 275], "max_pack": 54, "outsid": [54, 256, 257, 279], "sampler": [54, 276], "part": [54, 200, 224, 274, 282], "buffer": [54, 189, 199, 280], "enough": [54, 274], "lower": [54, 270, 278, 279], "triangular": 54, "wise": 54, "made": [54, 60, 64, 67, 71, 144, 275], "smaller": [54, 200, 275, 277, 278, 279, 280, 281, 282], "jam": 54, "s1": [54, 214], "s2": [54, 214], "s3": 54, "s4": 54, "contamin": 54, "input_po": [54, 74, 183, 185, 189, 190, 199], "matrix": [54, 188, 189, 199], "increment": 54, "move": [54, 71, 189, 260, 280], "entir": [54, 71, 196, 203, 247, 274, 282], "avoid": [54, 71, 184, 191, 195, 256, 273, 281, 282], "truncat": [54, 61, 71, 72, 94, 106, 119, 144, 148, 162, 169, 179, 215, 226], "sentenc": [54, 71], "techniqu": [55, 268, 269, 270, 275, 276, 277, 278, 279, 280, 281], "repons": 55, "At": [55, 56, 189, 199], "extract": [55, 56, 61, 216], "against": [55, 56, 225, 263, 281, 282], "unit": [55, 56, 247, 268], "filepath": [55, 56, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72], "filter": [55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 281], "prior": [55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 258], "doc": [55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 227, 245, 248, 251, 252, 256, 262, 273, 275], "round": [56, 281], "incorpor": [56, 222], "happen": [56, 196, 198], "ti": [56, 95, 171, 175, 187, 280], "agnost": 56, "treat": [56, 191, 207, 274], "minimum": [56, 65, 66], "corpu": [57, 61, 71, 72], "package_refer": [57, 61, 63, 71, 72], "loading_method": [57, 61, 63, 71, 72], "tabular": [57, 71], "txt": [57, 71, 179, 249, 276], "eo": [57, 71, 169, 214, 217, 274], "yahma": 58, "variant": [58, 62, 68], "page": [58, 72, 267, 268, 273, 276, 277, 280], "tatsu": 59, "lab": [59, 74], "codebas": [59, 275], "independ": 59, "contribut": [59, 60, 62, 64, 67, 68, 69, 197, 198, 221, 223], "alpacatomessag": 59, "alpaca_d": 59, "altern": [60, 64, 67, 193, 276, 280], "toward": [60, 225], "my_dataset": [60, 64], "london": [60, 64], "ccdv": 61, "cnn_dailymail": 61, "textcompletiondataset": [61, 71, 72], "cnn": 61, "dailymail": 61, "articl": [61, 72], "highlight": [61, 282], "disabl": [61, 72, 189, 193, 199, 207, 256, 281], "highest": [61, 72], "conjunct": [62, 68, 70, 189, 280], "grammar_d": 62, "rlhflow": 63, "hh": 63, "preferencedataset": [63, 67, 70], "liuhaotian": 65, "llava": 65, "150k": 65, "coco": 65, "train2017": 65, "llava_instruct_150k": 65, "2017": 65, "visit": [65, 275], "cocodataset": 65, "wget": 65, "zip": [65, 264], "unzip": 65, "minim": [65, 66, 276, 278, 279, 280, 281, 282], "clip": [65, 66, 78, 79, 80, 81, 142, 143, 144, 147, 151, 191, 223], "mymodeltransform": [65, 66], "tokenizer_path": [65, 66], "image_transform": [65, 66], "yet": [65, 66, 153, 274, 275], "llava_instruct_d": 65, "huggingfacem4": 66, "the_cauldron": 66, "cauldron": 66, "card": 66, "cauldron_d": 66, "compris": 67, "share": [67, 183, 187, 275], "c1": 67, "r1": 67, "chosen_messag": 67, "rejected_messag": 67, "samsung": 68, "samsum_d": 68, "351": 69, "82": 69, "391": 69, "221": 69, "220": 69, "193": 69, "471": 69, "lvwerra": 70, "stack": [70, 191, 257], "exchang": 70, "allenai": [71, 281], "data_dir": 71, "realnewslik": 71, "wikitext_document_level": 72, "wikitext": [72, 281], "103": [72, 275], "transformerdecod": [73, 74, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 95, 96, 97, 98, 99, 101, 102, 103, 104, 105, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 146, 150, 154, 155, 156, 157, 158, 159, 160, 161, 163, 164, 165, 166, 167, 168, 170, 171, 172, 173, 174, 175, 176, 177, 178, 188, 190, 199, 200, 201, 279], "max_generated_token": 73, "pad_id": [73, 226], "temperatur": [73, 74, 77, 222, 224, 225, 275], "top_k": [73, 74, 77, 275], "stop_token": [73, 226], "rng": 73, "custom_generate_next_token": 73, "seq_length": [73, 74, 75, 188, 190, 200, 201], "prune": [73, 77, 282], "probabl": [73, 77, 85, 86, 87, 95, 96, 97, 107, 109, 110, 111, 120, 121, 122, 129, 130, 131, 132, 138, 139, 149, 150, 151, 154, 155, 156, 157, 165, 166, 171, 172, 173, 174, 205, 206, 222, 223, 224, 225, 275, 278], "stop": [73, 226], "random": [73, 191, 256, 276], "compil": [73, 196, 275, 277, 280, 282], "generate_next_token": 73, "llama3_8b": [73, 122, 130, 199, 277, 280, 281], "manual_se": 73, "tolist": 73, "jeremi": 73, "m": [73, 195, 274, 281], "seq_len": [73, 75, 76, 189], "num_generated_token": 73, "q": [74, 77, 183, 279], "randomli": [74, 77, 258], "softmax": [74, 77, 183, 189, 190, 199, 278], "trick": [74, 77], "fast": [74, 275], "32971d3129541c5bfb4f715abc33d1c5f408d204": 74, "l40": 74, "k": [74, 77, 79, 183, 279], "padding_mask": [75, 76, 223, 226], "target_seq_len": 75, "suitabl": 75, "scaled_dot_product_attent": [75, 91, 95, 101, 107, 116, 120, 125, 129, 154, 156, 158, 160, 165, 167, 171, 175, 183], "static": 75, "kv": [75, 182, 183, 189, 190, 192, 193, 194, 199, 281], "cach": [75, 182, 183, 185, 188, 189, 190, 192, 193, 194, 199, 201, 267, 273], "longer": [75, 182, 280], "boolean": [75, 76, 81, 183, 188, 189, 190, 199, 201, 227, 242], "assertionerror": [75, 81, 182, 188, 189, 210, 211, 258], "shift": [76, 189], "uniform_": 77, "int32": 77, "patch": [78, 79, 80, 81, 143, 144, 147, 151, 191, 219], "check": [78, 79, 80, 81, 188, 189, 190, 191, 199, 201, 210, 237, 245, 263, 266, 268, 269, 270, 271, 274, 275, 276, 279, 280], "vision_transform": [78, 79, 80, 81], "visiontransform": [78, 79, 80, 81], "divid": [78, 79, 80, 81, 144, 147, 151, 191, 197, 198, 219], "dimension": [78, 79, 80, 81, 147, 151, 191], "n_img": [78, 79, 191], "n_tokens_per_til": [78, 79, 80], "crop": [78, 79, 80, 81, 142, 147, 151, 191], "local_token_positional_embed": 79, "_position_embed": [79, 191], "tokenpositionalembed": [79, 191], "gate": [79, 186, 232, 269, 270, 273, 276], "global_token_positional_embed": 79, "400": [79, 80, 81, 147, 151, 191, 219], "10x10": [79, 80, 81, 147, 151, 191, 219], "grid": [79, 80, 81, 147, 151, 191, 219], "th": [79, 182], "silu": [81, 180], "cls_output_dim": [81, 191], "attn_bia": 81, "out_indic": [81, 191], "output_cls_project": 81, "in_channel": [81, 147, 151, 191], "intermediate_act": 81, "transformerencoderlay": 81, "cl": [81, 143, 191], "mlp": [81, 85, 86, 87, 91, 95, 96, 97, 101, 107, 108, 109, 110, 111, 116, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 146, 149, 150, 151, 154, 155, 156, 157, 158, 160, 165, 166, 167, 171, 172, 173, 174, 175, 188, 189, 190, 210, 211, 277, 279, 280], "bia": [81, 187, 204, 205, 206, 258, 279, 281, 282], "intermedi": [81, 91, 95, 101, 107, 116, 120, 125, 129, 146, 147, 150, 151, 154, 156, 158, 160, 165, 167, 171, 175, 191, 231, 254, 277, 282], "fourth": [81, 147, 151, 191], "determin": [81, 147, 151, 211], "divis": [81, 184], "code_llama2": [82, 83, 84, 85, 86, 87, 88, 89, 90, 273], "arxiv": [82, 83, 84, 85, 86, 88, 89, 90, 95, 96, 97, 98, 99, 102, 103, 104, 105, 107, 108, 109, 110, 112, 113, 114, 115, 120, 121, 122, 123, 124, 131, 132, 133, 134, 135, 138, 139, 140, 141, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 183, 184, 185, 191, 219, 220, 222, 223, 224, 225], "pdf": [82, 83, 84, 219, 220], "2308": [82, 83, 84], "12950": [82, 83, 84], "lora_attn_modul": [85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 210, 211, 269, 279, 280, 282], "q_proj": [85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 183, 210, 211, 269, 279, 280, 281, 282], "k_proj": [85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 183, 210, 211, 269, 279, 280, 281, 282], "v_proj": [85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 183, 210, 211, 269, 279, 280, 281, 282], "output_proj": [85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 183, 210, 211, 279, 280, 281, 282], "apply_lora_to_mlp": [85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 210, 211, 269, 279, 280], "apply_lora_to_output": [85, 86, 87, 88, 89, 90, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 174, 210, 211, 279, 280], "lora_rank": [85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 269, 279, 280], "lora_alpha": [85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 269, 279, 280], "lora_dropout": [85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 280], "use_dora": [85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 131, 132, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 280], "quantize_bas": [85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 205, 206, 280, 282], "code_llama2_13b": 85, "tloen": [85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 166, 172, 173, 174], "8bb8579e403dc78e37fe81ffbb253c413007323f": [85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 166, 172, 173, 174], "l41": [85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 166, 172, 173, 174], "l43": [85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 166, 172, 173, 174], "linear": [85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 143, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 187, 189, 204, 205, 206, 210, 211, 279, 280, 281, 282], "low": [85, 86, 87, 95, 96, 97, 107, 108, 109, 110, 111, 120, 121, 122, 129, 130, 131, 132, 138, 139, 149, 150, 151, 154, 155, 156, 157, 165, 166, 171, 172, 173, 174, 205, 206, 269, 275, 278, 279, 282], "approxim": [85, 86, 87, 95, 96, 97, 107, 108, 109, 110, 111, 120, 121, 122, 129, 130, 131, 132, 138, 139, 149, 150, 151, 154, 155, 156, 157, 165, 166, 171, 172, 173, 174, 205, 206, 279], "factor": [85, 86, 87, 95, 96, 97, 107, 108, 109, 110, 111, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 149, 150, 151, 154, 155, 156, 157, 165, 166, 171, 172, 173, 174, 205, 206, 220, 275], "dropout": [85, 86, 87, 91, 95, 96, 97, 101, 107, 109, 110, 111, 116, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 149, 150, 151, 154, 155, 156, 157, 158, 160, 165, 166, 167, 171, 172, 173, 174, 175, 183, 205, 206, 279, 280, 282], "decompos": [85, 86, 95, 96, 97, 107, 108, 109, 110, 120, 121, 122, 131, 132, 138, 139, 154, 155, 156, 157, 165, 166, 205, 269], "magnitud": [85, 86, 95, 96, 97, 107, 108, 109, 110, 120, 121, 122, 131, 132, 138, 139, 154, 155, 156, 157, 165, 166, 205, 280], "dora": [85, 86, 95, 96, 97, 107, 108, 109, 110, 120, 121, 122, 129, 131, 132, 138, 139, 150, 151, 154, 155, 156, 157, 165, 166, 205, 269], "ab": [85, 86, 88, 89, 90, 95, 96, 97, 98, 99, 102, 103, 104, 105, 107, 108, 109, 110, 112, 113, 114, 115, 120, 121, 122, 123, 124, 131, 132, 133, 134, 135, 138, 139, 140, 141, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 183, 184, 185, 191, 222, 223, 224, 225], "2402": [85, 86, 95, 96, 97, 107, 108, 109, 110, 120, 121, 122, 131, 132, 138, 139, 154, 155, 156, 157, 165, 166], "09353": [85, 86, 95, 96, 97, 107, 108, 109, 110, 120, 121, 122, 131, 132, 138, 139, 154, 155, 156, 157, 165, 166], "code_llama2_70b": 86, "code_llama2_7b": 87, "qlora": [88, 89, 90, 98, 99, 112, 113, 114, 115, 123, 124, 133, 134, 135, 140, 141, 152, 163, 164, 170, 195, 266, 268, 269, 277, 279], "paper": [88, 89, 90, 98, 99, 112, 113, 114, 115, 123, 124, 133, 134, 135, 140, 141, 152, 163, 164, 170, 219, 222, 224, 225, 278, 279, 282], "2305": [88, 89, 90, 98, 99, 112, 113, 114, 115, 123, 124, 133, 134, 135, 140, 141, 152, 163, 164, 170, 183, 222, 224], "14314": [88, 89, 90, 98, 99, 112, 113, 114, 115, 123, 124, 133, 134, 135, 140, 141, 152, 163, 164, 170], "lora_code_llama2_13b": 88, "lora_code_llama2_70b": 89, "lora_code_llama2_7b": 90, "head_dim": [91, 95, 182, 183, 189], "intermediate_dim": [91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175], "attn_dropout": [91, 95, 101, 107, 116, 120, 125, 129, 154, 156, 158, 160, 165, 167, 171, 175, 183, 189], "norm_ep": [91, 95, 101, 107, 116, 120, 125, 129, 154, 156, 158, 160, 165, 167, 171, 175], "1e": [91, 95, 101, 107, 116, 120, 125, 129, 154, 156, 158, 160, 165, 167, 171, 175, 184, 278, 280], "06": [91, 95, 184, 279], "rope_bas": [91, 95, 101, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175], "10000": [91, 95, 101, 154, 156, 158, 160, 165, 167, 185], "norm_embed": [91, 95], "transformerselfattentionlay": [91, 101, 116, 125, 158, 175, 188, 189, 199, 201], "rm": [91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175], "norm": [91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 189], "space": [91, 101, 116, 125, 146, 150, 158, 175, 189, 203, 280], "slide": [91, 158, 168], "window": [91, 158, 168], "vocabulari": [91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 196, 198, 279, 280], "mha": [91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 183, 189], "onto": [91, 95, 101, 107, 116, 120, 125, 129, 154, 156, 158, 160, 165, 167, 171, 175, 183, 203], "epsilon": [91, 95, 101, 107, 116, 120, 125, 129, 154, 156, 158, 160, 165, 167, 171, 175, 223], "rotari": [91, 95, 101, 125, 129, 154, 156, 158, 160, 165, 167, 185, 277], "10_000": [91, 95, 154, 156, 158, 160, 167], "blog": [92, 93], "technolog": [92, 93], "develop": [92, 93, 267, 282], "gemmatoken": 94, "_templatetyp": [94, 106, 119, 148, 162, 169, 179], "gemma_2b": 96, "gemma_7b": 97, "lora_gemma_2b": 98, "lora_gemma_7b": 99, "taken": [100, 279, 282], "sy": [100, 274], "honest": [100, 274], "pari": [100, 153], "capit": [100, 153], "franc": [100, 153], "known": [100, 153, 241, 281], "stun": [100, 153], "05": [101, 107, 116, 120, 125, 129, 154, 156, 158, 160, 165, 167, 171, 175], "gqa": [101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 183], "mqa": [101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 183], "kvcach": [101, 107, 116, 120, 125, 129, 146, 150, 165, 171, 175, 183, 189, 192, 193, 194, 199], "scale_hidden_dim_for_mlp": [101, 107, 116, 120, 125, 129, 146, 150, 171, 175], "2307": [102, 103, 104, 105], "09288": [102, 103, 104, 105], "classif": [105, 156, 160, 161, 232], "llama2_70b": 109, "llama2_7b": [110, 279], "classifi": [111, 156, 160, 161, 258, 280], "llama2_reward_7b": [111, 232], "lora_llama2_13b": 112, "lora_llama2_70b": 113, "lora_llama2_7b": [114, 279], "lora_llama2_reward_7b": 115, "500000": [116, 120, 125, 129, 146, 150], "llama3token": [119, 144, 213], "regist": [119, 144, 148, 169, 179, 195, 253, 282], "canon": [119, 144, 148, 169, 179], "llama3_70b": 121, "lora_llama3_70b": 123, "lora_llama3_8b": [124, 280], "scale_factor": [125, 129], "500_000": [125, 129], "rope": [125, 129, 171, 175, 183, 185], "llama3_1": [126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 269, 278], "llama3_1_70b": 131, "llama3_1_8b": 132, "lora_llama3_1_405b": 133, "lora_llama3_1_70b": 134, "lora_llama3_1_8b": 135, "llama3_2_1b": [138, 192, 193, 194], "llama3_2_3b": 139, "lora_llama3_2_1b": 140, "lora_llama3_2_3b": 141, "projection_head": [142, 199, 203], "combin": [142, 144, 147, 151, 189, 199, 201, 203, 221, 278], "learnabl": [142, 186, 199, 201, 205, 275], "fusion": [142, 145, 146, 147, 149, 150, 151, 199, 200, 201, 202, 203], "encoder_dim": [142, 143], "decoder_dim": [142, 143], "num_img": [142, 143], "num_emb": [142, 143], "broken": [142, 143, 191, 201], "width": [142, 281], "clip_embeds_per_til": 142, "emb": [142, 143, 183, 188, 189, 199], "num_hidden_input": 143, "sequenti": [143, 199, 203], "num_hidden": 143, "hidden_st": [143, 191], "image_mean": 144, "image_std": 144, "tranform": 144, "possible_resolut": 144, "448": [144, 145, 148, 149], "deviat": 144, "still": [144, 196, 198, 200, 201, 269, 279, 281, 282], "transformed_data": 144, "img1": [144, 219], "img2": [144, 219], "31587": [144, 214, 215], "29644": [144, 214, 215], "102": [144, 214, 215], "truncate_at_eo": [144, 215], "skip": [144, 183], "tokenize_head": 144, "tokenize_end": 144, "header": 144, "eom": 144, "wether": 144, "decoder_train": [145, 149, 152, 199], "encoder_train": [145, 149, 152, 199], "fusion_train": [145, 149, 152, 199], "deepfusionmodel": [145, 149, 152], "trainabl": [145, 149, 201, 206, 209, 247, 279, 280, 282], "resiz": [145, 148, 149], "fusion_interv": [146, 150], "num_special_token": [146, 150], "encoder_max_seq_len": [146, 150, 188, 189, 190, 194, 199, 201], "causalselfattent": [146, 150], "interv": [146, 150, 276], "clip_embed_dim": [147, 151], "clip_num_lay": [147, 151], "clip_hidden_st": [147, 151], "num_layers_project": [147, 151], "decoder_embed_dim": [147, 151], "llama3visionencod": [147, 151], "spatial": [147, 151], "backbon": [147, 151], "trainbl": 149, "decoder_lora": 150, "fusion_lora": [150, 151], "encoder_lora": 151, "lora_llama3_2_vision_11b": 152, "num_class": [156, 160, 258], "announc": 159, "ray2333": 161, "feedback": [161, 222], "lora_mistral_7b": 163, "lora_mistral_reward_7b": 164, "phi3_mini": [166, 232], "128k": 168, "nor": 168, "phi3minitoken": 169, "tokenizer_config": 169, "spm": 169, "lm": [169, 223, 278], "bo": [169, 214, 217, 274], "unk": 169, "augment": [169, 282], "endoftext": 169, "phi3minisentencepiecebasetoken": 169, "lora_phi3_mini": 170, "1000000": [171, 175], "tie_word_embed": [171, 172, 173, 175, 176, 177], "qwen2transformerdecod": 171, "period": [171, 175], "word": [171, 175, 280, 281], "qwen2_0_5b": [172, 187], "qwen2_1_5b": [173, 187], "qwen2_7b": 174, "qwen": [176, 177, 178], "merges_fil": 179, "qwen2token": 179, "gate_proj": 180, "down_proj": 180, "up_proj": 180, "feed": [180, 188, 190], "network": [180, 207, 279, 282], "fed": [180, 274], "multipli": [180, 280], "in_dim": [180, 204, 205, 206, 279, 280, 282], "out_dim": [180, 189, 204, 205, 206, 279, 280, 282], "layernorm": 181, "past": 182, "expand": 182, "dpython": [182, 183, 188, 189, 190, 194, 195, 199, 201, 255, 259], "reset": [182, 183, 188, 189, 190, 199, 201, 240], "k_val": 182, "v_val": 182, "fill": 182, "bfloat16": [182, 195, 255, 275, 276, 277, 279, 280, 281], "greater": [182, 191, 263], "pos_embed": [183, 188, 279, 281], "q_norm": 183, "k_norm": 183, "kv_cach": [183, 192, 193, 194], "is_caus": 183, "13245v1": 183, "multihead": 183, "extrem": 183, "credit": 183, "litgpt": 183, "v": [183, 189, 199, 279], "n_kv_head": 183, "rotarypositionalembed": [183, 279, 281], "rmsnorm": 183, "vice": [183, 273], "versa": [183, 273], "y": 183, "s_x": 183, "s_y": 183, "_masktyp": [183, 189, 190], "score": [183, 189, 190, 221], "encoder_max_cache_seq_len": [183, 189, 190], "j": [183, 188, 189, 190, 199], "blockmask": [183, 189, 190], "create_block_mask": [183, 189, 190], "flex_attent": [183, 189, 190], "n_h": [183, 185], "num": [183, 185], "n_kv": 183, "h_d": [183, 185], "reset_cach": [183, 188, 189, 190, 199, 201], "setup_cach": [183, 188, 189, 190, 192, 193, 199, 201], "ep": 184, "squar": 184, "1910": 184, "07467": 184, "propos": [185, 280], "2104": 185, "09864": 185, "verfic": 185, "l80": 185, "init": [185, 240, 252, 282], "exceed": 185, "freq": 185, "recomput": [185, 280], "geometr": 185, "progress": [185, 271, 276, 280], "rotat": 185, "angl": 185, "basic": [186, 277], "tied_modul": 187, "lost": 187, "whose": [187, 207, 248, 253], "attributeerror": [187, 260], "attn": [188, 190, 192, 193, 194, 279, 281, 282], "multiheadattent": [188, 190, 279, 281], "ca_norm": 188, "mlp_norm": [188, 190], "ca_scal": 188, "mlp_scale": [188, 190], "ff": [188, 190], "caches_are_en": [188, 189, 190, 192, 193, 194, 199, 201], "func": [188, 190, 201], "caches_are_setup": [188, 189, 190, 192, 193, 194, 199, 201], "token_sequ": 188, "embed_sequ": 188, "decoder_max_seq_len": [188, 189, 190, 192, 193, 194, 199, 201], "modulelist": 189, "output_hidden_st": [189, 199], "belong": [189, 235], "reduc": [189, 222, 268, 269, 270, 278, 279, 280, 281, 282], "statement": 189, "improv": [189, 215, 224, 238, 270, 277, 278, 279, 280], "readabl": [189, 275], "behaviour": [189, 199, 258], "alter": [189, 199], "common_util": [189, 192, 193, 194, 195], "disable_kv_cach": [189, 199], "chunked_output": 189, "last_hidden_st": 189, "chunk": [189, 196, 198, 215], "cewithchunkedoutputloss": [189, 199], "upcast": [189, 196, 198], "set_num_output_chunk": [189, 199], "num_chunk": [189, 196, 198], "s_e": [189, 199], "d_e": [189, 199], "arang": [189, 199], "prompt_length": [189, 199], "correspondingli": 189, "padded_prompt_length": 189, "m_": [189, 199], "realloc": [189, 199], "runtimeerror": [189, 217, 233, 237, 239, 244], "num_output_chunk": [189, 196, 198, 199], "transformercrossattentionlay": [189, 199, 201], "fusionlay": [189, 199], "sa_norm": 190, "sa_scal": 190, "token_pos_embed": 191, "pre_tile_pos_emb": 191, "post_tile_pos_emb": 191, "cls_project": 191, "vit": 191, "11929": 191, "convolut": 191, "flatten": 191, "downscal": 191, "800x400": 191, "400x400": 191, "_transform": 191, "whole": [191, 278], "n_token": 191, "101": 191, "pool": 191, "tiledtokenpositionalembed": 191, "tilepositionalembed": 191, "tile_pos_emb": 191, "8x8": 191, "21": 191, "22": 191, "23": [191, 236], "24": [191, 276, 277], "25": [191, 275, 278], "26": 191, "27": [191, 275], "28": [191, 275], "29": [191, 282], "30": [191, 226, 281], "31": [191, 277], "33": 191, "34": 191, "35": [191, 282], "36": 191, "37": 191, "38": [191, 275], "39": 191, "41": 191, "43": 191, "44": 191, "45": 191, "46": 191, "47": 191, "48": [191, 275, 282], "49": 191, "50": [191, 226, 248, 275], "51": 191, "52": [191, 276], "53": 191, "54": 191, "55": [191, 276], "56": 191, "57": [191, 279, 282], "58": 191, "59": [191, 282], "60": 191, "61": [191, 275], "62": 191, "63": 191, "64": [191, 269, 279, 280], "num_patches_per_til": 191, "emb_dim": 191, "constain": 191, "anim": 191, "max_n_img": 191, "n_channel": 191, "vision_util": 191, "tile_crop": 191, "800": 191, "patch_grid_s": 191, "rand": 191, "nch": 191, "tile_cropped_imag": 191, "batch_imag": 191, "unsqueez": 191, "batch_aspect_ratio": 191, "clip_vision_encod": 191, "cache_en": 192, "float32": [192, 193, 194, 237, 278], "1024": [192, 193, 194, 281], "temporarili": [193, 194, 207, 280], "enter": [193, 194], "overhead": [193, 222, 270, 280, 281], "untouch": [193, 274], "yield": [193, 194, 207], "caller": [193, 194, 207], "delete_kv_cach": 194, "offload_to_cpu": 195, "hook": [195, 253, 280, 282], "nf4": [195, 280, 282], "restor": 195, "higher": [195, 277, 278, 280, 281, 282], "offload": [195, 282], "increas": [195, 222, 236, 277, 278, 279, 280, 281], "peak": [195, 240, 246, 275, 277, 279, 282], "gpu": [195, 270, 273, 275, 276, 277, 278, 279, 280, 281, 282], "_register_state_dict_hook": 195, "mymodul": 195, "_after_": 195, "nf4tensor": [195, 282], "unquant": [195, 281, 282], "unus": 195, "ignore_index": [196, 197, 198, 278], "entropi": [196, 198, 278], "bf16": [196, 198, 237, 280, 282], "ce": [196, 278], "better": [196, 198, 225, 268, 274, 275, 278, 280, 281], "accuraci": [196, 198, 270, 275, 277, 278, 279, 280, 281, 282], "doubl": [196, 198, 282], "therefor": [196, 198, 280, 282], "num_token": [196, 197, 198], "consider": [196, 198], "compute_cross_entropi": 196, "gain": [196, 270, 277], "won": [196, 274], "realiz": 196, "pull": [196, 269, 270, 273], "1390": 196, "loss_fn": [196, 198], "chunkedcrossentropyloss": 196, "output_chunk": [196, 198], "kullback": [197, 278], "leibler": [197, 278], "diverg": [197, 198, 221, 278], "jongwooko": [197, 278], "distillm": [197, 278], "17c0f98bc263b1861a02d5df578c84aea652ee65": 197, "student_logit": [197, 198, 278], "teacher_logit": [197, 198, 278], "student": [197, 198], "teacher": [197, 198, 275], "kl": [197, 198, 221, 278], "teacher_chunk": 198, "teacher_model": 198, "model_fus": [199, 200, 201, 202, 203], "deepfus": 199, "evolut": 199, "signatur": 199, "interchang": 199, "fusion_param": [199, 200, 201, 202, 203], "fusionembed": 199, "fusion_lay": [199, 201], "clip_vit_224": [199, 203], "feedforward": [199, 203], "register_fusion_modul": 199, "flamingo": [199, 201, 219], "strict": [199, 200, 201, 210, 279], "freez": [199, 275, 279], "fusion_vocab_s": 200, "necessit": 200, "rout": 200, "128": [200, 269, 277, 279, 280], "fusion_first": 201, "shot": [201, 275, 277, 281], "infus": 201, "interpret": 201, "enocd": 201, "isn": [201, 237, 273], "fused_lay": 201, "mark": [203, 274], "earli": 203, "peft": [204, 205, 206, 207, 208, 209, 210, 211, 229, 269, 279, 282], "adapter_param": [204, 205, 206, 207, 208, 209], "proj": 204, "loralinear": [204, 279, 280, 282], "alpha": [205, 206, 279, 280, 282], "use_bia": [205, 206], "scalar": [205, 248, 249, 250, 251, 252, 280], "orient": [205, 280], "bax": [205, 206], "distinct": [205, 282], "lora_a": [205, 206, 279, 282], "lora_b": [205, 206, 279, 282], "initialize_dora_magnitud": 205, "perturb": 206, "decomposit": [206, 279, 280], "matric": [206, 279, 282], "mapsto": 206, "w_0x": 206, "r": [206, 279], "polici": [207, 221, 222, 223, 224, 225, 227, 238, 247, 254, 271], "neural": [207, 279, 282], "get_adapter_param": [209, 279], "base_miss": 210, "base_unexpect": 210, "lora_miss": 210, "lora_unexpect": 210, "validate_state_dict_for_lora": [210, 279], "unlik": 210, "reli": [210, 217, 275, 277], "unexpect": 210, "nonempti": 210, "full_model_state_dict_kei": 211, "lora_state_dict_kei": 211, "base_model_state_dict_kei": 211, "confirm": [211, 267], "lora_modul": 211, "complement": 211, "disjoint": 211, "overlap": [211, 280], "tiktokenbasetoken": 212, "light": 214, "sentencepieceprocessor": 214, "trim": 214, "whitespac": 214, "spm_model": [214, 274], "tokenized_text": [214, 215], "add_bo": [214, 215, 274], "trim_leading_whitespac": 214, "prefix": [214, 280], "unbatch": 214, "bos_id": [215, 217], "lightweight": [215, 274], "substr": 215, "repetit": 215, "speed": [215, 257, 277, 280, 281, 282], "identif": 215, "regex": 215, "absent": 215, "tt_model": 215, "tokenizer_json_path": 216, "heavili": 217, "concat": 217, "1788": 217, "2643": 217, "465": 217, "22137": 217, "join": 217, "satisfi": [217, 275], "loos": 218, "image_token_id": 219, "particip": [219, 220], "laid": 219, "fig": 219, "2204": 219, "14198": 219, "immedi": [219, 280], "until": [219, 280], "img3": 219, "equal": [219, 263], "gamma": [220, 224, 225], "lmbda": 220, "estim": [220, 221], "1506": 220, "02438": 220, "response_len": [220, 221], "receiv": 220, "discount": 220, "gae": 220, "logprob": [221, 225], "ref_logprob": 221, "kl_coeff": 221, "valid_score_idx": 221, "coeffici": [221, 223], "total_reward": 221, "kl_reward": 221, "beta": [222, 225], "label_smooth": [222, 225], "18290": 222, "intuit": [222, 224, 225], "dispref": 222, "dynam": [222, 281], "degener": 222, "occur": [222, 270], "naiv": 222, "trl": [222, 224, 225], "5d1deb1445828cfd0e947cb3a7925b1c03a283fc": 222, "dpo_train": [222, 224], "l844": 222, "2009": 222, "01325": 222, "regular": [222, 225, 280, 281, 282], "baselin": [222, 223, 275, 278, 279], "uncertainti": [222, 225], "policy_chosen_logp": [222, 224, 225], "policy_rejected_logp": [222, 224, 225], "reference_chosen_logp": [222, 224], "reference_rejected_logp": [222, 224], "chosen_reward": [222, 224, 225], "rejected_reward": [222, 224, 225], "value_clip_rang": 223, "value_coeff": 223, "proxim": [223, 271], "1707": 223, "06347": 223, "eqn": 223, "vwxyzjn": 223, "ccc19538e817e98a60d3253242ac15e2a562cb49": 223, "lm_human_preference_detail": 223, "train_policy_acceler": 223, "l719": 223, "ea25b9e8b234e6ee1bca43083f8f3cf974143998": 223, "ppo2": 223, "l68": 223, "l75": 223, "pi_old_logprob": 223, "pi_logprob": 223, "phi_old_valu": 223, "phi_valu": 223, "value_padding_mask": 223, "old": 223, "participag": 223, "five": 223, "policy_loss": 223, "value_loss": 223, "clipfrac": 223, "fraction": 223, "statist": [224, 280], "rso": 224, "hing": 224, "2309": 224, "06657": 224, "logist": 224, "regress": 224, "slic": 224, "10425": 224, "almost": [224, 279], "svm": 224, "counter": 224, "4dce042a3863db1d375358e8c8092b874b02934b": 224, "l1141": 224, "simpo": 225, "2405": 225, "14734": 225, "averag": [225, 278], "implicit": 225, "margin": 225, "bradlei": 225, "terri": 225, "larger": [225, 231, 275, 277, 278, 280], "win": 225, "lose": 225, "98ad01ddfd1e1b67ec018014b83cba40e0caea66": 225, "cpo_train": 225, "l603": 225, "pretti": [225, 275], "identitc": 225, "elimin": 225, "kind": 225, "ipoloss": 225, "fill_valu": 226, "sequence_length": 226, "stop_token_id": 226, "869": 226, "eos_mask": 226, "truncated_sequ": 226, "datatyp": [227, 280, 282], "denot": 227, "auto_wrap_polici": [227, 238, 254], "submodul": [227, 247], "obei": 227, "contract": 227, "get_fsdp_polici": 227, "modules_to_wrap": [227, 238, 247], "min_num_param": 227, "my_fsdp_polici": 227, "recurs": [227, 247, 251], "sum": [227, 278, 279], "p": [227, 233, 279, 281, 282], "numel": [227, 279], "1000": [227, 281], "stabl": [227, 245, 251, 256, 267, 280], "html": [227, 245, 251, 254, 256, 262, 266], "filename_format": 228, "max_filenam": 228, "concis": 228, "filenam": [228, 249], "file_": 228, "_of_": 228, "n_file": 228, "build_checkpoint_filenam": 228, "file_00001_of_00003": 228, "file_00002_of_00003": 228, "file_00003_of_00003": 228, "safe_seri": 229, "from_pretrain": 229, "0001_of_0003": 229, "0002_of_0003": 229, "todo": 229, "preserv": [229, 282], "weight_map": [229, 275], "convert_weight": 229, "_model_typ": [229, 232], "intermediate_checkpoint": [229, 230, 231], "adapter_onli": [229, 230, 231], "_weight_map": 229, "shard": [230, 277], "wip": 230, "qualnam": 232, "boundari": 232, "distinguish": 232, "llama3_vis": 232, "llama3_2_vision_decod": 232, "mistral_reward_7b": 232, "my_new_model": 232, "my_custom_state_dict_map": 232, "optim_map": 233, "bare": 233, "bone": 233, "optim_dict": [233, 235, 253], "cfg_optim": 233, "ckpt": 233, "optim_ckpt": 233, "placeholder_optim_dict": 233, "optiminbackwardwrapp": 233, "get_last_lr": 233, "rate": [233, 236, 239, 268, 276, 280], "schedul": [233, 236, 257, 276, 280], "get_optim_kei": 233, "arbitrari": [233, 279, 280], "optim_ckpt_map": 233, "set_lr_schedul": 233, "lr_schedul": [233, 236], "lrschedul": 233, "loadabl": 233, "step_lr_schedul": 233, "ac_mod": 234, "ac_opt": 234, "op": [234, 281], "ac": [234, 238], "optimizerinbackwardwrapp": [235, 239], "named_paramet": [235, 258], "num_warmup_step": 236, "num_training_step": 236, "num_cycl": [236, 257], "last_epoch": 236, "lambdalr": 236, "linearli": 236, "decreas": [236, 279, 280, 281, 282], "cosin": 236, "v4": 236, "src": 236, "l104": 236, "warmup": [236, 257], "phase": 236, "wave": 236, "half": [236, 280], "kernel": 237, "memory_efficient_fsdp_wrap": [238, 281], "maxim": [238, 247, 266, 268], "workload": [238, 270, 280, 281], "fullyshardeddataparallel": [238, 247, 280], "fsdppolicytyp": [238, 247], "warpper": 239, "optimizer_in_backward": 239, "reset_stat": 240, "track": [240, 248], "alloc": [240, 246, 247, 277, 280, 282], "reserv": [240, 246, 274, 282], "stat": [240, 246, 282], "int4": [241, 281], "4w": 241, "recogn": 241, "int8dynactint4weightquant": [241, 270, 281], "8da4w": [241, 281], "int4weightonlyquant": [241, 281], "int8dynactint4weightqatquant": [241, 270, 281], "qat": [241, 266, 271], "int4weightonlyqatquant": 241, "exclud": 242, "aka": 243, "master": 245, "port": [245, 273], "address": [245, 278, 280], "hold": [245, 276], "peak_memory_act": 246, "peak_memory_alloc": 246, "peak_memory_reserv": 246, "get_memory_stat": 246, "hierarch": 247, "api_kei": 248, "experiment_kei": 248, "onlin": 248, "log_cod": 248, "comet": 248, "site": [248, 275], "ml": 248, "team": 248, "compar": [248, 251, 263, 275, 277, 278, 279, 281, 282], "sdk": 248, "uncategor": 248, "alphanumer": 248, "charact": 248, "get_or_cr": 248, "fresh": 248, "persist": 248, "hpo": 248, "sweep": 248, "server": 248, "offlin": 248, "auto": [248, 273], "creation": 248, "experimentconfig": 248, "project_nam": 248, "my_workspac": 248, "my_metr": [248, 251, 252], "importerror": [248, 252], "termin": [248, 251, 252], "comet_api_kei": 248, "flush": [248, 249, 250, 251, 252], "ndarrai": [248, 249, 250, 251, 252], "record": [248, 249, 250, 251, 252, 257], "log_config": [248, 252], "payload": [248, 249, 250, 251, 252], "log_": 249, "unixtimestamp": 249, "thread": 249, "safe": 249, "organize_log": 251, "tensorboard": 251, "subdirectori": 251, "logdir": 251, "startup": 251, "tree": [251, 275, 277], "tfevent": 251, "encount": 251, "frontend": 251, "organ": [251, 273], "accordingli": [251, 281], "my_log_dir": 251, "view": [251, 278], "entiti": 252, "bias": [252, 279, 282], "usernam": 252, "my_ent": 252, "my_group": 252, "account": [252, 279, 282], "link": [252, 275, 277], "capecap": 252, "6053ofw0": 252, "torchtune_config_j67sb73v": 252, "soon": [253, 280], "readi": [253, 266, 274, 281], "grad": 253, "acwrappolicytyp": 254, "author": [254, 268, 276, 280, 282], "fsdp_adavnced_tutori": 254, "insid": 255, "contextmanag": 255, "debug_mod": 256, "pseudo": 256, "commonli": [256, 279, 280, 282], "numpi": 256, "determinist": 256, "global": [256, 280], "warn": 256, "nondeterminist": 256, "cudnn": 256, "set_deterministic_debug_mod": 256, "profile_memori": 257, "with_stack": 257, "record_shap": 257, "with_flop": 257, "wait_step": 257, "warmup_step": 257, "active_step": 257, "profil": 257, "layout": 257, "trace": 257, "profileract": 257, "gradient_accumul": 257, "sensibl": 257, "default_schedul": 257, "reduct": [257, 270, 279], "scope": 257, "flop": 257, "cycl": 257, "repeat": [257, 280], "model_named_paramet": 258, "force_overrid": 258, "concret": [258, 280], "vocab_dim": 258, "named_param": 259, "inplac": [260, 279], "too": [260, 270, 277], "handler": 262, "_log": 262, "__version__": 263, "generated_examples_python": 264, "galleri": [264, 272], "sphinx": 264, "000": [265, 272, 277], "execut": [265, 272], "generated_exampl": 265, "mem": [265, 272], "mb": [265, 272], "gentl": 266, "introduct": 266, "first_finetune_tutori": 266, "kd": 266, "torchvis": 267, "torchao": [267, 270, 275, 277, 280, 281, 282], "latest": [267, 270, 276, 280, 282], "whl": 267, "cu121": 267, "cu118": 267, "cu124": 267, "And": [267, 275], "welcom": [267, 273], "greatest": [267, 276], "contributor": 267, "dev": 267, "commit": 267, "branch": 267, "therebi": [267, 280, 281, 282], "forc": [267, 278], "reinstal": 267, "opt": [267, 276], "suffix": 267, "On": [268, 279], "pointer": 268, "emphas": 268, "simplic": 268, "component": 268, "prove": 268, "democrat": 268, "zoo": 268, "varieti": [268, 279], "integr": [268, 275, 276, 277, 279, 281, 282], "fsdp2": 268, "excit": 268, "checkout": 268, "quickstart": 268, "attain": 268, "embodi": 268, "philosophi": 268, "usabl": 268, "composit": 268, "hard": 268, "outlin": 268, "unecessari": 268, "never": 268, "thoroughli": 268, "competit": 269, "grant": [269, 270, 276], "interest": [269, 270, 275, 278], "8b_lora_single_devic": [269, 273, 274, 277, 278, 280], "lever": [269, 270], "action": [269, 270], "degrad": [270, 280, 281, 282], "simul": [270, 280, 281], "compromis": 270, "blogpost": [270, 280], "qat_distribut": [270, 281], "8b_qat_ful": [270, 281], "least": [270, 277, 279, 281], "vram": [270, 277, 279, 280, 281], "80gb": [270, 281], "a100": 270, "h100": 270, "delai": 270, "fake": [270, 281], "empir": [270, 281], "potenti": [270, 279, 280], "fake_quant_after_n_step": [270, 281], "idea": [270, 278, 282], "roughli": 270, "total_step": 270, "futur": [270, 281], "plan": [270, 275], "un": 270, "groupsiz": [270, 281], "256": [270, 277, 281], "hackabl": [271, 276], "singularli": [271, 276], "technic": [271, 276], "awar": [271, 280, 281], "tracker": 271, "short": 273, "subcommand": 273, "anytim": 273, "symlink": 273, "wrote": 273, "readm": [273, 275, 277], "md": 273, "lot": [273, 275, 280], "recent": 273, "releas": [273, 277], "agre": 273, "term": [273, 280], "perman": 273, "eat": 273, "bandwith": 273, "storag": [273, 282], "00030": 273, "ootb": 273, "7b_full_low_memori": [273, 275, 276], "8b_full_single_devic": 273, "mini_full_low_memori": 273, "7b_full": [273, 275, 276], "13b_full": [273, 275, 276], "70b_full": 273, "edit": 273, "clobber": 273, "destin": 273, "lora_finetune_distribut": [273, 277, 279], "torchrun": 273, "launch": [273, 274, 276], "nproc": 273, "node": 273, "worker": 273, "nnode": [273, 279, 281], "minimum_nod": 273, "maximum_nod": 273, "fail": 273, "rdzv": 273, "rendezv": 273, "endpoint": 273, "8b_lora": [273, 277], "bypass": 273, "fancy_lora": 273, "8b_fancy_lora": 273, "nice": 274, "meet": 274, "overhaul": 274, "multiturn": 274, "untrain": 274, "accompani": 274, "who": 274, "influenti": 274, "hip": 274, "hop": 274, "artist": 274, "2pac": 274, "rakim": 274, "flavor": 274, "formatted_messag": 274, "nyou": 274, "nwho": 274, "why": [274, 276, 279], "518": 274, "25580": 274, "29962": 274, "3532": 274, "14816": 274, "29903": 274, "6778": 274, "_spm_model": 274, "piece_to_id": 274, "manual": [274, 282], "529": 274, "29879": 274, "29958": 274, "nhere": 274, "pure": 274, "mess": 274, "prime": 274, "strictli": 274, "ask": [274, 280], "though": 274, "robust": 274, "pretend": 274, "zuckerberg": 274, "seem": [274, 275], "good": [274, 279, 280], "altogeth": 274, "honor": 274, "custom_8b_lora_single_devic": 274, "favorit": [275, 279], "seemlessli": 275, "connect": [275, 281], "amount": 275, "natur": 275, "export": 275, "leverag": [275, 277, 282], "percentag": 275, "16gb": [275, 279], "rtx": 275, "3090": 275, "4090": 275, "hour": 275, "7b_qlora_single_devic": [275, 276, 282], "473": 275, "98": [275, 282], "gb": [275, 277, 279, 281, 282], "484": 275, "01": [275, 276], "fact": [275, 277, 279, 280], "third": 275, "But": [275, 279], "realli": 275, "eleuther_ev": [275, 277, 281], "eleuther_evalu": [275, 277, 281], "lm_eval": [275, 277], "custom_eval_config": [275, 277], "truthfulqa_mc2": [275, 277, 278, 279], "measur": [275, 277], "propens": [275, 277], "324": 275, "loglikelihood": 275, "195": 275, "121": 275, "197": 275, "acc": [275, 281], "388": 275, "489": 275, "great": [275, 280], "custom_generation_config": [275, 277], "kick": 275, "300": 275, "bai": 275, "area": 275, "92": 275, "exploratorium": 275, "san": 275, "francisco": 275, "magazin": 275, "awesom": 275, "bridg": 275, "cool": 275, "96": [275, 282], "sec": [275, 277], "83": 275, "99": [275, 279], "72": 275, "littl": 275, "int8_weight_onli": [275, 277], "int8_dynamic_activation_int8_weight": [275, 277], "ao": [275, 277], "quant_api": [275, 277], "quantize_": [275, 277], "int4_weight_onli": [275, 277], "previous": [275, 277, 279], "benefit": 275, "clone": [275, 279, 281, 282], "assumpt": 275, "new_dir": 275, "output_dict": 275, "sd_1": 275, "sd_2": 275, "dump": 275, "convert_hf_checkpoint": 275, "checkpoint_path": 275, "justin": 275, "school": 275, "math": 275, "ws": 275, "94": [275, 277], "bandwidth": [275, 277], "1391": 275, "84": 275, "thats": 275, "seamlessli": 275, "authent": [275, 276], "hopefulli": 275, "gave": 275, "minut": 276, "agreement": 276, "depth": 276, "principl": 276, "boilerpl": 276, "substanti": [276, 279], "custom_config": 276, "replic": 276, "lorafinetunerecipesingledevic": 276, "lora_finetune_output": 276, "log_1713194212": 276, "3697006702423096": 276, "25880": [276, 282], "83it": 276, "monitor": 276, "tqdm": 276, "e2": 276, "focu": 277, "theta": 277, "observ": [277, 281], "consum": [277, 282], "overal": [277, 278], "8b_qlora_single_devic": [277, 280], "coupl": [277, 279, 282], "meta_model_0": [277, 281], "122": 277, "sarah": 277, "busi": 277, "mum": 277, "young": 277, "children": 277, "live": 277, "north": 277, "east": 277, "england": 277, "135": 277, "88": 277, "138": 277, "346": 277, "09": 277, "139": 277, "broader": 277, "teach": [278, 279], "straight": [278, 279], "jump": [278, 279], "compress": 278, "transfer": 278, "capac": 278, "computation": 278, "expens": 278, "deploi": 278, "imit": 278, "diagram": 278, "aim": [278, 280], "minillm": 278, "forwardklloss": 278, "super": 278, "teacher_prob": 278, "student_logprob": 278, "log_softmax": 278, "prod_prob": 278, "forwardklwithchunkedoutputloss": 278, "knowledge_distillation_single_devic": 278, "bit": [278, 279, 280, 281, 282], "alpaca_cleaned_dataset": 278, "hellaswag": [278, 281], "commonsense_qa": 278, "kd_ratio": 278, "teacher_checkpoint": 278, "00004": 278, "truthfulqa": [278, 279], "commonsens": 278, "constant": 278, "boost": 278, "graph": [278, 280], "irrespect": 278, "3e": 278, "slightli": 278, "truthful_qa": 278, "wherea": 278, "unfamiliar": 279, "oppos": [279, 282], "momentum": [279, 280], "aghajanyan": 279, "et": 279, "al": 279, "hypothes": 279, "intrins": 279, "eight": 279, "practic": 279, "blue": 279, "although": [279, 281], "rememb": 279, "approx": 279, "15m": 279, "65k": 279, "requires_grad": [279, 282], "frozen_out": [279, 282], "lora_out": [279, 282], "base_model": 279, "lora_model": 279, "lora_llama_2_7b": [279, 282], "alon": 279, "in_featur": [279, 281], "out_featur": [279, 281], "validate_missing_and_unexpected_for_lora": 279, "peft_util": 279, "set_trainable_param": 279, "lora_param": 279, "total_param": 279, "trainable_param": 279, "2f": 279, "6742609920": 279, "4194304": 279, "7b_lora": 279, "my_model_checkpoint_path": [279, 281, 282], "tokenizer_checkpoint": [279, 281, 282], "my_tokenizer_checkpoint_path": [279, 281, 282], "constraint": 279, "factori": 279, "benefici": 279, "impact": [279, 280], "minor": 279, "lora_experiment_1": 279, "smooth": [279, 282], "curv": [279, 282], "500": 279, "ran": 279, "footprint": [279, 281], "commod": 279, "cogniz": 279, "ax": 279, "parallel": 279, "475": 279, "87": 279, "508": 279, "86": 279, "504": 279, "04": 279, "514": 279, "lowest": 279, "absolut": 279, "4gb": 279, "tradeoff": 279, "salman": 280, "mohammadi": 280, "brief": 280, "glossari": 280, "struggl": 280, "constrain": [280, 281], "cost": 280, "particularli": 280, "gradient_accumulation_step": 280, "throughput": 280, "ram": 280, "bottleneck": 280, "sebastian": 280, "raschka": 280, "fp16": 280, "sound": 280, "quot": 280, "aliv": 280, "region": 280, "enable_activation_checkpoint": 280, "bring": 280, "autograd": [280, 282], "saved_tensors_hook": 280, "cours": 280, "runtim": 280, "hide": 280, "later": 280, "brought": 280, "enable_activation_offload": 280, "total_batch_s": 280, "count": 280, "suppos": 280, "log_every_n_step": 280, "translat": 280, "frequent": 280, "slowli": 280, "num_devic": 280, "adamw8bit": 280, "pagedadamw": 280, "modern": 280, "converg": 280, "stateless": 280, "stochast": 280, "descent": 280, "sacrif": 280, "remov": 280, "optimizer_in_bwd": 280, "cpuoffloadoptim": 280, "offload_gradi": 280, "prototyp": 280, "low_bit_optim": 280, "4e": 280, "adam": 280, "hint": 280, "slowdown": 280, "4x": 280, "fsdp_cpu_offload": 280, "greatli": 280, "lora_": 280, "lora_llama3": 280, "_lora": 280, "firstli": 280, "secondli": 280, "affect": 280, "fashion": 280, "slower": [280, 282], "jointli": 280, "sens": 280, "novel": 280, "normalfloat": [280, 282], "8x": [280, 282], "worth": 280, "cast": [280, 281], "incur": [280, 281, 282], "penalti": 280, "qlora_": 280, "qlora_llama3_8b": 280, "_qlora": 280, "reap": 280, "hood": [280, 282], "doralinear": 280, "swap": [280, 281], "perplex": 281, "goal": 281, "ptq": 281, "kept": 281, "nois": 281, "henc": 281, "x_q": 281, "int8": 281, "zp": 281, "x_float": 281, "qmin": 281, "qmax": 281, "clamp": 281, "x_fq": 281, "dequant": 281, "proce": 281, "prepared_model": 281, "int8dynactint4weightqatlinear": 281, "int8dynactint4weightlinear": 281, "train_loop": 281, "converted_model": 281, "qat_distributed_recipe_label": 281, "recov": 281, "custom_8b_qat_ful": 281, "2000": 281, "led": 281, "presum": 281, "mutat": 281, "5gb": 281, "custom_quant": 281, "poorli": 281, "custom_eleuther_evalu": 281, "fullmodeltorchtunecheckpoint": 281, "max_seq_length": 281, "my_eleuther_evalu": 281, "stderr": 281, "word_perplex": 281, "9148": 281, "byte_perplex": 281, "5357": 281, "bits_per_byt": 281, "6189": 281, "5687": 281, "0049": 281, "acc_norm": 281, "7536": 281, "0043": 281, "portion": [281, 282], "74": 281, "048": 281, "190": 281, "7735": 281, "5598": 281, "6413": 281, "5481": 281, "0050": 281, "7390": 281, "0044": 281, "7251": 281, "4994": 281, "5844": 281, "5740": 281, "7610": 281, "outperform": 281, "importantli": 281, "characterist": 281, "187": 281, "958": 281, "halv": 281, "motiv": 281, "edg": 281, "smartphon": 281, "executorch": 281, "xnnpack": 281, "export_llama": 281, "use_sdpa_with_kv_cach": 281, "qmode": 281, "group_siz": 281, "get_bos_id": 281, "get_eos_id": 281, "output_nam": 281, "llama3_8da4w": 281, "pte": 281, "881": 281, "oneplu": 281, "709": 281, "tok": 281, "815": 281, "316": 281, "364": 281, "highli": 282, "vanilla": 282, "held": 282, "bespok": 282, "vast": 282, "major": 282, "normatfloat": 282, "deepdiv": 282, "de": 282, "counterpart": 282, "set_default_devic": 282, "qlora_linear": 282, "memory_alloc": 282, "177": 282, "152": 282, "del": 282, "empty_cach": 282, "lora_linear": 282, "081": 282, "344": 282, "qlora_llama2_7b": 282, "qlora_model": 282, "essenti": 282, "reparametrize_as_dtype_state_dict_post_hook": 282, "149": 282, "9157477021217346": 282, "02": 282, "08": 282, "15it": 282, "nightli": 282, "200": 282, "hundr": 282, "228": 282, "8158286809921265": 282, "95it": 282, "exercis": 282, "linear_nf4": 282, "to_nf4": 282, "linear_weight": 282, "incom": 282}, "objects": {"torchtune.config": [[27, 0, 1, "", "instantiate"], [28, 0, 1, "", "log_config"], [29, 0, 1, "", "parse"], [30, 0, 1, "", "validate"]], "torchtune.data": [[31, 1, 1, "", "AlpacaToMessages"], [32, 1, 1, "", "ChatMLTemplate"], [33, 1, 1, "", "ChosenRejectedToMessages"], [34, 2, 1, "", "GrammarErrorCorrectionTemplate"], [35, 1, 1, "", "InputOutputToMessages"], [36, 1, 1, "", "Message"], [37, 1, 1, "", "OpenAIToMessages"], [38, 1, 1, "", "PromptTemplate"], [39, 1, 1, "", "PromptTemplateInterface"], [40, 2, 1, "", "QuestionAnswerTemplate"], [41, 2, 1, "", "Role"], [42, 1, 1, "", "ShareGPTToMessages"], [43, 2, 1, "", "SummarizeTemplate"], [44, 0, 1, "", "format_content_with_images"], [45, 0, 1, "", "left_pad_sequence"], [46, 0, 1, "", "load_image"], [47, 0, 1, "", "padded_collate"], [48, 0, 1, "", "padded_collate_dpo"], [49, 0, 1, "", "padded_collate_sft"], [50, 0, 1, "", "padded_collate_tiled_images_and_mask"], [51, 0, 1, "", "truncate"], [52, 0, 1, "", "validate_messages"]], "torchtune.data.Message": [[36, 3, 1, "", "contains_media"], [36, 4, 1, "", "from_dict"], [36, 4, 1, "", "get_media"], [36, 3, 1, "", "text_content"]], "torchtune.datasets": [[53, 1, 1, "", "ConcatDataset"], [54, 1, 1, "", "PackedDataset"], [55, 1, 1, "", "PreferenceDataset"], [56, 1, 1, "", "SFTDataset"], [57, 1, 1, "", "TextCompletionDataset"], [58, 0, 1, "", "alpaca_cleaned_dataset"], [59, 0, 1, "", "alpaca_dataset"], [60, 0, 1, "", "chat_dataset"], [61, 0, 1, "", "cnn_dailymail_articles_dataset"], [62, 0, 1, "", "grammar_dataset"], [63, 0, 1, "", "hh_rlhf_helpful_dataset"], [64, 0, 1, "", "instruct_dataset"], [67, 0, 1, "", "preference_dataset"], [68, 0, 1, "", "samsum_dataset"], [69, 0, 1, "", "slimorca_dataset"], [70, 0, 1, "", "stack_exchange_paired_dataset"], [71, 0, 1, "", "text_completion_dataset"], [72, 0, 1, "", "wikitext_dataset"]], "torchtune.datasets.multimodal": [[65, 0, 1, "", "llava_instruct_dataset"], [66, 0, 1, "", "the_cauldron_dataset"]], "torchtune.generation": [[73, 0, 1, "", "generate"], [74, 0, 1, "", "generate_next_token"], [75, 0, 1, "", "get_causal_mask_from_padding_mask"], [76, 0, 1, "", "get_position_ids_from_padding_mask"], [77, 0, 1, "", "sample"]], "torchtune.models.clip": [[78, 1, 1, "", "TilePositionalEmbedding"], [79, 1, 1, "", "TiledTokenPositionalEmbedding"], [80, 1, 1, "", "TokenPositionalEmbedding"], [81, 0, 1, "", "clip_vision_encoder"]], "torchtune.models.clip.TilePositionalEmbedding": [[78, 4, 1, "", "forward"]], "torchtune.models.clip.TiledTokenPositionalEmbedding": [[79, 4, 1, "", "forward"]], "torchtune.models.clip.TokenPositionalEmbedding": [[80, 4, 1, "", "forward"]], "torchtune.models.code_llama2": [[82, 0, 1, "", "code_llama2_13b"], [83, 0, 1, "", "code_llama2_70b"], [84, 0, 1, "", "code_llama2_7b"], [85, 0, 1, "", "lora_code_llama2_13b"], [86, 0, 1, "", "lora_code_llama2_70b"], [87, 0, 1, "", "lora_code_llama2_7b"], [88, 0, 1, "", "qlora_code_llama2_13b"], [89, 0, 1, "", "qlora_code_llama2_70b"], [90, 0, 1, "", "qlora_code_llama2_7b"]], "torchtune.models.gemma": [[91, 0, 1, "", "gemma"], [92, 0, 1, "", "gemma_2b"], [93, 0, 1, "", "gemma_7b"], [94, 0, 1, "", "gemma_tokenizer"], [95, 0, 1, "", "lora_gemma"], [96, 0, 1, "", "lora_gemma_2b"], [97, 0, 1, "", "lora_gemma_7b"], [98, 0, 1, "", "qlora_gemma_2b"], [99, 0, 1, "", "qlora_gemma_7b"]], "torchtune.models.llama2": [[100, 1, 1, "", "Llama2ChatTemplate"], [101, 0, 1, "", "llama2"], [102, 0, 1, "", "llama2_13b"], [103, 0, 1, "", "llama2_70b"], [104, 0, 1, "", "llama2_7b"], [105, 0, 1, "", "llama2_reward_7b"], [106, 0, 1, "", "llama2_tokenizer"], [107, 0, 1, "", "lora_llama2"], [108, 0, 1, "", "lora_llama2_13b"], [109, 0, 1, "", "lora_llama2_70b"], [110, 0, 1, "", "lora_llama2_7b"], [111, 0, 1, "", "lora_llama2_reward_7b"], [112, 0, 1, "", "qlora_llama2_13b"], [113, 0, 1, "", "qlora_llama2_70b"], [114, 0, 1, "", "qlora_llama2_7b"], [115, 0, 1, "", "qlora_llama2_reward_7b"]], "torchtune.models.llama3": [[116, 0, 1, "", "llama3"], [117, 0, 1, "", "llama3_70b"], [118, 0, 1, "", "llama3_8b"], [119, 0, 1, "", "llama3_tokenizer"], [120, 0, 1, "", "lora_llama3"], [121, 0, 1, "", "lora_llama3_70b"], [122, 0, 1, "", "lora_llama3_8b"], [123, 0, 1, "", "qlora_llama3_70b"], [124, 0, 1, "", "qlora_llama3_8b"]], "torchtune.models.llama3_1": [[125, 0, 1, "", "llama3_1"], [126, 0, 1, "", "llama3_1_405b"], [127, 0, 1, "", "llama3_1_70b"], [128, 0, 1, "", "llama3_1_8b"], [129, 0, 1, "", "lora_llama3_1"], [130, 0, 1, "", "lora_llama3_1_405b"], [131, 0, 1, "", "lora_llama3_1_70b"], [132, 0, 1, "", "lora_llama3_1_8b"], [133, 0, 1, "", "qlora_llama3_1_405b"], [134, 0, 1, "", "qlora_llama3_1_70b"], [135, 0, 1, "", "qlora_llama3_1_8b"]], "torchtune.models.llama3_2": [[136, 0, 1, "", "llama3_2_1b"], [137, 0, 1, "", "llama3_2_3b"], [138, 0, 1, "", "lora_llama3_2_1b"], [139, 0, 1, "", "lora_llama3_2_3b"], [140, 0, 1, "", "qlora_llama3_2_1b"], [141, 0, 1, "", "qlora_llama3_2_3b"]], "torchtune.models.llama3_2_vision": [[142, 1, 1, "", "Llama3VisionEncoder"], [143, 1, 1, "", "Llama3VisionProjectionHead"], [144, 1, 1, "", "Llama3VisionTransform"], [145, 0, 1, "", "llama3_2_vision_11b"], [146, 0, 1, "", "llama3_2_vision_decoder"], [147, 0, 1, "", "llama3_2_vision_encoder"], [148, 0, 1, "", "llama3_2_vision_transform"], [149, 0, 1, "", "lora_llama3_2_vision_11b"], [150, 0, 1, "", "lora_llama3_2_vision_decoder"], [151, 0, 1, "", "lora_llama3_2_vision_encoder"], [152, 0, 1, "", "qlora_llama3_2_vision_11b"]], "torchtune.models.llama3_2_vision.Llama3VisionEncoder": [[142, 4, 1, "", "forward"]], "torchtune.models.llama3_2_vision.Llama3VisionProjectionHead": [[143, 4, 1, "", "forward"]], "torchtune.models.llama3_2_vision.Llama3VisionTransform": [[144, 4, 1, "", "decode"], [144, 4, 1, "", "tokenize_message"], [144, 4, 1, "", "tokenize_messages"]], "torchtune.models.mistral": [[153, 1, 1, "", "MistralChatTemplate"], [154, 0, 1, "", "lora_mistral"], [155, 0, 1, "", "lora_mistral_7b"], [156, 0, 1, "", "lora_mistral_classifier"], [157, 0, 1, "", "lora_mistral_reward_7b"], [158, 0, 1, "", "mistral"], [159, 0, 1, "", "mistral_7b"], [160, 0, 1, "", "mistral_classifier"], [161, 0, 1, "", "mistral_reward_7b"], [162, 0, 1, "", "mistral_tokenizer"], [163, 0, 1, "", "qlora_mistral_7b"], [164, 0, 1, "", "qlora_mistral_reward_7b"]], "torchtune.models.phi3": [[165, 0, 1, "", "lora_phi3"], [166, 0, 1, "", "lora_phi3_mini"], [167, 0, 1, "", "phi3"], [168, 0, 1, "", "phi3_mini"], [169, 0, 1, "", "phi3_mini_tokenizer"], [170, 0, 1, "", "qlora_phi3_mini"]], "torchtune.models.qwen2": [[171, 0, 1, "", "lora_qwen2"], [172, 0, 1, "", "lora_qwen2_0_5b"], [173, 0, 1, "", "lora_qwen2_1_5b"], [174, 0, 1, "", "lora_qwen2_7b"], [175, 0, 1, "", "qwen2"], [176, 0, 1, "", "qwen2_0_5b"], [177, 0, 1, "", "qwen2_1_5b"], [178, 0, 1, "", "qwen2_7b"], [179, 0, 1, "", "qwen2_tokenizer"]], "torchtune.modules": [[180, 1, 1, "", "FeedForward"], [181, 1, 1, "", "Fp32LayerNorm"], [182, 1, 1, "", "KVCache"], [183, 1, 1, "", "MultiHeadAttention"], [184, 1, 1, "", "RMSNorm"], [185, 1, 1, "", "RotaryPositionalEmbeddings"], [186, 1, 1, "", "TanhGate"], [187, 1, 1, "", "TiedLinear"], [188, 1, 1, "", "TransformerCrossAttentionLayer"], [189, 1, 1, "", "TransformerDecoder"], [190, 1, 1, "", "TransformerSelfAttentionLayer"], [191, 1, 1, "", "VisionTransformer"]], "torchtune.modules.FeedForward": [[180, 4, 1, "", "forward"]], "torchtune.modules.Fp32LayerNorm": [[181, 4, 1, "", "forward"]], "torchtune.modules.KVCache": [[182, 4, 1, "", "reset"], [182, 4, 1, "", "update"]], "torchtune.modules.MultiHeadAttention": [[183, 4, 1, "", "forward"], [183, 4, 1, "", "reset_cache"], [183, 4, 1, "", "setup_cache"]], "torchtune.modules.RMSNorm": [[184, 4, 1, "", "forward"]], "torchtune.modules.RotaryPositionalEmbeddings": [[185, 4, 1, "", "forward"]], "torchtune.modules.TanhGate": [[186, 4, 1, "", "forward"]], "torchtune.modules.TransformerCrossAttentionLayer": [[188, 4, 1, "", "caches_are_enabled"], [188, 4, 1, "", "caches_are_setup"], [188, 4, 1, "", "forward"], [188, 4, 1, "", "reset_cache"], [188, 4, 1, "", "setup_caches"]], "torchtune.modules.TransformerDecoder": [[189, 4, 1, "", "caches_are_enabled"], [189, 4, 1, "", "caches_are_setup"], [189, 4, 1, "", "chunked_output"], [189, 4, 1, "", "forward"], [189, 4, 1, "", "reset_caches"], [189, 4, 1, "", "set_num_output_chunks"], [189, 4, 1, "", "setup_caches"]], "torchtune.modules.TransformerSelfAttentionLayer": [[190, 4, 1, "", "caches_are_enabled"], [190, 4, 1, "", "caches_are_setup"], [190, 4, 1, "", "forward"], [190, 4, 1, "", "reset_cache"], [190, 4, 1, "", "setup_caches"]], "torchtune.modules.VisionTransformer": [[191, 4, 1, "", "forward"]], "torchtune.modules.common_utils": [[192, 0, 1, "", "delete_kv_caches"], [193, 0, 1, "", "disable_kv_cache"], [194, 0, 1, "", "local_kv_cache"], [195, 0, 1, "", "reparametrize_as_dtype_state_dict_post_hook"]], "torchtune.modules.loss": [[196, 1, 1, "", "CEWithChunkedOutputLoss"], [197, 1, 1, "", "ForwardKLLoss"], [198, 1, 1, "", "ForwardKLWithChunkedOutputLoss"]], "torchtune.modules.loss.CEWithChunkedOutputLoss": [[196, 4, 1, "", "compute_cross_entropy"], [196, 4, 1, "", "forward"]], "torchtune.modules.loss.ForwardKLLoss": [[197, 4, 1, "", "forward"]], "torchtune.modules.loss.ForwardKLWithChunkedOutputLoss": [[198, 4, 1, "", "forward"]], "torchtune.modules.model_fusion": [[199, 1, 1, "", "DeepFusionModel"], [200, 1, 1, "", "FusionEmbedding"], [201, 1, 1, "", "FusionLayer"], [202, 0, 1, "", "get_fusion_params"], [203, 0, 1, "", "register_fusion_module"]], "torchtune.modules.model_fusion.DeepFusionModel": [[199, 4, 1, "", "caches_are_enabled"], [199, 4, 1, "", "caches_are_setup"], [199, 4, 1, "", "forward"], [199, 4, 1, "", "reset_caches"], [199, 4, 1, "", "set_num_output_chunks"], [199, 4, 1, "", "setup_caches"]], "torchtune.modules.model_fusion.FusionEmbedding": [[200, 4, 1, "", "forward"], [200, 4, 1, "", "fusion_params"]], "torchtune.modules.model_fusion.FusionLayer": [[201, 4, 1, "", "caches_are_enabled"], [201, 4, 1, "", "caches_are_setup"], [201, 4, 1, "", "forward"], [201, 4, 1, "", "fusion_params"], [201, 4, 1, "", "reset_cache"], [201, 4, 1, "", "setup_caches"]], "torchtune.modules.peft": [[204, 1, 1, "", "AdapterModule"], [205, 1, 1, "", "DoRALinear"], [206, 1, 1, "", "LoRALinear"], [207, 0, 1, "", "disable_adapter"], [208, 0, 1, "", "get_adapter_params"], [209, 0, 1, "", "set_trainable_params"], [210, 0, 1, "", "validate_missing_and_unexpected_for_lora"], [211, 0, 1, "", "validate_state_dict_for_lora"]], "torchtune.modules.peft.AdapterModule": [[204, 4, 1, "", "adapter_params"]], "torchtune.modules.peft.DoRALinear": [[205, 4, 1, "", "adapter_params"], [205, 4, 1, "", "forward"], [205, 4, 1, "", "initialize_dora_magnitude"]], "torchtune.modules.peft.LoRALinear": [[206, 4, 1, "", "adapter_params"], [206, 4, 1, "", "forward"]], "torchtune.modules.tokenizers": [[212, 1, 1, "", "BaseTokenizer"], [213, 1, 1, "", "ModelTokenizer"], [214, 1, 1, "", "SentencePieceBaseTokenizer"], [215, 1, 1, "", "TikTokenBaseTokenizer"], [216, 0, 1, "", "parse_hf_tokenizer_json"], [217, 0, 1, "", "tokenize_messages_no_special_tokens"]], "torchtune.modules.tokenizers.BaseTokenizer": [[212, 4, 1, "", "decode"], [212, 4, 1, "", "encode"]], "torchtune.modules.tokenizers.ModelTokenizer": [[213, 4, 1, "", "tokenize_messages"]], "torchtune.modules.tokenizers.SentencePieceBaseTokenizer": [[214, 4, 1, "", "decode"], [214, 4, 1, "", "encode"]], "torchtune.modules.tokenizers.TikTokenBaseTokenizer": [[215, 4, 1, "", "decode"], [215, 4, 1, "", "encode"]], "torchtune.modules.transforms": [[218, 1, 1, "", "Transform"], [219, 1, 1, "", "VisionCrossAttentionMask"]], "torchtune.rlhf": [[220, 0, 1, "", "estimate_advantages"], [221, 0, 1, "", "get_rewards_ppo"], [226, 0, 1, "", "truncate_sequence_at_first_stop_token"]], "torchtune.rlhf.loss": [[222, 1, 1, "", "DPOLoss"], [223, 1, 1, "", "PPOLoss"], [224, 1, 1, "", "RSOLoss"], [225, 1, 1, "", "SimPOLoss"]], "torchtune.rlhf.loss.DPOLoss": [[222, 4, 1, "", "forward"]], "torchtune.rlhf.loss.PPOLoss": [[223, 4, 1, "", "forward"]], "torchtune.rlhf.loss.RSOLoss": [[224, 4, 1, "", "forward"]], "torchtune.rlhf.loss.SimPOLoss": [[225, 4, 1, "", "forward"]], "torchtune.training": [[227, 2, 1, "", "FSDPPolicyType"], [228, 1, 1, "", "FormattedCheckpointFiles"], [229, 1, 1, "", "FullModelHFCheckpointer"], [230, 1, 1, "", "FullModelMetaCheckpointer"], [231, 1, 1, "", "FullModelTorchTuneCheckpointer"], [232, 1, 1, "", "ModelType"], [233, 1, 1, "", "OptimizerInBackwardWrapper"], [234, 0, 1, "", "apply_selective_activation_checkpointing"], [235, 0, 1, "", "create_optim_in_bwd_wrapper"], [236, 0, 1, "", "get_cosine_schedule_with_warmup"], [237, 0, 1, "", "get_dtype"], [238, 0, 1, "", "get_full_finetune_fsdp_wrap_policy"], [239, 0, 1, "", "get_lr"], [240, 0, 1, "", "get_memory_stats"], [241, 0, 1, "", "get_quantizer_mode"], [242, 0, 1, "", "get_unmasked_sequence_lengths"], [243, 0, 1, "", "get_world_size_and_rank"], [244, 0, 1, "", "init_distributed"], [245, 0, 1, "", "is_distributed"], [246, 0, 1, "", "log_memory_stats"], [247, 0, 1, "", "lora_fsdp_wrap_policy"], [253, 0, 1, "", "register_optim_in_bwd_hooks"], [254, 0, 1, "", "set_activation_checkpointing"], [255, 0, 1, "", "set_default_dtype"], [256, 0, 1, "", "set_seed"], [257, 0, 1, "", "setup_torch_profiler"], [258, 0, 1, "", "update_state_dict_for_classifier"], [259, 0, 1, "", "validate_expected_param_dtype"]], "torchtune.training.FormattedCheckpointFiles": [[228, 4, 1, "", "build_checkpoint_filenames"]], "torchtune.training.FullModelHFCheckpointer": [[229, 4, 1, "", "load_checkpoint"], [229, 4, 1, "", "save_checkpoint"]], "torchtune.training.FullModelMetaCheckpointer": [[230, 4, 1, "", "load_checkpoint"], [230, 4, 1, "", "save_checkpoint"]], "torchtune.training.FullModelTorchTuneCheckpointer": [[231, 4, 1, "", "load_checkpoint"], [231, 4, 1, "", "save_checkpoint"]], "torchtune.training.OptimizerInBackwardWrapper": [[233, 4, 1, "", "get_last_lr"], [233, 4, 1, "", "get_optim_key"], [233, 4, 1, "", "load_state_dict"], [233, 4, 1, "", "set_lr_scheduler"], [233, 4, 1, "", "state_dict"], [233, 4, 1, "", "step_lr_scheduler"]], "torchtune.training.metric_logging": [[248, 1, 1, "", "CometLogger"], [249, 1, 1, "", "DiskLogger"], [250, 1, 1, "", "StdoutLogger"], [251, 1, 1, "", "TensorBoardLogger"], [252, 1, 1, "", "WandBLogger"]], "torchtune.training.metric_logging.CometLogger": [[248, 4, 1, "", "close"], [248, 4, 1, "", "log"], [248, 4, 1, "", "log_config"], [248, 4, 1, "", "log_dict"]], "torchtune.training.metric_logging.DiskLogger": [[249, 4, 1, "", "close"], [249, 4, 1, "", "log"], [249, 4, 1, "", "log_dict"]], "torchtune.training.metric_logging.StdoutLogger": [[250, 4, 1, "", "close"], [250, 4, 1, "", "log"], [250, 4, 1, "", "log_dict"]], "torchtune.training.metric_logging.TensorBoardLogger": [[251, 4, 1, "", "close"], [251, 4, 1, "", "log"], [251, 4, 1, "", "log_dict"]], "torchtune.training.metric_logging.WandBLogger": [[252, 4, 1, "", "close"], [252, 4, 1, "", "log"], [252, 4, 1, "", "log_config"], [252, 4, 1, "", "log_dict"]], "torchtune.utils": [[260, 0, 1, "", "batch_to_device"], [261, 0, 1, "", "get_device"], [262, 0, 1, "", "get_logger"], [263, 0, 1, "", "torch_version_ge"]]}, "objtypes": {"0": "py:function", "1": "py:class", "2": "py:data", "3": "py:property", "4": "py:method"}, "objnames": {"0": ["py", "function", "Python function"], "1": ["py", "class", "Python class"], "2": ["py", "data", "Python data"], "3": ["py", "property", "Python property"], "4": ["py", "method", "Python method"]}, "titleterms": {"torchtun": [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 22, 34, 40, 41, 43, 227, 266, 268, 273, 275, 277, 278, 279, 281, 282], "config": [0, 10, 24, 25, 273, 276], "data": [1, 11, 34, 40, 41, 43, 274], "text": [1, 2, 14, 16, 20, 277], "templat": [1, 9, 12, 14, 19, 21, 274], "type": 1, "messag": [1, 13, 14, 36], "transform": [1, 5, 13, 14, 15, 218], "collat": 1, "helper": 1, "function": 1, "dataset": [2, 9, 11, 12, 16, 18, 20, 274], "imag": [2, 14, 16], "gener": [2, 3, 73, 275, 277], "builder": 2, "class": [2, 19, 25], "model": [4, 5, 15, 21, 26, 273, 275, 276, 277, 278, 279, 280, 281], "llama3": [4, 116, 274, 277, 278, 281], "2": [4, 278], "vision": [4, 5], "1": [4, 278], "llama2": [4, 101, 274, 275, 279, 282], "code": 4, "llama": 4, "qwen": 4, "phi": 4, "3": 4, "mistral": [4, 158], "gemma": [4, 91], "clip": 4, "modul": 5, "compon": [5, 10, 24, 280], "build": [5, 267, 282], "block": 5, "loss": 5, "base": [5, 21], "token": [5, 14, 21, 274], "util": [5, 8], "peft": [5, 280], "fusion": 5, "rlhf": 6, "train": [7, 227, 270, 276], "checkpoint": [7, 22, 26, 275, 280], "reduc": 7, "precis": [7, 280], "distribut": [7, 270], "memori": [7, 279, 280, 282], "manag": 7, "schedul": 7, "metric": [7, 23, 26], "log": [7, 23, 26], "perform": [7, 279], "profil": 7, "miscellan": [7, 8], "chat": [9, 274], "exampl": [9, 12, 13, 15, 16, 18, 20], "format": [9, 12, 14, 16, 18, 20, 22], "load": [9, 12, 16, 18, 20, 21], "from": [9, 12, 16, 18, 20, 21, 274, 282], "hug": [9, 12, 16, 18, 20, 21, 275], "face": [9, 12, 16, 18, 20, 21, 275], "local": [9, 12, 16, 18, 20], "remot": [9, 12, 16], "specifi": 9, "convers": 9, "style": 9, "sharegpt": 9, "openai": 9, "renam": [9, 12], "column": [9, 12], "built": [9, 12, 16, 18, 19, 20, 273], "custom": [10, 13, 19, 274], "recip": [10, 25, 271, 273, 276, 278, 279, 281], "set": [10, 21], "up": [10, 275], "your": [10, 24, 25, 275, 276], "project": 10, "launch": 10, "overview": [11, 22, 268, 271, 275, 280], "pipelin": 11, "instruct": [12, 267, 277], "configur": [13, 24], "creat": [14, 15], "prompt": [14, 19, 21, 274], "access": [14, 277], "content": 14, "multimod": [15, 16], "us": [15, 19, 24, 25, 274, 275, 278, 282], "interleav": 16, "sampl": [17, 77], "pack": 17, "prefer": 18, "defin": 19, "via": [19, 267, 277], "dotpath": 19, "string": 19, "dictionari": 19, "prompttempl": [19, 38], "complet": 20, "json": 20, "txt": 20, "download": [21, 273, 275, 276], "file": 21, "max": 21, "sequenc": 21, "length": 21, "special": [21, 274], "handl": 22, "differ": 22, "hfcheckpoint": 22, "metacheckpoint": 22, "torchtunecheckpoint": 22, "intermedi": 22, "vs": 22, "final": 22, "lora": [22, 269, 275, 279, 280, 282], "put": [22, 282], "thi": 22, "all": [22, 24, 282], "togeth": [22, 282], "comet": 23, "logger": [23, 26], "about": 24, "where": 24, "do": 24, "paramet": [24, 280], "live": 24, "write": 24, "instanti": [24, 27], "referenc": 24, "other": [24, 275], "field": 24, "interpol": 24, "valid": [24, 30, 273], "best": 24, "practic": 24, "airtight": 24, "public": 24, "api": 24, "onli": 24, "command": 24, "line": 24, "overrid": 24, "remov": 24, "what": [25, 268, 278, 279, 281, 282], "ar": 25, "script": 25, "run": [25, 273, 275], "cli": [25, 273], "pars": [25, 29], "weight": [26, 280], "bias": 26, "w": 26, "b": 26, "log_config": 28, "alpacatomessag": 31, "chatmltempl": 32, "chosenrejectedtomessag": 33, "grammarerrorcorrectiontempl": 34, "inputoutputtomessag": 35, "openaitomessag": 37, "prompttemplateinterfac": 39, "questionanswertempl": 40, "role": 41, "sharegpttomessag": 42, "summarizetempl": 43, "format_content_with_imag": 44, "left_pad_sequ": 45, "load_imag": 46, "padded_col": 47, "padded_collate_dpo": 48, "padded_collate_sft": 49, "padded_collate_tiled_images_and_mask": 50, "truncat": 51, "validate_messag": 52, "concatdataset": 53, "packeddataset": 54, "preferencedataset": 55, "sftdataset": 56, "textcompletiondataset": 57, "alpaca_cleaned_dataset": 58, "alpaca_dataset": 59, "chat_dataset": 60, "cnn_dailymail_articles_dataset": 61, "grammar_dataset": 62, "hh_rlhf_helpful_dataset": 63, "instruct_dataset": 64, "llava_instruct_dataset": 65, "the_cauldron_dataset": 66, "preference_dataset": 67, "samsum_dataset": 68, "slimorca_dataset": 69, "stack_exchange_paired_dataset": 70, "text_completion_dataset": 71, "wikitext_dataset": 72, "generate_next_token": 74, "get_causal_mask_from_padding_mask": 75, "get_position_ids_from_padding_mask": 76, "tilepositionalembed": 78, "tiledtokenpositionalembed": 79, "tokenpositionalembed": 80, "clip_vision_encod": 81, "code_llama2_13b": 82, "code_llama2_70b": 83, "code_llama2_7b": 84, "lora_code_llama2_13b": 85, "lora_code_llama2_70b": 86, "lora_code_llama2_7b": 87, "qlora_code_llama2_13b": 88, "qlora_code_llama2_70b": 89, "qlora_code_llama2_7b": 90, "gemma_2b": 92, "gemma_7b": 93, "gemma_token": 94, "lora_gemma": 95, "lora_gemma_2b": 96, "lora_gemma_7b": 97, "qlora_gemma_2b": 98, "qlora_gemma_7b": 99, "llama2chattempl": 100, "llama2_13b": 102, "llama2_70b": 103, "llama2_7b": 104, "llama2_reward_7b": 105, "llama2_token": 106, "lora_llama2": 107, "lora_llama2_13b": 108, "lora_llama2_70b": 109, "lora_llama2_7b": 110, "lora_llama2_reward_7b": 111, "qlora_llama2_13b": 112, "qlora_llama2_70b": 113, "qlora_llama2_7b": 114, "qlora_llama2_reward_7b": 115, "llama3_70b": 117, "llama3_8b": 118, "llama3_token": 119, "lora_llama3": 120, "lora_llama3_70b": 121, "lora_llama3_8b": 122, "qlora_llama3_70b": 123, "qlora_llama3_8b": 124, "llama3_1": 125, "llama3_1_405b": 126, "llama3_1_70b": 127, "llama3_1_8b": 128, "lora_llama3_1": 129, "lora_llama3_1_405b": 130, "lora_llama3_1_70b": 131, "lora_llama3_1_8b": 132, "qlora_llama3_1_405b": 133, "qlora_llama3_1_70b": 134, "qlora_llama3_1_8b": 135, "llama3_2_1b": 136, "llama3_2_3b": 137, "lora_llama3_2_1b": 138, "lora_llama3_2_3b": 139, "qlora_llama3_2_1b": 140, "qlora_llama3_2_3b": 141, "llama3visionencod": 142, "llama3visionprojectionhead": 143, "llama3visiontransform": 144, "llama3_2_vision_11b": 145, "llama3_2_vision_decod": 146, "llama3_2_vision_encod": 147, "llama3_2_vision_transform": 148, "lora_llama3_2_vision_11b": 149, "lora_llama3_2_vision_decod": 150, "lora_llama3_2_vision_encod": 151, "qlora_llama3_2_vision_11b": 152, "mistralchattempl": 153, "lora_mistr": 154, "lora_mistral_7b": 155, "lora_mistral_classifi": 156, "lora_mistral_reward_7b": 157, "mistral_7b": 159, "mistral_classifi": 160, "mistral_reward_7b": 161, "mistral_token": 162, "qlora_mistral_7b": 163, "qlora_mistral_reward_7b": 164, "lora_phi3": 165, "lora_phi3_mini": 166, "phi3": 167, "phi3_mini": 168, "phi3_mini_token": 169, "qlora_phi3_mini": 170, "lora_qwen2": 171, "lora_qwen2_0_5b": 172, "lora_qwen2_1_5b": 173, "lora_qwen2_7b": 174, "qwen2": [175, 278], "qwen2_0_5b": 176, "qwen2_1_5b": 177, "qwen2_7b": 178, "qwen2_token": 179, "feedforward": 180, "fp32layernorm": 181, "kvcach": 182, "multiheadattent": 183, "rmsnorm": 184, "rotarypositionalembed": 185, "tanhgat": 186, "tiedlinear": 187, "transformercrossattentionlay": 188, "transformerdecod": 189, "transformerselfattentionlay": 190, "visiontransform": 191, "delete_kv_cach": 192, "disable_kv_cach": 193, "local_kv_cach": 194, "reparametrize_as_dtype_state_dict_post_hook": 195, "cewithchunkedoutputloss": 196, "forwardklloss": 197, "forwardklwithchunkedoutputloss": 198, "deepfusionmodel": 199, "fusionembed": 200, "fusionlay": 201, "get_fusion_param": 202, "register_fusion_modul": 203, "adaptermodul": 204, "doralinear": 205, "loralinear": 206, "disable_adapt": 207, "get_adapter_param": 208, "set_trainable_param": 209, "validate_missing_and_unexpected_for_lora": 210, "validate_state_dict_for_lora": 211, "basetoken": 212, "modeltoken": 213, "sentencepiecebasetoken": 214, "tiktokenbasetoken": 215, "parse_hf_tokenizer_json": 216, "tokenize_messages_no_special_token": 217, "visioncrossattentionmask": 219, "estimate_advantag": 220, "get_rewards_ppo": 221, "dpoloss": 222, "ppoloss": 223, "rsoloss": 224, "simpoloss": 225, "truncate_sequence_at_first_stop_token": 226, "fsdppolicytyp": 227, "formattedcheckpointfil": 228, "fullmodelhfcheckpoint": 229, "fullmodelmetacheckpoint": 230, "fullmodeltorchtunecheckpoint": 231, "modeltyp": 232, "optimizerinbackwardwrapp": 233, "apply_selective_activation_checkpoint": 234, "create_optim_in_bwd_wrapp": 235, "get_cosine_schedule_with_warmup": 236, "get_dtyp": 237, "get_full_finetune_fsdp_wrap_polici": 238, "get_lr": 239, "get_memory_stat": 240, "get_quantizer_mod": 241, "get_unmasked_sequence_length": 242, "get_world_size_and_rank": 243, "init_distribut": 244, "is_distribut": 245, "log_memory_stat": 246, "lora_fsdp_wrap_polici": 247, "cometlogg": 248, "disklogg": 249, "stdoutlogg": 250, "tensorboardlogg": 251, "wandblogg": 252, "register_optim_in_bwd_hook": 253, "set_activation_checkpoint": 254, "set_default_dtyp": 255, "set_se": 256, "setup_torch_profil": 257, "update_state_dict_for_classifi": 258, "validate_expected_param_dtyp": 259, "batch_to_devic": 260, "get_devic": 261, "get_logg": 262, "torch_version_g": 263, "comput": [265, 272], "time": [265, 272], "welcom": 266, "document": 266, "get": [266, 273, 277], "start": [266, 273], "tutori": 266, "instal": 267, "pre": 267, "requisit": 267, "pypi": 267, "git": 267, "clone": 267, "nightli": 267, "kei": 268, "concept": 268, "design": 268, "principl": 268, "singl": 269, "devic": [269, 281], "finetun": [269, 271, 275, 279, 281, 282], "quantiz": [270, 275, 277, 280, 281], "awar": 270, "qat": [270, 281], "list": 273, "copi": 273, "fine": [274, 276, 277, 278, 279, 280, 281, 282], "tune": [274, 276, 277, 278, 279, 280, 281, 282], "chang": 274, "when": 274, "should": 274, "i": 274, "end": 275, "workflow": 275, "7b": 275, "evalu": [275, 277, 281], "eleutherai": [275, 277], "s": [275, 277], "eval": [275, 277], "har": [275, 277], "speed": 275, "librari": 275, "upload": 275, "hub": 275, "first": 276, "llm": 276, "select": 276, "modifi": 276, "next": 276, "step": [276, 280], "meta": 277, "8b": [277, 278], "our": 277, "faster": 277, "distil": 278, "1b": 278, "knowledg": 278, "how": [278, 279], "doe": [278, 279], "work": [278, 279], "kd": 278, "ablat": 278, "studi": 278, "teacher": 278, "student": 278, "hyperparamet": 278, "learn": 278, "rate": 278, "ratio": 278, "5b": 278, "0": 278, "appli": [279, 281], "trade": 279, "off": 279, "optim": 280, "activ": 280, "offload": 280, "gradient": 280, "accumul": 280, "lower": [280, 281], "fuse": 280, "backward": 280, "pass": 280, "state": 280, "cpu": 280, "effici": 280, "low": 280, "rank": 280, "adapt": 280, "qlora": [280, 282], "decompos": 280, "dora": 280, "option": 281, "save": 282, "deep": 282, "dive": 282}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 6, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1, "sphinx": 56}}) \ No newline at end of file +Search.setIndex({"docnames": ["api_ref_config", "api_ref_data", "api_ref_datasets", "api_ref_generation", "api_ref_models", "api_ref_modules", "api_ref_rlhf", "api_ref_training", "api_ref_utilities", "basics/chat_datasets", "basics/custom_components", "basics/datasets_overview", "basics/instruct_datasets", "basics/message_transforms", "basics/messages", "basics/model_transforms", "basics/multimodal_datasets", "basics/packing", "basics/preference_datasets", "basics/prompt_templates", "basics/text_completion_datasets", "basics/tokenizers", "deep_dives/checkpointer", "deep_dives/comet_logging", "deep_dives/configs", "deep_dives/recipe_deepdive", "deep_dives/wandb_logging", "generated/torchtune.config.instantiate", "generated/torchtune.config.log_config", "generated/torchtune.config.parse", "generated/torchtune.config.validate", "generated/torchtune.data.AlpacaToMessages", "generated/torchtune.data.ChatMLTemplate", "generated/torchtune.data.ChosenRejectedToMessages", "generated/torchtune.data.GrammarErrorCorrectionTemplate", "generated/torchtune.data.InputOutputToMessages", "generated/torchtune.data.Message", "generated/torchtune.data.OpenAIToMessages", "generated/torchtune.data.PromptTemplate", "generated/torchtune.data.PromptTemplateInterface", "generated/torchtune.data.QuestionAnswerTemplate", "generated/torchtune.data.Role", "generated/torchtune.data.ShareGPTToMessages", "generated/torchtune.data.SummarizeTemplate", "generated/torchtune.data.format_content_with_images", "generated/torchtune.data.left_pad_sequence", "generated/torchtune.data.load_image", "generated/torchtune.data.padded_collate", "generated/torchtune.data.padded_collate_dpo", "generated/torchtune.data.padded_collate_sft", "generated/torchtune.data.padded_collate_tiled_images_and_mask", "generated/torchtune.data.truncate", "generated/torchtune.data.validate_messages", "generated/torchtune.datasets.ConcatDataset", "generated/torchtune.datasets.PackedDataset", "generated/torchtune.datasets.PreferenceDataset", "generated/torchtune.datasets.SFTDataset", "generated/torchtune.datasets.TextCompletionDataset", "generated/torchtune.datasets.alpaca_cleaned_dataset", "generated/torchtune.datasets.alpaca_dataset", "generated/torchtune.datasets.chat_dataset", "generated/torchtune.datasets.cnn_dailymail_articles_dataset", "generated/torchtune.datasets.grammar_dataset", "generated/torchtune.datasets.hh_rlhf_helpful_dataset", "generated/torchtune.datasets.instruct_dataset", "generated/torchtune.datasets.multimodal.llava_instruct_dataset", "generated/torchtune.datasets.multimodal.the_cauldron_dataset", "generated/torchtune.datasets.preference_dataset", "generated/torchtune.datasets.samsum_dataset", "generated/torchtune.datasets.slimorca_dataset", "generated/torchtune.datasets.stack_exchange_paired_dataset", "generated/torchtune.datasets.text_completion_dataset", "generated/torchtune.datasets.wikitext_dataset", "generated/torchtune.generation.generate", "generated/torchtune.generation.generate_next_token", "generated/torchtune.generation.get_causal_mask_from_padding_mask", "generated/torchtune.generation.get_position_ids_from_padding_mask", "generated/torchtune.generation.sample", "generated/torchtune.models.clip.TilePositionalEmbedding", "generated/torchtune.models.clip.TiledTokenPositionalEmbedding", "generated/torchtune.models.clip.TokenPositionalEmbedding", "generated/torchtune.models.clip.clip_vision_encoder", "generated/torchtune.models.code_llama2.code_llama2_13b", "generated/torchtune.models.code_llama2.code_llama2_70b", "generated/torchtune.models.code_llama2.code_llama2_7b", "generated/torchtune.models.code_llama2.lora_code_llama2_13b", "generated/torchtune.models.code_llama2.lora_code_llama2_70b", "generated/torchtune.models.code_llama2.lora_code_llama2_7b", "generated/torchtune.models.code_llama2.qlora_code_llama2_13b", "generated/torchtune.models.code_llama2.qlora_code_llama2_70b", "generated/torchtune.models.code_llama2.qlora_code_llama2_7b", "generated/torchtune.models.gemma.gemma", "generated/torchtune.models.gemma.gemma_2b", "generated/torchtune.models.gemma.gemma_7b", "generated/torchtune.models.gemma.gemma_tokenizer", "generated/torchtune.models.gemma.lora_gemma", "generated/torchtune.models.gemma.lora_gemma_2b", "generated/torchtune.models.gemma.lora_gemma_7b", "generated/torchtune.models.gemma.qlora_gemma_2b", "generated/torchtune.models.gemma.qlora_gemma_7b", "generated/torchtune.models.llama2.Llama2ChatTemplate", "generated/torchtune.models.llama2.llama2", "generated/torchtune.models.llama2.llama2_13b", "generated/torchtune.models.llama2.llama2_70b", "generated/torchtune.models.llama2.llama2_7b", "generated/torchtune.models.llama2.llama2_reward_7b", "generated/torchtune.models.llama2.llama2_tokenizer", "generated/torchtune.models.llama2.lora_llama2", "generated/torchtune.models.llama2.lora_llama2_13b", "generated/torchtune.models.llama2.lora_llama2_70b", "generated/torchtune.models.llama2.lora_llama2_7b", "generated/torchtune.models.llama2.lora_llama2_reward_7b", "generated/torchtune.models.llama2.qlora_llama2_13b", "generated/torchtune.models.llama2.qlora_llama2_70b", "generated/torchtune.models.llama2.qlora_llama2_7b", "generated/torchtune.models.llama2.qlora_llama2_reward_7b", "generated/torchtune.models.llama3.llama3", "generated/torchtune.models.llama3.llama3_70b", "generated/torchtune.models.llama3.llama3_8b", "generated/torchtune.models.llama3.llama3_tokenizer", "generated/torchtune.models.llama3.lora_llama3", "generated/torchtune.models.llama3.lora_llama3_70b", "generated/torchtune.models.llama3.lora_llama3_8b", "generated/torchtune.models.llama3.qlora_llama3_70b", "generated/torchtune.models.llama3.qlora_llama3_8b", "generated/torchtune.models.llama3_1.llama3_1", "generated/torchtune.models.llama3_1.llama3_1_405b", "generated/torchtune.models.llama3_1.llama3_1_70b", "generated/torchtune.models.llama3_1.llama3_1_8b", "generated/torchtune.models.llama3_1.lora_llama3_1", "generated/torchtune.models.llama3_1.lora_llama3_1_405b", "generated/torchtune.models.llama3_1.lora_llama3_1_70b", "generated/torchtune.models.llama3_1.lora_llama3_1_8b", "generated/torchtune.models.llama3_1.qlora_llama3_1_405b", "generated/torchtune.models.llama3_1.qlora_llama3_1_70b", "generated/torchtune.models.llama3_1.qlora_llama3_1_8b", "generated/torchtune.models.llama3_2.llama3_2_1b", "generated/torchtune.models.llama3_2.llama3_2_3b", "generated/torchtune.models.llama3_2.lora_llama3_2_1b", "generated/torchtune.models.llama3_2.lora_llama3_2_3b", "generated/torchtune.models.llama3_2.qlora_llama3_2_1b", "generated/torchtune.models.llama3_2.qlora_llama3_2_3b", "generated/torchtune.models.llama3_2_vision.Llama3VisionEncoder", "generated/torchtune.models.llama3_2_vision.Llama3VisionProjectionHead", "generated/torchtune.models.llama3_2_vision.Llama3VisionTransform", "generated/torchtune.models.llama3_2_vision.llama3_2_vision_11b", "generated/torchtune.models.llama3_2_vision.llama3_2_vision_decoder", "generated/torchtune.models.llama3_2_vision.llama3_2_vision_encoder", "generated/torchtune.models.llama3_2_vision.llama3_2_vision_transform", "generated/torchtune.models.llama3_2_vision.lora_llama3_2_vision_11b", "generated/torchtune.models.llama3_2_vision.lora_llama3_2_vision_decoder", "generated/torchtune.models.llama3_2_vision.lora_llama3_2_vision_encoder", "generated/torchtune.models.llama3_2_vision.qlora_llama3_2_vision_11b", "generated/torchtune.models.mistral.MistralChatTemplate", "generated/torchtune.models.mistral.lora_mistral", "generated/torchtune.models.mistral.lora_mistral_7b", "generated/torchtune.models.mistral.lora_mistral_classifier", "generated/torchtune.models.mistral.lora_mistral_reward_7b", "generated/torchtune.models.mistral.mistral", "generated/torchtune.models.mistral.mistral_7b", "generated/torchtune.models.mistral.mistral_classifier", "generated/torchtune.models.mistral.mistral_reward_7b", "generated/torchtune.models.mistral.mistral_tokenizer", "generated/torchtune.models.mistral.qlora_mistral_7b", "generated/torchtune.models.mistral.qlora_mistral_reward_7b", "generated/torchtune.models.phi3.lora_phi3", "generated/torchtune.models.phi3.lora_phi3_mini", "generated/torchtune.models.phi3.phi3", "generated/torchtune.models.phi3.phi3_mini", "generated/torchtune.models.phi3.phi3_mini_tokenizer", "generated/torchtune.models.phi3.qlora_phi3_mini", "generated/torchtune.models.qwen2.lora_qwen2", "generated/torchtune.models.qwen2.lora_qwen2_0_5b", "generated/torchtune.models.qwen2.lora_qwen2_1_5b", "generated/torchtune.models.qwen2.lora_qwen2_7b", "generated/torchtune.models.qwen2.qwen2", "generated/torchtune.models.qwen2.qwen2_0_5b", "generated/torchtune.models.qwen2.qwen2_1_5b", "generated/torchtune.models.qwen2.qwen2_7b", "generated/torchtune.models.qwen2.qwen2_tokenizer", "generated/torchtune.modules.FeedForward", "generated/torchtune.modules.Fp32LayerNorm", "generated/torchtune.modules.KVCache", "generated/torchtune.modules.MultiHeadAttention", "generated/torchtune.modules.RMSNorm", "generated/torchtune.modules.RotaryPositionalEmbeddings", "generated/torchtune.modules.TanhGate", "generated/torchtune.modules.TiedLinear", "generated/torchtune.modules.TransformerCrossAttentionLayer", "generated/torchtune.modules.TransformerDecoder", "generated/torchtune.modules.TransformerSelfAttentionLayer", "generated/torchtune.modules.VisionTransformer", "generated/torchtune.modules.common_utils.delete_kv_caches", "generated/torchtune.modules.common_utils.disable_kv_cache", "generated/torchtune.modules.common_utils.local_kv_cache", "generated/torchtune.modules.common_utils.reparametrize_as_dtype_state_dict_post_hook", "generated/torchtune.modules.loss.CEWithChunkedOutputLoss", "generated/torchtune.modules.loss.ForwardKLLoss", "generated/torchtune.modules.loss.ForwardKLWithChunkedOutputLoss", "generated/torchtune.modules.model_fusion.DeepFusionModel", "generated/torchtune.modules.model_fusion.FusionEmbedding", "generated/torchtune.modules.model_fusion.FusionLayer", "generated/torchtune.modules.model_fusion.get_fusion_params", "generated/torchtune.modules.model_fusion.register_fusion_module", "generated/torchtune.modules.peft.AdapterModule", "generated/torchtune.modules.peft.DoRALinear", "generated/torchtune.modules.peft.LoRALinear", "generated/torchtune.modules.peft.disable_adapter", "generated/torchtune.modules.peft.get_adapter_params", "generated/torchtune.modules.peft.set_trainable_params", "generated/torchtune.modules.peft.validate_missing_and_unexpected_for_lora", "generated/torchtune.modules.peft.validate_state_dict_for_lora", "generated/torchtune.modules.tokenizers.BaseTokenizer", "generated/torchtune.modules.tokenizers.ModelTokenizer", "generated/torchtune.modules.tokenizers.SentencePieceBaseTokenizer", "generated/torchtune.modules.tokenizers.TikTokenBaseTokenizer", "generated/torchtune.modules.tokenizers.parse_hf_tokenizer_json", "generated/torchtune.modules.tokenizers.tokenize_messages_no_special_tokens", "generated/torchtune.modules.transforms.Transform", "generated/torchtune.modules.transforms.VisionCrossAttentionMask", "generated/torchtune.rlhf.estimate_advantages", "generated/torchtune.rlhf.get_rewards_ppo", "generated/torchtune.rlhf.loss.DPOLoss", "generated/torchtune.rlhf.loss.PPOLoss", "generated/torchtune.rlhf.loss.RSOLoss", "generated/torchtune.rlhf.loss.SimPOLoss", "generated/torchtune.rlhf.truncate_sequence_at_first_stop_token", "generated/torchtune.training.FSDPPolicyType", "generated/torchtune.training.FormattedCheckpointFiles", "generated/torchtune.training.FullModelHFCheckpointer", "generated/torchtune.training.FullModelMetaCheckpointer", "generated/torchtune.training.FullModelTorchTuneCheckpointer", "generated/torchtune.training.ModelType", "generated/torchtune.training.OptimizerInBackwardWrapper", "generated/torchtune.training.apply_selective_activation_checkpointing", "generated/torchtune.training.create_optim_in_bwd_wrapper", "generated/torchtune.training.get_cosine_schedule_with_warmup", "generated/torchtune.training.get_dtype", "generated/torchtune.training.get_full_finetune_fsdp_wrap_policy", "generated/torchtune.training.get_lr", "generated/torchtune.training.get_memory_stats", "generated/torchtune.training.get_quantizer_mode", "generated/torchtune.training.get_unmasked_sequence_lengths", "generated/torchtune.training.get_world_size_and_rank", "generated/torchtune.training.init_distributed", "generated/torchtune.training.is_distributed", "generated/torchtune.training.log_memory_stats", "generated/torchtune.training.lora_fsdp_wrap_policy", "generated/torchtune.training.metric_logging.CometLogger", "generated/torchtune.training.metric_logging.DiskLogger", "generated/torchtune.training.metric_logging.StdoutLogger", "generated/torchtune.training.metric_logging.TensorBoardLogger", "generated/torchtune.training.metric_logging.WandBLogger", "generated/torchtune.training.register_optim_in_bwd_hooks", "generated/torchtune.training.set_activation_checkpointing", "generated/torchtune.training.set_default_dtype", "generated/torchtune.training.set_seed", "generated/torchtune.training.setup_torch_profiler", "generated/torchtune.training.update_state_dict_for_classifier", "generated/torchtune.training.validate_expected_param_dtype", "generated/torchtune.utils.batch_to_device", "generated/torchtune.utils.get_device", "generated/torchtune.utils.get_logger", "generated/torchtune.utils.torch_version_ge", "generated_examples/index", "generated_examples/sg_execution_times", "index", "install", "overview", "recipes/lora_finetune_single_device", "recipes/qat_distributed", "recipes/recipes_overview", "sg_execution_times", "tune_cli", "tutorials/chat", "tutorials/e2e_flow", "tutorials/first_finetune_tutorial", "tutorials/llama3", "tutorials/llama_kd_tutorial", "tutorials/lora_finetune", "tutorials/memory_optimizations", "tutorials/qat_finetune", "tutorials/qlora_finetune"], "filenames": ["api_ref_config.rst", "api_ref_data.rst", "api_ref_datasets.rst", "api_ref_generation.rst", "api_ref_models.rst", "api_ref_modules.rst", "api_ref_rlhf.rst", "api_ref_training.rst", "api_ref_utilities.rst", "basics/chat_datasets.rst", "basics/custom_components.rst", "basics/datasets_overview.rst", "basics/instruct_datasets.rst", "basics/message_transforms.rst", "basics/messages.rst", "basics/model_transforms.rst", "basics/multimodal_datasets.rst", "basics/packing.rst", "basics/preference_datasets.rst", "basics/prompt_templates.rst", "basics/text_completion_datasets.rst", "basics/tokenizers.rst", "deep_dives/checkpointer.rst", "deep_dives/comet_logging.rst", "deep_dives/configs.rst", "deep_dives/recipe_deepdive.rst", "deep_dives/wandb_logging.rst", "generated/torchtune.config.instantiate.rst", "generated/torchtune.config.log_config.rst", "generated/torchtune.config.parse.rst", "generated/torchtune.config.validate.rst", "generated/torchtune.data.AlpacaToMessages.rst", "generated/torchtune.data.ChatMLTemplate.rst", "generated/torchtune.data.ChosenRejectedToMessages.rst", "generated/torchtune.data.GrammarErrorCorrectionTemplate.rst", "generated/torchtune.data.InputOutputToMessages.rst", "generated/torchtune.data.Message.rst", "generated/torchtune.data.OpenAIToMessages.rst", "generated/torchtune.data.PromptTemplate.rst", "generated/torchtune.data.PromptTemplateInterface.rst", "generated/torchtune.data.QuestionAnswerTemplate.rst", "generated/torchtune.data.Role.rst", "generated/torchtune.data.ShareGPTToMessages.rst", "generated/torchtune.data.SummarizeTemplate.rst", "generated/torchtune.data.format_content_with_images.rst", "generated/torchtune.data.left_pad_sequence.rst", "generated/torchtune.data.load_image.rst", "generated/torchtune.data.padded_collate.rst", "generated/torchtune.data.padded_collate_dpo.rst", "generated/torchtune.data.padded_collate_sft.rst", "generated/torchtune.data.padded_collate_tiled_images_and_mask.rst", "generated/torchtune.data.truncate.rst", "generated/torchtune.data.validate_messages.rst", "generated/torchtune.datasets.ConcatDataset.rst", "generated/torchtune.datasets.PackedDataset.rst", "generated/torchtune.datasets.PreferenceDataset.rst", "generated/torchtune.datasets.SFTDataset.rst", "generated/torchtune.datasets.TextCompletionDataset.rst", "generated/torchtune.datasets.alpaca_cleaned_dataset.rst", "generated/torchtune.datasets.alpaca_dataset.rst", "generated/torchtune.datasets.chat_dataset.rst", "generated/torchtune.datasets.cnn_dailymail_articles_dataset.rst", "generated/torchtune.datasets.grammar_dataset.rst", "generated/torchtune.datasets.hh_rlhf_helpful_dataset.rst", "generated/torchtune.datasets.instruct_dataset.rst", "generated/torchtune.datasets.multimodal.llava_instruct_dataset.rst", "generated/torchtune.datasets.multimodal.the_cauldron_dataset.rst", "generated/torchtune.datasets.preference_dataset.rst", "generated/torchtune.datasets.samsum_dataset.rst", "generated/torchtune.datasets.slimorca_dataset.rst", "generated/torchtune.datasets.stack_exchange_paired_dataset.rst", "generated/torchtune.datasets.text_completion_dataset.rst", "generated/torchtune.datasets.wikitext_dataset.rst", "generated/torchtune.generation.generate.rst", "generated/torchtune.generation.generate_next_token.rst", "generated/torchtune.generation.get_causal_mask_from_padding_mask.rst", "generated/torchtune.generation.get_position_ids_from_padding_mask.rst", "generated/torchtune.generation.sample.rst", "generated/torchtune.models.clip.TilePositionalEmbedding.rst", "generated/torchtune.models.clip.TiledTokenPositionalEmbedding.rst", "generated/torchtune.models.clip.TokenPositionalEmbedding.rst", "generated/torchtune.models.clip.clip_vision_encoder.rst", "generated/torchtune.models.code_llama2.code_llama2_13b.rst", "generated/torchtune.models.code_llama2.code_llama2_70b.rst", "generated/torchtune.models.code_llama2.code_llama2_7b.rst", "generated/torchtune.models.code_llama2.lora_code_llama2_13b.rst", "generated/torchtune.models.code_llama2.lora_code_llama2_70b.rst", "generated/torchtune.models.code_llama2.lora_code_llama2_7b.rst", "generated/torchtune.models.code_llama2.qlora_code_llama2_13b.rst", "generated/torchtune.models.code_llama2.qlora_code_llama2_70b.rst", "generated/torchtune.models.code_llama2.qlora_code_llama2_7b.rst", "generated/torchtune.models.gemma.gemma.rst", "generated/torchtune.models.gemma.gemma_2b.rst", "generated/torchtune.models.gemma.gemma_7b.rst", "generated/torchtune.models.gemma.gemma_tokenizer.rst", "generated/torchtune.models.gemma.lora_gemma.rst", "generated/torchtune.models.gemma.lora_gemma_2b.rst", "generated/torchtune.models.gemma.lora_gemma_7b.rst", "generated/torchtune.models.gemma.qlora_gemma_2b.rst", "generated/torchtune.models.gemma.qlora_gemma_7b.rst", "generated/torchtune.models.llama2.Llama2ChatTemplate.rst", "generated/torchtune.models.llama2.llama2.rst", "generated/torchtune.models.llama2.llama2_13b.rst", "generated/torchtune.models.llama2.llama2_70b.rst", "generated/torchtune.models.llama2.llama2_7b.rst", "generated/torchtune.models.llama2.llama2_reward_7b.rst", "generated/torchtune.models.llama2.llama2_tokenizer.rst", "generated/torchtune.models.llama2.lora_llama2.rst", "generated/torchtune.models.llama2.lora_llama2_13b.rst", "generated/torchtune.models.llama2.lora_llama2_70b.rst", "generated/torchtune.models.llama2.lora_llama2_7b.rst", "generated/torchtune.models.llama2.lora_llama2_reward_7b.rst", "generated/torchtune.models.llama2.qlora_llama2_13b.rst", "generated/torchtune.models.llama2.qlora_llama2_70b.rst", "generated/torchtune.models.llama2.qlora_llama2_7b.rst", "generated/torchtune.models.llama2.qlora_llama2_reward_7b.rst", "generated/torchtune.models.llama3.llama3.rst", "generated/torchtune.models.llama3.llama3_70b.rst", "generated/torchtune.models.llama3.llama3_8b.rst", "generated/torchtune.models.llama3.llama3_tokenizer.rst", "generated/torchtune.models.llama3.lora_llama3.rst", "generated/torchtune.models.llama3.lora_llama3_70b.rst", "generated/torchtune.models.llama3.lora_llama3_8b.rst", "generated/torchtune.models.llama3.qlora_llama3_70b.rst", "generated/torchtune.models.llama3.qlora_llama3_8b.rst", "generated/torchtune.models.llama3_1.llama3_1.rst", "generated/torchtune.models.llama3_1.llama3_1_405b.rst", "generated/torchtune.models.llama3_1.llama3_1_70b.rst", "generated/torchtune.models.llama3_1.llama3_1_8b.rst", "generated/torchtune.models.llama3_1.lora_llama3_1.rst", "generated/torchtune.models.llama3_1.lora_llama3_1_405b.rst", "generated/torchtune.models.llama3_1.lora_llama3_1_70b.rst", "generated/torchtune.models.llama3_1.lora_llama3_1_8b.rst", "generated/torchtune.models.llama3_1.qlora_llama3_1_405b.rst", "generated/torchtune.models.llama3_1.qlora_llama3_1_70b.rst", "generated/torchtune.models.llama3_1.qlora_llama3_1_8b.rst", "generated/torchtune.models.llama3_2.llama3_2_1b.rst", "generated/torchtune.models.llama3_2.llama3_2_3b.rst", "generated/torchtune.models.llama3_2.lora_llama3_2_1b.rst", "generated/torchtune.models.llama3_2.lora_llama3_2_3b.rst", "generated/torchtune.models.llama3_2.qlora_llama3_2_1b.rst", "generated/torchtune.models.llama3_2.qlora_llama3_2_3b.rst", "generated/torchtune.models.llama3_2_vision.Llama3VisionEncoder.rst", "generated/torchtune.models.llama3_2_vision.Llama3VisionProjectionHead.rst", "generated/torchtune.models.llama3_2_vision.Llama3VisionTransform.rst", "generated/torchtune.models.llama3_2_vision.llama3_2_vision_11b.rst", "generated/torchtune.models.llama3_2_vision.llama3_2_vision_decoder.rst", "generated/torchtune.models.llama3_2_vision.llama3_2_vision_encoder.rst", "generated/torchtune.models.llama3_2_vision.llama3_2_vision_transform.rst", "generated/torchtune.models.llama3_2_vision.lora_llama3_2_vision_11b.rst", "generated/torchtune.models.llama3_2_vision.lora_llama3_2_vision_decoder.rst", "generated/torchtune.models.llama3_2_vision.lora_llama3_2_vision_encoder.rst", "generated/torchtune.models.llama3_2_vision.qlora_llama3_2_vision_11b.rst", "generated/torchtune.models.mistral.MistralChatTemplate.rst", "generated/torchtune.models.mistral.lora_mistral.rst", "generated/torchtune.models.mistral.lora_mistral_7b.rst", "generated/torchtune.models.mistral.lora_mistral_classifier.rst", "generated/torchtune.models.mistral.lora_mistral_reward_7b.rst", "generated/torchtune.models.mistral.mistral.rst", "generated/torchtune.models.mistral.mistral_7b.rst", "generated/torchtune.models.mistral.mistral_classifier.rst", "generated/torchtune.models.mistral.mistral_reward_7b.rst", "generated/torchtune.models.mistral.mistral_tokenizer.rst", "generated/torchtune.models.mistral.qlora_mistral_7b.rst", "generated/torchtune.models.mistral.qlora_mistral_reward_7b.rst", "generated/torchtune.models.phi3.lora_phi3.rst", "generated/torchtune.models.phi3.lora_phi3_mini.rst", "generated/torchtune.models.phi3.phi3.rst", "generated/torchtune.models.phi3.phi3_mini.rst", "generated/torchtune.models.phi3.phi3_mini_tokenizer.rst", "generated/torchtune.models.phi3.qlora_phi3_mini.rst", "generated/torchtune.models.qwen2.lora_qwen2.rst", "generated/torchtune.models.qwen2.lora_qwen2_0_5b.rst", "generated/torchtune.models.qwen2.lora_qwen2_1_5b.rst", "generated/torchtune.models.qwen2.lora_qwen2_7b.rst", "generated/torchtune.models.qwen2.qwen2.rst", "generated/torchtune.models.qwen2.qwen2_0_5b.rst", "generated/torchtune.models.qwen2.qwen2_1_5b.rst", "generated/torchtune.models.qwen2.qwen2_7b.rst", "generated/torchtune.models.qwen2.qwen2_tokenizer.rst", "generated/torchtune.modules.FeedForward.rst", "generated/torchtune.modules.Fp32LayerNorm.rst", "generated/torchtune.modules.KVCache.rst", "generated/torchtune.modules.MultiHeadAttention.rst", "generated/torchtune.modules.RMSNorm.rst", "generated/torchtune.modules.RotaryPositionalEmbeddings.rst", "generated/torchtune.modules.TanhGate.rst", "generated/torchtune.modules.TiedLinear.rst", "generated/torchtune.modules.TransformerCrossAttentionLayer.rst", "generated/torchtune.modules.TransformerDecoder.rst", "generated/torchtune.modules.TransformerSelfAttentionLayer.rst", "generated/torchtune.modules.VisionTransformer.rst", "generated/torchtune.modules.common_utils.delete_kv_caches.rst", "generated/torchtune.modules.common_utils.disable_kv_cache.rst", "generated/torchtune.modules.common_utils.local_kv_cache.rst", "generated/torchtune.modules.common_utils.reparametrize_as_dtype_state_dict_post_hook.rst", "generated/torchtune.modules.loss.CEWithChunkedOutputLoss.rst", "generated/torchtune.modules.loss.ForwardKLLoss.rst", "generated/torchtune.modules.loss.ForwardKLWithChunkedOutputLoss.rst", "generated/torchtune.modules.model_fusion.DeepFusionModel.rst", "generated/torchtune.modules.model_fusion.FusionEmbedding.rst", "generated/torchtune.modules.model_fusion.FusionLayer.rst", "generated/torchtune.modules.model_fusion.get_fusion_params.rst", "generated/torchtune.modules.model_fusion.register_fusion_module.rst", "generated/torchtune.modules.peft.AdapterModule.rst", "generated/torchtune.modules.peft.DoRALinear.rst", "generated/torchtune.modules.peft.LoRALinear.rst", "generated/torchtune.modules.peft.disable_adapter.rst", "generated/torchtune.modules.peft.get_adapter_params.rst", "generated/torchtune.modules.peft.set_trainable_params.rst", "generated/torchtune.modules.peft.validate_missing_and_unexpected_for_lora.rst", "generated/torchtune.modules.peft.validate_state_dict_for_lora.rst", "generated/torchtune.modules.tokenizers.BaseTokenizer.rst", "generated/torchtune.modules.tokenizers.ModelTokenizer.rst", "generated/torchtune.modules.tokenizers.SentencePieceBaseTokenizer.rst", "generated/torchtune.modules.tokenizers.TikTokenBaseTokenizer.rst", "generated/torchtune.modules.tokenizers.parse_hf_tokenizer_json.rst", "generated/torchtune.modules.tokenizers.tokenize_messages_no_special_tokens.rst", "generated/torchtune.modules.transforms.Transform.rst", "generated/torchtune.modules.transforms.VisionCrossAttentionMask.rst", "generated/torchtune.rlhf.estimate_advantages.rst", "generated/torchtune.rlhf.get_rewards_ppo.rst", "generated/torchtune.rlhf.loss.DPOLoss.rst", "generated/torchtune.rlhf.loss.PPOLoss.rst", "generated/torchtune.rlhf.loss.RSOLoss.rst", "generated/torchtune.rlhf.loss.SimPOLoss.rst", "generated/torchtune.rlhf.truncate_sequence_at_first_stop_token.rst", "generated/torchtune.training.FSDPPolicyType.rst", "generated/torchtune.training.FormattedCheckpointFiles.rst", "generated/torchtune.training.FullModelHFCheckpointer.rst", "generated/torchtune.training.FullModelMetaCheckpointer.rst", "generated/torchtune.training.FullModelTorchTuneCheckpointer.rst", "generated/torchtune.training.ModelType.rst", "generated/torchtune.training.OptimizerInBackwardWrapper.rst", "generated/torchtune.training.apply_selective_activation_checkpointing.rst", "generated/torchtune.training.create_optim_in_bwd_wrapper.rst", "generated/torchtune.training.get_cosine_schedule_with_warmup.rst", "generated/torchtune.training.get_dtype.rst", "generated/torchtune.training.get_full_finetune_fsdp_wrap_policy.rst", "generated/torchtune.training.get_lr.rst", "generated/torchtune.training.get_memory_stats.rst", "generated/torchtune.training.get_quantizer_mode.rst", "generated/torchtune.training.get_unmasked_sequence_lengths.rst", "generated/torchtune.training.get_world_size_and_rank.rst", "generated/torchtune.training.init_distributed.rst", "generated/torchtune.training.is_distributed.rst", "generated/torchtune.training.log_memory_stats.rst", "generated/torchtune.training.lora_fsdp_wrap_policy.rst", "generated/torchtune.training.metric_logging.CometLogger.rst", "generated/torchtune.training.metric_logging.DiskLogger.rst", "generated/torchtune.training.metric_logging.StdoutLogger.rst", "generated/torchtune.training.metric_logging.TensorBoardLogger.rst", "generated/torchtune.training.metric_logging.WandBLogger.rst", "generated/torchtune.training.register_optim_in_bwd_hooks.rst", "generated/torchtune.training.set_activation_checkpointing.rst", "generated/torchtune.training.set_default_dtype.rst", "generated/torchtune.training.set_seed.rst", "generated/torchtune.training.setup_torch_profiler.rst", "generated/torchtune.training.update_state_dict_for_classifier.rst", "generated/torchtune.training.validate_expected_param_dtype.rst", "generated/torchtune.utils.batch_to_device.rst", "generated/torchtune.utils.get_device.rst", "generated/torchtune.utils.get_logger.rst", "generated/torchtune.utils.torch_version_ge.rst", "generated_examples/index.rst", "generated_examples/sg_execution_times.rst", "index.rst", "install.rst", "overview.rst", "recipes/lora_finetune_single_device.rst", "recipes/qat_distributed.rst", "recipes/recipes_overview.rst", "sg_execution_times.rst", "tune_cli.rst", "tutorials/chat.rst", "tutorials/e2e_flow.rst", "tutorials/first_finetune_tutorial.rst", "tutorials/llama3.rst", "tutorials/llama_kd_tutorial.rst", "tutorials/lora_finetune.rst", "tutorials/memory_optimizations.rst", "tutorials/qat_finetune.rst", "tutorials/qlora_finetune.rst"], "titles": ["torchtune.config", "torchtune.data", "torchtune.datasets", "torchtune.generation", "torchtune.models", "torchtune.modules", "torchtune.rlhf", "torchtune.training", "torchtune.utils", "Chat Datasets", "Custom Components and Recipes", "Datasets Overview", "Instruct Datasets", "Message Transforms", "Messages", "Multimodal Transforms", "Multimodal Datasets", "Sample packing", "Preference Datasets", "Prompt Templates", "Text-completion Datasets", "Tokenizers", "Checkpointing in torchtune", "Logging to Comet", "All About Configs", "What Are Recipes?", "Logging to Weights & Biases", "instantiate", "log_config", "parse", "validate", "AlpacaToMessages", "ChatMLTemplate", "ChosenRejectedToMessages", "torchtune.data.GrammarErrorCorrectionTemplate", "InputOutputToMessages", "Message", "OpenAIToMessages", "PromptTemplate", "PromptTemplateInterface", "torchtune.data.QuestionAnswerTemplate", "torchtune.data.Role", "ShareGPTToMessages", "torchtune.data.SummarizeTemplate", "format_content_with_images", "left_pad_sequence", "load_image", "padded_collate", "padded_collate_dpo", "padded_collate_sft", "padded_collate_tiled_images_and_mask", "truncate", "validate_messages", "ConcatDataset", "PackedDataset", "PreferenceDataset", "SFTDataset", "TextCompletionDataset", "alpaca_cleaned_dataset", "alpaca_dataset", "chat_dataset", "cnn_dailymail_articles_dataset", "grammar_dataset", "hh_rlhf_helpful_dataset", "instruct_dataset", "llava_instruct_dataset", "the_cauldron_dataset", "preference_dataset", "samsum_dataset", "slimorca_dataset", "stack_exchange_paired_dataset", "text_completion_dataset", "wikitext_dataset", "generate", "generate_next_token", "get_causal_mask_from_padding_mask", "get_position_ids_from_padding_mask", "sample", "TilePositionalEmbedding", "TiledTokenPositionalEmbedding", "TokenPositionalEmbedding", "clip_vision_encoder", "code_llama2_13b", "code_llama2_70b", "code_llama2_7b", "lora_code_llama2_13b", "lora_code_llama2_70b", "lora_code_llama2_7b", "qlora_code_llama2_13b", "qlora_code_llama2_70b", "qlora_code_llama2_7b", "gemma", "gemma_2b", "gemma_7b", "gemma_tokenizer", "lora_gemma", "lora_gemma_2b", "lora_gemma_7b", "qlora_gemma_2b", "qlora_gemma_7b", "Llama2ChatTemplate", "llama2", "llama2_13b", "llama2_70b", "llama2_7b", "llama2_reward_7b", "llama2_tokenizer", "lora_llama2", "lora_llama2_13b", "lora_llama2_70b", "lora_llama2_7b", "lora_llama2_reward_7b", "qlora_llama2_13b", "qlora_llama2_70b", "qlora_llama2_7b", "qlora_llama2_reward_7b", "llama3", "llama3_70b", "llama3_8b", "llama3_tokenizer", "lora_llama3", "lora_llama3_70b", "lora_llama3_8b", "qlora_llama3_70b", "qlora_llama3_8b", "llama3_1", "llama3_1_405b", "llama3_1_70b", "llama3_1_8b", "lora_llama3_1", "lora_llama3_1_405b", "lora_llama3_1_70b", "lora_llama3_1_8b", "qlora_llama3_1_405b", "qlora_llama3_1_70b", "qlora_llama3_1_8b", "llama3_2_1b", "llama3_2_3b", "lora_llama3_2_1b", "lora_llama3_2_3b", "qlora_llama3_2_1b", "qlora_llama3_2_3b", "Llama3VisionEncoder", "Llama3VisionProjectionHead", "Llama3VisionTransform", "llama3_2_vision_11b", "llama3_2_vision_decoder", "llama3_2_vision_encoder", "llama3_2_vision_transform", "lora_llama3_2_vision_11b", "lora_llama3_2_vision_decoder", "lora_llama3_2_vision_encoder", "qlora_llama3_2_vision_11b", "MistralChatTemplate", "lora_mistral", "lora_mistral_7b", "lora_mistral_classifier", "lora_mistral_reward_7b", "mistral", "mistral_7b", "mistral_classifier", "mistral_reward_7b", "mistral_tokenizer", "qlora_mistral_7b", "qlora_mistral_reward_7b", "lora_phi3", "lora_phi3_mini", "phi3", "phi3_mini", "phi3_mini_tokenizer", "qlora_phi3_mini", "lora_qwen2", "lora_qwen2_0_5b", "lora_qwen2_1_5b", "lora_qwen2_7b", "qwen2", "qwen2_0_5b", "qwen2_1_5b", "qwen2_7b", "qwen2_tokenizer", "FeedForward", "Fp32LayerNorm", "KVCache", "MultiHeadAttention", "RMSNorm", "RotaryPositionalEmbeddings", "TanhGate", "TiedLinear", "TransformerCrossAttentionLayer", "TransformerDecoder", "TransformerSelfAttentionLayer", "VisionTransformer", "delete_kv_caches", "disable_kv_cache", "local_kv_cache", "reparametrize_as_dtype_state_dict_post_hook", "CEWithChunkedOutputLoss", "ForwardKLLoss", "ForwardKLWithChunkedOutputLoss", "DeepFusionModel", "FusionEmbedding", "FusionLayer", "get_fusion_params", "register_fusion_module", "AdapterModule", "DoRALinear", "LoRALinear", "disable_adapter", "get_adapter_params", "set_trainable_params", "validate_missing_and_unexpected_for_lora", "validate_state_dict_for_lora", "BaseTokenizer", "ModelTokenizer", "SentencePieceBaseTokenizer", "TikTokenBaseTokenizer", "parse_hf_tokenizer_json", "tokenize_messages_no_special_tokens", "Transform", "VisionCrossAttentionMask", "estimate_advantages", "get_rewards_ppo", "DPOLoss", "PPOLoss", "RSOLoss", "SimPOLoss", "truncate_sequence_at_first_stop_token", "torchtune.training.FSDPPolicyType", "FormattedCheckpointFiles", "FullModelHFCheckpointer", "FullModelMetaCheckpointer", "FullModelTorchTuneCheckpointer", "ModelType", "OptimizerInBackwardWrapper", "apply_selective_activation_checkpointing", "create_optim_in_bwd_wrapper", "get_cosine_schedule_with_warmup", "get_dtype", "get_full_finetune_fsdp_wrap_policy", "get_lr", "get_memory_stats", "get_quantizer_mode", "get_unmasked_sequence_lengths", "get_world_size_and_rank", "init_distributed", "is_distributed", "log_memory_stats", "lora_fsdp_wrap_policy", "CometLogger", "DiskLogger", "StdoutLogger", "TensorBoardLogger", "WandBLogger", "register_optim_in_bwd_hooks", "set_activation_checkpointing", "set_default_dtype", "set_seed", "setup_torch_profiler", "update_state_dict_for_classifier", "validate_expected_param_dtype", "batch_to_device", "get_device", "get_logger", "torch_version_ge", "<no title>", "Computation times", "Welcome to the torchtune Documentation", "Install Instructions", "torchtune Overview", "LoRA Single Device Finetuning", "Distributed Quantization-Aware Training (QAT)", "Recipes Overview", "Computation times", "torchtune CLI", "Fine-Tuning Llama3 with Chat Data", "End-to-End Workflow with torchtune", "Fine-Tune Your First LLM", "Meta Llama3 in torchtune", "Distilling Llama3.1 8B into Llama3.2 1B using Knowledge Distillation", "Fine-Tuning Llama2 with LoRA", "Memory Optimization Overview", "Fine-Tuning Llama3 with QAT", "Fine-Tuning Llama2 with QLoRA"], "terms": {"instruct": [1, 2, 4, 9, 10, 11, 13, 15, 16, 17, 18, 19, 21, 31, 32, 33, 35, 37, 42, 54, 56, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 71, 145, 148, 149, 153, 161, 167, 168, 169, 176, 177, 178, 266, 269, 270, 273, 274, 276, 278, 279, 281, 282], "prompt": [1, 9, 10, 11, 12, 13, 18, 31, 33, 34, 35, 36, 37, 38, 39, 40, 42, 43, 55, 56, 59, 60, 62, 63, 64, 67, 68, 69, 70, 73, 74, 94, 100, 106, 119, 144, 148, 153, 162, 169, 179, 189, 199, 217, 275, 277], "chat": [1, 2, 11, 13, 16, 18, 32, 37, 42, 56, 60, 100, 169, 269], "includ": [1, 9, 11, 12, 16, 18, 19, 21, 22, 24, 25, 38, 39, 56, 77, 81, 91, 101, 116, 125, 146, 147, 148, 150, 151, 158, 169, 175, 189, 205, 206, 212, 229, 230, 268, 271, 273, 274, 275, 276, 277, 278, 279, 282], "some": [1, 17, 18, 20, 21, 22, 24, 32, 156, 200, 202, 208, 209, 266, 268, 269, 270, 273, 274, 275, 276, 278, 279, 280, 281, 282], "specif": [1, 5, 11, 12, 15, 19, 21, 24, 25, 27, 55, 56, 65, 66, 144, 213, 238, 270, 274, 275, 280, 281, 282], "format": [1, 2, 7, 11, 19, 21, 36, 45, 46, 55, 56, 59, 60, 63, 64, 67, 100, 144, 153, 213, 228, 229, 230, 231, 232, 273, 274, 275, 276, 277, 279, 280], "differ": [1, 9, 10, 17, 18, 19, 21, 24, 26, 48, 53, 60, 64, 78, 79, 80, 144, 191, 205, 214, 222, 232, 259, 268, 269, 270, 273, 274, 275, 277, 278, 279, 280, 281, 282], "dataset": [1, 10, 13, 14, 15, 17, 19, 24, 31, 33, 35, 36, 37, 42, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 222, 268, 276, 277, 278, 281], "model": [1, 2, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 25, 27, 31, 32, 33, 35, 36, 37, 42, 53, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 182, 183, 184, 185, 187, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 216, 217, 218, 220, 221, 222, 223, 224, 225, 229, 230, 231, 232, 234, 235, 238, 240, 247, 248, 253, 254, 258, 266, 268, 269, 270, 274, 282], "convert": [1, 9, 11, 14, 21, 22, 33, 35, 37, 42, 49, 55, 56, 60, 65, 66, 67, 75, 142, 229, 275, 281, 282], "from": [1, 2, 4, 10, 11, 13, 14, 15, 17, 19, 22, 23, 24, 25, 26, 27, 31, 33, 36, 37, 42, 45, 46, 47, 50, 53, 54, 55, 56, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 76, 77, 78, 79, 80, 81, 82, 83, 84, 92, 93, 100, 102, 103, 104, 105, 119, 143, 144, 148, 159, 161, 169, 176, 177, 178, 179, 180, 183, 188, 189, 190, 191, 192, 193, 194, 196, 197, 198, 201, 202, 203, 204, 208, 211, 214, 216, 219, 222, 224, 225, 228, 229, 230, 231, 233, 235, 236, 248, 251, 252, 253, 258, 265, 267, 270, 272, 273, 275, 276, 277, 278, 279, 280, 281], "common": [1, 2, 5, 9, 14, 15, 24, 217, 273, 274, 277, 279, 280, 281], "schema": [1, 9, 11, 12, 16], "convers": [1, 13, 16, 18, 19, 21, 22, 33, 42, 52, 55, 56, 60, 65, 67, 69, 229, 231, 232, 268, 274, 275, 279, 280, 282], "json": [1, 9, 12, 13, 16, 18, 21, 22, 37, 42, 55, 56, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 119, 148, 169, 179, 216, 229, 273, 274, 275, 281], "list": [1, 9, 11, 14, 15, 18, 19, 21, 22, 24, 33, 36, 38, 44, 45, 47, 48, 49, 50, 51, 52, 53, 55, 56, 60, 61, 65, 66, 67, 72, 73, 81, 85, 86, 87, 88, 89, 90, 94, 95, 96, 97, 98, 99, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 119, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 143, 144, 147, 148, 149, 150, 151, 152, 154, 155, 156, 157, 162, 163, 164, 165, 166, 169, 170, 171, 172, 173, 174, 189, 191, 196, 198, 199, 200, 201, 204, 205, 206, 210, 211, 212, 213, 214, 215, 217, 219, 228, 229, 230, 231, 248, 262, 271, 274, 275, 276, 277, 280, 281], "us": [1, 2, 4, 5, 9, 10, 11, 12, 13, 14, 16, 17, 18, 20, 21, 22, 23, 26, 27, 29, 32, 35, 36, 38, 44, 47, 50, 53, 54, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 77, 79, 80, 81, 100, 101, 107, 116, 119, 120, 125, 129, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 165, 169, 171, 175, 179, 180, 182, 183, 184, 185, 187, 189, 190, 191, 192, 193, 194, 195, 196, 199, 200, 203, 207, 210, 214, 215, 219, 220, 221, 222, 223, 225, 227, 229, 230, 232, 233, 237, 238, 240, 247, 248, 249, 250, 251, 252, 256, 258, 260, 261, 266, 267, 268, 269, 270, 271, 273, 276, 277, 279, 280, 281], "collect": [1, 24, 276], "sampl": [1, 9, 11, 12, 13, 14, 15, 16, 19, 20, 21, 23, 26, 33, 35, 36, 37, 42, 44, 50, 54, 55, 56, 57, 62, 63, 65, 66, 67, 68, 69, 71, 73, 74, 183, 185, 189, 190, 191, 199, 218, 219, 224, 274, 275, 280], "batch": [1, 11, 17, 25, 47, 48, 49, 50, 54, 59, 62, 65, 66, 68, 79, 142, 143, 182, 183, 185, 188, 189, 190, 191, 194, 199, 201, 220, 221, 222, 224, 225, 242, 257, 260, 268, 276, 277, 279, 280], "handl": [1, 13, 16, 17, 24, 29, 31, 53, 56, 144, 214, 215, 274, 275, 279, 280, 282], "ani": [1, 5, 10, 11, 13, 14, 15, 16, 17, 21, 22, 24, 25, 27, 29, 30, 33, 36, 37, 38, 42, 44, 47, 50, 51, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 76, 80, 181, 189, 195, 199, 201, 202, 208, 209, 210, 211, 212, 213, 214, 217, 229, 230, 231, 233, 244, 247, 248, 256, 259, 273, 274, 276, 279, 280, 281], "pad": [1, 45, 47, 48, 49, 50, 54, 73, 75, 76, 189, 191, 221, 223, 226, 242], "miscellan": 1, "modifi": [1, 10, 21, 24, 25, 26, 193, 195, 205, 233, 268, 275, 277, 278, 279, 280, 281, 282], "For": [2, 7, 9, 10, 12, 14, 16, 18, 19, 20, 21, 22, 24, 25, 33, 35, 36, 37, 38, 42, 50, 53, 54, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 78, 79, 80, 81, 91, 95, 101, 107, 116, 120, 125, 129, 143, 146, 147, 150, 151, 154, 156, 158, 160, 165, 167, 171, 175, 183, 189, 191, 196, 199, 200, 203, 207, 218, 229, 235, 241, 248, 252, 254, 256, 267, 269, 270, 271, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282], "detail": [2, 9, 10, 12, 13, 16, 21, 22, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 78, 79, 80, 81, 160, 182, 191, 196, 223, 227, 238, 247, 256, 269, 270, 273, 275, 276, 277, 278, 279, 280, 281, 282], "usag": [2, 21, 195, 196, 198, 228, 232, 233, 257, 267, 273, 275, 276, 277, 280, 281, 282], "guid": [2, 23, 24, 26, 33, 35, 37, 42, 60, 62, 63, 64, 65, 66, 67, 68, 69, 225, 248, 268, 274, 276, 278, 279], "pleas": [2, 7, 34, 40, 43, 78, 79, 80, 81, 88, 89, 90, 98, 99, 112, 113, 114, 115, 123, 124, 133, 134, 135, 140, 141, 152, 163, 164, 170, 191, 196, 227, 238, 247, 254, 267, 270, 271, 275, 277, 282], "see": [2, 7, 9, 10, 11, 12, 13, 14, 16, 18, 19, 21, 22, 23, 26, 34, 40, 43, 47, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 74, 88, 89, 90, 98, 99, 100, 112, 113, 114, 115, 123, 124, 133, 134, 135, 140, 141, 152, 153, 160, 163, 164, 170, 182, 188, 190, 191, 201, 204, 212, 213, 218, 227, 232, 238, 247, 248, 252, 254, 256, 262, 267, 268, 269, 270, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282], "overview": [2, 7, 24, 26, 199, 266, 269, 270, 276, 278, 279, 282], "support": [2, 4, 10, 11, 15, 16, 17, 18, 21, 22, 23, 25, 26, 27, 36, 37, 54, 55, 56, 59, 60, 61, 62, 65, 66, 67, 68, 69, 72, 77, 95, 107, 120, 129, 142, 149, 150, 151, 153, 154, 156, 165, 168, 169, 171, 181, 183, 191, 200, 201, 206, 224, 230, 231, 233, 237, 240, 241, 268, 269, 270, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282], "sever": [2, 280], "wide": [2, 9, 183, 278], "onli": [2, 4, 10, 16, 18, 22, 23, 26, 35, 36, 42, 54, 55, 56, 61, 67, 73, 77, 81, 95, 107, 120, 129, 144, 149, 150, 151, 153, 154, 156, 165, 171, 183, 187, 189, 191, 196, 198, 202, 206, 208, 210, 214, 229, 230, 231, 233, 237, 238, 240, 241, 247, 273, 275, 276, 278, 279, 280, 281, 282], "help": [2, 11, 18, 19, 22, 63, 100, 189, 191, 199, 229, 248, 266, 267, 268, 273, 274, 275, 276, 278, 280, 281, 282], "quickli": [2, 11, 24, 38, 57, 269, 274, 280], "bootstrap": [2, 11], "your": [2, 7, 9, 11, 12, 13, 14, 16, 17, 18, 21, 23, 26, 27, 38, 57, 60, 64, 67, 79, 80, 81, 147, 151, 191, 200, 248, 251, 252, 258, 266, 267, 268, 269, 270, 273, 274, 277, 278, 279, 280, 281, 282], "fine": [2, 9, 10, 11, 12, 16, 18, 19, 20, 22, 23, 25, 26, 36, 54, 55, 56, 71, 205, 258, 266, 268, 269, 270, 271, 275], "tune": [2, 4, 9, 10, 11, 12, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 29, 36, 54, 55, 56, 71, 205, 258, 266, 267, 268, 269, 270, 271, 273, 275], "also": [2, 9, 10, 12, 14, 16, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 53, 60, 64, 67, 71, 73, 74, 91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 169, 171, 175, 183, 189, 192, 205, 206, 225, 238, 240, 247, 248, 252, 258, 261, 267, 270, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282], "like": [2, 6, 12, 22, 23, 24, 25, 26, 169, 191, 196, 198, 200, 231, 267, 273, 274, 275, 276, 278, 279, 280, 281], "These": [2, 5, 10, 13, 15, 18, 19, 21, 22, 24, 25, 27, 54, 55, 67, 191, 219, 269, 271, 274, 275, 276, 277, 279, 280, 281, 282], "ar": [2, 5, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 22, 23, 24, 26, 27, 31, 35, 38, 39, 42, 45, 47, 48, 52, 54, 55, 56, 59, 60, 64, 65, 66, 67, 73, 75, 76, 79, 85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 100, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 144, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 182, 188, 189, 190, 191, 193, 199, 200, 201, 205, 206, 207, 210, 211, 219, 221, 227, 229, 230, 232, 233, 235, 237, 239, 240, 245, 247, 257, 258, 267, 268, 269, 271, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282], "especi": [2, 268, 273, 275, 280], "specifi": [2, 10, 12, 16, 18, 20, 22, 24, 25, 27, 31, 33, 35, 37, 42, 44, 60, 62, 63, 64, 65, 66, 67, 68, 69, 73, 75, 77, 94, 101, 106, 107, 116, 119, 120, 125, 129, 146, 148, 150, 162, 169, 171, 175, 179, 183, 189, 190, 197, 198, 199, 227, 238, 241, 247, 252, 254, 257, 270, 271, 273, 274, 275, 276, 277, 280, 281, 282], "yaml": [2, 10, 17, 18, 20, 24, 25, 27, 28, 29, 53, 60, 64, 67, 71, 252, 268, 271, 273, 274, 275, 276, 277, 279, 281, 282], "config": [2, 9, 12, 13, 16, 17, 18, 19, 20, 21, 22, 23, 26, 27, 28, 29, 30, 53, 60, 64, 67, 71, 183, 210, 229, 233, 248, 252, 257, 268, 269, 270, 271, 274, 275, 277, 278, 279, 280, 281, 282], "represent": [2, 228, 278, 279, 281, 282], "abov": [2, 4, 9, 16, 17, 18, 20, 22, 55, 195, 245, 267, 270, 275, 277, 279, 280, 281, 282], "text": [4, 5, 9, 11, 12, 15, 18, 19, 21, 35, 36, 37, 38, 39, 42, 44, 50, 54, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 144, 200, 201, 212, 214, 215, 217, 219, 274, 275, 281], "version": [4, 58, 73, 95, 107, 120, 129, 149, 154, 156, 165, 171, 183, 263, 267, 277, 280, 281, 282], "famili": [4, 22, 25, 59, 61, 65, 66, 69, 70, 72, 232, 268, 273, 277, 278], "import": [4, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 27, 60, 64, 65, 66, 67, 71, 77, 191, 192, 193, 194, 222, 248, 251, 252, 274, 275, 276, 277, 278, 279, 280, 281, 282], "you": [4, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 36, 38, 55, 56, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 100, 182, 189, 191, 194, 196, 198, 201, 203, 232, 248, 251, 252, 258, 266, 267, 268, 269, 270, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282], "need": [4, 9, 10, 12, 14, 16, 18, 19, 20, 22, 23, 24, 25, 26, 38, 54, 56, 183, 189, 191, 199, 200, 225, 247, 248, 251, 252, 253, 267, 269, 270, 271, 273, 274, 275, 276, 277, 279, 280, 282], "request": [4, 237, 275], "access": [4, 10, 22, 24, 25, 53, 229, 235, 269, 270, 273, 275, 276], "hug": [4, 11, 22, 32, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 119, 148, 169, 179, 216, 236, 268, 273, 276, 277], "face": [4, 11, 22, 32, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 119, 148, 169, 179, 216, 236, 268, 273, 276, 277], "befor": [4, 19, 22, 38, 52, 54, 65, 78, 79, 81, 91, 95, 147, 151, 183, 188, 189, 190, 191, 196, 198, 199, 201, 206, 215, 229, 248, 270, 273, 275, 280, 281], "download": [4, 10, 11, 16, 22, 65, 264, 267, 269, 270, 274, 277, 278, 279, 281, 282], "To": [4, 9, 12, 13, 14, 16, 17, 18, 19, 20, 22, 24, 25, 26, 54, 65, 189, 191, 201, 229, 258, 267, 268, 270, 271, 273, 275, 276, 277, 278, 279, 280, 281, 282], "1b": [4, 10, 17, 136, 138, 140, 266], "meta": [4, 10, 15, 16, 20, 21, 22, 100, 185, 229, 230, 269, 270, 273, 274, 275, 276, 278], "output": [4, 10, 12, 13, 14, 20, 21, 22, 31, 35, 45, 53, 55, 56, 59, 62, 64, 68, 69, 73, 81, 85, 86, 87, 91, 95, 101, 105, 107, 108, 109, 110, 111, 116, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 142, 143, 146, 147, 149, 150, 151, 154, 155, 156, 157, 158, 161, 165, 166, 171, 174, 175, 180, 181, 183, 185, 186, 188, 189, 190, 191, 196, 198, 199, 200, 201, 205, 206, 209, 210, 211, 219, 231, 238, 250, 257, 258, 267, 269, 270, 273, 275, 276, 277, 278, 279, 280, 282], "dir": [4, 10, 21, 22, 252, 267, 269, 270, 273, 275, 276, 277, 278, 281], "tmp": [4, 9, 10, 12, 14, 15, 16, 17, 18, 19, 20, 21, 24, 233, 269, 270, 274, 276, 278], "ignor": [4, 9, 10, 12, 22, 42, 71, 187, 188, 190, 197, 198, 234, 258, 269, 270, 273, 278], "pattern": [4, 10, 19, 215, 269, 270, 273, 278], "origin": [4, 10, 15, 16, 17, 20, 21, 22, 58, 59, 63, 195, 200, 201, 205, 206, 269, 270, 274, 275, 277, 278, 279, 280, 281, 282], "consolid": [4, 10, 22, 269, 270, 278], "00": [4, 10, 16, 22, 60, 64, 265, 269, 270, 272, 276, 278], "pth": [4, 10, 22, 228, 269, 270, 275, 278], "hf": [4, 9, 18, 20, 21, 22, 222, 224, 229, 273, 274, 275, 276, 277], "token": [4, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 22, 24, 25, 36, 42, 47, 49, 50, 51, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 91, 94, 95, 101, 106, 107, 116, 119, 120, 125, 129, 144, 146, 147, 148, 150, 151, 154, 156, 158, 160, 162, 165, 167, 169, 171, 175, 179, 183, 185, 188, 189, 190, 191, 196, 198, 199, 200, 201, 212, 213, 214, 215, 216, 217, 219, 221, 223, 226, 238, 242, 269, 273, 275, 276, 277, 278, 279, 280, 281, 282], "hf_token": [4, 21, 270, 278], "3b": [4, 137, 139, 141], "The": [4, 9, 11, 12, 15, 16, 18, 19, 20, 21, 22, 23, 24, 25, 26, 29, 30, 32, 36, 46, 47, 52, 53, 54, 55, 56, 60, 63, 64, 65, 66, 67, 70, 78, 79, 80, 81, 85, 86, 87, 95, 96, 97, 107, 108, 109, 110, 111, 120, 121, 122, 129, 130, 131, 132, 138, 139, 142, 144, 147, 149, 150, 151, 154, 156, 165, 166, 171, 172, 173, 174, 181, 184, 185, 186, 187, 191, 195, 196, 197, 198, 199, 200, 201, 205, 207, 212, 213, 214, 215, 216, 217, 219, 220, 222, 223, 224, 225, 227, 229, 231, 233, 236, 237, 239, 241, 248, 252, 255, 257, 261, 262, 263, 267, 268, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282], "reus": [4, 268], "llama3_token": [4, 15, 17, 20, 21, 65, 66, 73, 274, 277], "class": [4, 10, 13, 14, 15, 21, 24, 26, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 42, 43, 53, 54, 55, 56, 57, 65, 66, 78, 79, 80, 81, 94, 100, 105, 106, 119, 142, 143, 144, 148, 153, 156, 160, 161, 162, 169, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 196, 197, 198, 199, 200, 201, 202, 204, 205, 206, 208, 209, 212, 213, 214, 215, 218, 219, 222, 223, 224, 225, 228, 229, 230, 231, 232, 233, 248, 249, 250, 251, 252, 271, 274, 276, 278, 279, 280, 282], "languag": [4, 10, 16, 32, 73, 150, 200, 201, 205, 206, 222, 258, 279, 280], "11b": [4, 145, 152], "8b": [4, 15, 16, 20, 21, 118, 122, 124, 128, 130, 132, 135, 166, 266, 269, 270, 273, 274, 281], "70b": [4, 83, 86, 89, 103, 109, 113, 117, 121, 123, 127, 131, 134, 277], "405b": [4, 126, 130, 133], "weight": [4, 21, 22, 25, 50, 85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 145, 148, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 187, 195, 204, 205, 206, 210, 214, 222, 229, 230, 231, 232, 241, 252, 258, 266, 269, 270, 273, 274, 275, 276, 277, 278, 279, 281, 282], "can": [4, 5, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 30, 33, 35, 36, 37, 38, 39, 42, 50, 53, 55, 56, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 78, 79, 81, 144, 147, 151, 184, 185, 187, 188, 189, 191, 196, 198, 199, 201, 203, 207, 214, 215, 227, 229, 232, 234, 238, 247, 248, 251, 252, 254, 257, 266, 267, 268, 269, 270, 271, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282], "instead": [4, 9, 12, 14, 16, 22, 25, 31, 45, 54, 55, 71, 81, 129, 150, 151, 182, 187, 191, 206, 225, 273, 277, 279, 280, 281], "builder": [4, 9, 10, 11, 12, 13, 15, 16, 17, 22, 58, 60, 61, 64, 67, 82, 83, 84, 85, 86, 87, 88, 89, 90, 92, 93, 96, 97, 98, 99, 102, 103, 104, 105, 108, 109, 110, 111, 112, 113, 114, 115, 117, 118, 121, 122, 123, 124, 126, 127, 128, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 152, 155, 157, 159, 161, 163, 164, 166, 168, 170, 172, 173, 174, 176, 177, 178, 274, 280, 282], "all": [4, 5, 10, 11, 14, 15, 19, 21, 25, 30, 35, 36, 38, 42, 45, 47, 50, 53, 54, 55, 56, 81, 119, 142, 148, 169, 179, 183, 187, 189, 191, 192, 193, 194, 195, 199, 200, 201, 203, 207, 218, 229, 233, 235, 239, 245, 253, 259, 260, 264, 266, 268, 269, 270, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281], "7b": [4, 9, 12, 14, 18, 19, 20, 21, 22, 61, 72, 84, 87, 90, 93, 97, 104, 105, 110, 111, 114, 115, 155, 157, 159, 161, 164, 174, 178, 229, 230, 274, 276, 277, 279, 282], "13b": [4, 22, 82, 85, 88, 102, 108, 112], "codellama": 4, "size": [4, 14, 15, 16, 22, 25, 27, 45, 50, 59, 62, 65, 66, 68, 79, 80, 81, 142, 143, 144, 145, 147, 148, 149, 151, 182, 183, 184, 185, 188, 189, 190, 191, 194, 196, 198, 199, 200, 201, 219, 220, 221, 242, 243, 245, 268, 270, 273, 275, 276, 277, 279, 280, 281], "0": [4, 9, 10, 12, 14, 15, 16, 18, 20, 22, 25, 45, 47, 48, 49, 50, 54, 60, 64, 67, 73, 74, 76, 77, 81, 85, 86, 87, 88, 89, 90, 91, 95, 96, 97, 98, 99, 101, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 120, 121, 122, 123, 124, 125, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 143, 144, 146, 147, 149, 150, 151, 152, 154, 155, 156, 157, 158, 160, 163, 164, 165, 166, 167, 170, 171, 172, 173, 174, 175, 176, 177, 182, 183, 189, 191, 192, 193, 194, 200, 205, 206, 217, 222, 223, 224, 225, 226, 236, 242, 248, 251, 252, 256, 261, 263, 265, 270, 272, 274, 275, 276, 277, 279, 280, 281, 282], "5b": [4, 172, 173, 176, 177, 280], "qwen2": [4, 10, 171, 172, 173, 174, 176, 177, 178, 179, 232, 280], "exampl": [4, 10, 19, 21, 22, 23, 24, 25, 26, 27, 29, 33, 35, 37, 38, 42, 44, 45, 46, 47, 48, 49, 50, 53, 54, 56, 59, 60, 61, 62, 64, 65, 66, 67, 68, 69, 71, 72, 73, 75, 76, 77, 81, 143, 144, 147, 151, 182, 183, 191, 192, 193, 194, 196, 198, 199, 200, 201, 203, 204, 207, 212, 213, 214, 215, 217, 218, 222, 224, 225, 226, 227, 228, 229, 230, 232, 233, 241, 242, 248, 251, 252, 255, 258, 261, 262, 263, 264, 265, 267, 269, 270, 272, 273, 274, 275, 277, 278, 279, 280, 281, 282], "none": [4, 9, 16, 25, 26, 28, 30, 31, 33, 35, 37, 42, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 77, 81, 94, 101, 106, 107, 116, 119, 120, 125, 129, 142, 143, 144, 146, 148, 150, 162, 169, 179, 180, 182, 183, 185, 188, 189, 190, 191, 192, 193, 194, 199, 201, 207, 209, 210, 211, 214, 217, 220, 221, 223, 229, 230, 231, 232, 233, 234, 237, 241, 246, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 259, 260, 261, 262, 273, 275, 281], "mini": [4, 21, 166, 167, 168, 169, 170], "4k": [4, 21, 167, 168, 169], "microsoft": [4, 168, 169], "ai": [4, 10, 12, 14, 19, 55, 56, 159, 252, 274, 277], "thi": [4, 9, 10, 12, 13, 14, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 31, 33, 34, 35, 36, 37, 42, 43, 44, 45, 47, 48, 50, 53, 54, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 75, 79, 80, 81, 91, 95, 101, 107, 116, 120, 125, 129, 142, 144, 146, 147, 150, 151, 153, 154, 156, 158, 160, 165, 167, 168, 169, 171, 175, 180, 182, 183, 185, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 199, 200, 201, 203, 204, 207, 210, 211, 212, 213, 214, 215, 217, 218, 219, 221, 222, 223, 225, 227, 228, 229, 230, 231, 233, 236, 237, 240, 242, 245, 247, 248, 249, 251, 252, 253, 254, 256, 258, 260, 261, 266, 267, 268, 269, 270, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282], "v0": [4, 9, 14, 18, 19, 21, 153], "mistralai": [4, 21, 273], "2b": [4, 92, 96], "gemma2": 4, "googl": [4, 92, 93], "gguf": 4, "compon": [4, 6, 14, 21, 22, 25, 30, 48, 55, 56, 65, 66, 205, 268, 271, 276, 278, 279, 282], "multimod": [4, 11, 14, 36, 42, 56, 65, 66, 199, 267], "encod": [4, 5, 15, 21, 50, 56, 73, 74, 81, 142, 143, 145, 146, 147, 149, 150, 151, 183, 188, 189, 190, 194, 199, 200, 201, 203, 212, 214, 215, 217, 219, 222, 225, 274], "perform": [5, 12, 13, 17, 19, 20, 21, 22, 54, 73, 191, 196, 207, 218, 225, 268, 269, 270, 274, 275, 277, 278, 280, 281, 282], "direct": [5, 18, 25, 48, 85, 86, 95, 96, 97, 107, 108, 109, 110, 120, 121, 122, 131, 132, 138, 139, 154, 155, 156, 157, 165, 166, 205, 222, 267, 271, 280], "id": [5, 14, 17, 21, 22, 47, 48, 49, 50, 54, 61, 65, 66, 72, 73, 74, 76, 77, 144, 183, 185, 189, 190, 199, 212, 213, 214, 215, 216, 217, 219, 229, 231, 248, 274, 275], "decod": [5, 9, 12, 14, 15, 16, 18, 20, 21, 60, 64, 67, 73, 91, 95, 101, 107, 116, 120, 125, 129, 143, 144, 145, 146, 147, 149, 150, 151, 154, 156, 158, 160, 165, 167, 171, 175, 183, 188, 189, 190, 194, 199, 201, 203, 212, 214, 215, 274], "typic": [5, 9, 12, 20, 24, 33, 37, 42, 50, 54, 55, 56, 57, 71, 169, 203, 222, 225, 280, 281, 282], "byte": [5, 21, 215, 280, 282], "pair": [5, 10, 18, 21, 24, 48, 49, 63, 67, 70, 215], "underli": [5, 13, 18, 21, 214, 280, 282], "helper": 5, "method": [5, 13, 14, 15, 19, 21, 22, 24, 25, 26, 29, 46, 55, 57, 59, 60, 61, 62, 63, 64, 67, 68, 69, 70, 71, 72, 144, 189, 195, 196, 199, 202, 203, 204, 208, 210, 212, 213, 233, 241, 267, 268, 279, 282], "two": [5, 15, 18, 19, 22, 24, 35, 50, 52, 65, 66, 73, 74, 79, 191, 200, 203, 205, 219, 226, 228, 268, 270, 275, 276, 277, 279, 280, 281, 282], "pre": [5, 9, 11, 12, 17, 18, 19, 20, 54, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 100, 145, 148, 149, 191, 199, 201, 203, 205, 270, 274, 280], "train": [5, 9, 10, 11, 12, 13, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 31, 33, 35, 50, 53, 54, 55, 56, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 100, 144, 145, 148, 149, 181, 183, 185, 189, 190, 195, 196, 198, 199, 200, 201, 203, 205, 222, 225, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 266, 268, 269, 271, 273, 274, 275, 277, 278, 279, 280, 281, 282], "function": [5, 10, 22, 24, 25, 27, 29, 45, 46, 47, 48, 60, 64, 67, 73, 79, 80, 81, 147, 151, 180, 183, 191, 192, 195, 207, 210, 211, 222, 223, 227, 229, 243, 256, 258, 260, 261, 268, 278, 282], "preprocess": [5, 54, 191], "imag": [5, 11, 15, 35, 36, 37, 42, 44, 46, 50, 56, 65, 66, 78, 79, 80, 81, 142, 143, 144, 145, 147, 148, 149, 151, 191, 200, 219, 279], "loss": [6, 9, 12, 14, 24, 25, 36, 38, 55, 56, 59, 60, 62, 64, 67, 68, 69, 196, 197, 198, 222, 223, 224, 225, 276, 278, 279, 282], "algorithm": [6, 21, 220, 225, 256], "ppo": [6, 220, 221, 222, 223, 271], "dpo": [6, 18, 48, 55, 207, 222, 224, 225, 271], "offer": 7, "allow": [7, 10, 53, 201, 205, 210, 251, 270, 273, 280, 281, 282], "seamless": 7, "transit": 7, "between": [7, 9, 18, 19, 21, 22, 55, 60, 67, 146, 150, 188, 189, 193, 199, 221, 223, 225, 229, 232, 248, 275, 277, 278, 279, 280, 281, 282], "interoper": [7, 22, 25, 268, 275, 282], "rest": [7, 274, 280, 282], "ecosystem": [7, 22, 25, 268, 275, 277, 282], "comprehens": [7, 280], "deep": [7, 22, 23, 24, 25, 26, 201, 203, 268, 271, 276, 277, 280], "dive": [7, 22, 23, 24, 25, 26, 268, 270, 271, 276, 277, 280], "util": [7, 14, 16, 22, 24, 25, 27, 45, 47, 50, 142, 234, 251, 253, 254, 260, 261, 262, 263, 268, 275, 276, 280, 282], "work": [7, 22, 25, 35, 42, 187, 200, 201, 268, 270, 273, 275, 277, 280, 282], "set": [7, 9, 12, 17, 18, 20, 22, 23, 24, 25, 26, 33, 36, 37, 42, 50, 54, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 71, 72, 101, 107, 116, 120, 125, 129, 144, 146, 149, 150, 154, 156, 158, 160, 165, 167, 171, 175, 183, 185, 188, 189, 192, 193, 194, 199, 207, 209, 227, 233, 238, 245, 247, 248, 254, 255, 256, 257, 260, 261, 268, 271, 273, 274, 275, 276, 277, 278, 279, 280, 281], "enabl": [7, 10, 11, 17, 21, 23, 24, 25, 26, 53, 85, 86, 87, 88, 89, 90, 96, 97, 98, 99, 108, 109, 110, 111, 112, 113, 114, 115, 121, 122, 123, 124, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 152, 155, 157, 163, 164, 166, 170, 172, 173, 174, 176, 177, 183, 188, 189, 190, 192, 193, 194, 199, 201, 205, 206, 256, 257, 270, 277, 279, 280, 282], "consumpt": [7, 53, 75, 269, 280], "dure": [7, 10, 11, 22, 54, 59, 60, 62, 64, 67, 68, 69, 182, 183, 185, 189, 190, 191, 195, 199, 200, 225, 240, 269, 270, 274, 275, 277, 279, 280, 281, 282], "control": [7, 13, 18, 21, 25, 36, 59, 60, 62, 64, 67, 68, 69, 193, 194, 201, 207, 248, 256, 270, 275, 280], "lr": [7, 24, 233, 236, 239, 278, 280], "process": [7, 11, 14, 15, 17, 25, 26, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 81, 147, 151, 191, 195, 243, 244, 256, 276, 281, 282], "variou": 7, "provid": [7, 10, 11, 12, 14, 22, 24, 25, 27, 32, 33, 35, 37, 42, 46, 47, 51, 53, 54, 73, 75, 81, 183, 187, 189, 191, 199, 207, 217, 222, 231, 238, 248, 252, 257, 261, 268, 269, 270, 273, 274, 275, 276, 277, 280], "debug": [7, 22, 24, 25, 248, 273], "finetun": [7, 10, 22, 24, 25, 85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 166, 172, 173, 174, 199, 266, 268, 270, 276, 277, 280], "job": [7, 10, 26, 256, 276], "involv": [9, 12, 17, 20, 56, 281], "multi": [9, 18, 25, 183, 277], "turn": [9, 18, 25, 33, 36, 37, 42, 52, 55, 67, 274, 280], "multipl": [9, 16, 17, 18, 22, 24, 25, 33, 36, 37, 42, 48, 53, 56, 67, 142, 143, 183, 189, 190, 191, 199, 206, 248, 249, 250, 251, 252, 257, 276, 277, 278, 280], "back": [9, 21, 22, 52, 207, 229, 279, 280, 282], "forth": [9, 52], "user": [9, 12, 13, 14, 15, 16, 18, 19, 21, 25, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 52, 55, 56, 60, 64, 67, 94, 101, 106, 107, 116, 119, 120, 125, 129, 146, 148, 150, 154, 156, 158, 160, 162, 165, 167, 169, 171, 175, 179, 183, 217, 271, 274, 276, 281], "assist": [9, 12, 13, 14, 15, 16, 18, 19, 21, 31, 32, 33, 35, 36, 37, 38, 39, 41, 42, 44, 52, 55, 56, 60, 67, 73, 94, 100, 106, 119, 148, 162, 169, 179, 217, 274], "role": [9, 13, 14, 15, 16, 18, 19, 21, 33, 36, 37, 38, 39, 42, 44, 55, 56, 60, 67, 94, 106, 119, 144, 148, 162, 169, 179, 217, 274], "content": [9, 13, 15, 16, 18, 19, 21, 22, 33, 36, 37, 38, 39, 42, 44, 55, 56, 60, 67, 217, 274], "what": [9, 14, 15, 16, 18, 22, 23, 24, 26, 36, 37, 55, 56, 60, 64, 67, 100, 153, 191, 266, 271, 274, 275, 276, 277, 280], "answer": [9, 15, 16, 19, 40, 64, 275, 277], "ultim": [9, 281], "question": [9, 15, 16, 19, 40, 64, 275, 277], "life": 9, "42": [9, 73, 191], "That": [9, 274], "s": [9, 10, 12, 13, 14, 16, 17, 18, 19, 20, 22, 24, 25, 26, 27, 29, 32, 37, 42, 52, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 85, 86, 87, 100, 107, 108, 109, 110, 111, 120, 121, 122, 129, 130, 131, 132, 138, 139, 142, 143, 144, 149, 150, 151, 153, 154, 155, 156, 157, 165, 166, 169, 171, 174, 175, 182, 183, 185, 189, 190, 191, 195, 199, 202, 203, 204, 205, 208, 210, 211, 215, 222, 224, 225, 226, 227, 229, 230, 233, 238, 240, 242, 247, 248, 251, 254, 255, 258, 260, 261, 268, 273, 274, 276, 278, 279, 280, 281, 282], "ridicul": 9, "oh": 9, "i": [9, 12, 14, 18, 19, 20, 25, 36, 67, 73, 100, 142, 143, 153, 183, 188, 189, 190, 191, 195, 199, 209, 228, 233, 275, 277, 280, 281, 282], "know": [9, 274, 275, 278, 279], "more": [9, 10, 11, 12, 13, 14, 16, 18, 19, 21, 22, 24, 25, 38, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 182, 191, 196, 203, 210, 227, 228, 231, 248, 252, 254, 256, 260, 268, 269, 270, 271, 273, 275, 276, 277, 278, 279, 280, 281, 282], "structur": [9, 12, 13, 14, 19, 25, 37, 39, 42, 60, 119, 144, 148, 169, 179, 219, 274, 275, 281], "than": [9, 10, 12, 16, 18, 24, 50, 52, 73, 75, 182, 183, 191, 222, 227, 231, 232, 259, 260, 263, 274, 275, 276, 277, 278, 279, 280, 282], "freeform": [9, 12, 57, 71], "associ": [9, 10, 11, 12, 22, 24, 25, 73, 74, 81, 91, 101, 116, 125, 146, 150, 158, 175, 248, 275, 279], "where": [9, 10, 12, 14, 16, 18, 19, 20, 36, 38, 45, 48, 59, 73, 75, 76, 79, 105, 142, 143, 161, 180, 183, 189, 191, 193, 196, 198, 199, 206, 214, 219, 220, 222, 223, 226, 238, 242, 247, 278, 280], "thei": [9, 11, 12, 19, 21, 24, 25, 53, 65, 66, 81, 142, 147, 151, 189, 191, 201, 211, 238, 273, 274, 279, 280, 281], "learn": [9, 12, 25, 53, 200, 201, 203, 233, 236, 239, 268, 269, 270, 271, 274, 276, 277, 279, 280, 281, 282], "simpli": [9, 12, 13, 14, 16, 20, 22, 24, 54, 56, 222, 273, 274, 275, 277, 278, 280, 282], "predict": [9, 12, 73, 74, 77, 220, 221, 223, 269], "next": [9, 12, 22, 54, 71, 73, 74, 81, 191, 219, 269, 277, 282], "respond": 9, "accur": 9, "primari": [9, 12, 16, 18, 20, 22, 24, 25, 55, 56, 271, 276], "entri": [9, 12, 16, 18, 20, 24, 25, 47, 50, 271, 276, 280], "point": [9, 10, 12, 16, 18, 20, 21, 24, 25, 46, 60, 217, 271, 275, 276, 277, 279, 281, 282], "torchtun": [9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 37, 38, 39, 42, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 267, 269, 270, 271, 274, 276, 280], "chat_dataset": [9, 12, 13, 18, 274], "let": [9, 10, 11, 12, 16, 18, 22, 24, 26, 273, 274, 275, 276, 277, 278, 279, 280, 282], "follow": [9, 10, 11, 12, 15, 16, 19, 22, 25, 36, 37, 38, 42, 50, 54, 55, 56, 64, 67, 144, 183, 188, 219, 223, 231, 232, 233, 236, 245, 252, 257, 266, 267, 270, 271, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282], "data": [9, 10, 12, 13, 14, 15, 16, 19, 21, 23, 31, 32, 33, 35, 36, 37, 38, 39, 42, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 55, 56, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 100, 148, 179, 191, 218, 222, 224, 240, 248, 249, 250, 251, 252, 260, 269, 270, 275, 280, 281, 282], "directli": [9, 10, 12, 13, 14, 16, 22, 24, 25, 27, 31, 55, 56, 60, 64, 65, 67, 71, 222, 227, 229, 273, 275, 276, 277, 279, 280, 281, 282], "llm": [9, 10, 11, 12, 21, 25, 199, 201, 266, 267, 268, 269, 271, 275, 277, 278, 279], "my_data": [9, 12, 13, 16, 274], "human": [9, 16, 18, 36, 42, 60, 100, 222, 223, 224, 274], "valu": [9, 16, 22, 24, 33, 35, 37, 42, 45, 47, 48, 50, 59, 60, 62, 63, 64, 67, 68, 69, 70, 73, 74, 76, 77, 82, 83, 84, 91, 92, 93, 95, 101, 102, 103, 104, 105, 107, 116, 117, 118, 120, 125, 126, 127, 128, 129, 136, 137, 144, 146, 150, 154, 156, 158, 159, 160, 161, 165, 167, 171, 175, 176, 177, 178, 182, 183, 184, 188, 189, 190, 197, 198, 199, 201, 210, 220, 221, 223, 226, 229, 232, 233, 236, 242, 248, 249, 250, 251, 252, 256, 270, 273, 274, 276, 277, 279, 280, 281], "gpt": [9, 16, 42, 60, 74, 274, 275], "mistral": [9, 14, 18, 19, 21, 144, 153, 154, 155, 156, 157, 159, 160, 161, 162, 163, 164, 232, 273, 274, 275, 276], "mistral_token": [9, 14, 18, 19, 21], "m_token": [9, 14, 18, 19, 20, 21], "path": [9, 10, 12, 14, 15, 16, 17, 18, 19, 20, 21, 24, 25, 26, 27, 35, 42, 46, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 94, 106, 119, 144, 148, 162, 169, 179, 214, 215, 216, 229, 230, 231, 257, 273, 274, 275, 277, 279], "1": [9, 14, 16, 18, 19, 20, 21, 22, 25, 35, 42, 45, 47, 48, 49, 50, 54, 69, 73, 74, 76, 77, 78, 79, 101, 107, 116, 120, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 143, 144, 146, 150, 153, 154, 156, 158, 160, 165, 167, 171, 172, 173, 175, 176, 177, 182, 183, 189, 191, 192, 193, 194, 196, 197, 198, 214, 215, 217, 222, 223, 224, 225, 230, 232, 236, 242, 245, 248, 251, 252, 255, 256, 268, 269, 273, 274, 275, 276, 279, 280, 281, 282], "prompt_templ": [9, 12, 14, 16, 18, 19, 94, 106, 119, 144, 148, 162, 169, 179], "mistralchattempl": [9, 14, 18, 19, 162, 274], "max_seq_len": [9, 10, 12, 14, 16, 17, 18, 20, 21, 24, 27, 47, 50, 51, 54, 59, 60, 61, 62, 64, 65, 66, 68, 69, 71, 72, 91, 94, 95, 101, 106, 107, 116, 119, 120, 125, 129, 144, 146, 148, 150, 154, 156, 158, 160, 162, 165, 167, 169, 171, 175, 179, 182, 183, 185, 189, 194, 281], "8192": [9, 12, 14, 16, 17, 18, 20, 21, 148, 279, 281], "ds": [9, 10, 12, 15, 16, 18, 20, 54, 69, 274], "sourc": [9, 10, 12, 13, 16, 18, 20, 22, 24, 27, 28, 29, 30, 31, 32, 33, 35, 36, 37, 38, 39, 42, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 91, 92, 93, 94, 95, 96, 97, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 116, 117, 118, 119, 120, 121, 122, 125, 126, 127, 128, 129, 130, 131, 132, 136, 137, 138, 139, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 165, 166, 167, 168, 169, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 274, 275, 281], "data_fil": [9, 12, 13, 16, 18, 20, 55, 56, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 274], "split": [9, 10, 12, 13, 14, 16, 18, 20, 22, 44, 53, 54, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 215, 274, 275, 281], "conversation_column": [9, 60, 274], "conversation_styl": [9, 60, 274], "By": [9, 12, 22, 205, 270, 273, 278, 279, 280, 281, 282], "default": [9, 10, 12, 16, 22, 24, 31, 32, 33, 35, 36, 37, 42, 45, 48, 49, 50, 51, 54, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 77, 81, 82, 83, 84, 85, 86, 87, 91, 92, 93, 94, 95, 96, 97, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 116, 117, 118, 119, 120, 121, 122, 125, 126, 127, 128, 129, 130, 131, 132, 136, 137, 138, 139, 144, 145, 148, 149, 150, 151, 154, 155, 156, 157, 158, 159, 160, 161, 162, 165, 166, 167, 169, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 183, 184, 185, 188, 189, 190, 195, 197, 198, 199, 205, 206, 210, 214, 215, 217, 220, 221, 222, 225, 229, 230, 231, 233, 236, 237, 243, 247, 248, 249, 252, 255, 256, 257, 267, 270, 273, 274, 275, 277, 278, 279, 280, 281, 282], "true": [9, 10, 12, 13, 14, 15, 16, 17, 22, 24, 31, 36, 45, 53, 54, 57, 58, 59, 60, 62, 64, 65, 66, 67, 68, 69, 71, 72, 75, 76, 81, 88, 89, 90, 91, 95, 98, 99, 112, 113, 114, 115, 123, 124, 133, 134, 135, 140, 141, 144, 145, 152, 163, 164, 170, 183, 188, 189, 190, 192, 193, 194, 195, 196, 197, 199, 201, 207, 214, 215, 217, 219, 220, 223, 226, 227, 229, 230, 231, 238, 239, 240, 242, 244, 245, 248, 251, 257, 263, 269, 273, 274, 275, 277, 279, 280, 281, 282], "train_on_input": [9, 12, 13, 18, 24, 31, 33, 35, 37, 42, 53, 58, 59, 60, 62, 63, 64, 67, 68, 69, 70], "new_system_prompt": [9, 12, 13, 33, 35, 37, 42, 60, 62, 63, 64, 65, 66, 67, 68, 69], "tokenized_dict": [9, 12, 15, 16, 18, 20], "label": [9, 12, 20, 25, 47, 48, 49, 50, 54, 61, 69, 72, 196, 197, 198, 222, 225, 278], "print": [9, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 26, 44, 50, 53, 59, 62, 65, 66, 68, 69, 73, 144, 191, 192, 193, 194, 214, 215, 217, 263, 274, 276, 279, 281, 282], "inst": [9, 14, 19, 21, 100, 144, 153, 274], "733": [9, 14, 21], "16289": [9, 14, 21], "28793": [9, 14, 21], "1824": 9, "349": 9, "272": 9, "4372": 9, "In": [9, 10, 12, 13, 14, 16, 17, 18, 19, 20, 21, 22, 24, 25, 55, 79, 80, 81, 147, 151, 185, 189, 191, 206, 227, 247, 251, 252, 270, 274, 275, 277, 278, 279, 280, 281, 282], "_component_": [9, 10, 12, 13, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 53, 60, 64, 67, 71, 257, 270, 274, 275, 277, 278, 279, 280, 281], "null": [9, 22, 24, 281], "have": [9, 10, 13, 14, 18, 21, 22, 24, 27, 35, 36, 55, 60, 67, 75, 79, 80, 81, 142, 147, 151, 181, 182, 183, 184, 187, 189, 191, 192, 193, 194, 196, 198, 199, 204, 211, 219, 225, 228, 231, 233, 238, 239, 251, 259, 267, 274, 275, 276, 277, 278, 279, 280, 281, 282], "singl": [9, 10, 16, 17, 18, 19, 22, 24, 27, 33, 35, 37, 42, 47, 53, 54, 55, 56, 57, 60, 67, 71, 79, 80, 81, 94, 105, 106, 119, 142, 143, 144, 147, 148, 151, 161, 162, 169, 183, 189, 191, 199, 229, 230, 231, 232, 233, 235, 271, 273, 274, 275, 276, 277, 278, 279, 280, 282], "name": [9, 12, 13, 14, 16, 18, 20, 22, 23, 24, 26, 28, 31, 33, 35, 37, 42, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 204, 209, 211, 215, 229, 230, 231, 232, 233, 235, 248, 249, 250, 251, 252, 258, 259, 261, 273, 274, 275, 277, 280, 281], "messag": [9, 11, 12, 15, 16, 18, 19, 21, 31, 32, 33, 35, 37, 38, 39, 42, 44, 52, 55, 56, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 94, 106, 119, 144, 148, 162, 169, 213, 217, 267, 273, 274], "contain": [9, 11, 13, 14, 15, 16, 18, 20, 22, 33, 35, 36, 42, 47, 48, 49, 50, 54, 55, 56, 57, 60, 65, 71, 119, 144, 148, 169, 179, 182, 183, 185, 189, 190, 199, 202, 204, 208, 209, 210, 215, 217, 220, 226, 229, 230, 231, 233, 235, 240, 246, 251, 257, 258, 260, 274, 275, 277, 279], "topic": [9, 266], "per": [9, 16, 47, 88, 89, 90, 98, 99, 112, 113, 114, 115, 123, 124, 133, 134, 135, 140, 141, 143, 144, 152, 163, 164, 170, 182, 191, 195, 219, 221, 222, 273, 280, 281, 282], "could": [9, 18, 19, 239, 278, 279], "system": [9, 12, 13, 18, 19, 32, 33, 35, 36, 37, 38, 39, 41, 42, 44, 52, 55, 56, 60, 62, 63, 64, 65, 66, 67, 68, 69, 94, 100, 106, 119, 148, 153, 162, 169, 179, 217, 274], "tool": [9, 18, 19, 22, 36, 38, 56, 153, 248, 275, 276], "call": [9, 14, 18, 21, 22, 27, 36, 38, 56, 65, 66, 153, 183, 189, 191, 192, 193, 195, 199, 210, 248, 249, 250, 251, 252, 253, 257, 258, 274, 279, 282], "return": [9, 10, 13, 15, 18, 19, 21, 27, 29, 36, 38, 44, 45, 46, 47, 48, 49, 50, 51, 54, 55, 56, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 91, 92, 93, 94, 95, 96, 97, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 116, 117, 118, 119, 120, 121, 122, 125, 126, 127, 128, 129, 130, 131, 132, 136, 137, 138, 139, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 154, 155, 156, 157, 158, 159, 160, 161, 162, 165, 166, 167, 168, 169, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 188, 189, 190, 191, 193, 194, 196, 197, 198, 199, 200, 201, 202, 204, 205, 206, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 231, 233, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 255, 256, 257, 261, 262, 263, 278, 279, 282], "dai": [9, 20], "todai": 9, "It": [9, 10, 14, 16, 32, 36, 38, 55, 56, 60, 62, 64, 65, 66, 68, 70, 144, 147, 151, 153, 187, 189, 191, 199, 222, 225, 248, 273, 274, 278, 282], "tuesdai": 9, "about": [9, 10, 13, 14, 18, 22, 25, 65, 66, 191, 222, 225, 248, 252, 268, 269, 270, 271, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282], "tomorrow": 9, "wednesdai": 9, "As": [9, 12, 16, 22, 24, 25, 26, 206, 268, 275, 280, 282], "an": [9, 10, 12, 14, 16, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 46, 50, 52, 53, 57, 60, 62, 64, 65, 66, 67, 68, 71, 72, 78, 79, 80, 107, 120, 129, 144, 147, 149, 151, 154, 156, 160, 165, 171, 172, 173, 176, 177, 183, 187, 189, 191, 199, 200, 201, 203, 204, 207, 208, 209, 213, 218, 219, 222, 227, 228, 229, 230, 231, 233, 234, 238, 239, 248, 252, 257, 261, 268, 269, 270, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282], "slimorca": [9, 69], "pass": [9, 10, 11, 12, 14, 15, 16, 17, 18, 19, 20, 21, 24, 27, 36, 38, 53, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 91, 95, 101, 107, 116, 120, 125, 129, 149, 154, 156, 158, 160, 165, 167, 171, 175, 183, 189, 193, 194, 195, 199, 207, 211, 215, 223, 227, 231, 237, 238, 240, 244, 247, 248, 251, 252, 254, 257, 273, 274, 279, 281, 282], "repo": [9, 10, 12, 16, 18, 20, 22, 65, 229, 230, 232, 273, 275], "select": [9, 234], "one": [9, 10, 11, 12, 13, 16, 18, 22, 25, 33, 35, 37, 42, 47, 50, 52, 60, 66, 67, 191, 196, 198, 217, 231, 248, 275, 276, 277, 280, 282], "most": [9, 12, 13, 16, 18, 20, 22, 24, 36, 38, 274, 276, 279, 280, 282], "gemma": [9, 12, 18, 20, 92, 93, 94, 95, 96, 97, 98, 99, 187, 232, 280], "gemma_token": [9, 12, 18, 20], "g_token": [9, 12, 18, 20], "open": [9, 20, 46, 69, 92, 93, 275], "orca": [9, 69], "dedup": [9, 69], "recip": [9, 11, 12, 16, 18, 20, 22, 23, 24, 26, 27, 28, 29, 144, 189, 199, 229, 230, 231, 268, 269, 270, 274, 275, 277, 280, 282], "via": [9, 12, 14, 16, 17, 18, 20, 23, 24, 26, 55, 60, 64, 67, 71, 183, 189, 190, 205, 206, 229, 279, 282], "http": [9, 12, 16, 27, 46, 57, 61, 63, 65, 71, 72, 74, 82, 83, 84, 85, 86, 87, 88, 89, 90, 92, 93, 95, 96, 97, 98, 99, 102, 103, 104, 105, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 152, 154, 155, 156, 157, 159, 161, 163, 164, 165, 166, 168, 169, 170, 172, 173, 174, 176, 177, 178, 183, 184, 185, 191, 196, 197, 219, 220, 222, 223, 224, 225, 227, 229, 230, 236, 245, 248, 251, 252, 254, 256, 262, 267, 275, 277, 278], "ha": [9, 18, 22, 64, 73, 143, 186, 188, 189, 191, 194, 196, 198, 199, 202, 204, 207, 208, 211, 226, 231, 233, 258, 259, 274, 275, 276, 277, 278, 279, 280, 282], "addition": [9, 22, 214, 215, 225, 256, 274, 279, 280], "argument": [9, 10, 12, 16, 22, 24, 27, 34, 40, 43, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 88, 89, 90, 98, 99, 112, 113, 114, 115, 123, 124, 133, 134, 135, 140, 141, 152, 163, 164, 170, 227, 238, 244, 248, 249, 251, 252, 254, 273, 274, 279, 280, 281], "load_dataset": [9, 12, 16, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 274], "document": [9, 12, 16, 17, 78, 79, 80, 81, 183, 189, 190, 227, 238, 247, 269, 271, 273, 280], "file": [9, 10, 11, 12, 16, 22, 23, 24, 25, 26, 27, 28, 29, 46, 55, 56, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 119, 144, 148, 169, 179, 214, 215, 216, 229, 230, 231, 249, 252, 257, 265, 268, 270, 272, 273, 274, 275, 276, 277, 279, 280, 281, 282], "raw": [9, 11, 13, 14, 16, 21, 44], "vari": [9, 50, 54, 189], "field": [9, 10, 14, 15, 27, 31, 35, 36, 42, 44, 54, 55, 56, 59, 65, 66, 246], "indic": [9, 14, 16, 18, 19, 50, 53, 54, 75, 76, 81, 147, 151, 183, 185, 189, 190, 191, 199, 200, 219, 220, 223, 226, 227, 242, 245, 274], "There": [9, 24, 52, 79, 274, 276, 277, 278, 279, 280], "few": [9, 10, 201, 277, 279, 282], "standard": [9, 12, 14, 15, 17, 19, 22, 34, 55, 56, 60, 63, 101, 107, 116, 120, 125, 129, 144, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 183, 205, 250, 268, 274, 275, 277, 278], "across": [9, 22, 25, 50, 53, 205, 229, 251, 256, 275, 277, 278, 281], "mani": [9, 14, 16, 19, 24, 54, 269, 270, 275, 278], "we": [9, 10, 11, 12, 18, 19, 20, 21, 22, 23, 24, 25, 26, 47, 50, 54, 55, 56, 60, 61, 67, 72, 73, 77, 182, 183, 185, 189, 190, 191, 193, 196, 198, 199, 206, 222, 225, 229, 230, 231, 237, 241, 247, 253, 258, 268, 269, 270, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282], "ipython": [9, 14, 19, 36, 38, 41, 55, 56, 94, 106, 119, 148, 162, 169, 179], "transform": [9, 10, 11, 16, 22, 25, 31, 33, 35, 55, 56, 59, 60, 62, 63, 65, 66, 67, 68, 69, 81, 85, 86, 87, 91, 95, 96, 97, 101, 107, 108, 109, 110, 111, 116, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 143, 144, 146, 147, 148, 149, 150, 151, 154, 155, 156, 157, 158, 160, 165, 166, 167, 171, 172, 173, 174, 175, 188, 189, 190, 191, 201, 219, 236, 254, 279, 280, 281], "sharegpttomessag": [9, 13, 60, 69], "expect": [9, 12, 13, 15, 16, 18, 19, 20, 22, 24, 27, 31, 33, 35, 36, 37, 42, 46, 50, 55, 56, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 143, 144, 185, 199, 211, 233, 248, 252, 259, 274, 279, 280, 281], "code": [9, 10, 12, 13, 16, 19, 21, 22, 25, 82, 83, 84, 85, 86, 87, 88, 89, 90, 189, 248, 264, 268, 276, 280], "openaitomessag": [9, 13, 60, 67], "If": [9, 10, 13, 14, 16, 17, 19, 21, 22, 24, 30, 33, 35, 36, 37, 42, 44, 46, 47, 50, 51, 52, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 71, 73, 75, 77, 81, 94, 101, 106, 107, 116, 119, 120, 125, 129, 142, 144, 146, 148, 150, 162, 169, 171, 175, 179, 182, 183, 185, 187, 189, 190, 191, 193, 194, 195, 196, 198, 199, 205, 206, 211, 217, 229, 230, 231, 232, 233, 234, 237, 238, 239, 240, 241, 244, 248, 251, 252, 256, 257, 259, 261, 267, 273, 274, 275, 276, 277, 278, 279, 280, 281], "doe": [9, 17, 22, 44, 50, 54, 67, 71, 91, 153, 158, 168, 183, 187, 189, 190, 192, 193, 194, 197, 198, 199, 204, 217, 229, 231, 233, 258, 273, 275, 281], "fit": [9, 25, 54, 61, 71, 72, 191, 222, 274], "creat": [9, 10, 13, 16, 19, 22, 24, 27, 38, 54, 56, 60, 67, 75, 82, 83, 84, 85, 86, 87, 88, 89, 90, 92, 93, 96, 97, 98, 99, 102, 103, 104, 105, 108, 109, 110, 111, 112, 113, 114, 115, 117, 118, 121, 122, 123, 124, 126, 127, 128, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 152, 155, 157, 159, 161, 163, 164, 166, 168, 170, 172, 173, 174, 176, 177, 178, 182, 183, 189, 190, 191, 199, 227, 229, 230, 231, 235, 236, 248, 249, 251, 273, 275, 282], "custom": [9, 15, 16, 21, 24, 25, 31, 38, 55, 56, 60, 64, 65, 66, 67, 71, 94, 106, 119, 148, 162, 169, 179, 254, 268, 269, 270, 273, 276, 277, 279, 280], "dialogu": [9, 16, 43, 68, 274], "defin": [9, 10, 17, 22, 24, 25, 38, 55, 56, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 183, 188, 189, 199, 202, 204, 206, 208, 221, 276, 279], "same": [9, 10, 11, 15, 18, 22, 24, 38, 45, 78, 79, 85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 143, 166, 172, 173, 174, 181, 182, 184, 186, 187, 188, 190, 191, 194, 199, 201, 217, 223, 225, 226, 233, 238, 239, 252, 258, 260, 270, 273, 274, 275, 277, 278, 279, 280, 281, 282], "wai": [9, 14, 19, 22, 24, 55, 56, 210, 228, 273, 275, 276, 277, 278], "instruct_dataset": [9, 12, 13, 53], "info": [9, 262, 276], "slimorca_dataset": [9, 24], "command": [10, 12, 17, 21, 23, 25, 26, 267, 270, 271, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282], "line": [10, 17, 22, 23, 25, 271, 273, 276, 277, 280], "both": [10, 14, 15, 21, 22, 37, 50, 53, 63, 67, 180, 199, 201, 203, 211, 273, 275, 278, 279, 280, 281, 282], "built": [10, 11, 13, 23, 24, 26, 63, 67, 70, 267, 274, 276, 282], "done": [10, 17, 54, 189, 210, 237, 247, 258, 279, 281, 282], "run": [10, 17, 22, 23, 24, 26, 29, 91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 182, 183, 189, 195, 196, 229, 230, 231, 233, 234, 235, 245, 248, 251, 252, 253, 267, 268, 269, 270, 271, 274, 276, 277, 278, 279, 280, 281, 282], "cli": [10, 24, 26, 28, 29, 267, 269, 275, 276, 280], "which": [10, 11, 12, 14, 16, 17, 18, 19, 20, 21, 22, 24, 25, 46, 47, 53, 54, 57, 59, 60, 62, 64, 67, 68, 69, 71, 76, 77, 85, 86, 87, 94, 95, 96, 97, 106, 107, 108, 109, 110, 111, 119, 120, 121, 122, 129, 130, 131, 132, 138, 139, 144, 147, 148, 149, 150, 151, 153, 154, 155, 156, 157, 162, 165, 166, 169, 171, 172, 173, 174, 182, 183, 185, 189, 190, 191, 192, 193, 194, 199, 201, 210, 211, 214, 229, 230, 231, 233, 236, 237, 249, 252, 254, 258, 268, 269, 270, 271, 273, 274, 275, 276, 278, 279, 280, 281, 282], "folder": [10, 22], "first": [10, 17, 22, 24, 27, 42, 52, 54, 65, 76, 81, 147, 151, 189, 191, 192, 193, 199, 226, 229, 266, 268, 269, 274, 275, 277, 278, 279, 281, 282], "ensur": [10, 19, 21, 22, 24, 30, 52, 55, 56, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 183, 192, 229, 231, 237, 268, 276], "instal": [10, 23, 24, 26, 245, 248, 251, 252, 266, 273, 275, 276, 277, 278, 279, 280, 281, 282], "environ": [10, 25, 245, 248, 261, 267, 271, 273, 275, 276, 281], "so": [10, 13, 18, 19, 20, 22, 24, 54, 65, 187, 191, 229, 267, 268, 274, 275, 276, 277, 279, 280, 281, 282], "directori": [10, 22, 24, 35, 42, 65, 229, 230, 231, 249, 251, 252, 257, 273, 275, 276, 277], "new": [10, 14, 15, 16, 19, 21, 25, 37, 42, 59, 61, 62, 63, 65, 67, 68, 69, 159, 182, 200, 201, 232, 248, 249, 251, 274, 275, 276, 277, 278, 279, 282], "librari": [10, 222, 224, 237, 256, 262, 266, 267, 268, 273, 280, 282], "mkdir": 10, "my_project": [10, 248, 252], "cd": [10, 21, 267, 275], "llama": [10, 15, 16, 17, 20, 21, 22, 100, 142, 144, 145, 146, 147, 148, 150, 151, 184, 185, 229, 230, 269, 270, 273, 274, 275, 276, 277, 278, 279], "3": [10, 15, 16, 17, 20, 21, 22, 45, 47, 48, 49, 50, 54, 76, 77, 81, 142, 144, 145, 146, 147, 148, 150, 151, 153, 166, 168, 169, 191, 232, 242, 255, 262, 269, 270, 273, 274, 275, 276, 277, 278, 281, 282], "2": [10, 14, 15, 17, 21, 22, 26, 45, 47, 48, 49, 50, 52, 54, 69, 76, 77, 78, 79, 136, 137, 138, 139, 140, 141, 142, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 182, 183, 191, 214, 215, 217, 223, 225, 226, 229, 230, 232, 242, 255, 256, 257, 263, 270, 274, 275, 276, 277, 279, 280, 281], "lora": [10, 24, 85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 205, 206, 207, 210, 211, 229, 247, 266, 268, 271, 274, 276, 277, 278], "devic": [10, 17, 24, 25, 194, 233, 237, 240, 260, 261, 271, 273, 274, 275, 276, 277, 279, 280, 282], "lora_finetune_single_devic": [10, 24, 269, 273, 274, 275, 276, 277, 278, 279, 280, 282], "llama3_2": [10, 17, 136, 137, 138, 139, 140, 141, 192, 193, 194, 232, 278], "1b_lora_single_devic": 10, "often": [10, 279, 280], "ll": [10, 18, 20, 22, 24, 25, 73, 241, 268, 270, 274, 275, 276, 277, 278, 280, 281, 282], "want": [10, 12, 19, 22, 24, 25, 26, 27, 50, 55, 56, 73, 203, 267, 273, 274, 275, 276, 277, 278, 279, 280], "start": [10, 23, 25, 26, 46, 76, 217, 232, 248, 267, 268, 274, 275, 276, 278, 280, 281], "our": [10, 12, 13, 20, 22, 25, 268, 269, 270, 271, 274, 275, 276, 278, 279, 280, 281, 282], "particular": [10, 11, 13, 19, 21, 24, 53, 144, 227, 279, 282], "adjust": [10, 205, 269, 270, 278, 280, 281], "hyperparamet": [10, 23, 225, 233, 268, 276, 279, 282], "cp": [10, 24, 267, 273, 274, 275, 276, 277, 281], "copi": [10, 274, 275, 276, 277, 280, 281, 282], "make": [10, 17, 19, 22, 23, 24, 25, 26, 145, 191, 268, 273, 275, 276, 277, 278, 279, 280, 281, 282], "modif": [10, 281], "show": [10, 144, 219, 267, 269, 270, 273, 274, 278, 279], "each": [10, 12, 15, 18, 19, 20, 22, 25, 38, 39, 42, 47, 48, 50, 53, 54, 55, 56, 78, 79, 80, 81, 85, 86, 87, 95, 96, 97, 107, 108, 109, 110, 111, 120, 121, 122, 129, 130, 131, 132, 138, 139, 143, 144, 147, 149, 150, 151, 154, 155, 156, 157, 165, 166, 171, 172, 173, 174, 183, 185, 189, 190, 191, 196, 198, 199, 201, 205, 210, 211, 217, 219, 220, 221, 222, 224, 225, 242, 256, 257, 268, 270, 271, 273, 275, 276, 279, 280, 281], "ls": [10, 21, 267, 271, 273, 275, 276, 277], "full": [10, 11, 13, 16, 22, 24, 25, 34, 40, 43, 55, 72, 88, 89, 90, 98, 99, 112, 113, 114, 115, 123, 124, 133, 134, 135, 140, 141, 149, 152, 163, 164, 170, 199, 210, 211, 217, 234, 267, 268, 271, 273, 275, 277, 279, 280, 281], "5b_full_single_devic": 10, "qwen_config": 10, "now": [10, 19, 22, 182, 193, 233, 235, 270, 274, 275, 276, 277, 278, 279, 281, 282], "sure": [10, 17, 22, 24, 275, 276, 277, 278, 279, 280, 281, 282], "correct": [10, 12, 14, 19, 25, 34, 62, 184, 185, 189, 261, 268, 274], "ve": [10, 18, 21, 24, 182, 270, 273, 274, 275, 277, 278, 279, 280], "even": [10, 191, 258, 267, 273, 274, 277, 278, 279, 280, 282], "didn": 10, "t": [10, 13, 14, 18, 19, 20, 22, 24, 25, 45, 142, 143, 196, 201, 237, 252, 256, 273, 274, 275, 276, 278, 280, 282], "complet": [10, 11, 12, 18, 22, 25, 37, 54, 61, 71, 169, 274, 275, 276, 277, 280], "note": [10, 16, 21, 22, 24, 95, 199, 204, 233, 253, 256, 258, 270, 274, 275, 278, 279, 280, 281, 282], "must": [10, 13, 17, 27, 38, 53, 65, 66, 183, 193, 204, 228, 248, 282], "extens": [10, 25, 231, 268], "full_finetune_single_devic": [10, 17, 239, 273, 275, 276], "Or": [10, 199, 267], "rel": [10, 16, 17, 54, 183, 185, 189, 190, 199, 222, 240, 278, 279], "discuss": [10, 14, 19, 21, 24, 275, 276, 277, 279], "workflow": [10, 11, 266, 276, 279], "write": [10, 16, 22, 25, 229, 230, 231, 249, 276], "own": [10, 13, 18, 21, 22, 38, 247, 256, 273, 274, 275, 277, 278, 279], "loop": 10, "logic": [10, 15, 25, 31, 56, 213, 232, 268, 271, 276, 279], "case": [10, 14, 16, 22, 25, 26, 36, 38, 55, 79, 80, 81, 147, 151, 191, 193, 229, 233, 237, 241, 247, 249, 254, 268, 273, 274, 275, 277, 279, 280, 282], "similar": [10, 13, 16, 60, 61, 63, 65, 66, 67, 70, 71, 72, 210, 222, 275, 277, 278, 279, 280, 282], "scratch": 10, "local": [10, 11, 14, 46, 55, 56, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 119, 148, 169, 179, 248, 252, 256, 267, 273, 274, 275, 276], "single_devic": 10, "py": [10, 13, 24, 27, 74, 85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 166, 172, 173, 174, 182, 184, 185, 197, 222, 223, 224, 225, 236, 273, 275, 277], "recommend": [10, 60, 61, 62, 67, 68, 70, 72, 153, 189, 196, 248, 251, 274, 275, 280, 282], "python": [10, 24, 248, 252, 256, 262, 264, 273, 275, 281], "convent": [10, 188], "main": [10, 27, 29, 169, 184, 185, 267, 270, 275, 277], "script": [10, 22, 26, 271, 273, 275, 276, 277], "decor": [10, 25, 29], "pars": [10, 24, 27, 28, 216, 271, 276], "omegaconf": [10, 27], "dictconfig": [10, 24, 25, 27, 28, 29, 30, 248, 252, 257], "def": [10, 13, 15, 19, 21, 24, 25, 26, 29, 65, 66, 227, 232, 278, 279, 282], "cfg": [10, 24, 25, 28, 29, 30], "add": [10, 12, 13, 14, 16, 19, 21, 23, 24, 26, 50, 54, 57, 71, 144, 153, 191, 203, 215, 217, 231, 232, 274, 275, 277, 279, 280, 282], "here": [10, 12, 14, 15, 16, 18, 20, 21, 22, 23, 24, 26, 32, 62, 65, 66, 184, 185, 239, 269, 270, 271, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282], "attribut": [10, 187, 207, 217, 225, 235], "__name__": 10, "__main__": 10, "don": [10, 13, 14, 18, 19, 20, 22, 24, 25, 252, 256, 273, 274, 275, 276, 278, 280, 282], "experiment": [10, 21, 24], "optim": [10, 18, 19, 22, 24, 25, 48, 53, 55, 91, 158, 168, 222, 223, 224, 225, 231, 233, 235, 236, 239, 240, 253, 257, 269, 270, 271, 274, 275, 276, 277, 278, 279, 282], "them": [10, 12, 15, 18, 19, 22, 24, 53, 67, 191, 195, 201, 217, 260, 270, 273, 274, 275, 279, 280, 281, 282], "when": [10, 16, 17, 18, 20, 21, 22, 24, 25, 29, 53, 54, 55, 56, 57, 67, 71, 73, 75, 182, 183, 185, 189, 190, 191, 193, 194, 195, 196, 198, 199, 200, 207, 210, 221, 236, 238, 251, 253, 258, 269, 273, 275, 277, 278, 279, 280, 281, 282], "mean": [10, 24, 144, 183, 184, 188, 189, 190, 199, 220, 247, 273, 274, 276, 279, 281], "high": [10, 53, 55, 56, 268, 278, 279, 280], "level": [10, 25, 55, 56, 196, 198, 218, 235, 247, 262, 268, 278, 282], "paramet": [10, 13, 14, 15, 16, 25, 27, 28, 29, 30, 31, 33, 35, 36, 37, 38, 42, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 91, 92, 93, 94, 95, 96, 97, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 116, 117, 118, 119, 120, 121, 122, 125, 126, 127, 128, 129, 130, 131, 132, 136, 137, 138, 139, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 154, 155, 156, 157, 158, 159, 160, 161, 162, 165, 166, 167, 169, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 244, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 266, 268, 269, 270, 271, 273, 274, 275, 276, 277, 278, 279, 281, 282], "easili": [10, 16, 22, 24, 268, 278, 279, 281, 282], "custom_decod": 10, "customtransformerdecod": 10, "nn": [10, 27, 45, 47, 50, 81, 142, 143, 180, 182, 183, 187, 188, 189, 190, 191, 192, 193, 194, 195, 199, 200, 201, 202, 203, 204, 207, 208, 209, 227, 234, 235, 247, 253, 254, 258, 259, 278, 279, 282], "modul": [10, 13, 15, 21, 24, 27, 65, 66, 78, 79, 80, 81, 142, 143, 144, 147, 151, 156, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 222, 223, 224, 227, 232, 234, 235, 238, 247, 253, 254, 256, 276, 278, 279, 280, 282], "A": [10, 13, 15, 19, 25, 26, 33, 34, 37, 40, 42, 43, 47, 48, 49, 50, 53, 54, 67, 81, 179, 183, 187, 188, 189, 190, 191, 195, 199, 206, 210, 214, 215, 217, 219, 220, 221, 222, 223, 224, 225, 226, 227, 232, 233, 239, 240, 241, 246, 247, 265, 266, 272, 273, 274, 279, 280, 281, 282], "architectur": [10, 25, 100, 153, 189, 191, 199, 201, 232, 273], "present": [10, 33, 37, 42, 62, 63, 65, 66, 67, 68, 69, 215, 231, 258], "custom_model": 10, "num_lay": [10, 27, 81, 91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 189, 191, 199, 201], "int": [10, 15, 21, 24, 26, 47, 48, 49, 50, 51, 54, 61, 65, 66, 72, 73, 74, 75, 77, 78, 79, 80, 81, 85, 86, 87, 88, 89, 90, 91, 94, 95, 96, 97, 98, 99, 101, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 119, 120, 121, 122, 123, 124, 125, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 154, 155, 156, 157, 158, 160, 162, 163, 164, 165, 166, 167, 169, 170, 171, 172, 173, 174, 175, 179, 182, 183, 184, 185, 188, 189, 190, 191, 194, 196, 197, 198, 199, 200, 201, 205, 206, 212, 213, 214, 215, 216, 217, 219, 226, 227, 229, 230, 231, 233, 234, 236, 238, 243, 247, 248, 249, 250, 251, 252, 254, 256, 257, 273, 278, 279, 280, 282], "classification_head": 10, "bool": [10, 15, 19, 21, 24, 31, 33, 35, 36, 37, 42, 45, 54, 57, 58, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 81, 85, 86, 87, 88, 89, 90, 91, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 144, 145, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 175, 183, 188, 189, 190, 195, 196, 197, 199, 201, 205, 206, 210, 211, 213, 214, 215, 217, 220, 226, 227, 229, 230, 231, 238, 240, 244, 245, 247, 248, 251, 254, 257, 258, 263, 280, 282], "fals": [10, 13, 14, 15, 16, 18, 19, 22, 24, 33, 35, 36, 37, 42, 45, 53, 54, 58, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 75, 76, 81, 85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 145, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 175, 183, 189, 190, 192, 193, 194, 199, 200, 201, 205, 206, 207, 210, 214, 226, 229, 230, 231, 242, 245, 257, 258, 273, 274, 275, 277, 279, 281, 282], "setup": [10, 22, 24, 25, 75, 182, 183, 188, 189, 190, 192, 193, 194, 199, 201, 234, 257, 273, 275, 279, 282], "expos": [10, 13, 24, 25, 231, 271, 276], "friendli": [10, 60, 64, 67, 71, 73, 274], "manner": [10, 20], "rather": [10, 222, 280], "everi": [10, 12, 22, 25, 62, 63, 67, 68, 69, 78, 79, 80, 146, 150, 191, 193, 251, 257, 267, 273, 280, 282], "construct": [10, 36, 63, 219, 271, 279], "care": [10, 22, 229, 231, 275, 277, 279], "how": [10, 13, 14, 18, 22, 23, 24, 25, 26, 191, 227, 248, 254, 266, 269, 270, 273, 274, 275, 276, 277, 280, 281, 282], "implement": [10, 19, 21, 22, 25, 55, 57, 59, 60, 61, 62, 63, 64, 67, 68, 69, 70, 71, 72, 180, 184, 185, 186, 191, 197, 204, 206, 212, 213, 218, 222, 223, 224, 225, 229, 236, 241, 251, 268, 270, 278, 279, 280, 281, 282], "llama3_2_vision_11b": 10, "custom_dataset": [10, 13], "sftdataset": [10, 13, 24, 55, 58, 59, 60, 62, 64, 65, 66, 68, 69], "packeddataset": [10, 17, 53, 58, 59, 60, 62, 64, 68, 69, 71, 72], "inputoutputtomessag": [10, 13, 14, 62, 68], "modeltoken": [10, 15, 21, 24, 36, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 67, 68, 69, 70, 71, 72, 217], "build": [10, 25, 71, 81, 91, 101, 116, 125, 146, 147, 150, 151, 158, 160, 175, 228, 268, 277, 279, 280], "block": [10, 25, 54, 85, 86, 87, 91, 95, 96, 97, 101, 107, 108, 109, 110, 111, 116, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 146, 149, 150, 151, 154, 155, 156, 157, 158, 165, 166, 171, 172, 173, 174, 175, 183, 189, 190, 210, 211, 268], "tiny_cod": 10, "pack": [10, 54, 58, 59, 60, 62, 64, 65, 66, 68, 69, 71, 72, 183, 185, 189, 190, 199, 281], "subset": [10, 15, 16, 47, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 95, 107, 120, 129, 149, 154, 156, 165, 171, 202, 208], "nampdn": 10, "tini": 10, "respons": [10, 12, 13, 18, 19, 21, 32, 33, 35, 36, 37, 42, 55, 56, 60, 62, 63, 64, 65, 66, 67, 68, 69, 217, 220, 221, 222, 224, 225, 275, 276, 277], "model_transform": [10, 13, 15, 16, 55, 56, 62, 65, 66, 68, 69, 144], "message_transform": [10, 13, 55, 56], "column_map": [10, 12, 13, 16, 18, 31, 33, 35, 37, 42, 53, 58, 59, 62, 63, 64, 65, 66, 67, 68, 69, 70], "input": [10, 11, 12, 13, 14, 15, 20, 21, 22, 31, 35, 47, 48, 49, 50, 54, 55, 56, 59, 61, 62, 64, 65, 66, 68, 69, 72, 78, 79, 80, 81, 94, 106, 119, 142, 143, 144, 147, 148, 151, 162, 169, 171, 175, 180, 181, 183, 184, 185, 186, 187, 188, 189, 190, 191, 197, 198, 199, 200, 201, 205, 206, 214, 215, 219, 229, 231, 239, 256, 259, 274, 279, 282], "filter_fn": [10, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72], "lambda": [10, 220], "x": [10, 22, 45, 73, 74, 75, 78, 79, 80, 142, 143, 180, 181, 183, 184, 185, 186, 188, 189, 190, 191, 199, 200, 201, 205, 206, 242, 255, 278, 279, 281, 282], "split_across_pack": [10, 54, 71], "els": [10, 11, 12, 19, 25, 252, 268, 282], "posit": [10, 17, 24, 27, 54, 74, 76, 78, 79, 80, 81, 91, 95, 125, 129, 142, 147, 151, 154, 156, 158, 160, 165, 167, 182, 183, 185, 188, 189, 190, 191, 199, 200, 277], "automat": [10, 12, 16, 17, 19, 21, 23, 24, 26, 27, 59, 60, 273, 275, 282], "instanti": [10, 30, 38, 82, 83, 84, 85, 86, 87, 91, 92, 93, 94, 95, 96, 97, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 116, 117, 118, 119, 120, 121, 122, 125, 126, 127, 128, 129, 130, 131, 132, 136, 137, 138, 139, 145, 146, 147, 148, 149, 150, 151, 154, 155, 156, 157, 158, 159, 160, 161, 162, 165, 166, 167, 168, 169, 171, 172, 173, 174, 175, 176, 177, 178, 179, 233], "separ": [10, 55, 201, 217, 229, 274, 276, 277, 279, 282], "under": [10, 24, 257, 280, 282], "best": [10, 16, 18, 25, 270, 274, 278, 280], "root": [10, 184, 251, 252], "custom_finetun": 10, "32": [10, 27, 182, 191, 199, 201, 248, 277, 279, 280, 281, 282], "option": [10, 12, 18, 21, 22, 24, 25, 31, 33, 35, 37, 42, 50, 51, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 77, 80, 81, 85, 86, 87, 94, 95, 96, 97, 101, 106, 107, 108, 109, 110, 111, 116, 119, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 142, 143, 144, 146, 147, 148, 149, 150, 151, 154, 155, 156, 157, 162, 165, 166, 169, 171, 172, 173, 174, 175, 179, 180, 183, 185, 188, 189, 190, 191, 194, 195, 199, 210, 211, 212, 214, 217, 220, 221, 223, 229, 230, 231, 233, 234, 237, 241, 248, 249, 252, 256, 257, 261, 262, 267, 268, 273, 274, 275, 280], "param": [10, 22, 25, 85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 145, 149, 166, 172, 173, 174, 202, 203, 205, 206, 208, 209, 211, 229, 279, 281, 282], "omit": [10, 278, 279, 280], "being": [10, 19, 22, 56, 193, 229, 230, 231, 235, 261, 280, 281, 282], "found": [10, 11, 22, 23, 24, 26, 184, 185, 229, 230, 231, 270, 273, 278, 279, 282], "correctli": [10, 21, 22, 25, 30, 210, 229, 267, 271, 274, 276, 282], "try": [10, 22, 24, 274, 275, 276, 277, 282], "after": [10, 19, 20, 23, 25, 38, 56, 65, 66, 94, 106, 119, 144, 148, 162, 169, 182, 183, 186, 187, 189, 190, 199, 201, 226, 247, 248, 249, 250, 251, 252, 270, 274, 275, 277, 281, 282], "pythonpath": 10, "pwd": 10, "vlm": [11, 16], "hub": [11, 22, 55, 56, 273, 276], "remot": [11, 14, 35, 42, 46, 55, 56], "url": [11, 16, 35, 37, 42, 46, 267], "project": [11, 23, 26, 81, 85, 86, 87, 91, 95, 101, 105, 107, 108, 109, 110, 111, 116, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 142, 143, 146, 147, 149, 150, 151, 154, 155, 156, 157, 158, 161, 165, 166, 171, 174, 175, 180, 183, 189, 191, 199, 203, 210, 211, 232, 238, 248, 252, 266, 279, 280, 282], "prefer": [11, 13, 25, 48, 55, 63, 67, 70, 222, 223, 224, 225, 268, 271, 273, 280], "align": [11, 65, 66, 222, 274, 278], "continu": [11, 20, 54, 191, 248], "pretrain": [11, 142, 143, 144, 199, 201, 203, 214, 215, 273, 274, 276, 279, 282], "beyond": [11, 275, 280, 282], "those": [11, 22, 232, 275, 277, 279], "customiz": 11, "task": [11, 12, 16, 18, 19, 34, 40, 43, 53, 61, 144, 269, 274, 275, 277, 278, 279, 280, 281, 282], "supervis": [11, 20, 56], "rlhf": [11, 55, 63, 220, 221, 222, 223, 224, 225, 226], "queri": [11, 91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 182, 183, 189, 190, 199, 277, 280], "time": [11, 16, 17, 22, 60, 64, 91, 158, 193, 196, 198, 217, 220, 249, 251, 257, 270, 273, 274, 275, 277, 282], "take": [11, 12, 13, 16, 18, 22, 24, 25, 27, 48, 55, 56, 65, 66, 67, 142, 182, 191, 195, 201, 229, 231, 260, 261, 270, 274, 275, 276, 277, 278, 279, 280, 282], "object": [11, 13, 14, 15, 19, 21, 24, 27, 28, 81, 183, 222, 225, 227, 241], "appli": [11, 12, 15, 19, 22, 25, 47, 55, 56, 59, 65, 66, 85, 86, 87, 88, 89, 90, 91, 95, 96, 97, 98, 99, 101, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 120, 121, 122, 123, 124, 125, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 146, 149, 150, 151, 152, 154, 155, 156, 157, 158, 163, 164, 165, 166, 170, 171, 172, 173, 174, 175, 183, 187, 188, 189, 190, 199, 205, 210, 211, 254, 268, 269, 278, 280, 282], "templat": [11, 31, 32, 34, 38, 39, 40, 43, 55, 56, 59, 62, 68, 94, 100, 106, 119, 144, 148, 153, 162, 169, 179], "anyth": [11, 61, 260], "requir": [11, 15, 17, 19, 21, 22, 24, 47, 48, 53, 55, 56, 57, 65, 66, 67, 71, 144, 187, 189, 200, 229, 231, 233, 244, 245, 247, 248, 251, 252, 256, 257, 267, 270, 273, 274, 276, 280, 281, 282], "collat": [11, 47, 49, 50, 54], "packag": [11, 23, 26, 248, 251, 252, 267], "togeth": [11, 25, 54, 196, 252, 271, 276, 279, 280, 281], "form": [12, 18, 22, 24, 25, 31, 44, 52, 55, 56, 273], "along": [12, 22, 279], "describ": [12, 254], "hand": [12, 36], "grammar": [12, 19, 34, 62], "head": [12, 81, 91, 95, 101, 107, 116, 120, 125, 129, 142, 146, 147, 150, 151, 154, 156, 158, 160, 165, 167, 171, 175, 182, 183, 185, 189, 199, 203, 232, 277], "csv": [12, 55, 56, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72], "incorrect": [12, 19], "cat": [12, 16, 19, 219], "grammarerrorcorrectiontempl": [12, 19, 62], "prepend": [12, 14, 16, 19, 33, 35, 37, 38, 39, 42, 60, 62, 63, 64, 65, 66, 67, 68, 69, 94, 106, 119, 144, 148, 162, 169, 179, 214], "english": [12, 19, 34], "ncorrect": [12, 34], "mask": [12, 13, 14, 15, 17, 19, 21, 36, 38, 50, 54, 56, 59, 60, 62, 64, 65, 66, 67, 68, 69, 74, 75, 76, 144, 183, 188, 189, 190, 199, 213, 217, 219, 220, 223, 242, 274, 278], "out": [12, 15, 18, 20, 22, 24, 25, 59, 60, 62, 64, 67, 68, 69, 75, 76, 219, 229, 230, 242, 266, 268, 269, 270, 271, 273, 274, 275, 276, 277, 279, 280, 282], "100": [12, 18, 25, 48, 49, 50, 59, 60, 62, 64, 67, 68, 69, 73, 196, 197, 198, 200, 278, 279, 282], "27957": 12, "736": 12, "577": 12, "anoth": [12, 13, 16, 24, 56, 187, 248, 275, 280], "c4": [12, 71, 281], "200m": 12, "liweili": [12, 62], "c4_200m": [12, 62], "chang": [12, 13, 16, 21, 22, 23, 24, 26, 31, 33, 35, 64, 66, 70, 231, 267, 273, 275, 276, 277, 278, 279, 280, 281, 282], "remap": 12, "someth": [12, 22, 25, 26, 274, 275, 281], "hello": [12, 13, 14, 19, 21, 44, 214, 215, 262, 274, 275, 277], "world": [12, 13, 14, 19, 21, 44, 214, 215, 243, 245, 262, 275], "bye": [12, 13], "robot": [12, 15], "am": [12, 14, 16, 60, 64, 100, 153, 274, 275, 277], "prompttempl": [12, 31, 34, 40, 43, 144], "relev": [12, 14, 25, 188, 189, 190, 199, 273, 275, 279, 280], "inform": [12, 14, 22, 248, 252, 254, 268, 273, 275, 276], "mai": [12, 16, 17, 24, 26, 60, 73, 191, 194, 200, 238, 258, 269, 270, 274, 276, 278, 279, 280], "alpaca_dataset": [12, 17, 24, 58], "grammar_dataset": 12, "samsum_dataset": 12, "dictionari": [13, 14, 15, 36, 38, 44, 47, 48, 49, 54, 55, 56, 94, 106, 119, 148, 162, 169, 179, 240, 246, 248, 249, 250, 251, 252, 260, 275], "onc": [13, 21, 24, 38, 189, 199, 275, 276, 277, 279, 282], "repres": [13, 36, 48, 78, 79, 191, 228, 234, 274, 280, 281], "prepar": [13, 15, 274, 281], "ad": [13, 16, 19, 21, 25, 38, 50, 78, 79, 80, 146, 150, 160, 191, 199, 200, 203, 214, 217, 231, 232, 274, 279, 280, 281, 282], "column": [13, 16, 18, 20, 31, 33, 35, 37, 42, 55, 56, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 183, 189, 190, 199, 281], "worri": [13, 22, 274, 276], "itself": [13, 24], "do": [13, 15, 18, 21, 22, 23, 25, 36, 47, 65, 67, 193, 210, 217, 248, 252, 258, 273, 275, 276, 277, 279, 280, 281], "well": [13, 18, 22, 24, 25, 268, 273, 275, 277, 278, 280, 282], "flexibl": [13, 24, 53, 280], "inherit": [13, 14, 19, 25, 268], "__call__": [13, 15, 19, 65, 66, 144], "simpl": [13, 22, 25, 191, 225, 266, 276, 279, 281, 282], "contriv": [13, 19], "would": [13, 15, 19, 22, 24, 26, 38, 54, 189, 191, 199, 267, 274, 275, 279, 280, 282], "inde": [13, 237, 275], "quit": [13, 280, 282], "type": [13, 14, 15, 16, 21, 26, 27, 29, 36, 37, 44, 45, 46, 47, 48, 49, 50, 51, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 91, 92, 93, 94, 95, 96, 97, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 116, 117, 118, 119, 120, 121, 122, 125, 126, 127, 128, 129, 130, 131, 132, 136, 137, 138, 139, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 154, 155, 156, 157, 158, 159, 160, 161, 162, 165, 166, 167, 168, 169, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 188, 189, 190, 191, 194, 195, 196, 197, 198, 199, 200, 201, 202, 205, 206, 208, 212, 213, 214, 215, 216, 217, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 235, 237, 238, 239, 240, 241, 242, 243, 244, 245, 247, 254, 255, 256, 257, 259, 261, 262, 263, 270, 275, 279, 280, 281, 282], "map": [13, 15, 19, 21, 22, 31, 33, 35, 37, 38, 42, 47, 53, 54, 59, 62, 63, 64, 65, 66, 67, 68, 69, 70, 94, 106, 119, 144, 148, 162, 169, 179, 209, 215, 216, 229, 233, 235, 248, 249, 250, 251, 252, 253, 257, 275, 279], "messagetransform": 13, "self": [13, 15, 18, 19, 20, 21, 25, 26, 54, 65, 66, 85, 86, 87, 91, 95, 96, 97, 101, 107, 108, 109, 110, 111, 116, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 146, 149, 150, 151, 154, 155, 156, 157, 158, 160, 165, 166, 167, 171, 172, 173, 174, 175, 183, 188, 189, 190, 196, 198, 199, 201, 204, 210, 211, 229, 232, 233, 278, 279, 282], "str": [13, 15, 21, 24, 27, 28, 31, 33, 35, 36, 37, 38, 42, 44, 46, 47, 48, 49, 50, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 94, 106, 119, 144, 148, 149, 152, 162, 169, 179, 195, 200, 201, 202, 204, 205, 206, 208, 209, 210, 211, 212, 213, 214, 215, 216, 228, 229, 230, 231, 232, 233, 234, 237, 240, 241, 244, 246, 248, 249, 250, 251, 252, 256, 257, 258, 259, 261, 262, 263, 280], "eot": [13, 14, 19, 36, 144], "_messag": 13, "0x7fb0a10094e0": 13, "0x7fb0a100a290": 13, "msg": [13, 14, 16, 19, 21, 274], "text_cont": [13, 14, 16, 19, 36, 274], "manipul": 13, "load_dataset_kwarg": [13, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72], "mymessagetransform": 13, "chosenrejectedtomessag": [13, 63, 67], "core": [14, 25, 55, 56, 268, 271, 276, 282], "govern": [14, 274], "serv": [14, 19, 24, 33, 35, 37, 42, 60, 62, 63, 64, 65, 66, 67, 68, 69, 217, 227, 279], "interfac": [14, 25, 38, 39, 53, 204, 218], "api": [14, 25, 26, 34, 40, 43, 55, 56, 57, 59, 65, 66, 88, 89, 90, 98, 99, 112, 113, 114, 115, 123, 124, 133, 134, 135, 140, 141, 152, 163, 164, 170, 210, 248, 267, 271, 273, 274, 276, 277, 282], "oper": [14, 25, 191, 207, 218, 256, 281], "send": 14, "other": [14, 15, 18, 20, 22, 25, 27, 35, 38, 53, 231, 238, 257, 260, 269, 270, 274, 276, 277, 278, 279, 280, 281], "special": [14, 16, 19, 36, 42, 119, 144, 146, 148, 150, 169, 179, 191, 200, 212, 213, 215, 216, 217, 219, 233], "individu": [14, 36, 54, 199, 240, 252, 254, 274], "ref": [14, 55, 56, 57, 59, 65, 66, 168, 169, 252], "constructor": [14, 21], "ident": [14, 18, 20, 45, 47, 54, 65, 67, 153, 189, 205, 275, 280, 281], "from_dict": [14, 36, 274], "becaus": [14, 21, 55, 56, 95, 182, 189, 191, 199, 231, 273, 274, 281], "correspond": [14, 18, 21, 36, 48, 74, 75, 76, 202, 204, 208, 220, 223, 237, 270, 276, 277, 280, 281], "begin": [14, 22, 54, 71, 191, 215, 217, 274, 277, 282], "pil": [14, 15, 16, 36, 37, 44, 46], "img_msg": 14, "place": [14, 16, 20, 258, 274, 280], "mode": [14, 15, 16, 194, 234, 241, 248, 275], "rgb": [14, 15, 16, 142], "4": [14, 15, 16, 22, 24, 45, 47, 48, 49, 50, 76, 81, 144, 147, 151, 182, 183, 191, 242, 263, 268, 270, 273, 275, 277, 278, 279, 280, 281, 282], "appropri": [14, 36, 53, 76, 100, 200, 229, 236, 282], "load_imag": [14, 16], "image_path": [14, 16], "jpg": [14, 16, 35, 42, 46], "tag": [14, 16, 19, 21, 38, 42, 44, 94, 100, 106, 119, 144, 148, 153, 162, 169, 179, 248, 249, 250, 251, 252, 274], "placehold": [14, 16, 42, 228], "should": [14, 15, 16, 18, 20, 22, 24, 25, 33, 35, 36, 37, 38, 42, 47, 54, 59, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 75, 76, 85, 86, 87, 95, 96, 97, 100, 101, 107, 108, 109, 110, 111, 116, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 142, 146, 149, 150, 151, 153, 154, 155, 156, 157, 158, 160, 165, 166, 167, 171, 172, 173, 174, 175, 182, 183, 189, 191, 194, 199, 204, 210, 211, 220, 223, 227, 228, 246, 248, 249, 250, 251, 252, 267, 268, 275, 276, 277, 278, 279, 280, 281, 282], "insert": [14, 201, 281], "format_content_with_imag": [14, 16], "image_tag": [14, 16, 42, 44], "conveni": [14, 24, 25, 46, 273], "prompttemplateinterfac": [14, 19, 94, 106, 119, 148, 162, 169, 179], "templated_msg": [14, 19], "contains_media": [14, 16, 36], "get_media": [14, 15, 16, 36], "4x4": 14, "0x7f8d27e72740": 14, "tokenize_messsag": 14, "hi": [14, 20, 73, 274], "tokenize_messag": [14, 15, 21, 36, 55, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 144, 213, 217, 274], "22557": 14, "1526": [14, 21], "28808": 14, "28705": [14, 21], "28748": [14, 21], "15359": 14, "28725": 14, "315": [14, 20], "837": 14, "396": 14, "16107": 14, "13892": 14, "28723": 14, "modal": [15, 16, 56, 144, 201], "current": [15, 16, 18, 22, 35, 42, 54, 67, 75, 91, 95, 107, 120, 129, 149, 150, 151, 154, 156, 158, 165, 168, 171, 182, 183, 185, 189, 190, 199, 223, 230, 231, 233, 238, 241, 243, 249, 251, 253, 256, 270, 271, 276, 277, 278, 280, 281], "intend": [15, 260, 274], "drop": [15, 144, 200, 278, 281], "replac": [15, 16, 42, 51, 59, 60, 62, 64, 67, 68, 69, 144, 195, 200, 258, 279], "llama3_2_vis": [15, 16, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152], "llama3visiontransform": [15, 16, 148], "__init__": [15, 24, 25, 65, 66, 278, 279, 282], "transform_imag": 15, "clipimagetransform": [15, 65, 66, 144, 191], "xattn_mask": 15, "visioncrossattentionmask": [15, 144, 218], "224": [15, 16, 144], "tile_s": [15, 79, 80, 81, 144, 147, 151, 191, 219], "patch_siz": [15, 79, 80, 81, 144, 147, 151, 191, 219], "14": [15, 48, 144, 191, 281, 282], "skip_special_token": [15, 16, 67, 144], "begin_of_text": [15, 16, 21, 274], "start_header_id": [15, 16, 274], "end_header_id": [15, 16, 274], "n": [15, 16, 18, 19, 21, 34, 38, 40, 43, 183, 191, 217, 265, 272, 273, 274, 281], "eot_id": [15, 16, 21, 274], "na": [15, 274], "encoder_input": [15, 16, 50, 188, 189, 199], "shape": [15, 16, 22, 47, 50, 73, 74, 75, 76, 78, 79, 80, 81, 142, 143, 144, 147, 151, 180, 181, 182, 183, 184, 185, 186, 188, 189, 190, 191, 196, 197, 198, 199, 200, 201, 205, 206, 219, 220, 221, 222, 223, 224, 225, 226, 242, 257, 258, 278], "num_til": [15, 16, 142, 143, 191], "num_channel": [15, 16, 191], "tile_height": [15, 16], "tile_width": [15, 16], "torch": [15, 16, 22, 24, 45, 47, 48, 49, 50, 73, 74, 75, 76, 77, 78, 79, 80, 81, 142, 143, 144, 180, 181, 182, 183, 184, 185, 186, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 205, 206, 220, 221, 222, 223, 224, 225, 226, 231, 233, 235, 236, 237, 239, 240, 242, 244, 245, 251, 253, 254, 255, 256, 257, 258, 259, 260, 261, 263, 267, 270, 275, 276, 277, 278, 279, 280, 282], "just": [15, 19, 22, 268, 270, 273, 274, 276, 277, 279, 280, 281], "the_cauldron_dataset": [15, 16], "ai2d": [15, 66], "respir": 15, "combust": 15, "give": [15, 21, 24, 228, 278, 279, 280], "choic": [15, 18], "oxygen": 15, "b": [15, 25, 45, 47, 142, 143, 182, 183, 185, 189, 190, 199, 206, 220, 221, 225, 242, 252, 279, 282], "carbon": 15, "dioxid": 15, "c": [15, 45, 47, 50, 65, 142, 274], "nitrogen": 15, "d": [15, 24, 36, 65, 142, 143, 182, 183, 189, 199, 273, 274, 278, 279, 281], "heat": 15, "letter": 15, "mymultimodaltransform": 15, "my_tokenizer_build": 15, "myimagetransform": 15, "add_eo": [15, 57, 71, 144, 214, 215, 274], "tupl": [15, 19, 21, 24, 27, 38, 48, 73, 74, 80, 94, 106, 119, 144, 148, 162, 169, 179, 182, 191, 195, 213, 217, 220, 221, 222, 223, 224, 225, 226, 227, 243, 257, 258, 259], "infer": [15, 19, 22, 50, 56, 91, 100, 158, 182, 183, 185, 189, 190, 199, 261, 266, 270, 271, 274, 275, 276, 277, 281, 282], "vision": [15, 16, 56, 81, 142, 144, 145, 146, 147, 148, 149, 150, 151, 152, 200, 232], "aspect_ratio": [15, 50, 78, 79, 142, 191], "append": [15, 19, 38, 39, 94, 106, 119, 144, 148, 162, 169, 179, 189, 199, 214, 248, 267], "addit": [15, 21, 22, 24, 25, 27, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 144, 146, 147, 150, 151, 153, 193, 200, 201, 210, 222, 227, 229, 230, 231, 237, 238, 244, 247, 248, 249, 251, 252, 254, 268, 274, 276, 279, 280], "kei": [15, 21, 22, 24, 26, 33, 35, 37, 42, 47, 48, 55, 56, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 182, 183, 188, 189, 190, 199, 201, 209, 210, 211, 225, 229, 231, 233, 248, 257, 273, 275, 276, 279, 280, 282], "e": [16, 18, 19, 36, 46, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 78, 79, 80, 81, 142, 143, 144, 147, 151, 183, 191, 195, 199, 204, 209, 219, 228, 229, 233, 240, 257, 261, 267, 270, 275, 277, 279, 280, 281, 282], "g": [16, 18, 46, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 78, 79, 80, 81, 142, 143, 144, 147, 151, 183, 191, 199, 204, 219, 228, 229, 240, 257, 261, 270, 277, 279, 280, 281, 282], "base": [16, 18, 20, 22, 27, 36, 38, 85, 86, 87, 88, 89, 90, 91, 95, 96, 97, 98, 99, 101, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 125, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 144, 145, 148, 149, 150, 151, 152, 154, 155, 156, 157, 158, 160, 163, 164, 165, 166, 167, 170, 171, 172, 173, 174, 175, 185, 205, 206, 207, 209, 210, 211, 221, 222, 224, 225, 229, 236, 238, 239, 247, 249, 258, 261, 266, 274, 275, 276, 277, 278, 279, 280, 282], "multimodal_chat_dataset": 16, "visual": [16, 201], "get": [16, 22, 23, 24, 25, 26, 50, 144, 233, 237, 240, 243, 248, 262, 267, 268, 269, 270, 274, 275, 276, 278, 279, 280, 281], "below": [16, 23, 26, 47, 227, 277, 278, 279, 282], "clock": 16, "10": [16, 45, 47, 48, 49, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 191, 200, 275, 277, 280, 281, 282], "llama3_2_vision_transform": 16, "questionanswertempl": [16, 19, 70], "image_s": [16, 145, 148, 149, 152, 191], "560": [16, 145, 148, 149, 152], "image_dir": [16, 35, 42, 65], "home": [16, 24, 35, 42, 46], "nquestion": 16, "nit": 16, "00am": 16, "sharegpt": [16, 42, 60, 274], "q1": [16, 33, 55, 60, 67], "a1": [16, 33, 55, 60], "sharegpt4v": 16, "lin": 16, "chen": 16, "renam": 16, "themselv": [16, 282], "pathlib": 16, "pil_imag": 16, "Then": [16, 20, 26, 207, 276, 278, 280], "relat": [16, 188, 189, 199, 279], "user_messag": [16, 34, 40, 43, 144, 274], "locat": [16, 21, 24, 35, 42, 273, 277, 279, 281, 282], "long": [16, 54, 215, 274, 279], "image_dog": 16, "image_cat": 16, "image_bird": 16, "dog": [16, 219], "bird": [16, 46], "pet": 16, "three": [16, 22, 25, 50, 144, 222, 224, 225, 271, 276], "referenc": 16, "huggingfac": [16, 57, 61, 63, 71, 72, 161, 168, 169, 176, 177, 178, 222, 224, 225, 229, 230, 236, 273, 275], "co": [16, 57, 61, 63, 71, 72, 161, 168, 169, 176, 177, 178, 229, 230, 275], "img": 16, "llava_instruct_dataset": 16, "concaten": [17, 21, 48, 53, 147, 151, 213, 217], "sequenc": [17, 45, 47, 48, 49, 50, 54, 57, 61, 65, 66, 71, 72, 75, 76, 91, 94, 95, 101, 106, 107, 116, 119, 120, 125, 129, 142, 143, 144, 146, 148, 150, 154, 156, 158, 160, 162, 165, 167, 169, 171, 175, 179, 182, 183, 185, 188, 189, 190, 191, 194, 199, 201, 215, 217, 219, 221, 225, 226, 242, 274], "upto": [17, 185], "maximum": [17, 24, 47, 50, 51, 54, 61, 72, 75, 78, 79, 81, 91, 94, 95, 101, 106, 107, 116, 119, 120, 125, 129, 144, 146, 147, 148, 150, 151, 154, 156, 158, 160, 162, 165, 167, 169, 171, 175, 182, 183, 185, 188, 189, 190, 194, 199, 201, 228, 273], "length": [17, 45, 47, 49, 50, 51, 52, 53, 54, 61, 72, 75, 91, 94, 95, 101, 106, 107, 116, 119, 120, 125, 129, 142, 143, 144, 146, 148, 150, 154, 156, 158, 160, 162, 165, 167, 168, 169, 171, 175, 179, 182, 183, 185, 188, 189, 190, 194, 196, 198, 199, 201, 215, 219, 220, 221, 230, 242, 248, 280], "slow": [17, 280, 282], "down": [17, 191, 231, 279, 280, 282], "introduc": [17, 85, 86, 95, 96, 97, 107, 108, 109, 110, 120, 121, 122, 131, 132, 138, 139, 154, 155, 156, 157, 165, 166, 183, 184, 201, 205, 206, 225, 270, 274, 278, 279, 280, 281, 282], "signific": [17, 280, 281], "speedup": [17, 275, 277], "depend": [17, 25, 26, 229, 257, 273, 275, 278, 279, 280, 282], "iter": [17, 257, 258, 259, 282], "through": [17, 18, 22, 23, 24, 25, 26, 55, 81, 147, 151, 180, 182, 191, 201, 207, 268, 269, 270, 271, 273, 274, 275, 276, 278, 280, 281, 282], "greedi": [17, 54], "upon": [17, 25, 53, 189, 193, 199, 277], "initi": [17, 22, 25, 29, 53, 54, 82, 83, 84, 92, 93, 102, 103, 104, 105, 117, 118, 126, 127, 128, 136, 137, 159, 161, 176, 177, 178, 205, 222, 233, 244, 245, 258, 270, 276, 279, 282], "max": [17, 50, 54, 179, 189, 191, 199, 215, 228, 236, 273, 279], "llama3": [17, 20, 21, 24, 65, 66, 73, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 144, 146, 148, 149, 150, 152, 165, 196, 198, 232, 238, 266, 268, 269, 270, 273, 275, 280], "load": [17, 22, 25, 35, 42, 46, 53, 54, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 199, 210, 229, 230, 231, 233, 251, 258, 275, 277, 279], "isinst": [17, 227], "1b_full_single_devic": 17, "prevent": [17, 22, 54, 222, 273, 280], "irrelev": 17, "cross": [17, 50, 54, 146, 150, 188, 196, 198, 199, 201, 219, 278], "attend": [17, 54, 183, 188, 189, 190, 199, 219], "pytorch": [17, 24, 25, 74, 189, 195, 196, 227, 245, 251, 254, 256, 257, 266, 267, 268, 270, 275, 277, 279, 280, 281, 282], "flex": 17, "attent": [17, 50, 54, 74, 75, 76, 81, 85, 86, 87, 91, 95, 96, 97, 101, 107, 108, 109, 110, 111, 116, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 146, 147, 149, 150, 151, 154, 155, 156, 157, 158, 160, 165, 166, 167, 168, 171, 172, 173, 174, 175, 182, 183, 185, 188, 189, 190, 192, 199, 201, 210, 211, 219, 277, 279, 280, 282], "flash": 17, "non": [17, 197, 198, 211, 221, 278], "causal": [17, 54, 75, 183, 189, 190, 199], "hardwar": [17, 237, 268, 275, 279, 280], "cuda": [17, 24, 237, 240, 257, 261, 267, 275, 280, 282], "ture": 17, "sdpa": 17, "memori": [17, 21, 25, 53, 54, 57, 61, 71, 72, 187, 189, 195, 196, 198, 199, 210, 238, 240, 246, 247, 257, 266, 268, 269, 270, 275, 276, 277, 278, 281], "effici": [17, 210, 238, 266, 268, 269, 275, 276, 279, 281], "fallback": 17, "while": [17, 24, 25, 85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 166, 172, 173, 174, 200, 205, 268, 270, 275, 280, 281, 282], "retain": [17, 222, 280, 282], "reward": [18, 105, 111, 115, 157, 161, 164, 220, 221, 222, 224, 225, 232], "downstream": 18, "captur": 18, "ground": [18, 196, 197, 198, 280], "truth": [18, 24, 196, 197, 198, 275, 277], "usual": [18, 21, 22, 185, 189, 226, 229, 242, 252, 273, 275, 279, 280], "outcom": 18, "binari": 18, "comparison": [18, 25, 279, 282], "annot": 18, "accord": [18, 19, 65, 66, 76, 153, 274], "criterion": 18, "style": [18, 31, 54, 58, 59, 60, 69, 201, 282], "interact": [18, 25, 55, 67, 266, 271, 276], "free": [18, 225, 271, 279], "preference_dataset": 18, "my_preference_dataset": [18, 67], "chosen_convers": [18, 67], "hole": [18, 67], "my": [18, 19, 23, 67, 73, 273, 274, 275, 277], "trouser": [18, 67], "fix": [18, 20, 67, 281], "rejected_convers": [18, 67], "off": [18, 25, 38, 67, 269, 270, 275, 281], "chosen": [18, 33, 55, 63, 67, 70, 222, 224, 225, 257], "reject": [18, 33, 55, 63, 67, 70, 222, 224, 225], "rejected_input_id": [18, 48, 67], "nwhat": 18, "ntake": 18, "rejected_label": [18, 48], "128006": 18, "78191": 18, "128007": 18, "271": 18, "18293": 18, "1124": 18, "1022": 18, "13": [18, 20, 21, 48, 191, 217, 226, 282], "128009": [18, 274], "accomplish": [18, 20, 53, 60, 64, 67, 71], "shown": [18, 275, 280, 281], "di": 18, "look": [18, 19, 22, 24, 25, 235, 251, 267, 274, 275, 276, 277, 278, 279, 281], "anthrop": [18, 63], "harmless": [18, 63], "granni": 18, "her": [18, 20], "mobil": [18, 275], "phone": [18, 275], "issu": [18, 271, 281], "grandmoth": 18, "manag": [18, 22, 53, 193, 194, 207, 248, 255, 274], "behavior": [18, 22, 247, 274], "thing": [18, 280, 282], "grandma": 18, "feel": [18, 271, 279], "box": [18, 268, 270, 282], "hh_rlhf_helpful_dataset": 18, "hendrydong": 18, "preference_700k": 18, "stack_exchange_paired_dataset": 18, "purpos": [19, 65, 66, 276, 277], "whenev": [19, 144, 196, 279], "llama2": [19, 22, 24, 25, 27, 61, 72, 82, 83, 84, 85, 86, 87, 88, 89, 90, 100, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 144, 180, 189, 190, 232, 266, 269, 273, 276, 277, 280, 281], "were": [19, 20, 21, 53, 142, 191, 207, 223, 276, 281], "gear": [19, 144], "summar": [19, 43, 68, 274, 280], "summarizetempl": [19, 68, 274], "commun": [19, 144, 275, 280], "chatmltempl": [19, 144, 179], "gec_templ": 19, "extend": [19, 21, 22, 25, 268, 280], "customprompttempl": 19, "achiev": [19, 38, 253, 270, 275, 277, 278, 279, 281, 282], "prepend_tag": [19, 38], "append_tag": [19, 38], "thu": [19, 31, 38, 55, 56, 189, 280, 281], "empti": [19, 47, 50, 52, 77, 273], "standalon": [19, 182], "my_custom_templ": 19, "Is": 19, "overhyp": 19, "advanc": [19, 79, 80, 81, 147, 151, 191], "configur": [19, 21, 25, 55, 56, 59, 60, 61, 62, 64, 65, 66, 67, 68, 69, 70, 71, 72, 95, 107, 120, 129, 149, 154, 165, 171, 248, 268, 270, 271, 274, 276, 277, 278, 279, 280, 281, 282], "doesn": [19, 275], "neatli": 19, "fall": 19, "protocol": [19, 21, 204, 212, 213, 218], "arg": [19, 21, 24, 27, 39, 80, 181, 189, 195, 201, 204, 212, 213, 218, 250, 257, 270, 281], "whether": [19, 31, 33, 35, 36, 37, 42, 47, 50, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 85, 86, 87, 91, 95, 96, 97, 107, 108, 109, 110, 111, 120, 121, 122, 129, 130, 131, 132, 138, 139, 144, 145, 149, 150, 151, 154, 155, 156, 157, 165, 166, 171, 172, 173, 174, 175, 195, 197, 199, 205, 206, 210, 211, 214, 215, 227, 237, 239, 240, 248, 258, 274, 278], "sai": [19, 273, 274, 276], "eureka": 19, "eurekatempl": 19, "formatted_dialogu": 19, "llama2chattempl": [19, 106, 153, 179, 274], "paradigm": [20, 25, 269, 280], "unstructur": [20, 57, 71, 72], "unlabel": 20, "text_complet": 20, "odyssei": 20, "clear": [20, 280], "river": 20, "oceanu": 20, "had": 20, "got": [20, 50], "sea": 20, "went": 20, "till": 20, "reach": 20, "aeaean": 20, "island": 20, "dawn": 20, "sunris": 20, "drew": 20, "ship": 20, "sand": 20, "shore": 20, "sleep": 20, "wait": [20, 257], "break": [20, 144, 215], "child": 20, "morn": 20, "rosi": 20, "finger": 20, "appear": [20, 280], "sent": [20, 252], "men": 20, "circ": 20, "hous": 20, "fetch": [20, 279], "bodi": 20, "elpenor": 20, "cut": 20, "firewood": 20, "wood": 20, "headland": 20, "jut": 20, "wept": 20, "over": [20, 21, 25, 36, 56, 197, 198, 222, 236, 268, 270, 273, 275, 278, 279, 280, 282], "him": 20, "lament": 20, "funer": 20, "rite": 20, "armour": 20, "been": [20, 73, 75, 182, 189, 199, 226, 233, 238, 274, 280, 281], "burn": 20, "ash": 20, "rais": [20, 22, 27, 30, 33, 35, 37, 42, 44, 46, 47, 50, 52, 53, 59, 60, 62, 64, 65, 66, 68, 69, 71, 75, 81, 171, 182, 183, 187, 188, 189, 191, 192, 193, 194, 210, 211, 217, 229, 230, 231, 233, 237, 239, 240, 244, 248, 252, 256, 258, 259, 260], "cairn": 20, "stone": 20, "top": [20, 74, 77, 147, 151, 235, 280, 282], "oar": 20, "he": 20, "row": [20, 55, 56, 183, 189, 190, 199], "text_completion_dataset": [20, 281], "128000": [20, 274, 281], "6153": 20, "584": 20, "1051": 20, "2867": 20, "279": 20, "15140": 20, "22302": 20, "355": 20, "11": [20, 22, 45, 47, 48, 191, 275, 281, 282], "323": 20, "1047": 20, "2751": 20, "704": 20, "1139": 20, "1825": 20, "9581": 20, "4024": 20, "389": 20, "12222": 20, "8813": 20, "362": 20, "12791": 20, "5420": 20, "13218": 20, "1405": 20, "1070": 20, "374": 20, "39493": 20, "64919": 20, "439": 20, "304": 20, "1023": 20, "7634": 20, "1226": 20, "1243": 20, "24465": 20, "1057": 20, "8448": 20, "311": 20, "70163": 20, "1077": 20, "31284": 20, "6212": 20, "30315": 20, "1938": 20, "1288": 20, "1464": 20, "128001": [20, 281], "similarli": [20, 119, 148, 169, 179, 281], "wikimedia": 20, "wikipedia": [20, 46, 72], "cnn_dailymail_articles_dataset": 20, "index": [21, 48, 49, 50, 53, 54, 183, 185, 190, 197, 199, 221, 236, 261, 267, 274, 275], "embed": [21, 22, 78, 79, 80, 81, 91, 95, 101, 107, 116, 120, 125, 129, 142, 143, 144, 146, 147, 150, 151, 154, 156, 158, 160, 165, 167, 171, 175, 182, 183, 184, 185, 188, 189, 191, 199, 200, 201, 203, 238, 274, 277, 280, 281], "vector": [21, 205, 224, 274, 280], "understood": 21, "plai": [21, 275, 280], "necessari": [21, 22, 55, 56, 248, 249, 250, 251, 252, 274, 279], "phi3": [21, 22, 165, 166, 168, 169, 170, 232, 273], "phi3_mini_token": 21, "p_token": 21, "phi": [21, 168, 169, 232], "32010": 21, "29871": 21, "1792": [21, 217], "9508": [21, 217], "32007": 21, "32001": 21, "4299": 21, "2933": [21, 217], "nuser": 21, "nmodel": 21, "sentencepiec": [21, 214, 277], "tiktoken": [21, 144, 215, 277], "host": [21, 267, 273, 276, 280], "distribut": [21, 77, 233, 244, 245, 254, 256, 261, 268, 271, 273, 276, 277, 278, 280], "alongsid": [21, 238, 280], "alreadi": [21, 24, 33, 37, 42, 62, 63, 65, 66, 67, 68, 69, 182, 183, 193, 194, 199, 232, 244, 267, 273, 275, 278, 279], "_token": [21, 25], "mistraltoken": [21, 162, 274], "adher": [21, 37, 42], "arbitrarili": 21, "small": [21, 184, 275, 280], "seq": [21, 189, 199], "len": [21, 22, 50, 53, 59, 62, 65, 66, 68, 189, 191, 199], "demonstr": [21, 280, 281], "7": [21, 22, 45, 47, 48, 49, 50, 182, 191, 219, 223], "6312": 21, "28709": 21, "assign": [21, 24, 55, 56], "uniqu": [21, 55, 56, 232], "abil": 21, "NOT": [21, 22, 91, 144, 158], "presenc": [21, 31], "certain": [21, 22, 24, 257, 274], "proper": [21, 267, 276], "end_of_text": 21, "special_token": [21, 144, 215, 274], "added_token": 21, "128257": 21, "128258": 21, "remain": [21, 37, 42, 236, 278, 279, 280], "special_tokens_path": [21, 119, 148, 169, 179], "basetoken": 21, "actual": [21, 23, 24, 26, 31, 33, 35, 55, 56, 59, 62, 63, 64, 66, 67, 68, 70, 144, 270, 274, 281], "string": [21, 22, 35, 36, 38, 44, 60, 61, 94, 106, 119, 144, 148, 162, 169, 179, 204, 212, 214, 215, 217, 228, 234, 237, 241, 248, 261, 273, 280], "kwarg": [21, 24, 27, 39, 179, 181, 188, 190, 195, 201, 204, 212, 213, 218, 244, 248, 249, 250, 251, 252, 254, 257], "dict": [21, 22, 24, 25, 26, 27, 31, 33, 35, 36, 37, 38, 42, 44, 47, 48, 49, 50, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 94, 106, 119, 144, 148, 162, 169, 179, 188, 190, 195, 199, 201, 202, 208, 209, 210, 211, 212, 213, 215, 216, 218, 229, 230, 231, 233, 235, 240, 244, 246, 248, 253, 258, 260], "given": [21, 25, 27, 44, 47, 52, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 76, 144, 193, 194, 206, 207, 212, 213, 221, 237, 241, 247, 253, 261, 263, 268, 279], "token_id": [21, 144, 212, 215], "its": [21, 54, 100, 153, 156, 183, 185, 189, 190, 199, 201, 205, 233, 253, 256, 273, 274, 275, 277, 279, 280], "sentencepiecebasetoken": [21, 212], "bpe": 21, "sp_token": 21, "reason": [21, 25, 73, 275, 280, 281], "walk": [22, 25, 251, 268, 274, 275, 276, 281, 282], "design": [22, 25, 225], "cover": [22, 23, 24, 25, 26, 274, 275, 282], "scenario": [22, 53, 144], "compos": [22, 191], "plug": [22, 280], "evalu": [22, 25, 266, 268, 270, 271, 276, 278, 279, 282], "gener": [22, 25, 47, 54, 61, 71, 74, 75, 76, 77, 144, 193, 194, 207, 220, 239, 248, 255, 256, 257, 264, 266, 270, 274, 278, 279, 280, 281, 282], "easi": [22, 25, 268, 279, 280], "understand": [22, 24, 25, 201, 266, 268, 269, 274, 279, 280, 282], "concept": [22, 271, 275, 276, 280], "talk": 22, "close": [22, 25, 248, 249, 250, 251, 252, 279], "veri": [22, 53, 189, 199, 273, 275, 280], "dictat": 22, "state_dict": [22, 195, 200, 201, 210, 229, 230, 231, 232, 233, 258, 279, 282], "store": [22, 55, 56, 248, 249, 252, 279, 280, 282], "disk": [22, 57, 249], "identifi": [22, 248], "state": [22, 25, 143, 189, 191, 193, 195, 199, 202, 208, 209, 210, 211, 220, 222, 229, 230, 231, 233, 235, 258, 275, 277, 279, 282], "match": [22, 44, 211, 248, 258, 267, 273, 275, 277, 279], "up": [22, 23, 25, 26, 50, 54, 61, 72, 144, 189, 193, 194, 199, 215, 219, 235, 248, 257, 269, 270, 271, 273, 274, 276, 277, 279, 280, 282], "exactli": [22, 211, 228, 281], "definit": [22, 279], "either": [22, 47, 55, 56, 73, 183, 189, 190, 211, 229, 248, 254, 267, 273, 279, 280, 281, 282], "explicit": 22, "error": [22, 24, 34, 52, 229, 256, 273], "except": [22, 36, 153, 217, 278], "wors": [22, 280], "silent": 22, "succe": 22, "popular": [22, 199, 268, 275], "offici": [22, 100, 274, 276, 277], "websit": 22, "inspect": [22, 275, 279, 282], "mmap": [22, 275], "weights_onli": [22, 231], "map_loc": [22, 275], "cpu": [22, 25, 194, 195, 237, 257, 261, 267, 273, 275, 282], "tensor": [22, 45, 47, 48, 49, 50, 73, 74, 75, 76, 77, 78, 79, 80, 81, 142, 143, 180, 181, 182, 183, 184, 185, 186, 188, 189, 190, 191, 195, 196, 197, 198, 199, 200, 201, 205, 206, 220, 221, 222, 223, 224, 225, 226, 229, 242, 248, 249, 250, 251, 252, 255, 258, 260, 278, 279, 280, 282], "item": 22, "f": [22, 26, 59, 62, 65, 66, 68, 228, 274, 275, 278, 279, 282], "tok_embed": [22, 189, 199, 200], "32000": [22, 27, 279], "4096": [22, 27, 61, 72, 183, 185, 279, 281], "292": 22, "tabl": [22, 200, 274, 275, 277, 278, 280, 282], "layer": [22, 25, 81, 85, 86, 87, 88, 89, 90, 91, 95, 96, 97, 98, 99, 101, 105, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 120, 121, 122, 123, 124, 125, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 143, 146, 147, 149, 150, 151, 152, 154, 155, 156, 157, 158, 160, 161, 163, 164, 165, 166, 167, 170, 171, 172, 173, 174, 175, 183, 186, 187, 188, 189, 190, 191, 192, 193, 194, 199, 201, 203, 205, 206, 210, 211, 227, 234, 238, 268, 269, 277, 279, 280, 281, 282], "dim": [22, 50, 142, 143, 180, 183, 184, 185, 189, 196, 198, 199, 278], "within": [22, 24, 27, 54, 73, 77, 78, 95, 107, 120, 129, 149, 150, 151, 154, 156, 165, 171, 191, 193, 194, 251, 256, 257, 273, 279, 282], "big": 22, "bin": [22, 273, 275], "piec": 22, "pytorch_model": [22, 275], "00001": [22, 273, 278], "00002": [22, 273, 278], "embed_token": 22, "241": 22, "Not": 22, "fewer": [22, 183], "sinc": [22, 24, 27, 55, 56, 198, 229, 231, 274, 275, 277, 280, 281], "mismatch": 22, "caus": [22, 214], "re": [22, 24, 193, 201, 225, 231, 268, 269, 270, 274, 275, 276, 279, 280], "end": [22, 25, 36, 57, 71, 144, 215, 217, 266, 268, 274, 277, 279, 281], "number": [22, 25, 44, 50, 54, 61, 72, 73, 78, 79, 81, 91, 95, 101, 107, 116, 120, 125, 129, 142, 143, 144, 146, 147, 150, 151, 154, 156, 158, 160, 165, 167, 171, 175, 182, 183, 189, 191, 197, 198, 219, 229, 230, 231, 233, 234, 236, 243, 256, 257, 273, 276, 278, 279, 280], "save": [22, 25, 26, 189, 195, 196, 198, 199, 229, 230, 231, 233, 238, 247, 252, 266, 270, 273, 274, 275, 277, 279, 280, 281], "less": [22, 50, 73, 275, 276, 277, 280, 282], "prone": 22, "invari": 22, "accept": [22, 24, 227, 276, 280, 282], "explicitli": [22, 204, 268, 279], "produc": [22, 233, 270, 281, 282], "One": [22, 50, 281], "advantag": [22, 220, 223, 270, 279], "abl": [22, 25, 275, 276, 281], "post": [22, 191, 253, 257, 270, 275, 277, 281, 282], "quantiz": [22, 85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 205, 206, 231, 241, 266, 267, 269, 271, 276, 282], "eval": [22, 266, 268, 281], "without": [22, 24, 26, 183, 187, 189, 193, 199, 210, 267, 268, 270, 274, 275, 279, 280, 281], "OR": 22, "surround": [22, 25, 268], "load_checkpoint": [22, 25, 229, 230, 231, 232], "save_checkpoint": [22, 25, 26, 229, 230, 231], "permut": 22, "behav": 22, "further": [22, 191, 225, 273, 278, 279, 280, 281, 282], "illustr": [22, 65, 66, 277], "whilst": [22, 269, 280], "read": [22, 229, 230, 231, 268, 280], "compat": [22, 229, 231, 280, 281], "framework": [22, 25, 268], "mention": [22, 275, 280, 282], "assum": [22, 35, 42, 45, 47, 65, 94, 106, 119, 148, 162, 169, 179, 182, 183, 185, 190, 199, 200, 202, 208, 215, 233, 235, 236, 237, 239, 274, 275, 279], "checkpoint_dir": [22, 24, 229, 230, 231, 275, 277, 278, 281], "easiest": [22, 275, 276], "everyth": [22, 25, 268, 271, 276], "flow": [22, 54, 281, 282], "safetensor": [22, 228, 229, 273, 278], "output_dir": [22, 24, 229, 230, 231, 257, 275, 277, 279, 281, 282], "snippet": 22, "explain": [22, 280], "fullmodelhfcheckpoint": [22, 275, 278], "sort": [22, 229, 231], "order": [22, 23, 25, 229, 231, 251, 252, 276, 280], "matter": [22, 229, 231, 273, 279], "checkpoint_fil": [22, 24, 26, 229, 230, 231, 275, 277, 278, 279, 281, 282], "restart": [22, 273], "previou": [22, 54, 229, 230, 231, 278], "section": [22, 25, 240, 266, 275, 277, 280, 282], "recipe_checkpoint": [22, 229, 230, 231, 281], "model_typ": [22, 229, 230, 231, 275, 277, 281], "resume_from_checkpoint": [22, 229, 230, 231], "discrep": [22, 229], "github": [22, 27, 74, 85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 166, 172, 173, 174, 184, 185, 196, 197, 222, 223, 224, 225, 236, 267, 275, 277, 278], "repositori": [22, 55, 56, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 100, 269, 270, 275, 276], "fullmodelmetacheckpoint": [22, 277, 281], "test": [22, 24, 25, 73, 268, 270, 274, 280], "written": [22, 24, 25, 229, 230, 248, 249, 250, 251, 252, 268], "partit": [22, 229, 282], "key_1": [22, 231], "weight_1": 22, "key_2": 22, "weight_2": 22, "mid": 22, "chekpoint": 22, "middl": [22, 201, 275, 280], "subsequ": [22, 25, 182, 189, 191, 219], "recipe_st": [22, 229, 230, 231], "pt": [22, 26, 229, 230, 231, 275, 277, 278, 281], "epoch": [22, 25, 26, 229, 230, 231, 233, 236, 273, 274, 275, 276, 277, 281], "etc": [22, 25, 143, 229, 240, 276], "flood": 22, "overwritten": 22, "updat": [22, 24, 25, 38, 182, 183, 189, 199, 205, 218, 222, 223, 229, 233, 257, 260, 267, 274, 275, 276, 277, 279, 280, 281, 282], "hf_model_0001_0": [22, 275, 278], "hf_model_0002_0": [22, 275], "adapt": [22, 85, 86, 95, 96, 97, 107, 108, 109, 110, 120, 121, 122, 131, 132, 138, 139, 143, 154, 155, 156, 157, 165, 166, 199, 201, 202, 204, 205, 206, 207, 208, 209, 229, 230, 231, 247, 269, 274, 275, 279, 282], "merg": [22, 27, 28, 179, 229, 275, 277, 282], "tutori": [22, 254, 268, 269, 270, 274, 275, 276, 277, 278, 279, 280, 281, 282], "save_adapter_weights_onli": 22, "choos": [22, 60, 279], "resum": [22, 25, 229, 230, 231, 236, 282], "frozen": [22, 143, 149, 152, 200, 222, 279, 280, 282], "learnt": [22, 274, 275], "refer": [22, 24, 25, 184, 185, 187, 191, 196, 207, 221, 222, 223, 224, 225, 248, 268, 279, 280, 281], "adapter_checkpoint": [22, 229, 230, 231], "adapter_0": [22, 275], "knowledg": [22, 266], "forward": [22, 25, 78, 79, 80, 142, 143, 180, 181, 183, 184, 185, 186, 188, 189, 190, 191, 193, 194, 196, 197, 198, 199, 200, 201, 205, 206, 222, 223, 224, 225, 240, 257, 277, 278, 279, 280, 282], "modeltyp": [22, 229, 230, 231], "llama2_13b": [22, 108], "right": [22, 47, 50, 76, 189, 229, 275, 277, 279], "pytorch_fil": 22, "00003": [22, 228, 278], "torchtune_sd": 22, "load_state_dict": [22, 199, 200, 201, 210, 233, 258, 279], "successfulli": [22, 273, 276], "vocab": [22, 27, 179, 189, 199, 200, 277], "70": [22, 117], "randint": 22, "no_grad": 22, "6": [22, 45, 47, 48, 49, 50, 54, 91, 95, 184, 191, 242, 270, 281, 282], "3989": 22, "9": [22, 45, 47, 48, 50, 182, 191, 242, 275, 281, 282], "0531": 22, "2375": 22, "5": [22, 24, 45, 47, 48, 49, 50, 75, 191, 222, 225, 226, 236, 275, 276, 277, 278, 280], "2822": 22, "4872": 22, "7469": 22, "8": [22, 45, 47, 48, 50, 59, 62, 65, 66, 68, 85, 86, 87, 88, 89, 90, 96, 97, 98, 99, 108, 109, 110, 111, 112, 113, 114, 115, 121, 122, 123, 124, 125, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 155, 157, 163, 164, 166, 170, 172, 173, 174, 182, 191, 196, 198, 275, 278, 279, 280, 281, 282], "6737": 22, "0023": 22, "8235": 22, "6819": 22, "2424": 22, "0109": 22, "6915": 22, "3618": 22, "1628": 22, "8594": 22, "5857": 22, "1151": 22, "7808": 22, "2322": 22, "8850": 22, "9604": 22, "7624": 22, "6040": 22, "3159": 22, "5849": 22, "8039": 22, "9322": 22, "2010": [22, 191], "6824": 22, "8929": 22, "8465": 22, "3794": 22, "3500": 22, "6145": 22, "5931": 22, "find": [22, 23, 25, 26, 222, 273, 275, 276, 278, 279, 280], "hope": 22, "deeper": [22, 269, 270, 276, 280], "insight": [22, 275], "happi": [22, 275], "cometlogg": 23, "checkpoint": [23, 24, 25, 195, 199, 201, 215, 228, 229, 230, 231, 232, 233, 234, 252, 254, 258, 268, 270, 273, 277, 278, 279, 281, 282], "workspac": [23, 26, 248], "seen": [23, 26, 279, 282], "screenshot": [23, 26], "comet_ml": [23, 248], "featur": [23, 25, 26, 267, 268, 269, 270, 275, 276, 280], "pip": [23, 26, 248, 251, 252, 267, 275, 277, 280], "login": [23, 26, 248, 252, 273, 275], "metric_logg": [23, 24, 25, 26], "metric_log": [23, 24, 26, 248, 249, 250, 251, 252], "experiment_nam": [23, 248], "experi": [23, 24, 248, 252, 266, 268, 277, 278, 279], "grab": [23, 26, 277], "tab": [23, 26], "asset": 23, "artifact": [23, 26, 257], "click": [23, 26], "effect": [24, 225, 278, 280, 281], "prerequisit": [24, 274, 275, 276, 277, 278, 279, 281, 282], "Be": [24, 274, 275, 276, 277, 278, 279, 280, 281, 282], "familiar": [24, 274, 275, 276, 277, 278, 279, 281, 282], "fundament": [24, 281], "reproduc": [24, 248], "overridden": [24, 257], "quick": 24, "seed": [24, 25, 26, 256, 276, 281], "shuffl": [24, 54, 281], "dtype": [24, 25, 77, 182, 183, 188, 189, 190, 192, 193, 194, 195, 199, 201, 237, 255, 259, 275, 278, 280, 281, 282], "fp32": [24, 189, 196, 198, 280, 281, 282], "enable_fsdp": 24, "keyword": [24, 27, 55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 195, 274], "subfield": 24, "dotpath": [24, 94, 106, 119, 148, 162, 169, 179], "wish": [24, 182, 193, 258], "exact": [24, 27, 275], "normal": [24, 54, 144, 181, 183, 184, 188, 189, 190, 196, 197, 198, 214, 274, 279, 281, 282], "instanc": [24, 27, 53, 107, 120, 129, 149, 154, 156, 165, 171, 172, 173, 176, 177, 195, 202, 208, 209, 279], "preced": [24, 27, 273, 277, 279], "throw": 24, "notic": [24, 78, 79, 80, 191, 274, 279], "miss": [24, 210, 211, 257, 279], "llama2_token": [24, 274, 275], "llama2token": [24, 106], "512": [24, 81, 282], "overwrit": [24, 231, 258, 267, 273], "duplic": [24, 25, 268, 273], "sometim": 24, "resolv": [24, 28, 276], "alpaca": [24, 31, 53, 58, 59, 85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 166, 172, 173, 174, 278], "disklogg": 24, "log_dir": [24, 249, 251, 252], "verifi": [24, 237, 238, 261, 274, 276, 279], "properli": [24, 210, 245, 273], "wa": [24, 35, 42, 50, 75, 79, 80, 81, 147, 151, 191, 210, 274, 279, 281, 282], "7b_lora_single_devic": [24, 275, 276, 279, 282], "my_config": [24, 273], "guidelin": 24, "tempt": 24, "put": [24, 25, 271, 276, 279, 281], "much": [24, 200, 225, 275, 277, 279, 280, 281, 282], "switch": 24, "encourag": [24, 225, 279, 280], "clariti": 24, "significantli": [24, 222, 269, 270, 280], "easier": [24, 275, 276], "dont": 24, "privat": 24, "parent": [24, 273], "guarante": 24, "stabil": [24, 196, 198, 268, 270, 280, 281, 282], "underscor": 24, "_alpaca": 24, "k1": [24, 25], "v1": [24, 25, 72], "k2": [24, 25], "v2": [24, 25, 248], "my_model_checkpoint": 24, "file_1": 24, "file_2": 24, "my_tokenizer_path": 24, "nest": [24, 260], "dot": 24, "notat": [24, 50, 142, 143, 183, 185, 189, 199, 220, 221, 242], "flag": [24, 25, 36, 59, 60, 62, 64, 67, 68, 69, 227, 231, 238, 273, 280, 282], "bitsandbyt": [24, 280], "pagedadamw8bit": [24, 280], "delet": [24, 189, 192, 193, 194, 199], "foreach": 24, "8b_full": [24, 273], "adamw": [24, 279, 280], "2e": [24, 280], "fuse": [24, 146, 150, 199, 200, 201, 202, 253, 281], "nproc_per_nod": [24, 270, 277, 279, 281], "full_finetune_distribut": [24, 239, 273, 275, 276], "thought": [25, 268, 271, 276, 282], "target": [25, 75, 197, 198, 225, 268, 278], "pipelin": [25, 268, 270], "eg": [25, 189, 199, 229, 268], "meaning": [25, 268, 275], "fsdp": [25, 187, 227, 233, 238, 247, 276, 277, 280], "activ": [25, 81, 180, 234, 240, 246, 254, 257, 268, 270, 281, 282], "gradient": [25, 197, 198, 247, 253, 257, 268, 270, 275, 277, 279, 282], "accumul": [25, 253, 257, 268, 270], "mix": [25, 181, 273, 275, 280], "precis": [25, 181, 195, 237, 268, 270, 276, 282], "complex": 25, "becom": [25, 191, 267], "harder": 25, "anticip": 25, "methodolog": 25, "possibl": [25, 54, 228, 273, 280], "trade": [25, 280], "vs": [25, 276], "qualiti": [25, 275, 279, 281], "believ": 25, "suit": [25, 276, 280], "solut": 25, "result": [25, 65, 81, 147, 151, 191, 198, 217, 219, 257, 270, 275, 277, 278, 279, 280, 281, 282], "meant": [25, 195, 233], "expertis": 25, "routin": 25, "yourself": [25, 273, 277, 279], "exist": [25, 194, 201, 233, 248, 267, 273, 275, 276, 277, 282], "ones": [25, 50, 182], "modular": [25, 268], "wandb": [25, 26, 252, 276], "log": [25, 28, 222, 223, 224, 225, 240, 246, 248, 249, 250, 251, 252, 262, 275, 276, 277, 278, 279, 280, 282], "fulli": [25, 53, 149], "nativ": [25, 266, 268, 279, 281, 282], "numer": [25, 66, 268, 270, 281], "pariti": [25, 268], "verif": [25, 184], "benchmark": [25, 256, 268, 275, 277, 279, 281], "limit": [25, 233, 278, 280, 281], "hidden": [25, 81, 143, 147, 151, 180, 189, 191], "behind": 25, "unnecessari": 25, "abstract": [25, 212, 213, 268, 276, 282], "No": [25, 231, 268], "go": [25, 81, 100, 147, 151, 153, 191, 217, 268, 275, 276, 278, 280, 282], "figur": [25, 279, 282], "spectrum": 25, "decid": 25, "avail": [25, 35, 42, 72, 199, 201, 237, 245, 261, 268, 273, 275, 277, 279, 280], "consist": [25, 33, 37, 42, 65, 66, 72, 271, 276], "overrid": [25, 28, 29, 33, 37, 42, 62, 63, 65, 66, 67, 68, 69, 258, 271, 273, 275, 276, 277, 278, 282], "valid": [25, 52, 76, 197, 210, 211, 221, 239, 258, 259, 267, 271, 275, 276], "closer": [25, 278, 279], "monolith": [25, 268], "trainer": [25, 222, 224, 225], "wrapper": [25, 181, 214, 215, 233, 235, 273, 279], "around": [25, 144, 181, 214, 215, 240, 273, 274, 275, 279, 280, 281, 282], "extern": 25, "primarili": [25, 53, 279], "eleutherai": [25, 72, 268, 278, 279, 281], "har": [25, 268, 278, 279, 281], "stage": [25, 191], "distil": [25, 266], "dataload": [25, 54, 59, 62, 65, 66, 68], "applic": [25, 229, 230, 252], "clean": [25, 26, 58, 278], "group": [25, 183, 243, 244, 248, 249, 250, 251, 252, 273, 277, 281], "init_process_group": [25, 244], "backend": [25, 273, 281], "gloo": 25, "nccl": 25, "fullfinetunerecipedistribut": 25, "cleanup": 25, "stuff": 25, "carri": [25, 56], "metric": [25, 276, 278, 280, 281], "logger": [25, 246, 248, 249, 250, 251, 252, 262, 276], "_devic": 25, "get_devic": 25, "_dtype": 25, "get_dtyp": 25, "ckpt_dict": 25, "wrap": [25, 201, 227, 234, 238, 247, 254, 274, 280], "_model": [25, 233], "_setup_model": 25, "_setup_token": 25, "_optim": 25, "_setup_optim": 25, "_loss_fn": 25, "_setup_loss": 25, "_sampler": 25, "_dataload": 25, "_setup_data": 25, "backward": [25, 233, 235, 253, 257, 282], "zero_grad": 25, "curr_epoch": 25, "rang": [25, 200, 222, 223, 225, 256, 273, 277, 281], "epochs_run": [25, 26], "total_epoch": [25, 26], "idx": [25, 54], "enumer": 25, "_autocast": 25, "logit": [25, 73, 74, 77, 196, 197, 198, 242, 278], "global_step": 25, "_log_every_n_step": 25, "_metric_logg": 25, "log_dict": [25, 248, 249, 250, 251, 252], "step": [25, 54, 55, 56, 65, 66, 189, 199, 220, 233, 235, 236, 248, 249, 250, 251, 252, 253, 257, 266, 270, 275, 279, 281, 282], "recipe_main": [25, 29], "fullfinetunerecip": 25, "wandblogg": [26, 279, 282], "tip": 26, "straggler": 26, "background": 26, "crash": 26, "otherwis": [26, 45, 47, 50, 79, 80, 81, 147, 151, 187, 189, 191, 245, 248, 274, 281], "exit": [26, 193, 194, 207, 267, 273], "resourc": [26, 248, 249, 250, 251, 252, 280, 281], "kill": 26, "ps": 26, "aux": 26, "grep": 26, "awk": 26, "xarg": 26, "desir": [26, 55, 56, 255, 274, 280], "suggest": [26, 278], "approach": [26, 53, 278], "full_finetun": 26, "joinpath": 26, "_checkpoint": [26, 275], "_output_dir": [26, 229, 230, 231], "torchtune_model_": 26, "with_suffix": 26, "wandb_at": 26, "descript": [26, 273], "whatev": 26, "metadata": [26, 281], "seed_kei": 26, "epochs_kei": 26, "total_epochs_kei": 26, "max_steps_kei": 26, "max_steps_per_epoch": [26, 281], "add_fil": 26, "log_artifact": 26, "hydra": 27, "facebook": 27, "research": 27, "com": [27, 74, 85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 166, 172, 173, 174, 184, 185, 196, 197, 222, 223, 224, 225, 236, 248, 267, 275, 277, 278], "facebookresearch": [27, 184], "blob": [27, 74, 85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 166, 169, 172, 173, 174, 184, 185, 197, 222, 223, 224, 225, 236], "_intern": 27, "_instantiate2": 27, "l148": 27, "num_head": [27, 81, 91, 95, 101, 107, 116, 120, 125, 129, 146, 147, 150, 151, 154, 156, 158, 160, 165, 167, 171, 175, 182, 183, 185, 189], "num_kv_head": [27, 91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 182, 183], "vocab_s": [27, 73, 74, 91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 196, 197, 198, 200], "parsed_yaml": 27, "embed_dim": [27, 78, 79, 80, 81, 91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 183, 185, 188, 189, 190, 191, 200, 201, 258, 279], "valueerror": [27, 33, 35, 37, 42, 44, 46, 47, 50, 52, 53, 59, 60, 62, 64, 65, 66, 68, 69, 71, 171, 182, 183, 191, 192, 193, 194, 229, 230, 231, 237, 240, 256, 259], "recipe_nam": 28, "rank": [28, 85, 86, 87, 95, 96, 97, 107, 108, 109, 110, 111, 120, 121, 122, 129, 130, 131, 132, 138, 139, 149, 150, 151, 154, 155, 156, 157, 165, 166, 171, 172, 173, 174, 205, 206, 243, 245, 256, 269, 276, 279, 282], "zero": [28, 50, 182, 184, 189, 199, 228, 275, 277, 281], "displai": 28, "callabl": [29, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 81, 189, 207, 227, 238, 241, 247, 254], "With": [29, 275, 278, 279, 281, 282], "my_recip": 29, "foo": 29, "bar": [29, 268, 276, 280], "configerror": 30, "cannot": [30, 46, 231, 277], "equival": [31, 35, 79, 224, 225], "condit": [31, 73, 245, 273], "dedic": 31, "due": [31, 214, 279, 280, 282], "keep": [31, 33, 35, 37, 42, 63, 64, 66, 67, 70, 200, 275, 279, 280], "openai": [32, 37, 60, 223], "markup": 32, "im_start": 32, "context": [32, 168, 193, 194, 207, 255, 257, 280], "im_end": 32, "goe": [32, 207], "a2": [33, 55], "functool": [34, 40, 43, 227], "partial": [34, 40, 43, 227], "_prompt_templ": [34, 40, 43], "assistant_messag": [34, 40, 43], "respect": [35, 53, 100, 182, 209, 257, 274], "final": [35, 42, 55, 56, 85, 86, 87, 91, 95, 101, 107, 108, 109, 110, 111, 116, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 146, 147, 149, 150, 151, 154, 155, 156, 157, 158, 165, 166, 171, 174, 175, 180, 189, 199, 210, 211, 275, 277, 278, 279, 280, 282], "leav": [35, 42, 280], "liter": [36, 38, 41, 85, 86, 87, 88, 89, 90, 94, 95, 96, 97, 98, 99, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 119, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 148, 149, 150, 151, 152, 154, 155, 156, 157, 162, 163, 164, 165, 166, 169, 170, 171, 172, 173, 174, 179, 210, 211], "union": [36, 46, 47, 58, 59, 60, 62, 64, 68, 69, 71, 72, 94, 106, 119, 148, 162, 169, 179, 189, 199, 211, 229, 234, 239, 248, 249, 250, 251, 252, 254, 256], "interleav": [36, 219], "attach": 36, "writer": 36, "calcul": [36, 38, 76, 142, 144, 183, 188, 190, 191, 220, 221, 223, 277], "consecut": [36, 52, 182, 219], "last": [36, 51, 54, 71, 189, 221, 233, 236], "properti": [36, 279, 280], "media": [36, 56], "classmethod": 36, "image_url": 37, "unmask": [37, 42, 197], "consid": [38, 53, 55, 56, 79, 80, 81, 147, 151, 191, 280], "come": [38, 52, 204, 279, 280], "nanswer": 40, "alia": [41, 227], "alwai": [42, 248, 258, 274, 280], "nsummari": [43, 274], "summari": [43, 53, 68, 191, 240], "batch_first": 45, "padding_valu": 45, "float": [45, 73, 74, 77, 85, 86, 87, 88, 89, 90, 91, 95, 96, 97, 98, 99, 101, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 120, 121, 122, 123, 124, 125, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 144, 149, 150, 151, 152, 154, 155, 156, 157, 158, 160, 163, 164, 165, 166, 167, 170, 171, 172, 173, 174, 175, 183, 184, 205, 206, 220, 221, 222, 223, 224, 225, 233, 236, 239, 240, 246, 248, 249, 250, 251, 252, 279, 280, 281, 282], "rnn": [45, 47, 50], "pad_sequ": [45, 47, 50], "variabl": [45, 232, 245, 248, 280, 282], "left": [45, 47, 50, 144, 189, 279], "longest": [45, 49, 50], "trail": 45, "dimens": [45, 50, 91, 95, 101, 107, 116, 120, 125, 129, 142, 146, 147, 150, 151, 154, 156, 158, 160, 165, 167, 171, 175, 180, 182, 183, 185, 189, 191, 200, 205, 206, 277, 279, 280, 282], "element": [45, 47, 50, 53, 197, 242, 275], "12": [45, 47, 48, 69, 191, 267, 281], "image_loc": 46, "www": [46, 248], "org": [46, 65, 82, 83, 84, 85, 86, 88, 89, 90, 95, 96, 97, 98, 99, 102, 103, 104, 105, 107, 108, 109, 110, 112, 113, 114, 115, 120, 121, 122, 123, 124, 131, 132, 133, 134, 135, 138, 139, 140, 141, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 183, 184, 185, 191, 219, 220, 222, 223, 224, 225, 227, 245, 251, 254, 256, 262, 267], "en": [46, 57, 61, 63, 71, 72, 281], "pad_direct": [47, 50], "keys_to_pad": 47, "padding_idx": [47, 48, 49, 50, 54], "left_pad_sequ": [47, 50], "integ": [47, 49, 200, 227, 228, 234, 256], "batch_siz": [47, 59, 62, 65, 66, 68, 182, 183, 188, 189, 190, 192, 193, 194, 196, 197, 198, 199, 200, 201, 222, 224, 226, 275, 280, 281], "ignore_idx": [48, 49, 50], "input_id": [48, 242], "chosen_input_id": [48, 67], "chosen_label": 48, "15": [48, 191, 238, 274, 275, 279, 282], "16": [48, 85, 86, 87, 88, 89, 90, 96, 97, 98, 99, 108, 109, 110, 111, 112, 113, 114, 115, 121, 122, 123, 124, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 155, 157, 163, 164, 166, 170, 172, 173, 174, 182, 191, 279, 280, 282], "17": [48, 191, 279], "18": [48, 191, 277], "19": [48, 191, 282], "20": [48, 191, 226, 281], "token_pair": 49, "padded_col": 49, "pad_max_til": 50, "pad_max_imag": 50, "tile": [50, 78, 79, 80, 81, 142, 143, 144, 145, 147, 148, 149, 151, 191, 219], "aspect": [50, 78, 79, 268], "ratio": [50, 78, 79, 222, 223], "text_seq_len": [50, 219], "n_tile": [50, 78, 79, 191], "h": [50, 142, 182, 191, 196, 198, 267, 273], "w": [50, 82, 83, 84, 92, 93, 102, 103, 104, 105, 117, 118, 126, 127, 128, 136, 137, 142, 159, 161, 176, 177, 178, 191, 248, 251, 252, 274, 275, 279, 282], "h_ratio": 50, "w_ratio": 50, "encoder_mask": [50, 188, 189, 199], "image_seq_len": [50, 219], "channel": [50, 81, 142, 144, 147, 151, 191, 205, 281], "height": [50, 142], "largest": 50, "bsz": [50, 73, 74, 75, 76, 78, 79, 191, 196, 198], "max_num_imag": 50, "max_num_til": [50, 78, 79, 81, 144, 147, 151, 191], "tokens_per_til": 50, "image_id": 50, "four": [50, 279], "model_input": 50, "max_text_seq_len": 50, "40": [50, 79, 80, 81, 147, 151, 191, 219, 280, 282], "did": [50, 277, 282], "extra": [50, 144, 199, 267, 274, 279, 280, 281, 282], "second": [50, 183, 200, 275, 279, 280, 282], "eos_id": [51, 144, 215, 217], "shorter": [52, 189], "min": [52, 279], "invalid": 52, "sub": [53, 251], "unifi": [53, 161], "simplifi": [53, 222, 273, 278, 279], "simultan": 53, "intern": 53, "aggreg": 53, "transpar": 53, "howev": [53, 169, 267, 278, 280], "constitu": 53, "might": [53, 193, 200, 203, 273, 275, 280], "larg": [53, 196, 198, 205, 206, 257, 273, 280, 282], "comput": [53, 55, 56, 101, 107, 116, 120, 125, 129, 142, 143, 146, 150, 171, 175, 183, 185, 189, 190, 196, 198, 199, 219, 222, 224, 225, 240, 256, 270, 275, 278, 280, 281, 282], "cumul": 53, "maintain": [53, 201, 269, 280, 282], "deleg": 53, "retriev": [53, 55, 56, 189, 238], "lead": [53, 214, 228, 270], "scale": [53, 73, 74, 77, 85, 86, 87, 95, 96, 97, 107, 108, 109, 110, 111, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 149, 150, 151, 154, 155, 156, 157, 165, 166, 171, 172, 173, 174, 184, 186, 188, 190, 205, 206, 221, 225, 279, 280, 281, 282], "strategi": [53, 270], "stream": [53, 262, 280], "demand": 53, "deriv": [53, 180, 189, 190], "instans": 53, "dataset1": 53, "mycustomdataset": 53, "params1": 53, "dataset2": 53, "params2": 53, "concat_dataset": 53, "total": [53, 221, 223, 236, 243, 265, 272, 275, 277, 278, 279, 280], "data_point": 53, "1500": 53, "vicgal": 53, "gpt4": 53, "samsum": [53, 68], "focus": [53, 271, 276, 280], "enhanc": [53, 191, 225, 280, 282], "divers": 53, "machin": [53, 224, 261, 273, 275], "max_pack": 54, "outsid": [54, 256, 257, 279], "sampler": [54, 276], "part": [54, 200, 224, 274, 282], "buffer": [54, 189, 199, 280], "enough": [54, 274], "lower": [54, 270, 278, 279], "triangular": 54, "wise": 54, "made": [54, 60, 64, 67, 71, 144, 275], "smaller": [54, 200, 275, 277, 278, 279, 280, 281, 282], "jam": 54, "s1": [54, 214], "s2": [54, 214], "s3": 54, "s4": 54, "contamin": 54, "input_po": [54, 74, 183, 185, 189, 190, 199], "matrix": [54, 188, 189, 199], "increment": 54, "move": [54, 71, 189, 260, 280], "entir": [54, 71, 196, 203, 247, 274, 282], "avoid": [54, 71, 184, 191, 195, 256, 273, 281, 282], "truncat": [54, 61, 71, 72, 94, 106, 119, 144, 148, 162, 169, 179, 215, 226], "sentenc": [54, 71], "techniqu": [55, 268, 269, 270, 275, 276, 277, 278, 279, 280, 281], "repons": 55, "At": [55, 56, 189, 199], "extract": [55, 56, 61, 216], "against": [55, 56, 225, 263, 281, 282], "unit": [55, 56, 247, 268], "filepath": [55, 56, 57, 59, 60, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72], "filter": [55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 281], "prior": [55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 258], "doc": [55, 56, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 227, 245, 248, 251, 252, 256, 262, 273, 275], "round": [56, 281], "incorpor": [56, 222], "happen": [56, 196, 198], "ti": [56, 95, 171, 175, 187, 280], "agnost": 56, "treat": [56, 191, 207, 274], "minimum": [56, 65, 66], "corpu": [57, 61, 71, 72], "package_refer": [57, 61, 63, 71, 72], "loading_method": [57, 61, 63, 71, 72], "tabular": [57, 71], "txt": [57, 71, 179, 249, 276], "eo": [57, 71, 169, 214, 217, 274], "yahma": 58, "variant": [58, 62, 68], "page": [58, 72, 267, 268, 273, 276, 277, 280], "tatsu": 59, "lab": [59, 74], "codebas": [59, 275], "independ": 59, "contribut": [59, 60, 62, 64, 67, 68, 69, 197, 198, 221, 223], "alpacatomessag": 59, "alpaca_d": 59, "altern": [60, 64, 67, 193, 276, 280], "toward": [60, 225], "my_dataset": [60, 64], "london": [60, 64], "ccdv": 61, "cnn_dailymail": 61, "textcompletiondataset": [61, 71, 72], "cnn": 61, "dailymail": 61, "articl": [61, 72], "highlight": [61, 282], "disabl": [61, 72, 189, 193, 199, 207, 256, 281], "highest": [61, 72], "conjunct": [62, 68, 70, 189, 280], "grammar_d": 62, "rlhflow": 63, "hh": 63, "preferencedataset": [63, 67, 70], "liuhaotian": 65, "llava": 65, "150k": 65, "coco": 65, "train2017": 65, "llava_instruct_150k": 65, "2017": 65, "visit": [65, 275], "cocodataset": 65, "wget": 65, "zip": [65, 264], "unzip": 65, "minim": [65, 66, 276, 278, 279, 280, 281, 282], "clip": [65, 66, 78, 79, 80, 81, 142, 143, 144, 147, 151, 191, 223], "mymodeltransform": [65, 66], "tokenizer_path": [65, 66], "image_transform": [65, 66], "yet": [65, 66, 153, 274, 275], "llava_instruct_d": 65, "huggingfacem4": 66, "the_cauldron": 66, "cauldron": 66, "card": 66, "cauldron_d": 66, "compris": 67, "share": [67, 183, 187, 275], "c1": 67, "r1": 67, "chosen_messag": 67, "rejected_messag": 67, "samsung": 68, "samsum_d": 68, "351": 69, "82": 69, "391": 69, "221": 69, "220": 69, "193": 69, "471": 69, "lvwerra": 70, "stack": [70, 191, 257], "exchang": 70, "allenai": [71, 281], "data_dir": 71, "realnewslik": 71, "wikitext_document_level": 72, "wikitext": [72, 281], "103": [72, 275], "transformerdecod": [73, 74, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 95, 96, 97, 98, 99, 101, 102, 103, 104, 105, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 146, 150, 154, 155, 156, 157, 158, 159, 160, 161, 163, 164, 165, 166, 167, 168, 170, 171, 172, 173, 174, 175, 176, 177, 178, 188, 190, 199, 200, 201, 279], "max_generated_token": 73, "pad_id": [73, 226], "temperatur": [73, 74, 77, 222, 224, 225, 275], "top_k": [73, 74, 77, 275], "stop_token": [73, 226], "rng": 73, "custom_generate_next_token": 73, "seq_length": [73, 74, 75, 188, 190, 200, 201], "prune": [73, 77, 282], "probabl": [73, 77, 85, 86, 87, 95, 96, 97, 107, 109, 110, 111, 120, 121, 122, 129, 130, 131, 132, 138, 139, 149, 150, 151, 154, 155, 156, 157, 165, 166, 171, 172, 173, 174, 205, 206, 222, 223, 224, 225, 275, 278], "stop": [73, 226], "random": [73, 191, 256, 276], "compil": [73, 196, 275, 277, 280, 282], "generate_next_token": 73, "llama3_8b": [73, 122, 130, 199, 277, 280, 281], "manual_se": 73, "tolist": 73, "jeremi": 73, "m": [73, 195, 274, 281], "seq_len": [73, 75, 76, 189], "num_generated_token": 73, "q": [74, 77, 183, 279], "randomli": [74, 77, 258], "softmax": [74, 77, 183, 189, 190, 199, 278], "trick": [74, 77], "fast": [74, 275], "32971d3129541c5bfb4f715abc33d1c5f408d204": 74, "l40": 74, "k": [74, 77, 79, 183, 279], "padding_mask": [75, 76, 223, 226], "target_seq_len": 75, "suitabl": 75, "scaled_dot_product_attent": [75, 91, 95, 101, 107, 116, 120, 125, 129, 154, 156, 158, 160, 165, 167, 171, 175, 183], "static": 75, "kv": [75, 182, 183, 189, 190, 192, 193, 194, 199, 281], "cach": [75, 182, 183, 185, 188, 189, 190, 192, 193, 194, 199, 201, 267, 273], "longer": [75, 182, 280], "boolean": [75, 76, 81, 183, 188, 189, 190, 199, 201, 227, 242], "assertionerror": [75, 81, 182, 188, 189, 210, 211, 258], "shift": [76, 189], "uniform_": 77, "int32": 77, "patch": [78, 79, 80, 81, 143, 144, 147, 151, 191, 219], "check": [78, 79, 80, 81, 188, 189, 190, 191, 199, 201, 210, 237, 245, 263, 266, 268, 269, 270, 271, 274, 275, 276, 279, 280], "vision_transform": [78, 79, 80, 81], "visiontransform": [78, 79, 80, 81], "divid": [78, 79, 80, 81, 144, 147, 151, 191, 197, 198, 219], "dimension": [78, 79, 80, 81, 147, 151, 191], "n_img": [78, 79, 191], "n_tokens_per_til": [78, 79, 80], "crop": [78, 79, 80, 81, 142, 147, 151, 191], "local_token_positional_embed": 79, "_position_embed": [79, 191], "tokenpositionalembed": [79, 191], "gate": [79, 186, 232, 269, 270, 273, 276], "global_token_positional_embed": 79, "400": [79, 80, 81, 147, 151, 191, 219], "10x10": [79, 80, 81, 147, 151, 191, 219], "grid": [79, 80, 81, 147, 151, 191, 219], "th": [79, 182], "silu": [81, 180], "cls_output_dim": [81, 191], "attn_bia": 81, "out_indic": [81, 191], "output_cls_project": 81, "in_channel": [81, 147, 151, 191], "intermediate_act": 81, "transformerencoderlay": 81, "cl": [81, 143, 191], "mlp": [81, 85, 86, 87, 91, 95, 96, 97, 101, 107, 108, 109, 110, 111, 116, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 146, 149, 150, 151, 154, 155, 156, 157, 158, 160, 165, 166, 167, 171, 172, 173, 174, 175, 188, 189, 190, 210, 211, 277, 279, 280], "bia": [81, 187, 204, 205, 206, 258, 279, 281, 282], "intermedi": [81, 91, 95, 101, 107, 116, 120, 125, 129, 146, 147, 150, 151, 154, 156, 158, 160, 165, 167, 171, 175, 191, 231, 254, 277, 282], "fourth": [81, 147, 151, 191], "determin": [81, 147, 151, 211], "divis": [81, 184], "code_llama2": [82, 83, 84, 85, 86, 87, 88, 89, 90, 273], "arxiv": [82, 83, 84, 85, 86, 88, 89, 90, 95, 96, 97, 98, 99, 102, 103, 104, 105, 107, 108, 109, 110, 112, 113, 114, 115, 120, 121, 122, 123, 124, 131, 132, 133, 134, 135, 138, 139, 140, 141, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 183, 184, 185, 191, 219, 220, 222, 223, 224, 225], "pdf": [82, 83, 84, 219, 220], "2308": [82, 83, 84], "12950": [82, 83, 84], "lora_attn_modul": [85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 210, 211, 269, 279, 280, 282], "q_proj": [85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 183, 210, 211, 269, 279, 280, 281, 282], "k_proj": [85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 183, 210, 211, 269, 279, 280, 281, 282], "v_proj": [85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 183, 210, 211, 269, 279, 280, 281, 282], "output_proj": [85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 183, 210, 211, 279, 280, 281, 282], "apply_lora_to_mlp": [85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 210, 211, 269, 279, 280], "apply_lora_to_output": [85, 86, 87, 88, 89, 90, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 174, 210, 211, 279, 280], "lora_rank": [85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 269, 279, 280], "lora_alpha": [85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 269, 279, 280], "lora_dropout": [85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 280], "use_dora": [85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 131, 132, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 280], "quantize_bas": [85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 205, 206, 280, 282], "code_llama2_13b": 85, "tloen": [85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 166, 172, 173, 174], "8bb8579e403dc78e37fe81ffbb253c413007323f": [85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 166, 172, 173, 174], "l41": [85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 166, 172, 173, 174], "l43": [85, 86, 87, 96, 97, 108, 109, 110, 111, 121, 122, 130, 131, 132, 138, 139, 166, 172, 173, 174], "linear": [85, 86, 87, 88, 89, 90, 95, 96, 97, 98, 99, 107, 108, 109, 110, 111, 112, 113, 114, 115, 120, 121, 122, 123, 124, 129, 130, 131, 132, 133, 134, 135, 138, 139, 140, 141, 143, 149, 150, 151, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 171, 172, 173, 174, 187, 189, 204, 205, 206, 210, 211, 279, 280, 281, 282], "low": [85, 86, 87, 95, 96, 97, 107, 108, 109, 110, 111, 120, 121, 122, 129, 130, 131, 132, 138, 139, 149, 150, 151, 154, 155, 156, 157, 165, 166, 171, 172, 173, 174, 205, 206, 269, 275, 278, 279, 282], "approxim": [85, 86, 87, 95, 96, 97, 107, 108, 109, 110, 111, 120, 121, 122, 129, 130, 131, 132, 138, 139, 149, 150, 151, 154, 155, 156, 157, 165, 166, 171, 172, 173, 174, 205, 206, 279], "factor": [85, 86, 87, 95, 96, 97, 107, 108, 109, 110, 111, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 149, 150, 151, 154, 155, 156, 157, 165, 166, 171, 172, 173, 174, 205, 206, 220, 275], "dropout": [85, 86, 87, 91, 95, 96, 97, 101, 107, 109, 110, 111, 116, 120, 121, 122, 125, 129, 130, 131, 132, 138, 139, 149, 150, 151, 154, 155, 156, 157, 158, 160, 165, 166, 167, 171, 172, 173, 174, 175, 183, 205, 206, 279, 280, 282], "decompos": [85, 86, 95, 96, 97, 107, 108, 109, 110, 120, 121, 122, 131, 132, 138, 139, 154, 155, 156, 157, 165, 166, 205, 269], "magnitud": [85, 86, 95, 96, 97, 107, 108, 109, 110, 120, 121, 122, 131, 132, 138, 139, 154, 155, 156, 157, 165, 166, 205, 280], "dora": [85, 86, 95, 96, 97, 107, 108, 109, 110, 120, 121, 122, 129, 131, 132, 138, 139, 150, 151, 154, 155, 156, 157, 165, 166, 205, 269], "ab": [85, 86, 88, 89, 90, 95, 96, 97, 98, 99, 102, 103, 104, 105, 107, 108, 109, 110, 112, 113, 114, 115, 120, 121, 122, 123, 124, 131, 132, 133, 134, 135, 138, 139, 140, 141, 152, 154, 155, 156, 157, 163, 164, 165, 166, 170, 183, 184, 185, 191, 222, 223, 224, 225], "2402": [85, 86, 95, 96, 97, 107, 108, 109, 110, 120, 121, 122, 131, 132, 138, 139, 154, 155, 156, 157, 165, 166], "09353": [85, 86, 95, 96, 97, 107, 108, 109, 110, 120, 121, 122, 131, 132, 138, 139, 154, 155, 156, 157, 165, 166], "code_llama2_70b": 86, "code_llama2_7b": 87, "qlora": [88, 89, 90, 98, 99, 112, 113, 114, 115, 123, 124, 133, 134, 135, 140, 141, 152, 163, 164, 170, 195, 266, 268, 269, 277, 279], "paper": [88, 89, 90, 98, 99, 112, 113, 114, 115, 123, 124, 133, 134, 135, 140, 141, 152, 163, 164, 170, 219, 222, 224, 225, 278, 279, 282], "2305": [88, 89, 90, 98, 99, 112, 113, 114, 115, 123, 124, 133, 134, 135, 140, 141, 152, 163, 164, 170, 183, 222, 224], "14314": [88, 89, 90, 98, 99, 112, 113, 114, 115, 123, 124, 133, 134, 135, 140, 141, 152, 163, 164, 170], "lora_code_llama2_13b": 88, "lora_code_llama2_70b": 89, "lora_code_llama2_7b": 90, "head_dim": [91, 95, 182, 183, 189], "intermediate_dim": [91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175], "attn_dropout": [91, 95, 101, 107, 116, 120, 125, 129, 154, 156, 158, 160, 165, 167, 171, 175, 183, 189], "norm_ep": [91, 95, 101, 107, 116, 120, 125, 129, 154, 156, 158, 160, 165, 167, 171, 175], "1e": [91, 95, 101, 107, 116, 120, 125, 129, 154, 156, 158, 160, 165, 167, 171, 175, 184, 278, 280], "06": [91, 95, 184, 279], "rope_bas": [91, 95, 101, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175], "10000": [91, 95, 101, 154, 156, 158, 160, 165, 167, 185], "norm_embed": [91, 95], "transformerselfattentionlay": [91, 101, 116, 125, 158, 175, 188, 189, 199, 201], "rm": [91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175], "norm": [91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 189], "space": [91, 101, 116, 125, 146, 150, 158, 175, 189, 203, 280], "slide": [91, 158, 168], "window": [91, 158, 168], "vocabulari": [91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 196, 198, 279, 280], "mha": [91, 95, 101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 183, 189], "onto": [91, 95, 101, 107, 116, 120, 125, 129, 154, 156, 158, 160, 165, 167, 171, 175, 183, 203], "epsilon": [91, 95, 101, 107, 116, 120, 125, 129, 154, 156, 158, 160, 165, 167, 171, 175, 223], "rotari": [91, 95, 101, 125, 129, 154, 156, 158, 160, 165, 167, 185, 277], "10_000": [91, 95, 154, 156, 158, 160, 167], "blog": [92, 93], "technolog": [92, 93], "develop": [92, 93, 267, 282], "gemmatoken": 94, "_templatetyp": [94, 106, 119, 148, 162, 169, 179], "gemma_2b": 96, "gemma_7b": 97, "lora_gemma_2b": 98, "lora_gemma_7b": 99, "taken": [100, 279, 282], "sy": [100, 274], "honest": [100, 274], "pari": [100, 153], "capit": [100, 153], "franc": [100, 153], "known": [100, 153, 241, 281], "stun": [100, 153], "05": [101, 107, 116, 120, 125, 129, 154, 156, 158, 160, 165, 167, 171, 175], "gqa": [101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 183], "mqa": [101, 107, 116, 120, 125, 129, 146, 150, 154, 156, 158, 160, 165, 167, 171, 175, 183], "kvcach": [101, 107, 116, 120, 125, 129, 146, 150, 165, 171, 175, 183, 189, 192, 193, 194, 199], "scale_hidden_dim_for_mlp": [101, 107, 116, 120, 125, 129, 146, 150, 171, 175], "2307": [102, 103, 104, 105], "09288": [102, 103, 104, 105], "classif": [105, 156, 160, 161, 232], "llama2_70b": 109, "llama2_7b": [110, 279], "classifi": [111, 156, 160, 161, 258, 280], "llama2_reward_7b": [111, 232], "lora_llama2_13b": 112, "lora_llama2_70b": 113, "lora_llama2_7b": [114, 279], "lora_llama2_reward_7b": 115, "500000": [116, 120, 125, 129, 146, 150], "llama3token": [119, 144, 213], "regist": [119, 144, 148, 169, 179, 195, 253, 282], "canon": [119, 144, 148, 169, 179], "llama3_70b": 121, "lora_llama3_70b": 123, "lora_llama3_8b": [124, 280], "scale_factor": [125, 129], "500_000": [125, 129], "rope": [125, 129, 171, 175, 183, 185], "llama3_1": [126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 269, 278], "llama3_1_70b": 131, "llama3_1_8b": 132, "lora_llama3_1_405b": 133, "lora_llama3_1_70b": 134, "lora_llama3_1_8b": 135, "llama3_2_1b": [138, 192, 193, 194], "llama3_2_3b": 139, "lora_llama3_2_1b": 140, "lora_llama3_2_3b": 141, "projection_head": [142, 199, 203], "combin": [142, 144, 147, 151, 189, 199, 201, 203, 221, 278], "learnabl": [142, 186, 199, 201, 205, 275], "fusion": [142, 145, 146, 147, 149, 150, 151, 199, 200, 201, 202, 203], "encoder_dim": [142, 143], "decoder_dim": [142, 143], "num_img": [142, 143], "num_emb": [142, 143], "broken": [142, 143, 191, 201], "width": [142, 281], "clip_embeds_per_til": 142, "emb": [142, 143, 183, 188, 189, 199], "num_hidden_input": 143, "sequenti": [143, 199, 203], "num_hidden": 143, "hidden_st": [143, 191], "image_mean": 144, "image_std": 144, "tranform": 144, "possible_resolut": 144, "448": [144, 145, 148, 149], "deviat": 144, "still": [144, 196, 198, 200, 201, 269, 279, 281, 282], "transformed_data": 144, "img1": [144, 219], "img2": [144, 219], "31587": [144, 214, 215], "29644": [144, 214, 215], "102": [144, 214, 215], "truncate_at_eo": [144, 215], "skip": [144, 183], "tokenize_head": 144, "tokenize_end": 144, "header": 144, "eom": 144, "wether": 144, "decoder_train": [145, 149, 152, 199], "encoder_train": [145, 149, 152, 199], "fusion_train": [145, 149, 152, 199], "deepfusionmodel": [145, 149, 152], "trainabl": [145, 149, 201, 206, 209, 247, 279, 280, 282], "resiz": [145, 148, 149], "fusion_interv": [146, 150], "num_special_token": [146, 150], "encoder_max_seq_len": [146, 150, 188, 189, 190, 194, 199, 201], "causalselfattent": [146, 150], "interv": [146, 150, 276], "clip_embed_dim": [147, 151], "clip_num_lay": [147, 151], "clip_hidden_st": [147, 151], "num_layers_project": [147, 151], "decoder_embed_dim": [147, 151], "llama3visionencod": [147, 151], "spatial": [147, 151], "backbon": [147, 151], "trainbl": 149, "decoder_lora": 150, "fusion_lora": [150, 151], "encoder_lora": 151, "lora_llama3_2_vision_11b": 152, "num_class": [156, 160, 258], "announc": 159, "ray2333": 161, "feedback": [161, 222], "lora_mistral_7b": 163, "lora_mistral_reward_7b": 164, "phi3_mini": [166, 232], "128k": 168, "nor": 168, "phi3minitoken": 169, "tokenizer_config": 169, "spm": 169, "lm": [169, 223, 278], "bo": [169, 214, 217, 274], "unk": 169, "augment": [169, 282], "endoftext": 169, "phi3minisentencepiecebasetoken": 169, "lora_phi3_mini": 170, "1000000": [171, 175], "tie_word_embed": [171, 172, 173, 175, 176, 177], "qwen2transformerdecod": 171, "period": [171, 175], "word": [171, 175, 280, 281], "qwen2_0_5b": [172, 187], "qwen2_1_5b": [173, 187], "qwen2_7b": 174, "qwen": [176, 177, 178], "merges_fil": 179, "qwen2token": 179, "gate_proj": 180, "down_proj": 180, "up_proj": 180, "feed": [180, 188, 190], "network": [180, 207, 279, 282], "fed": [180, 274], "multipli": [180, 280], "in_dim": [180, 204, 205, 206, 279, 280, 282], "out_dim": [180, 189, 204, 205, 206, 279, 280, 282], "layernorm": 181, "past": 182, "expand": 182, "dpython": [182, 183, 188, 189, 190, 194, 195, 199, 201, 255, 259], "reset": [182, 183, 188, 189, 190, 199, 201, 240], "k_val": 182, "v_val": 182, "fill": 182, "bfloat16": [182, 195, 255, 275, 276, 277, 279, 280, 281], "greater": [182, 191, 263], "pos_embed": [183, 188, 279, 281], "q_norm": 183, "k_norm": 183, "kv_cach": [183, 192, 193, 194], "is_caus": 183, "13245v1": 183, "multihead": 183, "extrem": 183, "credit": 183, "litgpt": 183, "v": [183, 189, 199, 279], "n_kv_head": 183, "rotarypositionalembed": [183, 279, 281], "rmsnorm": 183, "vice": [183, 273], "versa": [183, 273], "y": 183, "s_x": 183, "s_y": 183, "_masktyp": [183, 189, 190], "score": [183, 189, 190, 221], "encoder_max_cache_seq_len": [183, 189, 190], "j": [183, 188, 189, 190, 199], "blockmask": [183, 189, 190], "create_block_mask": [183, 189, 190], "flex_attent": [183, 189, 190], "n_h": [183, 185], "num": [183, 185], "n_kv": 183, "h_d": [183, 185], "reset_cach": [183, 188, 189, 190, 199, 201], "setup_cach": [183, 188, 189, 190, 192, 193, 199, 201], "ep": 184, "squar": 184, "1910": 184, "07467": 184, "propos": [185, 280], "2104": 185, "09864": 185, "verfic": 185, "l80": 185, "init": [185, 240, 252, 282], "exceed": 185, "freq": 185, "recomput": [185, 280], "geometr": 185, "progress": [185, 271, 276, 280], "rotat": 185, "angl": 185, "basic": [186, 277], "tied_modul": 187, "lost": 187, "whose": [187, 207, 248, 253], "attributeerror": [187, 260], "attn": [188, 190, 192, 193, 194, 279, 281, 282], "multiheadattent": [188, 190, 279, 281], "ca_norm": 188, "mlp_norm": [188, 190], "ca_scal": 188, "mlp_scale": [188, 190], "ff": [188, 190], "caches_are_en": [188, 189, 190, 192, 193, 194, 199, 201], "func": [188, 190, 201], "caches_are_setup": [188, 189, 190, 192, 193, 194, 199, 201], "token_sequ": 188, "embed_sequ": 188, "decoder_max_seq_len": [188, 189, 190, 192, 193, 194, 199, 201], "modulelist": 189, "output_hidden_st": [189, 199], "belong": [189, 235], "reduc": [189, 222, 268, 269, 270, 278, 279, 280, 281, 282], "statement": 189, "improv": [189, 215, 224, 238, 270, 277, 278, 279, 280], "readabl": [189, 275], "behaviour": [189, 199, 258], "alter": [189, 199], "common_util": [189, 192, 193, 194, 195], "disable_kv_cach": [189, 199], "chunked_output": 189, "last_hidden_st": 189, "chunk": [189, 196, 198, 215], "cewithchunkedoutputloss": [189, 199], "upcast": [189, 196, 198], "set_num_output_chunk": [189, 199], "num_chunk": [189, 196, 198], "s_e": [189, 199], "d_e": [189, 199], "arang": [189, 199], "prompt_length": [189, 199], "correspondingli": 189, "padded_prompt_length": 189, "m_": [189, 199], "realloc": [189, 199], "runtimeerror": [189, 217, 233, 237, 239, 244], "num_output_chunk": [189, 196, 198, 199], "transformercrossattentionlay": [189, 199, 201], "fusionlay": [189, 199], "sa_norm": 190, "sa_scal": 190, "token_pos_embed": 191, "pre_tile_pos_emb": 191, "post_tile_pos_emb": 191, "cls_project": 191, "vit": 191, "11929": 191, "convolut": 191, "flatten": 191, "downscal": 191, "800x400": 191, "400x400": 191, "_transform": 191, "whole": [191, 278], "n_token": 191, "101": 191, "pool": 191, "tiledtokenpositionalembed": 191, "tilepositionalembed": 191, "tile_pos_emb": 191, "8x8": 191, "21": 191, "22": 191, "23": [191, 236], "24": [191, 276, 277], "25": [191, 275, 278], "26": 191, "27": [191, 275], "28": [191, 275], "29": [191, 282], "30": [191, 226, 281], "31": [191, 277], "33": 191, "34": 191, "35": [191, 282], "36": 191, "37": 191, "38": [191, 275], "39": 191, "41": 191, "43": 191, "44": 191, "45": 191, "46": 191, "47": 191, "48": [191, 275, 282], "49": 191, "50": [191, 226, 248, 275], "51": 191, "52": [191, 276], "53": 191, "54": 191, "55": [191, 276], "56": 191, "57": [191, 279, 282], "58": 191, "59": [191, 282], "60": 191, "61": [191, 275], "62": 191, "63": 191, "64": [191, 269, 279, 280], "num_patches_per_til": 191, "emb_dim": 191, "constain": 191, "anim": 191, "max_n_img": 191, "n_channel": 191, "vision_util": 191, "tile_crop": 191, "800": 191, "patch_grid_s": 191, "rand": 191, "nch": 191, "tile_cropped_imag": 191, "batch_imag": 191, "unsqueez": 191, "batch_aspect_ratio": 191, "clip_vision_encod": 191, "cache_en": 192, "float32": [192, 193, 194, 237, 278], "1024": [192, 193, 194, 281], "temporarili": [193, 194, 207, 280], "enter": [193, 194], "overhead": [193, 222, 270, 280, 281], "untouch": [193, 274], "yield": [193, 194, 207], "caller": [193, 194, 207], "delete_kv_cach": 194, "offload_to_cpu": 195, "hook": [195, 253, 280, 282], "nf4": [195, 280, 282], "restor": 195, "higher": [195, 277, 278, 280, 281, 282], "offload": [195, 282], "increas": [195, 222, 236, 277, 278, 279, 280, 281], "peak": [195, 240, 246, 275, 277, 279, 282], "gpu": [195, 270, 273, 275, 276, 277, 278, 279, 280, 281, 282], "_register_state_dict_hook": 195, "mymodul": 195, "_after_": 195, "nf4tensor": [195, 282], "unquant": [195, 281, 282], "unus": 195, "ignore_index": [196, 197, 198, 278], "entropi": [196, 198, 278], "bf16": [196, 198, 237, 280, 282], "ce": [196, 278], "better": [196, 198, 225, 268, 274, 275, 278, 280, 281], "accuraci": [196, 198, 270, 275, 277, 278, 279, 280, 281, 282], "doubl": [196, 198, 282], "therefor": [196, 198, 280, 282], "num_token": [196, 197, 198], "consider": [196, 198], "compute_cross_entropi": 196, "gain": [196, 270, 277], "won": [196, 274], "realiz": 196, "pull": [196, 269, 270, 273], "1390": 196, "loss_fn": [196, 198], "chunkedcrossentropyloss": 196, "output_chunk": [196, 198], "kullback": [197, 278], "leibler": [197, 278], "diverg": [197, 198, 221, 278], "jongwooko": [197, 278], "distillm": [197, 278], "17c0f98bc263b1861a02d5df578c84aea652ee65": 197, "student_logit": [197, 198, 278], "teacher_logit": [197, 198, 278], "student": [197, 198], "teacher": [197, 198, 275], "kl": [197, 198, 221, 278], "teacher_chunk": 198, "teacher_model": 198, "model_fus": [199, 200, 201, 202, 203], "deepfus": 199, "evolut": 199, "signatur": 199, "interchang": 199, "fusion_param": [199, 200, 201, 202, 203], "fusionembed": 199, "fusion_lay": [199, 201], "clip_vit_224": [199, 203], "feedforward": [199, 203], "register_fusion_modul": 199, "flamingo": [199, 201, 219], "strict": [199, 200, 201, 210, 279], "freez": [199, 275, 279], "fusion_vocab_s": 200, "necessit": 200, "rout": 200, "128": [200, 269, 277, 279, 280], "fusion_first": 201, "shot": [201, 275, 277, 281], "infus": 201, "interpret": 201, "enocd": 201, "isn": [201, 237, 273], "fused_lay": 201, "mark": [203, 274], "earli": 203, "peft": [204, 205, 206, 207, 208, 209, 210, 211, 229, 269, 279, 282], "adapter_param": [204, 205, 206, 207, 208, 209], "proj": 204, "loralinear": [204, 279, 280, 282], "alpha": [205, 206, 279, 280, 282], "use_bia": [205, 206], "scalar": [205, 248, 249, 250, 251, 252, 280], "orient": [205, 280], "bax": [205, 206], "distinct": [205, 282], "lora_a": [205, 206, 279, 282], "lora_b": [205, 206, 279, 282], "initialize_dora_magnitud": 205, "perturb": 206, "decomposit": [206, 279, 280], "matric": [206, 279, 282], "mapsto": 206, "w_0x": 206, "r": [206, 279], "polici": [207, 221, 222, 223, 224, 225, 227, 238, 247, 254, 271], "neural": [207, 279, 282], "get_adapter_param": [209, 279], "base_miss": 210, "base_unexpect": 210, "lora_miss": 210, "lora_unexpect": 210, "validate_state_dict_for_lora": [210, 279], "unlik": 210, "reli": [210, 217, 275, 277], "unexpect": 210, "nonempti": 210, "full_model_state_dict_kei": 211, "lora_state_dict_kei": 211, "base_model_state_dict_kei": 211, "confirm": [211, 267], "lora_modul": 211, "complement": 211, "disjoint": 211, "overlap": [211, 280], "tiktokenbasetoken": 212, "light": 214, "sentencepieceprocessor": 214, "trim": 214, "whitespac": 214, "spm_model": [214, 274], "tokenized_text": [214, 215], "add_bo": [214, 215, 274], "trim_leading_whitespac": 214, "prefix": [214, 280], "unbatch": 214, "bos_id": [215, 217], "lightweight": [215, 274], "substr": 215, "repetit": 215, "speed": [215, 257, 277, 280, 281, 282], "identif": 215, "regex": 215, "absent": 215, "tt_model": 215, "tokenizer_json_path": 216, "heavili": 217, "concat": 217, "1788": 217, "2643": 217, "465": 217, "22137": 217, "join": 217, "satisfi": [217, 275], "loos": 218, "image_token_id": 219, "particip": [219, 220], "laid": 219, "fig": 219, "2204": 219, "14198": 219, "immedi": [219, 280], "until": [219, 280], "img3": 219, "equal": [219, 263], "gamma": [220, 224, 225], "lmbda": 220, "estim": [220, 221], "1506": 220, "02438": 220, "response_len": [220, 221], "receiv": 220, "discount": 220, "gae": 220, "logprob": [221, 225], "ref_logprob": 221, "kl_coeff": 221, "valid_score_idx": 221, "coeffici": [221, 223], "total_reward": 221, "kl_reward": 221, "beta": [222, 225], "label_smooth": [222, 225], "18290": 222, "intuit": [222, 224, 225], "dispref": 222, "dynam": [222, 281], "degener": 222, "occur": [222, 270], "naiv": 222, "trl": [222, 224, 225], "5d1deb1445828cfd0e947cb3a7925b1c03a283fc": 222, "dpo_train": [222, 224], "l844": 222, "2009": 222, "01325": 222, "regular": [222, 225, 280, 281, 282], "baselin": [222, 223, 275, 278, 279], "uncertainti": [222, 225], "policy_chosen_logp": [222, 224, 225], "policy_rejected_logp": [222, 224, 225], "reference_chosen_logp": [222, 224], "reference_rejected_logp": [222, 224], "chosen_reward": [222, 224, 225], "rejected_reward": [222, 224, 225], "value_clip_rang": 223, "value_coeff": 223, "proxim": [223, 271], "1707": 223, "06347": 223, "eqn": 223, "vwxyzjn": 223, "ccc19538e817e98a60d3253242ac15e2a562cb49": 223, "lm_human_preference_detail": 223, "train_policy_acceler": 223, "l719": 223, "ea25b9e8b234e6ee1bca43083f8f3cf974143998": 223, "ppo2": 223, "l68": 223, "l75": 223, "pi_old_logprob": 223, "pi_logprob": 223, "phi_old_valu": 223, "phi_valu": 223, "value_padding_mask": 223, "old": 223, "participag": 223, "five": 223, "policy_loss": 223, "value_loss": 223, "clipfrac": 223, "fraction": 223, "statist": [224, 280], "rso": 224, "hing": 224, "2309": 224, "06657": 224, "logist": 224, "regress": 224, "slic": 224, "10425": 224, "almost": [224, 279], "svm": 224, "counter": 224, "4dce042a3863db1d375358e8c8092b874b02934b": 224, "l1141": 224, "simpo": 225, "2405": 225, "14734": 225, "averag": [225, 278], "implicit": 225, "margin": 225, "bradlei": 225, "terri": 225, "larger": [225, 231, 275, 277, 278, 280], "win": 225, "lose": 225, "98ad01ddfd1e1b67ec018014b83cba40e0caea66": 225, "cpo_train": 225, "l603": 225, "pretti": [225, 275], "identitc": 225, "elimin": 225, "kind": 225, "ipoloss": 225, "fill_valu": 226, "sequence_length": 226, "stop_token_id": 226, "869": 226, "eos_mask": 226, "truncated_sequ": 226, "datatyp": [227, 280, 282], "denot": 227, "auto_wrap_polici": [227, 238, 254], "submodul": [227, 247], "obei": 227, "contract": 227, "get_fsdp_polici": 227, "modules_to_wrap": [227, 238, 247], "min_num_param": 227, "my_fsdp_polici": 227, "recurs": [227, 247, 251], "sum": [227, 278, 279], "p": [227, 233, 279, 281, 282], "numel": [227, 279], "1000": [227, 281], "stabl": [227, 245, 251, 256, 267, 280], "html": [227, 245, 251, 254, 256, 262, 266], "filename_format": 228, "max_filenam": 228, "concis": 228, "filenam": [228, 249], "file_": 228, "_of_": 228, "n_file": 228, "build_checkpoint_filenam": 228, "file_00001_of_00003": 228, "file_00002_of_00003": 228, "file_00003_of_00003": 228, "safe_seri": 229, "from_pretrain": 229, "0001_of_0003": 229, "0002_of_0003": 229, "todo": 229, "preserv": [229, 282], "weight_map": [229, 275], "convert_weight": 229, "_model_typ": [229, 232], "intermediate_checkpoint": [229, 230, 231], "adapter_onli": [229, 230, 231], "_weight_map": 229, "shard": [230, 277], "wip": 230, "qualnam": 232, "boundari": 232, "distinguish": 232, "llama3_vis": 232, "llama3_2_vision_decod": 232, "mistral_reward_7b": 232, "my_new_model": 232, "my_custom_state_dict_map": 232, "optim_map": 233, "bare": 233, "bone": 233, "optim_dict": [233, 235, 253], "cfg_optim": 233, "ckpt": 233, "optim_ckpt": 233, "placeholder_optim_dict": 233, "optiminbackwardwrapp": 233, "get_last_lr": 233, "rate": [233, 236, 239, 268, 276, 280], "schedul": [233, 236, 257, 276, 280], "get_optim_kei": 233, "arbitrari": [233, 279, 280], "optim_ckpt_map": 233, "set_lr_schedul": 233, "lr_schedul": [233, 236], "lrschedul": 233, "loadabl": 233, "step_lr_schedul": 233, "ac_mod": 234, "ac_opt": 234, "op": [234, 281], "ac": [234, 238], "optimizerinbackwardwrapp": [235, 239], "named_paramet": [235, 258], "num_warmup_step": 236, "num_training_step": 236, "num_cycl": [236, 257], "last_epoch": 236, "lambdalr": 236, "linearli": 236, "decreas": [236, 279, 280, 281, 282], "cosin": 236, "v4": 236, "src": 236, "l104": 236, "warmup": [236, 257], "phase": 236, "wave": 236, "half": [236, 280], "kernel": 237, "memory_efficient_fsdp_wrap": [238, 281], "maxim": [238, 247, 266, 268], "workload": [238, 270, 280, 281], "fullyshardeddataparallel": [238, 247, 280], "fsdppolicytyp": [238, 247], "warpper": 239, "optimizer_in_backward": 239, "reset_stat": 240, "track": [240, 248], "alloc": [240, 246, 247, 277, 280, 282], "reserv": [240, 246, 274, 282], "stat": [240, 246, 282], "int4": [241, 281], "4w": 241, "recogn": 241, "int8dynactint4weightquant": [241, 270, 281], "8da4w": [241, 281], "int4weightonlyquant": [241, 281], "int8dynactint4weightqatquant": [241, 270, 281], "qat": [241, 266, 271], "int4weightonlyqatquant": 241, "exclud": 242, "aka": 243, "master": 245, "port": [245, 273], "address": [245, 278, 280], "hold": [245, 276], "peak_memory_act": 246, "peak_memory_alloc": 246, "peak_memory_reserv": 246, "get_memory_stat": 246, "hierarch": 247, "api_kei": 248, "experiment_kei": 248, "onlin": 248, "log_cod": 248, "comet": 248, "site": [248, 275], "ml": 248, "team": 248, "compar": [248, 251, 263, 275, 277, 278, 279, 281, 282], "sdk": 248, "uncategor": 248, "alphanumer": 248, "charact": 248, "get_or_cr": 248, "fresh": 248, "persist": 248, "hpo": 248, "sweep": 248, "server": 248, "offlin": 248, "auto": [248, 273], "creation": 248, "experimentconfig": 248, "project_nam": 248, "my_workspac": 248, "my_metr": [248, 251, 252], "importerror": [248, 252], "termin": [248, 251, 252], "comet_api_kei": 248, "flush": [248, 249, 250, 251, 252], "ndarrai": [248, 249, 250, 251, 252], "record": [248, 249, 250, 251, 252, 257], "log_config": [248, 252], "payload": [248, 249, 250, 251, 252], "log_": 249, "unixtimestamp": 249, "thread": 249, "safe": 249, "organize_log": 251, "tensorboard": 251, "subdirectori": 251, "logdir": 251, "startup": 251, "tree": [251, 275, 277], "tfevent": 251, "encount": 251, "frontend": 251, "organ": [251, 273], "accordingli": [251, 281], "my_log_dir": 251, "view": [251, 278], "entiti": 252, "bias": [252, 279, 282], "usernam": 252, "my_ent": 252, "my_group": 252, "account": [252, 279, 282], "link": [252, 275, 277], "capecap": 252, "6053ofw0": 252, "torchtune_config_j67sb73v": 252, "soon": [253, 280], "readi": [253, 266, 274, 281], "grad": 253, "acwrappolicytyp": 254, "author": [254, 268, 276, 280, 282], "fsdp_adavnced_tutori": 254, "insid": 255, "contextmanag": 255, "debug_mod": 256, "pseudo": 256, "commonli": [256, 279, 280, 282], "numpi": 256, "determinist": 256, "global": [256, 280], "warn": 256, "nondeterminist": 256, "cudnn": 256, "set_deterministic_debug_mod": 256, "profile_memori": 257, "with_stack": 257, "record_shap": 257, "with_flop": 257, "wait_step": 257, "warmup_step": 257, "active_step": 257, "profil": 257, "layout": 257, "trace": 257, "profileract": 257, "gradient_accumul": 257, "sensibl": 257, "default_schedul": 257, "reduct": [257, 270, 279], "scope": 257, "flop": 257, "cycl": 257, "repeat": [257, 280], "model_named_paramet": 258, "force_overrid": 258, "concret": [258, 280], "vocab_dim": 258, "named_param": 259, "inplac": [260, 279], "too": [260, 270, 277], "handler": 262, "_log": 262, "__version__": 263, "generated_examples_python": 264, "galleri": [264, 272], "sphinx": 264, "000": [265, 272, 277], "execut": [265, 272], "generated_exampl": 265, "mem": [265, 272], "mb": [265, 272], "gentl": 266, "introduct": 266, "first_finetune_tutori": 266, "kd": 266, "torchvis": 267, "torchao": [267, 270, 275, 277, 280, 281, 282], "latest": [267, 270, 276, 280, 282], "whl": 267, "cu121": 267, "cu118": 267, "cu124": 267, "And": [267, 275], "welcom": [267, 273], "greatest": [267, 276], "contributor": 267, "dev": 267, "commit": 267, "branch": 267, "therebi": [267, 280, 281, 282], "forc": [267, 278], "reinstal": 267, "opt": [267, 276], "suffix": 267, "On": [268, 279], "pointer": 268, "emphas": 268, "simplic": 268, "component": 268, "prove": 268, "democrat": 268, "zoo": 268, "varieti": [268, 279], "integr": [268, 275, 276, 277, 279, 281, 282], "fsdp2": 268, "excit": 268, "checkout": 268, "quickstart": 268, "attain": 268, "embodi": 268, "philosophi": 268, "usabl": 268, "composit": 268, "hard": 268, "outlin": 268, "unecessari": 268, "never": 268, "thoroughli": 268, "competit": 269, "grant": [269, 270, 276], "interest": [269, 270, 275, 278], "8b_lora_single_devic": [269, 273, 274, 277, 278, 280], "lever": [269, 270], "action": [269, 270], "degrad": [270, 280, 281, 282], "simul": [270, 280, 281], "compromis": 270, "blogpost": [270, 280], "qat_distribut": [270, 281], "8b_qat_ful": [270, 281], "least": [270, 277, 279, 281], "vram": [270, 277, 279, 280, 281], "80gb": [270, 281], "a100": 270, "h100": 270, "delai": 270, "fake": [270, 281], "empir": [270, 281], "potenti": [270, 279, 280], "fake_quant_after_n_step": [270, 281], "idea": [270, 278, 282], "roughli": 270, "total_step": 270, "futur": [270, 281], "plan": [270, 275], "un": 270, "groupsiz": [270, 281], "256": [270, 277, 281], "hackabl": [271, 276], "singularli": [271, 276], "technic": [271, 276], "awar": [271, 280, 281], "tracker": 271, "short": 273, "subcommand": 273, "anytim": 273, "symlink": 273, "wrote": 273, "readm": [273, 275, 277], "md": 273, "lot": [273, 275, 280], "recent": 273, "releas": [273, 277], "agre": 273, "term": [273, 280], "perman": 273, "eat": 273, "bandwith": 273, "storag": [273, 282], "00030": 273, "ootb": 273, "7b_full_low_memori": [273, 275, 276], "8b_full_single_devic": 273, "mini_full_low_memori": 273, "7b_full": [273, 275, 276], "13b_full": [273, 275, 276], "70b_full": 273, "edit": 273, "clobber": 273, "destin": 273, "lora_finetune_distribut": [273, 277, 279], "torchrun": 273, "launch": [273, 274, 276], "nproc": 273, "node": 273, "worker": 273, "nnode": [273, 279, 281], "minimum_nod": 273, "maximum_nod": 273, "fail": 273, "rdzv": 273, "rendezv": 273, "endpoint": 273, "8b_lora": [273, 277], "bypass": 273, "fancy_lora": 273, "8b_fancy_lora": 273, "nice": 274, "meet": 274, "overhaul": 274, "multiturn": 274, "untrain": 274, "accompani": 274, "who": 274, "influenti": 274, "hip": 274, "hop": 274, "artist": 274, "2pac": 274, "rakim": 274, "flavor": 274, "formatted_messag": 274, "nyou": 274, "nwho": 274, "why": [274, 276, 279], "518": 274, "25580": 274, "29962": 274, "3532": 274, "14816": 274, "29903": 274, "6778": 274, "_spm_model": 274, "piece_to_id": 274, "manual": [274, 282], "529": 274, "29879": 274, "29958": 274, "nhere": 274, "pure": 274, "mess": 274, "prime": 274, "strictli": 274, "ask": [274, 280], "though": 274, "robust": 274, "pretend": 274, "zuckerberg": 274, "seem": [274, 275], "good": [274, 279, 280], "altogeth": 274, "honor": 274, "custom_8b_lora_single_devic": 274, "favorit": [275, 279], "seemlessli": 275, "connect": [275, 281], "amount": 275, "natur": 275, "export": 275, "leverag": [275, 277, 282], "percentag": 275, "16gb": [275, 279], "rtx": 275, "3090": 275, "4090": 275, "hour": 275, "7b_qlora_single_devic": [275, 276, 282], "473": 275, "98": [275, 282], "gb": [275, 277, 279, 281, 282], "484": 275, "01": [275, 276], "fact": [275, 277, 279, 280], "third": 275, "But": [275, 279], "realli": 275, "eleuther_ev": [275, 277, 281], "eleuther_evalu": [275, 277, 281], "lm_eval": [275, 277], "custom_eval_config": [275, 277], "truthfulqa_mc2": [275, 277, 278, 279], "measur": [275, 277], "propens": [275, 277], "324": 275, "loglikelihood": 275, "195": 275, "121": 275, "197": 275, "acc": [275, 281], "388": 275, "489": 275, "great": [275, 280], "custom_generation_config": [275, 277], "kick": 275, "300": 275, "bai": 275, "area": 275, "92": 275, "exploratorium": 275, "san": 275, "francisco": 275, "magazin": 275, "awesom": 275, "bridg": 275, "cool": 275, "96": [275, 282], "sec": [275, 277], "83": 275, "99": [275, 279], "72": 275, "littl": 275, "int8_weight_onli": [275, 277], "int8_dynamic_activation_int8_weight": [275, 277], "ao": [275, 277], "quant_api": [275, 277], "quantize_": [275, 277], "int4_weight_onli": [275, 277], "previous": [275, 277, 279], "benefit": 275, "clone": [275, 279, 281, 282], "assumpt": 275, "new_dir": 275, "output_dict": 275, "sd_1": 275, "sd_2": 275, "dump": 275, "convert_hf_checkpoint": 275, "checkpoint_path": 275, "justin": 275, "school": 275, "math": 275, "ws": 275, "94": [275, 277], "bandwidth": [275, 277], "1391": 275, "84": 275, "thats": 275, "seamlessli": 275, "authent": [275, 276], "hopefulli": 275, "gave": 275, "minut": 276, "agreement": 276, "depth": 276, "principl": 276, "boilerpl": 276, "substanti": [276, 279], "custom_config": 276, "replic": 276, "lorafinetunerecipesingledevic": 276, "lora_finetune_output": 276, "log_1713194212": 276, "3697006702423096": 276, "25880": [276, 282], "83it": 276, "monitor": 276, "tqdm": 276, "e2": 276, "focu": 277, "theta": 277, "observ": [277, 281], "consum": [277, 282], "overal": [277, 278], "8b_qlora_single_devic": [277, 280], "coupl": [277, 279, 282], "meta_model_0": [277, 281], "122": 277, "sarah": 277, "busi": 277, "mum": 277, "young": 277, "children": 277, "live": 277, "north": 277, "east": 277, "england": 277, "135": 277, "88": 277, "138": 277, "346": 277, "09": 277, "139": 277, "broader": 277, "teach": [278, 279], "straight": [278, 279], "jump": [278, 279], "compress": 278, "transfer": 278, "capac": 278, "computation": 278, "expens": 278, "deploi": 278, "imit": 278, "diagram": 278, "aim": [278, 280], "minillm": 278, "forwardklloss": 278, "super": 278, "teacher_prob": 278, "student_logprob": 278, "log_softmax": 278, "prod_prob": 278, "forwardklwithchunkedoutputloss": 278, "knowledge_distillation_single_devic": 278, "bit": [278, 279, 280, 281, 282], "alpaca_cleaned_dataset": 278, "hellaswag": [278, 281], "commonsense_qa": 278, "kd_ratio": 278, "teacher_checkpoint": 278, "00004": 278, "truthfulqa": [278, 279], "commonsens": 278, "constant": 278, "boost": 278, "graph": [278, 280], "irrespect": 278, "3e": 278, "slightli": 278, "truthful_qa": 278, "wherea": 278, "unfamiliar": 279, "oppos": [279, 282], "momentum": [279, 280], "aghajanyan": 279, "et": 279, "al": 279, "hypothes": 279, "intrins": 279, "eight": 279, "practic": 279, "blue": 279, "although": [279, 281], "rememb": 279, "approx": 279, "15m": 279, "65k": 279, "requires_grad": [279, 282], "frozen_out": [279, 282], "lora_out": [279, 282], "base_model": 279, "lora_model": 279, "lora_llama_2_7b": [279, 282], "alon": 279, "in_featur": [279, 281], "out_featur": [279, 281], "validate_missing_and_unexpected_for_lora": 279, "peft_util": 279, "set_trainable_param": 279, "lora_param": 279, "total_param": 279, "trainable_param": 279, "2f": 279, "6742609920": 279, "4194304": 279, "7b_lora": 279, "my_model_checkpoint_path": [279, 281, 282], "tokenizer_checkpoint": [279, 281, 282], "my_tokenizer_checkpoint_path": [279, 281, 282], "constraint": 279, "factori": 279, "benefici": 279, "impact": [279, 280], "minor": 279, "lora_experiment_1": 279, "smooth": [279, 282], "curv": [279, 282], "500": 279, "ran": 279, "footprint": [279, 281], "commod": 279, "cogniz": 279, "ax": 279, "parallel": 279, "475": 279, "87": 279, "508": 279, "86": 279, "504": 279, "04": 279, "514": 279, "lowest": 279, "absolut": 279, "4gb": 279, "tradeoff": 279, "salman": 280, "mohammadi": 280, "brief": 280, "glossari": 280, "struggl": 280, "constrain": [280, 281], "cost": 280, "particularli": 280, "gradient_accumulation_step": 280, "throughput": 280, "ram": 280, "bottleneck": 280, "sebastian": 280, "raschka": 280, "fp16": 280, "sound": 280, "quot": 280, "aliv": 280, "region": 280, "enable_activation_checkpoint": 280, "bring": 280, "autograd": [280, 282], "saved_tensors_hook": 280, "cours": 280, "runtim": 280, "hide": 280, "later": 280, "brought": 280, "enable_activation_offload": 280, "total_batch_s": 280, "count": 280, "suppos": 280, "log_every_n_step": 280, "translat": 280, "frequent": 280, "slowli": 280, "num_devic": 280, "adamw8bit": 280, "pagedadamw": 280, "modern": 280, "converg": 280, "stateless": 280, "stochast": 280, "descent": 280, "sacrif": 280, "remov": 280, "optimizer_in_bwd": 280, "cpuoffloadoptim": 280, "offload_gradi": 280, "prototyp": 280, "low_bit_optim": 280, "4e": 280, "adam": 280, "hint": 280, "slowdown": 280, "4x": 280, "fsdp_cpu_offload": 280, "greatli": 280, "lora_": 280, "lora_llama3": 280, "_lora": 280, "firstli": 280, "secondli": 280, "affect": 280, "fashion": 280, "slower": [280, 282], "jointli": 280, "sens": 280, "novel": 280, "normalfloat": [280, 282], "8x": [280, 282], "worth": 280, "cast": [280, 281], "incur": [280, 281, 282], "penalti": 280, "qlora_": 280, "qlora_llama3_8b": 280, "_qlora": 280, "reap": 280, "hood": [280, 282], "doralinear": 280, "swap": [280, 281], "perplex": 281, "goal": 281, "ptq": 281, "kept": 281, "nois": 281, "henc": 281, "x_q": 281, "int8": 281, "zp": 281, "x_float": 281, "qmin": 281, "qmax": 281, "clamp": 281, "x_fq": 281, "dequant": 281, "proce": 281, "prepared_model": 281, "int8dynactint4weightqatlinear": 281, "int8dynactint4weightlinear": 281, "train_loop": 281, "converted_model": 281, "qat_distributed_recipe_label": 281, "recov": 281, "custom_8b_qat_ful": 281, "2000": 281, "led": 281, "presum": 281, "mutat": 281, "5gb": 281, "custom_quant": 281, "poorli": 281, "custom_eleuther_evalu": 281, "fullmodeltorchtunecheckpoint": 281, "max_seq_length": 281, "my_eleuther_evalu": 281, "stderr": 281, "word_perplex": 281, "9148": 281, "byte_perplex": 281, "5357": 281, "bits_per_byt": 281, "6189": 281, "5687": 281, "0049": 281, "acc_norm": 281, "7536": 281, "0043": 281, "portion": [281, 282], "74": 281, "048": 281, "190": 281, "7735": 281, "5598": 281, "6413": 281, "5481": 281, "0050": 281, "7390": 281, "0044": 281, "7251": 281, "4994": 281, "5844": 281, "5740": 281, "7610": 281, "outperform": 281, "importantli": 281, "characterist": 281, "187": 281, "958": 281, "halv": 281, "motiv": 281, "edg": 281, "smartphon": 281, "executorch": 281, "xnnpack": 281, "export_llama": 281, "use_sdpa_with_kv_cach": 281, "qmode": 281, "group_siz": 281, "get_bos_id": 281, "get_eos_id": 281, "output_nam": 281, "llama3_8da4w": 281, "pte": 281, "881": 281, "oneplu": 281, "709": 281, "tok": 281, "815": 281, "316": 281, "364": 281, "highli": 282, "vanilla": 282, "held": 282, "bespok": 282, "vast": 282, "major": 282, "normatfloat": 282, "deepdiv": 282, "de": 282, "counterpart": 282, "set_default_devic": 282, "qlora_linear": 282, "memory_alloc": 282, "177": 282, "152": 282, "del": 282, "empty_cach": 282, "lora_linear": 282, "081": 282, "344": 282, "qlora_llama2_7b": 282, "qlora_model": 282, "essenti": 282, "reparametrize_as_dtype_state_dict_post_hook": 282, "149": 282, "9157477021217346": 282, "02": 282, "08": 282, "15it": 282, "nightli": 282, "200": 282, "hundr": 282, "228": 282, "8158286809921265": 282, "95it": 282, "exercis": 282, "linear_nf4": 282, "to_nf4": 282, "linear_weight": 282, "incom": 282}, "objects": {"torchtune.config": [[27, 0, 1, "", "instantiate"], [28, 0, 1, "", "log_config"], [29, 0, 1, "", "parse"], [30, 0, 1, "", "validate"]], "torchtune.data": [[31, 1, 1, "", "AlpacaToMessages"], [32, 1, 1, "", "ChatMLTemplate"], [33, 1, 1, "", "ChosenRejectedToMessages"], [34, 2, 1, "", "GrammarErrorCorrectionTemplate"], [35, 1, 1, "", "InputOutputToMessages"], [36, 1, 1, "", "Message"], [37, 1, 1, "", "OpenAIToMessages"], [38, 1, 1, "", "PromptTemplate"], [39, 1, 1, "", "PromptTemplateInterface"], [40, 2, 1, "", "QuestionAnswerTemplate"], [41, 2, 1, "", "Role"], [42, 1, 1, "", "ShareGPTToMessages"], [43, 2, 1, "", "SummarizeTemplate"], [44, 0, 1, "", "format_content_with_images"], [45, 0, 1, "", "left_pad_sequence"], [46, 0, 1, "", "load_image"], [47, 0, 1, "", "padded_collate"], [48, 0, 1, "", "padded_collate_dpo"], [49, 0, 1, "", "padded_collate_sft"], [50, 0, 1, "", "padded_collate_tiled_images_and_mask"], [51, 0, 1, "", "truncate"], [52, 0, 1, "", "validate_messages"]], "torchtune.data.Message": [[36, 3, 1, "", "contains_media"], [36, 4, 1, "", "from_dict"], [36, 4, 1, "", "get_media"], [36, 3, 1, "", "text_content"]], "torchtune.datasets": [[53, 1, 1, "", "ConcatDataset"], [54, 1, 1, "", "PackedDataset"], [55, 1, 1, "", "PreferenceDataset"], [56, 1, 1, "", "SFTDataset"], [57, 1, 1, "", "TextCompletionDataset"], [58, 0, 1, "", "alpaca_cleaned_dataset"], [59, 0, 1, "", "alpaca_dataset"], [60, 0, 1, "", "chat_dataset"], [61, 0, 1, "", "cnn_dailymail_articles_dataset"], [62, 0, 1, "", "grammar_dataset"], [63, 0, 1, "", "hh_rlhf_helpful_dataset"], [64, 0, 1, "", "instruct_dataset"], [67, 0, 1, "", "preference_dataset"], [68, 0, 1, "", "samsum_dataset"], [69, 0, 1, "", "slimorca_dataset"], [70, 0, 1, "", "stack_exchange_paired_dataset"], [71, 0, 1, "", "text_completion_dataset"], [72, 0, 1, "", "wikitext_dataset"]], "torchtune.datasets.multimodal": [[65, 0, 1, "", "llava_instruct_dataset"], [66, 0, 1, "", "the_cauldron_dataset"]], "torchtune.generation": [[73, 0, 1, "", "generate"], [74, 0, 1, "", "generate_next_token"], [75, 0, 1, "", "get_causal_mask_from_padding_mask"], [76, 0, 1, "", "get_position_ids_from_padding_mask"], [77, 0, 1, "", "sample"]], "torchtune.models.clip": [[78, 1, 1, "", "TilePositionalEmbedding"], [79, 1, 1, "", "TiledTokenPositionalEmbedding"], [80, 1, 1, "", "TokenPositionalEmbedding"], [81, 0, 1, "", "clip_vision_encoder"]], "torchtune.models.clip.TilePositionalEmbedding": [[78, 4, 1, "", "forward"]], "torchtune.models.clip.TiledTokenPositionalEmbedding": [[79, 4, 1, "", "forward"]], "torchtune.models.clip.TokenPositionalEmbedding": [[80, 4, 1, "", "forward"]], "torchtune.models.code_llama2": [[82, 0, 1, "", "code_llama2_13b"], [83, 0, 1, "", "code_llama2_70b"], [84, 0, 1, "", "code_llama2_7b"], [85, 0, 1, "", "lora_code_llama2_13b"], [86, 0, 1, "", "lora_code_llama2_70b"], [87, 0, 1, "", "lora_code_llama2_7b"], [88, 0, 1, "", "qlora_code_llama2_13b"], [89, 0, 1, "", "qlora_code_llama2_70b"], [90, 0, 1, "", "qlora_code_llama2_7b"]], "torchtune.models.gemma": [[91, 0, 1, "", "gemma"], [92, 0, 1, "", "gemma_2b"], [93, 0, 1, "", "gemma_7b"], [94, 0, 1, "", "gemma_tokenizer"], [95, 0, 1, "", "lora_gemma"], [96, 0, 1, "", "lora_gemma_2b"], [97, 0, 1, "", "lora_gemma_7b"], [98, 0, 1, "", "qlora_gemma_2b"], [99, 0, 1, "", "qlora_gemma_7b"]], "torchtune.models.llama2": [[100, 1, 1, "", "Llama2ChatTemplate"], [101, 0, 1, "", "llama2"], [102, 0, 1, "", "llama2_13b"], [103, 0, 1, "", "llama2_70b"], [104, 0, 1, "", "llama2_7b"], [105, 0, 1, "", "llama2_reward_7b"], [106, 0, 1, "", "llama2_tokenizer"], [107, 0, 1, "", "lora_llama2"], [108, 0, 1, "", "lora_llama2_13b"], [109, 0, 1, "", "lora_llama2_70b"], [110, 0, 1, "", "lora_llama2_7b"], [111, 0, 1, "", "lora_llama2_reward_7b"], [112, 0, 1, "", "qlora_llama2_13b"], [113, 0, 1, "", "qlora_llama2_70b"], [114, 0, 1, "", "qlora_llama2_7b"], [115, 0, 1, "", "qlora_llama2_reward_7b"]], "torchtune.models.llama3": [[116, 0, 1, "", "llama3"], [117, 0, 1, "", "llama3_70b"], [118, 0, 1, "", "llama3_8b"], [119, 0, 1, "", "llama3_tokenizer"], [120, 0, 1, "", "lora_llama3"], [121, 0, 1, "", "lora_llama3_70b"], [122, 0, 1, "", "lora_llama3_8b"], [123, 0, 1, "", "qlora_llama3_70b"], [124, 0, 1, "", "qlora_llama3_8b"]], "torchtune.models.llama3_1": [[125, 0, 1, "", "llama3_1"], [126, 0, 1, "", "llama3_1_405b"], [127, 0, 1, "", "llama3_1_70b"], [128, 0, 1, "", "llama3_1_8b"], [129, 0, 1, "", "lora_llama3_1"], [130, 0, 1, "", "lora_llama3_1_405b"], [131, 0, 1, "", "lora_llama3_1_70b"], [132, 0, 1, "", "lora_llama3_1_8b"], [133, 0, 1, "", "qlora_llama3_1_405b"], [134, 0, 1, "", "qlora_llama3_1_70b"], [135, 0, 1, "", "qlora_llama3_1_8b"]], "torchtune.models.llama3_2": [[136, 0, 1, "", "llama3_2_1b"], [137, 0, 1, "", "llama3_2_3b"], [138, 0, 1, "", "lora_llama3_2_1b"], [139, 0, 1, "", "lora_llama3_2_3b"], [140, 0, 1, "", "qlora_llama3_2_1b"], [141, 0, 1, "", "qlora_llama3_2_3b"]], "torchtune.models.llama3_2_vision": [[142, 1, 1, "", "Llama3VisionEncoder"], [143, 1, 1, "", "Llama3VisionProjectionHead"], [144, 1, 1, "", "Llama3VisionTransform"], [145, 0, 1, "", "llama3_2_vision_11b"], [146, 0, 1, "", "llama3_2_vision_decoder"], [147, 0, 1, "", "llama3_2_vision_encoder"], [148, 0, 1, "", "llama3_2_vision_transform"], [149, 0, 1, "", "lora_llama3_2_vision_11b"], [150, 0, 1, "", "lora_llama3_2_vision_decoder"], [151, 0, 1, "", "lora_llama3_2_vision_encoder"], [152, 0, 1, "", "qlora_llama3_2_vision_11b"]], "torchtune.models.llama3_2_vision.Llama3VisionEncoder": [[142, 4, 1, "", "forward"]], "torchtune.models.llama3_2_vision.Llama3VisionProjectionHead": [[143, 4, 1, "", "forward"]], "torchtune.models.llama3_2_vision.Llama3VisionTransform": [[144, 4, 1, "", "decode"], [144, 4, 1, "", "tokenize_message"], [144, 4, 1, "", "tokenize_messages"]], "torchtune.models.mistral": [[153, 1, 1, "", "MistralChatTemplate"], [154, 0, 1, "", "lora_mistral"], [155, 0, 1, "", "lora_mistral_7b"], [156, 0, 1, "", "lora_mistral_classifier"], [157, 0, 1, "", "lora_mistral_reward_7b"], [158, 0, 1, "", "mistral"], [159, 0, 1, "", "mistral_7b"], [160, 0, 1, "", "mistral_classifier"], [161, 0, 1, "", "mistral_reward_7b"], [162, 0, 1, "", "mistral_tokenizer"], [163, 0, 1, "", "qlora_mistral_7b"], [164, 0, 1, "", "qlora_mistral_reward_7b"]], "torchtune.models.phi3": [[165, 0, 1, "", "lora_phi3"], [166, 0, 1, "", "lora_phi3_mini"], [167, 0, 1, "", "phi3"], [168, 0, 1, "", "phi3_mini"], [169, 0, 1, "", "phi3_mini_tokenizer"], [170, 0, 1, "", "qlora_phi3_mini"]], "torchtune.models.qwen2": [[171, 0, 1, "", "lora_qwen2"], [172, 0, 1, "", "lora_qwen2_0_5b"], [173, 0, 1, "", "lora_qwen2_1_5b"], [174, 0, 1, "", "lora_qwen2_7b"], [175, 0, 1, "", "qwen2"], [176, 0, 1, "", "qwen2_0_5b"], [177, 0, 1, "", "qwen2_1_5b"], [178, 0, 1, "", "qwen2_7b"], [179, 0, 1, "", "qwen2_tokenizer"]], "torchtune.modules": [[180, 1, 1, "", "FeedForward"], [181, 1, 1, "", "Fp32LayerNorm"], [182, 1, 1, "", "KVCache"], [183, 1, 1, "", "MultiHeadAttention"], [184, 1, 1, "", "RMSNorm"], [185, 1, 1, "", "RotaryPositionalEmbeddings"], [186, 1, 1, "", "TanhGate"], [187, 1, 1, "", "TiedLinear"], [188, 1, 1, "", "TransformerCrossAttentionLayer"], [189, 1, 1, "", "TransformerDecoder"], [190, 1, 1, "", "TransformerSelfAttentionLayer"], [191, 1, 1, "", "VisionTransformer"]], "torchtune.modules.FeedForward": [[180, 4, 1, "", "forward"]], "torchtune.modules.Fp32LayerNorm": [[181, 4, 1, "", "forward"]], "torchtune.modules.KVCache": [[182, 4, 1, "", "reset"], [182, 4, 1, "", "update"]], "torchtune.modules.MultiHeadAttention": [[183, 4, 1, "", "forward"], [183, 4, 1, "", "reset_cache"], [183, 4, 1, "", "setup_cache"]], "torchtune.modules.RMSNorm": [[184, 4, 1, "", "forward"]], "torchtune.modules.RotaryPositionalEmbeddings": [[185, 4, 1, "", "forward"]], "torchtune.modules.TanhGate": [[186, 4, 1, "", "forward"]], "torchtune.modules.TransformerCrossAttentionLayer": [[188, 4, 1, "", "caches_are_enabled"], [188, 4, 1, "", "caches_are_setup"], [188, 4, 1, "", "forward"], [188, 4, 1, "", "reset_cache"], [188, 4, 1, "", "setup_caches"]], "torchtune.modules.TransformerDecoder": [[189, 4, 1, "", "caches_are_enabled"], [189, 4, 1, "", "caches_are_setup"], [189, 4, 1, "", "chunked_output"], [189, 4, 1, "", "forward"], [189, 4, 1, "", "reset_caches"], [189, 4, 1, "", "set_num_output_chunks"], [189, 4, 1, "", "setup_caches"]], "torchtune.modules.TransformerSelfAttentionLayer": [[190, 4, 1, "", "caches_are_enabled"], [190, 4, 1, "", "caches_are_setup"], [190, 4, 1, "", "forward"], [190, 4, 1, "", "reset_cache"], [190, 4, 1, "", "setup_caches"]], "torchtune.modules.VisionTransformer": [[191, 4, 1, "", "forward"]], "torchtune.modules.common_utils": [[192, 0, 1, "", "delete_kv_caches"], [193, 0, 1, "", "disable_kv_cache"], [194, 0, 1, "", "local_kv_cache"], [195, 0, 1, "", "reparametrize_as_dtype_state_dict_post_hook"]], "torchtune.modules.loss": [[196, 1, 1, "", "CEWithChunkedOutputLoss"], [197, 1, 1, "", "ForwardKLLoss"], [198, 1, 1, "", "ForwardKLWithChunkedOutputLoss"]], "torchtune.modules.loss.CEWithChunkedOutputLoss": [[196, 4, 1, "", "compute_cross_entropy"], [196, 4, 1, "", "forward"]], "torchtune.modules.loss.ForwardKLLoss": [[197, 4, 1, "", "forward"]], "torchtune.modules.loss.ForwardKLWithChunkedOutputLoss": [[198, 4, 1, "", "forward"]], "torchtune.modules.model_fusion": [[199, 1, 1, "", "DeepFusionModel"], [200, 1, 1, "", "FusionEmbedding"], [201, 1, 1, "", "FusionLayer"], [202, 0, 1, "", "get_fusion_params"], [203, 0, 1, "", "register_fusion_module"]], "torchtune.modules.model_fusion.DeepFusionModel": [[199, 4, 1, "", "caches_are_enabled"], [199, 4, 1, "", "caches_are_setup"], [199, 4, 1, "", "forward"], [199, 4, 1, "", "reset_caches"], [199, 4, 1, "", "set_num_output_chunks"], [199, 4, 1, "", "setup_caches"]], "torchtune.modules.model_fusion.FusionEmbedding": [[200, 4, 1, "", "forward"], [200, 4, 1, "", "fusion_params"]], "torchtune.modules.model_fusion.FusionLayer": [[201, 4, 1, "", "caches_are_enabled"], [201, 4, 1, "", "caches_are_setup"], [201, 4, 1, "", "forward"], [201, 4, 1, "", "fusion_params"], [201, 4, 1, "", "reset_cache"], [201, 4, 1, "", "setup_caches"]], "torchtune.modules.peft": [[204, 1, 1, "", "AdapterModule"], [205, 1, 1, "", "DoRALinear"], [206, 1, 1, "", "LoRALinear"], [207, 0, 1, "", "disable_adapter"], [208, 0, 1, "", "get_adapter_params"], [209, 0, 1, "", "set_trainable_params"], [210, 0, 1, "", "validate_missing_and_unexpected_for_lora"], [211, 0, 1, "", "validate_state_dict_for_lora"]], "torchtune.modules.peft.AdapterModule": [[204, 4, 1, "", "adapter_params"]], "torchtune.modules.peft.DoRALinear": [[205, 4, 1, "", "adapter_params"], [205, 4, 1, "", "forward"], [205, 4, 1, "", "initialize_dora_magnitude"]], "torchtune.modules.peft.LoRALinear": [[206, 4, 1, "", "adapter_params"], [206, 4, 1, "", "forward"]], "torchtune.modules.tokenizers": [[212, 1, 1, "", "BaseTokenizer"], [213, 1, 1, "", "ModelTokenizer"], [214, 1, 1, "", "SentencePieceBaseTokenizer"], [215, 1, 1, "", "TikTokenBaseTokenizer"], [216, 0, 1, "", "parse_hf_tokenizer_json"], [217, 0, 1, "", "tokenize_messages_no_special_tokens"]], "torchtune.modules.tokenizers.BaseTokenizer": [[212, 4, 1, "", "decode"], [212, 4, 1, "", "encode"]], "torchtune.modules.tokenizers.ModelTokenizer": [[213, 4, 1, "", "tokenize_messages"]], "torchtune.modules.tokenizers.SentencePieceBaseTokenizer": [[214, 4, 1, "", "decode"], [214, 4, 1, "", "encode"]], "torchtune.modules.tokenizers.TikTokenBaseTokenizer": [[215, 4, 1, "", "decode"], [215, 4, 1, "", "encode"]], "torchtune.modules.transforms": [[218, 1, 1, "", "Transform"], [219, 1, 1, "", "VisionCrossAttentionMask"]], "torchtune.rlhf": [[220, 0, 1, "", "estimate_advantages"], [221, 0, 1, "", "get_rewards_ppo"], [226, 0, 1, "", "truncate_sequence_at_first_stop_token"]], "torchtune.rlhf.loss": [[222, 1, 1, "", "DPOLoss"], [223, 1, 1, "", "PPOLoss"], [224, 1, 1, "", "RSOLoss"], [225, 1, 1, "", "SimPOLoss"]], "torchtune.rlhf.loss.DPOLoss": [[222, 4, 1, "", "forward"]], "torchtune.rlhf.loss.PPOLoss": [[223, 4, 1, "", "forward"]], "torchtune.rlhf.loss.RSOLoss": [[224, 4, 1, "", "forward"]], "torchtune.rlhf.loss.SimPOLoss": [[225, 4, 1, "", "forward"]], "torchtune.training": [[227, 2, 1, "", "FSDPPolicyType"], [228, 1, 1, "", "FormattedCheckpointFiles"], [229, 1, 1, "", "FullModelHFCheckpointer"], [230, 1, 1, "", "FullModelMetaCheckpointer"], [231, 1, 1, "", "FullModelTorchTuneCheckpointer"], [232, 1, 1, "", "ModelType"], [233, 1, 1, "", "OptimizerInBackwardWrapper"], [234, 0, 1, "", "apply_selective_activation_checkpointing"], [235, 0, 1, "", "create_optim_in_bwd_wrapper"], [236, 0, 1, "", "get_cosine_schedule_with_warmup"], [237, 0, 1, "", "get_dtype"], [238, 0, 1, "", "get_full_finetune_fsdp_wrap_policy"], [239, 0, 1, "", "get_lr"], [240, 0, 1, "", "get_memory_stats"], [241, 0, 1, "", "get_quantizer_mode"], [242, 0, 1, "", "get_unmasked_sequence_lengths"], [243, 0, 1, "", "get_world_size_and_rank"], [244, 0, 1, "", "init_distributed"], [245, 0, 1, "", "is_distributed"], [246, 0, 1, "", "log_memory_stats"], [247, 0, 1, "", "lora_fsdp_wrap_policy"], [253, 0, 1, "", "register_optim_in_bwd_hooks"], [254, 0, 1, "", "set_activation_checkpointing"], [255, 0, 1, "", "set_default_dtype"], [256, 0, 1, "", "set_seed"], [257, 0, 1, "", "setup_torch_profiler"], [258, 0, 1, "", "update_state_dict_for_classifier"], [259, 0, 1, "", "validate_expected_param_dtype"]], "torchtune.training.FormattedCheckpointFiles": [[228, 4, 1, "", "build_checkpoint_filenames"]], "torchtune.training.FullModelHFCheckpointer": [[229, 4, 1, "", "load_checkpoint"], [229, 4, 1, "", "save_checkpoint"]], "torchtune.training.FullModelMetaCheckpointer": [[230, 4, 1, "", "load_checkpoint"], [230, 4, 1, "", "save_checkpoint"]], "torchtune.training.FullModelTorchTuneCheckpointer": [[231, 4, 1, "", "load_checkpoint"], [231, 4, 1, "", "save_checkpoint"]], "torchtune.training.OptimizerInBackwardWrapper": [[233, 4, 1, "", "get_last_lr"], [233, 4, 1, "", "get_optim_key"], [233, 4, 1, "", "load_state_dict"], [233, 4, 1, "", "set_lr_scheduler"], [233, 4, 1, "", "state_dict"], [233, 4, 1, "", "step_lr_scheduler"]], "torchtune.training.metric_logging": [[248, 1, 1, "", "CometLogger"], [249, 1, 1, "", "DiskLogger"], [250, 1, 1, "", "StdoutLogger"], [251, 1, 1, "", "TensorBoardLogger"], [252, 1, 1, "", "WandBLogger"]], "torchtune.training.metric_logging.CometLogger": [[248, 4, 1, "", "close"], [248, 4, 1, "", "log"], [248, 4, 1, "", "log_config"], [248, 4, 1, "", "log_dict"]], "torchtune.training.metric_logging.DiskLogger": [[249, 4, 1, "", "close"], [249, 4, 1, "", "log"], [249, 4, 1, "", "log_dict"]], "torchtune.training.metric_logging.StdoutLogger": [[250, 4, 1, "", "close"], [250, 4, 1, "", "log"], [250, 4, 1, "", "log_dict"]], "torchtune.training.metric_logging.TensorBoardLogger": [[251, 4, 1, "", "close"], [251, 4, 1, "", "log"], [251, 4, 1, "", "log_dict"]], "torchtune.training.metric_logging.WandBLogger": [[252, 4, 1, "", "close"], [252, 4, 1, "", "log"], [252, 4, 1, "", "log_config"], [252, 4, 1, "", "log_dict"]], "torchtune.utils": [[260, 0, 1, "", "batch_to_device"], [261, 0, 1, "", "get_device"], [262, 0, 1, "", "get_logger"], [263, 0, 1, "", "torch_version_ge"]]}, "objtypes": {"0": "py:function", "1": "py:class", "2": "py:data", "3": "py:property", "4": "py:method"}, "objnames": {"0": ["py", "function", "Python function"], "1": ["py", "class", "Python class"], "2": ["py", "data", "Python data"], "3": ["py", "property", "Python property"], "4": ["py", "method", "Python method"]}, "titleterms": {"torchtun": [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 22, 34, 40, 41, 43, 227, 266, 268, 273, 275, 277, 278, 279, 281, 282], "config": [0, 10, 24, 25, 273, 276], "data": [1, 11, 34, 40, 41, 43, 274], "text": [1, 2, 14, 16, 20, 277], "templat": [1, 9, 12, 14, 19, 21, 274], "type": 1, "messag": [1, 13, 14, 36], "transform": [1, 5, 13, 14, 15, 218], "collat": 1, "helper": 1, "function": 1, "dataset": [2, 9, 11, 12, 16, 18, 20, 274], "imag": [2, 14, 16], "gener": [2, 3, 73, 275, 277], "builder": 2, "class": [2, 19, 25], "model": [4, 5, 15, 21, 26, 273, 275, 276, 277, 278, 279, 280, 281], "llama3": [4, 116, 274, 277, 278, 281], "2": [4, 278], "vision": [4, 5], "1": [4, 278], "llama2": [4, 101, 274, 275, 279, 282], "code": 4, "llama": 4, "qwen": 4, "phi": 4, "3": 4, "mistral": [4, 158], "gemma": [4, 91], "clip": 4, "modul": 5, "compon": [5, 10, 24, 280], "build": [5, 267, 282], "block": 5, "loss": 5, "base": [5, 21], "token": [5, 14, 21, 274], "util": [5, 8], "peft": [5, 280], "fusion": 5, "rlhf": 6, "train": [7, 227, 270, 276], "checkpoint": [7, 22, 26, 275, 280], "reduc": 7, "precis": [7, 280], "distribut": [7, 270], "memori": [7, 279, 280, 282], "manag": 7, "schedul": 7, "metric": [7, 23, 26], "log": [7, 23, 26], "perform": [7, 279], "profil": 7, "miscellan": [7, 8], "chat": [9, 274], "exampl": [9, 12, 13, 15, 16, 18, 20], "format": [9, 12, 14, 16, 18, 20, 22], "load": [9, 12, 16, 18, 20, 21], "from": [9, 12, 16, 18, 20, 21, 274, 282], "hug": [9, 12, 16, 18, 20, 21, 275], "face": [9, 12, 16, 18, 20, 21, 275], "local": [9, 12, 16, 18, 20], "remot": [9, 12, 16], "specifi": 9, "convers": 9, "style": 9, "sharegpt": 9, "openai": 9, "renam": [9, 12], "column": [9, 12], "built": [9, 12, 16, 18, 19, 20, 273], "custom": [10, 13, 19, 274], "recip": [10, 25, 271, 273, 276, 278, 279, 281], "set": [10, 21], "up": [10, 275], "your": [10, 24, 25, 275, 276], "project": 10, "launch": 10, "overview": [11, 22, 268, 271, 275, 280], "pipelin": 11, "instruct": [12, 267, 277], "configur": [13, 24], "creat": [14, 15], "prompt": [14, 19, 21, 274], "access": [14, 277], "content": 14, "multimod": [15, 16], "us": [15, 19, 24, 25, 274, 275, 278, 282], "interleav": 16, "sampl": [17, 77], "pack": 17, "prefer": 18, "defin": 19, "via": [19, 267, 277], "dotpath": 19, "string": 19, "dictionari": 19, "prompttempl": [19, 38], "complet": 20, "json": 20, "txt": 20, "download": [21, 273, 275, 276], "file": 21, "max": 21, "sequenc": 21, "length": 21, "special": [21, 274], "handl": 22, "differ": 22, "hfcheckpoint": 22, "metacheckpoint": 22, "torchtunecheckpoint": 22, "intermedi": 22, "vs": 22, "final": 22, "lora": [22, 269, 275, 279, 280, 282], "put": [22, 282], "thi": 22, "all": [22, 24, 282], "togeth": [22, 282], "comet": 23, "logger": [23, 26], "about": 24, "where": 24, "do": 24, "paramet": [24, 280], "live": 24, "write": 24, "instanti": [24, 27], "referenc": 24, "other": [24, 275], "field": 24, "interpol": 24, "valid": [24, 30, 273], "best": 24, "practic": 24, "airtight": 24, "public": 24, "api": 24, "onli": 24, "command": 24, "line": 24, "overrid": 24, "remov": 24, "what": [25, 268, 278, 279, 281, 282], "ar": 25, "script": 25, "run": [25, 273, 275], "cli": [25, 273], "pars": [25, 29], "weight": [26, 280], "bias": 26, "w": 26, "b": 26, "log_config": 28, "alpacatomessag": 31, "chatmltempl": 32, "chosenrejectedtomessag": 33, "grammarerrorcorrectiontempl": 34, "inputoutputtomessag": 35, "openaitomessag": 37, "prompttemplateinterfac": 39, "questionanswertempl": 40, "role": 41, "sharegpttomessag": 42, "summarizetempl": 43, "format_content_with_imag": 44, "left_pad_sequ": 45, "load_imag": 46, "padded_col": 47, "padded_collate_dpo": 48, "padded_collate_sft": 49, "padded_collate_tiled_images_and_mask": 50, "truncat": 51, "validate_messag": 52, "concatdataset": 53, "packeddataset": 54, "preferencedataset": 55, "sftdataset": 56, "textcompletiondataset": 57, "alpaca_cleaned_dataset": 58, "alpaca_dataset": 59, "chat_dataset": 60, "cnn_dailymail_articles_dataset": 61, "grammar_dataset": 62, "hh_rlhf_helpful_dataset": 63, "instruct_dataset": 64, "llava_instruct_dataset": 65, "the_cauldron_dataset": 66, "preference_dataset": 67, "samsum_dataset": 68, "slimorca_dataset": 69, "stack_exchange_paired_dataset": 70, "text_completion_dataset": 71, "wikitext_dataset": 72, "generate_next_token": 74, "get_causal_mask_from_padding_mask": 75, "get_position_ids_from_padding_mask": 76, "tilepositionalembed": 78, "tiledtokenpositionalembed": 79, "tokenpositionalembed": 80, "clip_vision_encod": 81, "code_llama2_13b": 82, "code_llama2_70b": 83, "code_llama2_7b": 84, "lora_code_llama2_13b": 85, "lora_code_llama2_70b": 86, "lora_code_llama2_7b": 87, "qlora_code_llama2_13b": 88, "qlora_code_llama2_70b": 89, "qlora_code_llama2_7b": 90, "gemma_2b": 92, "gemma_7b": 93, "gemma_token": 94, "lora_gemma": 95, "lora_gemma_2b": 96, "lora_gemma_7b": 97, "qlora_gemma_2b": 98, "qlora_gemma_7b": 99, "llama2chattempl": 100, "llama2_13b": 102, "llama2_70b": 103, "llama2_7b": 104, "llama2_reward_7b": 105, "llama2_token": 106, "lora_llama2": 107, "lora_llama2_13b": 108, "lora_llama2_70b": 109, "lora_llama2_7b": 110, "lora_llama2_reward_7b": 111, "qlora_llama2_13b": 112, "qlora_llama2_70b": 113, "qlora_llama2_7b": 114, "qlora_llama2_reward_7b": 115, "llama3_70b": 117, "llama3_8b": 118, "llama3_token": 119, "lora_llama3": 120, "lora_llama3_70b": 121, "lora_llama3_8b": 122, "qlora_llama3_70b": 123, "qlora_llama3_8b": 124, "llama3_1": 125, "llama3_1_405b": 126, "llama3_1_70b": 127, "llama3_1_8b": 128, "lora_llama3_1": 129, "lora_llama3_1_405b": 130, "lora_llama3_1_70b": 131, "lora_llama3_1_8b": 132, "qlora_llama3_1_405b": 133, "qlora_llama3_1_70b": 134, "qlora_llama3_1_8b": 135, "llama3_2_1b": 136, "llama3_2_3b": 137, "lora_llama3_2_1b": 138, "lora_llama3_2_3b": 139, "qlora_llama3_2_1b": 140, "qlora_llama3_2_3b": 141, "llama3visionencod": 142, "llama3visionprojectionhead": 143, "llama3visiontransform": 144, "llama3_2_vision_11b": 145, "llama3_2_vision_decod": 146, "llama3_2_vision_encod": 147, "llama3_2_vision_transform": 148, "lora_llama3_2_vision_11b": 149, "lora_llama3_2_vision_decod": 150, "lora_llama3_2_vision_encod": 151, "qlora_llama3_2_vision_11b": 152, "mistralchattempl": 153, "lora_mistr": 154, "lora_mistral_7b": 155, "lora_mistral_classifi": 156, "lora_mistral_reward_7b": 157, "mistral_7b": 159, "mistral_classifi": 160, "mistral_reward_7b": 161, "mistral_token": 162, "qlora_mistral_7b": 163, "qlora_mistral_reward_7b": 164, "lora_phi3": 165, "lora_phi3_mini": 166, "phi3": 167, "phi3_mini": 168, "phi3_mini_token": 169, "qlora_phi3_mini": 170, "lora_qwen2": 171, "lora_qwen2_0_5b": 172, "lora_qwen2_1_5b": 173, "lora_qwen2_7b": 174, "qwen2": [175, 278], "qwen2_0_5b": 176, "qwen2_1_5b": 177, "qwen2_7b": 178, "qwen2_token": 179, "feedforward": 180, "fp32layernorm": 181, "kvcach": 182, "multiheadattent": 183, "rmsnorm": 184, "rotarypositionalembed": 185, "tanhgat": 186, "tiedlinear": 187, "transformercrossattentionlay": 188, "transformerdecod": 189, "transformerselfattentionlay": 190, "visiontransform": 191, "delete_kv_cach": 192, "disable_kv_cach": 193, "local_kv_cach": 194, "reparametrize_as_dtype_state_dict_post_hook": 195, "cewithchunkedoutputloss": 196, "forwardklloss": 197, "forwardklwithchunkedoutputloss": 198, "deepfusionmodel": 199, "fusionembed": 200, "fusionlay": 201, "get_fusion_param": 202, "register_fusion_modul": 203, "adaptermodul": 204, "doralinear": 205, "loralinear": 206, "disable_adapt": 207, "get_adapter_param": 208, "set_trainable_param": 209, "validate_missing_and_unexpected_for_lora": 210, "validate_state_dict_for_lora": 211, "basetoken": 212, "modeltoken": 213, "sentencepiecebasetoken": 214, "tiktokenbasetoken": 215, "parse_hf_tokenizer_json": 216, "tokenize_messages_no_special_token": 217, "visioncrossattentionmask": 219, "estimate_advantag": 220, "get_rewards_ppo": 221, "dpoloss": 222, "ppoloss": 223, "rsoloss": 224, "simpoloss": 225, "truncate_sequence_at_first_stop_token": 226, "fsdppolicytyp": 227, "formattedcheckpointfil": 228, "fullmodelhfcheckpoint": 229, "fullmodelmetacheckpoint": 230, "fullmodeltorchtunecheckpoint": 231, "modeltyp": 232, "optimizerinbackwardwrapp": 233, "apply_selective_activation_checkpoint": 234, "create_optim_in_bwd_wrapp": 235, "get_cosine_schedule_with_warmup": 236, "get_dtyp": 237, "get_full_finetune_fsdp_wrap_polici": 238, "get_lr": 239, "get_memory_stat": 240, "get_quantizer_mod": 241, "get_unmasked_sequence_length": 242, "get_world_size_and_rank": 243, "init_distribut": 244, "is_distribut": 245, "log_memory_stat": 246, "lora_fsdp_wrap_polici": 247, "cometlogg": 248, "disklogg": 249, "stdoutlogg": 250, "tensorboardlogg": 251, "wandblogg": 252, "register_optim_in_bwd_hook": 253, "set_activation_checkpoint": 254, "set_default_dtyp": 255, "set_se": 256, "setup_torch_profil": 257, "update_state_dict_for_classifi": 258, "validate_expected_param_dtyp": 259, "batch_to_devic": 260, "get_devic": 261, "get_logg": 262, "torch_version_g": 263, "comput": [265, 272], "time": [265, 272], "welcom": 266, "document": 266, "get": [266, 273, 277], "start": [266, 273], "tutori": 266, "instal": 267, "pre": 267, "requisit": 267, "pypi": 267, "git": 267, "clone": 267, "nightli": 267, "kei": 268, "concept": 268, "design": 268, "principl": 268, "singl": 269, "devic": [269, 281], "finetun": [269, 271, 275, 279, 281, 282], "quantiz": [270, 275, 277, 280, 281], "awar": 270, "qat": [270, 281], "list": 273, "copi": 273, "fine": [274, 276, 277, 278, 279, 280, 281, 282], "tune": [274, 276, 277, 278, 279, 280, 281, 282], "chang": 274, "when": 274, "should": 274, "i": 274, "end": 275, "workflow": 275, "7b": 275, "evalu": [275, 277, 281], "eleutherai": [275, 277], "s": [275, 277], "eval": [275, 277], "har": [275, 277], "speed": 275, "librari": 275, "upload": 275, "hub": 275, "first": 276, "llm": 276, "select": 276, "modifi": 276, "next": 276, "step": [276, 280], "meta": 277, "8b": [277, 278], "our": 277, "faster": 277, "distil": 278, "1b": 278, "knowledg": 278, "how": [278, 279], "doe": [278, 279], "work": [278, 279], "kd": 278, "ablat": 278, "studi": 278, "teacher": 278, "student": 278, "hyperparamet": 278, "learn": 278, "rate": 278, "ratio": 278, "5b": 278, "0": 278, "appli": [279, 281], "trade": 279, "off": 279, "optim": 280, "activ": 280, "offload": 280, "gradient": 280, "accumul": 280, "lower": [280, 281], "fuse": 280, "backward": 280, "pass": 280, "state": 280, "cpu": 280, "effici": 280, "low": 280, "rank": 280, "adapt": 280, "qlora": [280, 282], "decompos": 280, "dora": 280, "option": 281, "save": 282, "deep": 282, "dive": 282}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 6, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1, "sphinx": 56}}) \ No newline at end of file