raise compile error (#2188)

Co-authored-by: Felipe Mello <[email protected]>
pytorch · Dec 20, 2024 · de8b57c · de8b57c
1 parent 46a1ef0
commit de8b57c
Show file tree

Hide file tree

Showing 6 changed files with 29 additions and 21 deletions.
diff --git a/recipes/configs/llama3/8B_qat_lora.yaml b/recipes/configs/llama3/8B_qat_lora.yaml
@@ -83,6 +83,10 @@ dtype: bf16
 enable_activation_checkpointing: False  # True reduces memory
 enable_activation_offloading: False  # True reduces memory
 
+# QAT arguments
+quantizer:
+  _component_: torchtune.training.quantization.Int8DynActInt4WeightQATQuantizer
+  groupsize: 256
 
 # Profiler (disabled)
 profiler:
@@ -108,8 +112,3 @@ profiler:
   warmup_steps: 3
   active_steps: 2
   num_cycles: 1
-
-# QAT arguments
-quantizer:
-  _component_: torchtune.training.quantization.Int8DynActInt4WeightQATQuantizer
-  groupsize: 256
diff --git a/recipes/configs/llama3_1/8B_qat_lora.yaml b/recipes/configs/llama3_1/8B_qat_lora.yaml
@@ -86,6 +86,10 @@ dtype: bf16
 enable_activation_checkpointing: False  # True reduces memory
 enable_activation_offloading: False  # True reduces memory
 
+# QAT arguments
+quantizer:
+  _component_: torchtune.training.quantization.Int8DynActInt4WeightQATQuantizer
+  groupsize: 256
 
 # Profiler (disabled)
 profiler:
@@ -111,8 +115,3 @@ profiler:
   warmup_steps: 3
   active_steps: 2
   num_cycles: 1
-
-# QAT arguments
-quantizer:
-  _component_: torchtune.training.quantization.Int8DynActInt4WeightQATQuantizer
-  groupsize: 256
diff --git a/recipes/configs/llama3_2/1B_qat_lora.yaml b/recipes/configs/llama3_2/1B_qat_lora.yaml
@@ -82,6 +82,10 @@ dtype: bf16
 enable_activation_checkpointing: False  # True reduces memory
 enable_activation_offloading: False  # True reduces memory
 
+# QAT arguments
+quantizer:
+  _component_: torchtune.training.quantization.Int8DynActInt4WeightQATQuantizer
+  groupsize: 256
 
 # Profiler (disabled)
 profiler:
@@ -107,8 +111,3 @@ profiler:
   warmup_steps: 3
   active_steps: 2
   num_cycles: 1
-
-# QAT arguments
-quantizer:
-  _component_: torchtune.training.quantization.Int8DynActInt4WeightQATQuantizer
-  groupsize: 256
diff --git a/recipes/configs/llama3_2/3B_qat_lora.yaml b/recipes/configs/llama3_2/3B_qat_lora.yaml
@@ -83,6 +83,10 @@ dtype: bf16
 enable_activation_checkpointing: False  # True reduces memory
 enable_activation_offloading: False  # True reduces memory
 
+# QAT arguments
+quantizer:
+  _component_: torchtune.training.quantization.Int8DynActInt4WeightQATQuantizer
+  groupsize: 256
 
 # Profiler (disabled)
 profiler:
@@ -108,8 +112,3 @@ profiler:
   warmup_steps: 3
   active_steps: 2
   num_cycles: 1
-
-# QAT arguments
-quantizer:
-  _component_: torchtune.training.quantization.Int8DynActInt4WeightQATQuantizer
-  groupsize: 256
diff --git a/recipes/qat_distributed.py b/recipes/qat_distributed.py
@@ -118,6 +118,7 @@ class QATRecipeDistributed(FTRecipeInterface):
 
     Raises:
         ValueError: If ``dtype`` is set to fp16.
+        ValueError: If ``compile`` is set to True.
         RuntimeError: If ``dtype`` is set to bf16 and the hardware does not support bf16.
         RuntimeError: If ``left_pad_sequence`` is set as the data collator.
         RuntimeError: If ``enable_activation_offloading`` is True and device is not CUDA.
@@ -133,6 +134,11 @@ def __init__(self, cfg: DictConfig) -> None:
                 "full fp16 training is not supported with this recipe. Please use bf16 or fp32 instead."
             )
 
+        if cfg.get("compile", False):
+            raise ValueError(
+                "Compile is not yet supported for QAT. Please set compile=False."
+            )
+
         # logging attributes
         self._output_dir = cfg.output_dir
         self._log_every_n_steps = cfg.get("log_every_n_steps", 1)

diff --git a/recipes/qat_lora_finetune_distributed.py b/recipes/qat_lora_finetune_distributed.py
@@ -126,7 +126,8 @@ class QATLoRAFinetuneRecipeDistributed(FTRecipeInterface):
 
     Raises:
         ValueError: If ``dtype`` is set to fp16.
-        ValueError: If world_size is 1
+        ValueError: If world_size is 1.
+        ValueError: If ``compile`` is set to True.
         RuntimeError: If ``dtype`` is set to bf16 and the hardware does not support bf16.
         RuntimeError: If ``left_pad_sequence`` is set as the data collator.
         RuntimeError: If ``enable_activation_offloading`` is True and device is not CUDA.
@@ -149,6 +150,11 @@ def __init__(self, cfg: DictConfig) -> None:
                 "full fp16 training is not supported with this recipe. Please use bf16 or fp32 instead."
             )
 
+        if cfg.get("compile", False):
+            raise ValueError(
+                "Compile is not yet supported for QAT. Please set compile=False."
+            )
+
         _, rank = utils.get_world_size_and_rank()
 
         # _is_rank_zero is used primarily for logging. In the future, the logger