From 1492efc2bbb7e0a3df58a67dc5de28ef15c19d39 Mon Sep 17 00:00:00 2001
From: bghira <bghira@users.github.com>
Date: Wed, 11 Dec 2024 15:29:56 -0600
Subject: [PATCH] swith clip method to "value" by default

---
 helpers/configuration/cmd_args.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/helpers/configuration/cmd_args.py b/helpers/configuration/cmd_args.py
index ce365269..6c3839ee 100644
--- a/helpers/configuration/cmd_args.py
+++ b/helpers/configuration/cmd_args.py
@@ -1298,12 +1298,14 @@ def get_argument_parser():
     )
     parser.add_argument(
         "--grad_clip_method",
-        default="norm",
+        default="value",
         choices=["value", "norm"],
         help=(
             "When applying --max_grad_norm, the method to use for clipping the gradients."
-            " The default value 'norm' will clip gradient values such that the entire vector is normalised to this value."
-            " The 'value' method will clip the gradient values to this value, which may result in a less uniform gradient."
+            " The previous default option 'norm' will scale ALL gradient values when any outliers in the gradient are encountered, which can reduce training precision."
+            " The new default option 'value' will clip individual gradient values using this value as a maximum, which may preserve precision while avoiding outliers, enhancing convergence."
+            " In simple terms, the default will help the model learn faster without blowing up (SD3.5 Medium was the main test model)."
+            " Use 'norm' to return to the old behaviour."
         ),
     )
     parser.add_argument(
@@ -2416,9 +2418,6 @@ def parse_cmdline_args(input_args=None, exit_on_error: bool = False):
         # enable torch compile w/ activation checkpointing :[ slows us down.
         torch._dynamo.config.optimize_ddp = False
 
-    # if args.use_ema:
-    #     if "lora" in args.model_type:
-    #         raise ValueError("Using EMA is not currently supported for LoRA training.")
     args.logging_dir = os.path.join(args.output_dir, args.logging_dir)
     args.accelerator_project_config = ProjectConfiguration(
         project_dir=args.output_dir, logging_dir=args.logging_dir