From 6bd25a6f90cdef6b70b2267a49b77a3d47b7a532 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Sat, 23 Nov 2024 09:07:43 -0500
Subject: [PATCH 001/162] js pbar improvements

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md              |  3 ++-
 javascript/progressBar.js | 40 +++++++++++++++++++++++++++++++--------
 modules/sd_models.py      |  8 ++++----
 3 files changed, 38 insertions(+), 13 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index bc6cd163b..0dabf0d7c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,6 @@
 # Change Log for SD.Next
 
-## Update for 2024-11-22
+## Update for 2024-11-23
 
 - Model loader improvements:  
   - detect model components on model load fail  
@@ -16,6 +16,7 @@
   - fix README links  
   - fix sdxl controlnet single-file loader  
   - relax settings validator  
+  - improve js progress calls resiliency
 
 ## Update for 2024-11-21
 
diff --git a/javascript/progressBar.js b/javascript/progressBar.js
index a9ecb31e9..3f954e13b 100644
--- a/javascript/progressBar.js
+++ b/javascript/progressBar.js
@@ -4,23 +4,37 @@ function request(url, data, handler, errorHandler) {
   const xhr = new XMLHttpRequest();
   xhr.open('POST', url, true);
   xhr.setRequestHeader('Content-Type', 'application/json');
+  xhr.timeout = 5000;
+  xhr.ontimeout = () => {
+    console.error('xhr.ontimeout', xhr);
+    errorHandler();
+  };
+  xhr.onerror = () => {
+    console.error('xhr.onerror', xhr);
+    errorHandler();
+  };
+  xhr.onabort = () => {
+    console.error('xhr.onabort', xhr);
+    errorHandler();
+  };
   xhr.onreadystatechange = () => {
     if (xhr.readyState === 4) {
       if (xhr.status === 200) {
         try {
-          const js = JSON.parse(xhr.responseText);
-          handler(js);
-        } catch (error) {
-          console.error(error);
+          const json = JSON.parse(xhr.responseText);
+          handler(json);
+        } catch (err) {
+          console.error('xhr.onreadystatechange', xhr, err);
           errorHandler();
         }
       } else {
+        console.error('xhr.onreadystatechange', xhr);
         errorHandler();
       }
     }
   };
-  const js = JSON.stringify(data);
-  xhr.send(js);
+  const req = JSON.stringify(data);
+  xhr.send(req);
 }
 
 function pad2(x) {
@@ -118,11 +132,14 @@ function requestProgress(id_task, progressEl, galleryEl, atEnd = null, onProgres
 
   const start = (id_task, id_live_preview) => { // eslint-disable-line no-shadow
     if (!opts.live_previews_enable || opts.live_preview_refresh_period === 0 || opts.show_progress_every_n_steps === 0) return;
-    request('./internal/progress', { id_task, id_live_preview }, (res) => {
+
+    const onProgressHandler = (res) => {
+      // debug('onProgress', res);
       lastState = res;
       const elapsedFromStart = (new Date() - dateStart) / 1000;
       hasStarted |= res.active;
       if (res.completed || (!res.active && (hasStarted || once)) || (elapsedFromStart > 30 && !res.queued && res.progress === prevProgress)) {
+        debug('onProgressEnd', res);
         done();
         return;
       }
@@ -131,7 +148,14 @@ function requestProgress(id_task, progressEl, galleryEl, atEnd = null, onProgres
       if (res.live_preview && galleryEl) img.src = res.live_preview;
       if (onProgress) onProgress(res);
       setTimeout(() => start(id_task, id_live_preview), opts.live_preview_refresh_period || 500);
-    }, done);
+    };
+
+    const onProgressErrorHandler = (err) => {
+      console.error('onProgressError', err);
+      done();
+    };
+
+    request('./internal/progress', { id_task, id_live_preview }, onProgressHandler, onProgressErrorHandler);
   };
   start(id_task, 0);
 }
diff --git a/modules/sd_models.py b/modules/sd_models.py
index cf1921a36..a6ff19b6f 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -319,12 +319,12 @@ def set_diffuser_offload(sd_model, op: str = 'model'):
     if not (hasattr(sd_model, "has_accelerate") and sd_model.has_accelerate):
         sd_model.has_accelerate = False
     if hasattr(sd_model, 'maybe_free_model_hooks') and shared.opts.diffusers_offload_mode == "none":
-        shared.log.debug(f'Setting {op}: offload={shared.opts.diffusers_offload_mode}')
+        shared.log.debug(f'Setting {op}: offload={shared.opts.diffusers_offload_mode} limit={shared.opts.cuda_mem_fraction}')
         sd_model.maybe_free_model_hooks()
         sd_model.has_accelerate = False
     if hasattr(sd_model, "enable_model_cpu_offload") and shared.opts.diffusers_offload_mode == "model":
         try:
-            shared.log.debug(f'Setting {op}: offload={shared.opts.diffusers_offload_mode}')
+            shared.log.debug(f'Setting {op}: offload={shared.opts.diffusers_offload_mode} limit={shared.opts.cuda_mem_fraction}')
             if shared.opts.diffusers_move_base or shared.opts.diffusers_move_unet or shared.opts.diffusers_move_refiner:
                 shared.opts.diffusers_move_base = False
                 shared.opts.diffusers_move_unet = False
@@ -339,7 +339,7 @@ def set_diffuser_offload(sd_model, op: str = 'model'):
             shared.log.error(f'Setting {op}: offload={shared.opts.diffusers_offload_mode} {e}')
     if hasattr(sd_model, "enable_sequential_cpu_offload") and shared.opts.diffusers_offload_mode == "sequential":
         try:
-            shared.log.debug(f'Setting {op}: offload={shared.opts.diffusers_offload_mode}')
+            shared.log.debug(f'Setting {op}: offload={shared.opts.diffusers_offload_mode} limit={shared.opts.cuda_mem_fraction}')
             if shared.opts.diffusers_move_base or shared.opts.diffusers_move_unet or shared.opts.diffusers_move_refiner:
                 shared.opts.diffusers_move_base = False
                 shared.opts.diffusers_move_unet = False
@@ -359,7 +359,7 @@ def set_diffuser_offload(sd_model, op: str = 'model'):
             shared.log.error(f'Setting {op}: offload={shared.opts.diffusers_offload_mode} {e}')
     if shared.opts.diffusers_offload_mode == "balanced":
         try:
-            shared.log.debug(f'Setting {op}: offload={shared.opts.diffusers_offload_mode}')
+            shared.log.debug(f'Setting {op}: offload={shared.opts.diffusers_offload_mode} threshold={shared.opts.diffusers_offload_max_gpu_memory} limit={shared.opts.cuda_mem_fraction}')
             sd_model = apply_balanced_offload(sd_model)
         except Exception as e:
             shared.log.error(f'Setting {op}: offload={shared.opts.diffusers_offload_mode} {e}')

From 8a1eaedc82b53902aa780848262297c91edea02e Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Sat, 23 Nov 2024 10:47:06 -0500
Subject: [PATCH 002/162] browser to server logging

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 .eslintrc.json              |  7 ++--
 CHANGELOG.md                |  2 ++
 installer.py                |  2 +-
 javascript/extraNetworks.js | 15 +--------
 javascript/gallery.js       |  4 +--
 javascript/loader.js        |  8 +++--
 javascript/logMonitor.js    | 15 ++++++---
 javascript/logger.js        | 67 +++++++++++++++++++++++++++++++++++++
 javascript/progressBar.js   | 41 ++---------------------
 javascript/script.js        | 16 +--------
 javascript/ui.js            |  6 ++--
 modules/api/api.py          |  3 +-
 modules/api/models.py       |  8 ++++-
 modules/api/server.py       | 12 ++++++-
 14 files changed, 120 insertions(+), 86 deletions(-)
 create mode 100644 javascript/logger.js

diff --git a/.eslintrc.json b/.eslintrc.json
index 62feb13a5..2dddb41a1 100644
--- a/.eslintrc.json
+++ b/.eslintrc.json
@@ -42,9 +42,13 @@
   "globals": {
     // asssets
     "panzoom": "readonly",
-    // script.js
+    // logger.js
     "log": "readonly",
     "debug": "readonly",
+    "error": "readonly",
+    "xhrGet": "readonly",
+    "xhrPost": "readonly",
+    // script.js
     "gradioApp": "readonly",
     "executeCallbacks": "readonly",
     "onAfterUiUpdate": "readonly",
@@ -87,7 +91,6 @@
     // settings.js
     "registerDragDrop": "readonly",
     // extraNetworks.js
-    "requestGet": "readonly",
     "getENActiveTab": "readonly",
     "quickApplyStyle": "readonly",
     "quickSaveStyle": "readonly",
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0dabf0d7c..4a4a0a416 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,8 @@
   - Flux: do not recast quants  
 - Sampler improvements  
   - update DPM FlowMatch samplers  
+- UI:
+  - browser->server logging  
 - Fixes:  
   - update `diffusers`  
   - fix README links  
diff --git a/installer.py b/installer.py
index 0b64c3616..8f552526a 100644
--- a/installer.py
+++ b/installer.py
@@ -459,7 +459,7 @@ def check_python(supported_minors=[9, 10, 11, 12], reason=None):
 def check_diffusers():
     if args.skip_all or args.skip_requirements:
         return
-    sha = 'b5fd6f13f5434d69d919cc8cedf0b11db664cf06'
+    sha = '7ac6e286ee994270e737b70c904ea50049d53567'
     pkg = pkg_resources.working_set.by_key.get('diffusers', None)
     minor = int(pkg.version.split('.')[1] if pkg is not None else 0)
     cur = opts.get('diffusers_version', '') if minor > 0 else ''
diff --git a/javascript/extraNetworks.js b/javascript/extraNetworks.js
index 77fe125f3..622e40faf 100644
--- a/javascript/extraNetworks.js
+++ b/javascript/extraNetworks.js
@@ -3,19 +3,6 @@ let sortVal = -1;
 
 // helpers
 
-const requestGet = (url, data, handler) => {
-  const xhr = new XMLHttpRequest();
-  const args = Object.keys(data).map((k) => `${encodeURIComponent(k)}=${encodeURIComponent(data[k])}`).join('&');
-  xhr.open('GET', `${url}?${args}`, true);
-  xhr.onreadystatechange = () => {
-    if (xhr.readyState === 4) {
-      if (xhr.status === 200) handler(JSON.parse(xhr.responseText));
-      else console.error(`Request: url=${url} status=${xhr.status} err`);
-    }
-  };
-  xhr.send(JSON.stringify(data));
-};
-
 const getENActiveTab = () => {
   let tabName = '';
   if (gradioApp().getElementById('tab_txt2img').style.display === 'block') tabName = 'txt2img';
@@ -98,7 +85,7 @@ function readCardTags(el, tags) {
 }
 
 function readCardDescription(page, item) {
-  requestGet('/sd_extra_networks/description', { page, item }, (data) => {
+  xhrGet('/sd_extra_networks/description', { page, item }, (data) => {
     const tabname = getENActiveTab();
     const description = gradioApp().querySelector(`#${tabname}_description > label > textarea`);
     description.value = data?.description?.trim() || '';
diff --git a/javascript/gallery.js b/javascript/gallery.js
index 1f3afd148..05e594e4c 100644
--- a/javascript/gallery.js
+++ b/javascript/gallery.js
@@ -94,14 +94,14 @@ async function delayFetchThumb(fn) {
   outstanding++;
   const res = await fetch(`/sdapi/v1/browser/thumb?file=${encodeURI(fn)}`, { priority: 'low' });
   if (!res.ok) {
-    console.error(res.statusText);
+    error(`fetchThumb: ${res.statusText}`);
     outstanding--;
     return undefined;
   }
   const json = await res.json();
   outstanding--;
   if (!res || !json || json.error || Object.keys(json).length === 0) {
-    if (json.error) console.error(json.error);
+    if (json.error) error(`fetchThumb: ${json.error}`);
     return undefined;
   }
   return json;
diff --git a/javascript/loader.js b/javascript/loader.js
index f3c7fe60f..8cd4811bf 100644
--- a/javascript/loader.js
+++ b/javascript/loader.js
@@ -20,7 +20,7 @@ async function preloadImages() {
   try {
     await Promise.all(imagePromises);
   } catch (error) {
-    console.error('Error preloading images:', error);
+    error(`preloadImages: ${error}`);
   }
 }
 
@@ -43,14 +43,16 @@ async function createSplash() {
       const motdEl = document.getElementById('motd');
       if (motdEl) motdEl.innerHTML = text.replace(/["]+/g, '');
     })
-    .catch((err) => console.error('getMOTD:', err));
+    .catch((err) => error(`getMOTD: ${err}`));
 }
 
 async function removeSplash() {
   const splash = document.getElementById('splash');
   if (splash) splash.remove();
   log('removeSplash');
-  log('startupTime', Math.round(performance.now() - appStartTime) / 1000);
+  const t = Math.round(performance.now() - appStartTime) / 1000;
+  log('startupTime', t);
+  xhrPost('/sdapi/v1/log', { message: `ready time=${t}` });
 }
 
 window.onload = createSplash;
diff --git a/javascript/logMonitor.js b/javascript/logMonitor.js
index e4fe99a7f..9b915e6da 100644
--- a/javascript/logMonitor.js
+++ b/javascript/logMonitor.js
@@ -2,6 +2,7 @@ let logMonitorEl = null;
 let logMonitorStatus = true;
 let logWarnings = 0;
 let logErrors = 0;
+let logConnected = false;
 
 function dateToStr(ts) {
   const dt = new Date(1000 * ts);
@@ -29,8 +30,7 @@ async function logMonitor() {
       row.innerHTML = `<td>${dateToStr(l.created)}</td>${level}<td>${l.facility}</td>${module}<td>${l.msg}</td>`;
       logMonitorEl.appendChild(row);
     } catch (e) {
-      // console.log('logMonitor', e);
-      console.error('logMonitor line', line);
+      error(`logMonitor: ${line}`);
     }
   };
 
@@ -46,6 +46,7 @@ async function logMonitor() {
 
   if (logMonitorStatus) setTimeout(logMonitor, opts.logmonitor_refresh_period);
   else setTimeout(logMonitor, 10 * 1000); // on failure try to reconnect every 10sec
+
   if (!opts.logmonitor_show) return;
   logMonitorStatus = false;
   if (!logMonitorEl) {
@@ -64,14 +65,20 @@ async function logMonitor() {
       const lines = await res.json();
       if (logMonitorEl && lines?.length > 0) logMonitorEl.parentElement.parentElement.style.display = opts.logmonitor_show ? 'block' : 'none';
       for (const line of lines) addLogLine(line);
+      if (!logConnected) {
+        logConnected = true;
+        xhrPost('/sdapi/v1/log', { debug: 'connected' });
+      }
     } else {
-      addLogLine(`{ "created": ${Date.now()}, "level":"ERROR", "module":"logMonitor", "facility":"ui", "msg":"Failed to fetch log: ${res?.status} ${res?.statusText}" }`);
+      logConnected = false;
       logErrors++;
+      addLogLine(`{ "created": ${Date.now()}, "level":"ERROR", "module":"logMonitor", "facility":"ui", "msg":"Failed to fetch log: ${res?.status} ${res?.statusText}" }`);
     }
     cleanupLog(atBottom);
   } catch (err) {
-    addLogLine(`{ "created": ${Date.now()}, "level":"ERROR", "module":"logMonitor", "facility":"ui", "msg":"Failed to fetch log: server unreachable" }`);
+    logConnected = false;
     logErrors++;
+    addLogLine(`{ "created": ${Date.now()}, "level":"ERROR", "module":"logMonitor", "facility":"ui", "msg":"Failed to fetch log: server unreachable" }`);
     cleanupLog(atBottom);
   }
 }
diff --git a/javascript/logger.js b/javascript/logger.js
new file mode 100644
index 000000000..4ff1fb822
--- /dev/null
+++ b/javascript/logger.js
@@ -0,0 +1,67 @@
+const serverTimeout = 5000;
+
+const log = async (...msg) => {
+  const dt = new Date();
+  const ts = `${dt.getHours().toString().padStart(2, '0')}:${dt.getMinutes().toString().padStart(2, '0')}:${dt.getSeconds().toString().padStart(2, '0')}.${dt.getMilliseconds().toString().padStart(3, '0')}`;
+  if (window.logger) window.logger.innerHTML += window.logPrettyPrint(...msg);
+  console.log(ts, ...msg); // eslint-disable-line no-console
+};
+
+const debug = async (...msg) => {
+  const dt = new Date();
+  const ts = `${dt.getHours().toString().padStart(2, '0')}:${dt.getMinutes().toString().padStart(2, '0')}:${dt.getSeconds().toString().padStart(2, '0')}.${dt.getMilliseconds().toString().padStart(3, '0')}`;
+  if (window.logger) window.logger.innerHTML += window.logPrettyPrint(...msg);
+  console.debug(ts, ...msg); // eslint-disable-line no-console
+};
+
+const error = async (...msg) => {
+  const dt = new Date();
+  const ts = `${dt.getHours().toString().padStart(2, '0')}:${dt.getMinutes().toString().padStart(2, '0')}:${dt.getSeconds().toString().padStart(2, '0')}.${dt.getMilliseconds().toString().padStart(3, '0')}`;
+  if (window.logger) window.logger.innerHTML += window.logPrettyPrint(...msg);
+  console.error(ts, ...msg); // eslint-disable-line no-console
+  xhrPost('/sdapi/v1/log', { error: msg.join(' ') }); // eslint-disable-line no-use-before-define
+};
+
+const xhrInternal = (xhrObj, data, handler = undefined, errorHandler = undefined, ignore = false) => {
+  const err = (msg) => {
+    if (!ignore) {
+      error(`${msg}: state=${xhrObj.readyState} status=${xhrObj.status} response=${xhrObj.responseText}`);
+      if (errorHandler) errorHandler();
+    }
+  };
+
+  xhrObj.setRequestHeader('Content-Type', 'application/json');
+  xhrObj.timeout = serverTimeout;
+  xhrObj.ontimeout = () => err('xhr.ontimeout');
+  xhrObj.onerror = () => err('xhr.onerror');
+  xhrObj.onabort = () => err('xhr.onabort');
+  xhrObj.onreadystatechange = () => {
+    if (xhrObj.readyState === 4) {
+      if (xhrObj.status === 200) {
+        try {
+          const json = JSON.parse(xhrObj.responseText);
+          if (handler) handler(json);
+        } catch (e) {
+          error(`xhr.onreadystatechange: ${e}`);
+        }
+      } else {
+        err(`xhr.onreadystatechange: state=${xhrObj.readyState} status=${xhrObj.status} response=${xhrObj.responseText}`);
+      }
+    }
+  };
+  const req = JSON.stringify(data);
+  xhrObj.send(req);
+};
+
+const xhrGet = (url, data, handler = undefined, errorHandler = undefined, ignore = false) => {
+  const xhr = new XMLHttpRequest();
+  const args = Object.keys(data).map((k) => `${encodeURIComponent(k)}=${encodeURIComponent(data[k])}`).join('&');
+  xhr.open('GET', `${url}?${args}`, true);
+  xhrInternal(xhr, data, handler, errorHandler, ignore);
+};
+
+function xhrPost(url, data, handler = undefined, errorHandler = undefined, ignore = false) {
+  const xhr = new XMLHttpRequest();
+  xhr.open('POST', url, true);
+  xhrInternal(xhr, data, handler, errorHandler, ignore);
+}
diff --git a/javascript/progressBar.js b/javascript/progressBar.js
index 3f954e13b..9d897bc87 100644
--- a/javascript/progressBar.js
+++ b/javascript/progressBar.js
@@ -1,42 +1,5 @@
 let lastState = {};
 
-function request(url, data, handler, errorHandler) {
-  const xhr = new XMLHttpRequest();
-  xhr.open('POST', url, true);
-  xhr.setRequestHeader('Content-Type', 'application/json');
-  xhr.timeout = 5000;
-  xhr.ontimeout = () => {
-    console.error('xhr.ontimeout', xhr);
-    errorHandler();
-  };
-  xhr.onerror = () => {
-    console.error('xhr.onerror', xhr);
-    errorHandler();
-  };
-  xhr.onabort = () => {
-    console.error('xhr.onabort', xhr);
-    errorHandler();
-  };
-  xhr.onreadystatechange = () => {
-    if (xhr.readyState === 4) {
-      if (xhr.status === 200) {
-        try {
-          const json = JSON.parse(xhr.responseText);
-          handler(json);
-        } catch (err) {
-          console.error('xhr.onreadystatechange', xhr, err);
-          errorHandler();
-        }
-      } else {
-        console.error('xhr.onreadystatechange', xhr);
-        errorHandler();
-      }
-    }
-  };
-  const req = JSON.stringify(data);
-  xhr.send(req);
-}
-
 function pad2(x) {
   return x < 10 ? `0${x}` : x;
 }
@@ -151,11 +114,11 @@ function requestProgress(id_task, progressEl, galleryEl, atEnd = null, onProgres
     };
 
     const onProgressErrorHandler = (err) => {
-      console.error('onProgressError', err);
+      error(`onProgressError: ${err}`);
       done();
     };
 
-    request('./internal/progress', { id_task, id_live_preview }, onProgressHandler, onProgressErrorHandler);
+    xhrPost('./internal/progress', { id_task, id_live_preview }, onProgressHandler, onProgressErrorHandler);
   };
   start(id_task, 0);
 }
diff --git a/javascript/script.js b/javascript/script.js
index 104567dd7..250e90ba2 100644
--- a/javascript/script.js
+++ b/javascript/script.js
@@ -1,17 +1,3 @@
-const log = (...msg) => {
-  const dt = new Date();
-  const ts = `${dt.getHours().toString().padStart(2, '0')}:${dt.getMinutes().toString().padStart(2, '0')}:${dt.getSeconds().toString().padStart(2, '0')}.${dt.getMilliseconds().toString().padStart(3, '0')}`;
-  if (window.logger) window.logger.innerHTML += window.logPrettyPrint(...msg);
-  console.log(ts, ...msg); // eslint-disable-line no-console
-};
-
-const debug = (...msg) => {
-  const dt = new Date();
-  const ts = `${dt.getHours().toString().padStart(2, '0')}:${dt.getMinutes().toString().padStart(2, '0')}:${dt.getSeconds().toString().padStart(2, '0')}.${dt.getMilliseconds().toString().padStart(3, '0')}`;
-  if (window.logger) window.logger.innerHTML += window.logPrettyPrint(...msg);
-  console.debug(ts, ...msg); // eslint-disable-line no-console
-};
-
 async function sleep(ms) {
   return new Promise((resolve) => setTimeout(resolve, ms)); // eslint-disable-line no-promise-executor-return
 }
@@ -82,7 +68,7 @@ function executeCallbacks(queue, arg) {
     try {
       callback(arg);
     } catch (e) {
-      console.error('error running callback', callback, ':', e);
+      error(`executeCallbacks: ${callback} ${e}`);
     }
   }
 }
diff --git a/javascript/ui.js b/javascript/ui.js
index 8808f1c8b..81d1c67e4 100644
--- a/javascript/ui.js
+++ b/javascript/ui.js
@@ -28,7 +28,7 @@ function clip_gallery_urls(gallery) {
   const files = gallery.map((v) => v.data);
   navigator.clipboard.writeText(JSON.stringify(files)).then(
     () => log('clipboard:', files),
-    (err) => console.error('clipboard:', files, err),
+    (err) => error(`clipboard: ${files} ${err}`),
   );
 }
 
@@ -493,9 +493,9 @@ function previewTheme() {
             el.src = `/file=html/${name}.jpg`;
           }
         })
-        .catch((e) => console.error('previewTheme:', e));
+        .catch((e) => error(`previewTheme: ${e}`));
     })
-    .catch((e) => console.error('previewTheme:', e));
+    .catch((e) => error(`previewTheme: ${e}`));
 }
 
 async function browseFolder() {
diff --git a/modules/api/api.py b/modules/api/api.py
index f8346995d..d48cbf521 100644
--- a/modules/api/api.py
+++ b/modules/api/api.py
@@ -35,7 +35,8 @@ def __init__(self, app: FastAPI, queue_lock: Lock):
 
         # server api
         self.add_api_route("/sdapi/v1/motd", server.get_motd, methods=["GET"], response_model=str)
-        self.add_api_route("/sdapi/v1/log", server.get_log_buffer, methods=["GET"], response_model=List[str])
+        self.add_api_route("/sdapi/v1/log", server.get_log, methods=["GET"], response_model=List[str])
+        self.add_api_route("/sdapi/v1/log", server.post_log, methods=["POST"])
         self.add_api_route("/sdapi/v1/start", self.get_session_start, methods=["GET"])
         self.add_api_route("/sdapi/v1/version", server.get_version, methods=["GET"])
         self.add_api_route("/sdapi/v1/status", server.get_status, methods=["GET"], response_model=models.ResStatus)
diff --git a/modules/api/models.py b/modules/api/models.py
index e68ebf081..39bcbe383 100644
--- a/modules/api/models.py
+++ b/modules/api/models.py
@@ -286,10 +286,16 @@ class ResImageInfo(BaseModel):
     items: dict = Field(title="Items", description="A dictionary containing all the other fields the image had")
     parameters: dict = Field(title="Parameters", description="A dictionary with parsed generation info fields")
 
-class ReqLog(BaseModel):
+class ReqGetLog(BaseModel):
     lines: int = Field(default=100, title="Lines", description="How many lines to return")
     clear: bool = Field(default=False, title="Clear", description="Should the log be cleared after returning the lines?")
 
+
+class ReqPostLog(BaseModel):
+    message: Optional[str] = Field(title="Message", description="The info message to log")
+    debug: Optional[str] = Field(title="Debug message", description="The debug message to log")
+    error: Optional[str] = Field(title="Error message", description="The error message to log")
+
 class ReqProgress(BaseModel):
     skip_current_image: bool = Field(default=False, title="Skip current image", description="Skip current image serialization")
 
diff --git a/modules/api/server.py b/modules/api/server.py
index 939e19c86..dabbe634c 100644
--- a/modules/api/server.py
+++ b/modules/api/server.py
@@ -37,12 +37,22 @@ def get_platform():
     from modules.loader import get_packages as loader_get_packages
     return { **installer_get_platform(), **loader_get_packages() }
 
-def get_log_buffer(req: models.ReqLog = Depends()):
+def get_log(req: models.ReqGetLog = Depends()):
     lines = shared.log.buffer[:req.lines] if req.lines > 0 else shared.log.buffer.copy()
     if req.clear:
         shared.log.buffer.clear()
     return lines
 
+def post_log(req: models.ReqPostLog):
+    if req.message is not None:
+        shared.log.info(f'UI: {req.message}')
+    if req.debug is not None:
+        shared.log.debug(f'UI: {req.debug}')
+    if req.error is not None:
+        shared.log.error(f'UI: {req.error}')
+    return {}
+
+
 def get_config():
     options = {}
     for k in shared.opts.data.keys():

From 67c6b93213c20d830a00f0c7e6b429527bf6c725 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Sat, 23 Nov 2024 15:27:01 -0500
Subject: [PATCH 003/162] flux tools support

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md                    |  22 +++++-
 TODO.md                         |   2 +-
 installer.py                    |   4 +-
 javascript/logger.js            |   3 +-
 modules/model_flux.py           |  12 +++-
 modules/modelloader.py          |   3 +
 modules/processing_diffusers.py |   6 +-
 modules/sd_checkpoint.py        |   3 +-
 modules/sd_models.py            |  10 ++-
 scripts/animatediff.py          |   2 +-
 scripts/cogvideo.py             |   2 +-
 scripts/flux_tools.py           | 115 ++++++++++++++++++++++++++++++++
 scripts/image2video.py          |   2 +-
 wiki                            |   2 +-
 14 files changed, 170 insertions(+), 18 deletions(-)
 create mode 100644 scripts/flux_tools.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4a4a0a416..ad77bbbe5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,23 @@
 
 ## Update for 2024-11-23
 
+- [Flux Tools](https://blackforestlabs.ai/flux-1-tools/):  
+  **Redux** is actually a tool, **Fill** is inpaint/outpaint optimized version of *Flux-dev*  
+  **Canny** & **Depth** are optimized versions of *Flux-dev* for their respective tasks: they are *not* ControlNets that work on top of a model  
+  To use, go to image or control interface and select *Flux Tools* in scripts  
+  All models are auto-downloaded on first use  
+  *note*: All models are [gated](https://github.com/vladmandic/automatic/wiki/Gated) and require acceptance of terms and conditions via web page  
+  *recommended*: Enable on-the-fly [quantization](https://github.com/vladmandic/automatic/wiki/Quantization) or [compression](https://github.com/vladmandic/automatic/wiki/NNCF-Compression) to reduce resource usage  
+  *todo*: support for Canny/Depth LoRAs  
+  - [Redux](https://huggingface.co/black-forest-labs/FLUX.1-Redux-dev): ~0.1GB  
+    works together with existing model and basically uses input image to analyze it and use that instead of prompt  
+    *recommended*: low denoise strength levels result in more variety  
+  - [Fill](https://huggingface.co/black-forest-labs/FLUX.1-Fill-dev): ~23.8GB, replaces currently loaded model  
+    *note*: can be used in inpaint/outpaint mode only  
+  - [Canny](https://huggingface.co/black-forest-labs/FLUX.1-Canny-dev): ~23.8GB, replaces currently loaded model  
+    *recommended*: guidance scale 30  
+  - [Depth](https://huggingface.co/black-forest-labs/FLUX.1-Depth-dev): ~23.8GB, replaces currently loaded model  
+    *recommended*: guidance scale 10  
 - Model loader improvements:  
   - detect model components on model load fail  
   - Flux, SD35: force unload model  
@@ -11,14 +28,15 @@
   - Flux: do not recast quants  
 - Sampler improvements  
   - update DPM FlowMatch samplers  
-- UI:
+- UI:  
   - browser->server logging  
 - Fixes:  
   - update `diffusers`  
   - fix README links  
   - fix sdxl controlnet single-file loader  
   - relax settings validator  
-  - improve js progress calls resiliency
+  - improve js progress calls resiliency  
+  - fix text-to-video pipeline  
 
 ## Update for 2024-11-21
 
diff --git a/TODO.md b/TODO.md
index 973e062dc..73008039d 100644
--- a/TODO.md
+++ b/TODO.md
@@ -7,9 +7,9 @@ Main ToDo list can be found at [GitHub projects](https://github.com/users/vladma
 - SD35 IPAdapter: <https://github.com/huggingface/diffusers/issues/9966>
 - SD35 LoRA: <https://github.com/huggingface/diffusers/issues/9950>
 - Flux IPAdapter: <https://github.com/huggingface/diffusers/issues/9825>
-- Flux Fill/ControlNet/Redux: <https://github.com/huggingface/diffusers/pull/9985>
 - Flux NF4: <https://github.com/huggingface/diffusers/issues/9996>
 - SANA: <https://github.com/huggingface/diffusers/pull/9982>
+- LTX-Video: <https://huggingface.co/Lightricks/LTX-Video> <https://huggingface.co/spaces/Lightricks/LTX-Video-Playground/tree/main>
 
 ## Other
 
diff --git a/installer.py b/installer.py
index 8f552526a..396b53fab 100644
--- a/installer.py
+++ b/installer.py
@@ -212,7 +212,7 @@ def installed(package, friendly: str = None, reload = False, quiet = False):
         if friendly:
             pkgs = friendly.split()
         else:
-            pkgs = [p for p in package.split() if not p.startswith('-') and not p.startswith('=')]
+            pkgs = [p for p in package.split() if not p.startswith('-') and not p.startswith('=') and not p.startswith('git+')]
             pkgs = [p.split('/')[-1] for p in pkgs] # get only package name if installing from url
         for pkg in pkgs:
             if '!=' in pkg:
@@ -295,7 +295,7 @@ def install(package, friendly: str = None, ignore: bool = False, reinstall: bool
         quick_allowed = False
     if args.reinstall or reinstall or not installed(package, friendly, quiet=quiet):
         deps = '' if not no_deps else '--no-deps '
-        res = pip(f"install{' --upgrade' if not args.uv else ''} {deps}{package}", ignore=ignore, uv=package != "uv")
+        res = pip(f"install{' --upgrade' if not args.uv else ''} {deps}{package}", ignore=ignore, uv=package != "uv" and not package.startswith('git+'))
         try:
             import importlib # pylint: disable=deprecated-module
             importlib.reload(pkg_resources)
diff --git a/javascript/logger.js b/javascript/logger.js
index 4ff1fb822..5aa8face3 100644
--- a/javascript/logger.js
+++ b/javascript/logger.js
@@ -19,7 +19,8 @@ const error = async (...msg) => {
   const ts = `${dt.getHours().toString().padStart(2, '0')}:${dt.getMinutes().toString().padStart(2, '0')}:${dt.getSeconds().toString().padStart(2, '0')}.${dt.getMilliseconds().toString().padStart(3, '0')}`;
   if (window.logger) window.logger.innerHTML += window.logPrettyPrint(...msg);
   console.error(ts, ...msg); // eslint-disable-line no-console
-  xhrPost('/sdapi/v1/log', { error: msg.join(' ') }); // eslint-disable-line no-use-before-define
+  const txt = msg.join(' ');
+  if (!txt.includes('asctime') && !txt.includes('xhr.')) xhrPost('/sdapi/v1/log', { error: txt }); // eslint-disable-line no-use-before-define
 };
 
 const xhrInternal = (xhrObj, data, handler = undefined, errorHandler = undefined, ignore = false) => {
diff --git a/modules/model_flux.py b/modules/model_flux.py
index 17234d9a4..324e50b36 100644
--- a/modules/model_flux.py
+++ b/modules/model_flux.py
@@ -306,9 +306,17 @@ def load_flux(checkpoint_info, diffusers_load_config): # triggered by opts.sd_ch
         model_te.loaded_te = shared.opts.sd_text_encoder
     if vae is not None:
         kwargs['vae'] = vae
-    shared.log.debug(f'Load model: type=FLUX preloaded={list(kwargs)}')
     if repo_id == 'sayakpaul/flux.1-dev-nf4':
         repo_id = 'black-forest-labs/FLUX.1-dev' # workaround since sayakpaul model is missing model_index.json
+    if 'Fill' in repo_id:
+        cls = diffusers.FluxFillPipeline
+    elif 'Canny' in repo_id:
+        cls = diffusers.FluxControlPipeline
+    elif 'Depth' in repo_id:
+        cls = diffusers.FluxControlPipeline
+    else:
+        cls = diffusers.FluxPipeline
+    shared.log.debug(f'Load model: type=FLUX cls={cls.__name__} preloaded={list(kwargs)} revision={diffusers_load_config.get("revision", None)}')
     for c in kwargs:
         if kwargs[c].dtype == torch.float32 and devices.dtype != torch.float32:
             shared.log.warning(f'Load model: type=FLUX component={c} dtype={kwargs[c].dtype} cast dtype={devices.dtype} recast')
@@ -319,7 +327,7 @@ def load_flux(checkpoint_info, diffusers_load_config): # triggered by opts.sd_ch
     if checkpoint_info.path.endswith('.safetensors') and os.path.isfile(checkpoint_info.path):
         pipe = diffusers.FluxPipeline.from_single_file(checkpoint_info.path, cache_dir=shared.opts.diffusers_dir, **kwargs, **diffusers_load_config)
     else:
-        pipe = diffusers.FluxPipeline.from_pretrained(repo_id, cache_dir=shared.opts.diffusers_dir, **kwargs, **diffusers_load_config)
+        pipe = cls.from_pretrained(repo_id, cache_dir=shared.opts.diffusers_dir, **kwargs, **diffusers_load_config)
 
     # release memory
     transformer = None
diff --git a/modules/modelloader.py b/modules/modelloader.py
index ce36a739b..b1b3930d6 100644
--- a/modules/modelloader.py
+++ b/modules/modelloader.py
@@ -326,6 +326,9 @@ def find_diffuser(name: str, full=False):
         return [repo[0]['name']]
     hf_api = hf.HfApi()
     models = list(hf_api.list_models(model_name=name, library=['diffusers'], full=True, limit=20, sort="downloads", direction=-1))
+    if len(models) == 0:
+        models = list(hf_api.list_models(model_name=name, full=True, limit=20, sort="downloads", direction=-1)) # widen search
+    models = [m for m in models if m.id.startswith(name)] # filter exact
     shared.log.debug(f'Searching diffusers models: {name} {len(models) > 0}')
     if len(models) > 0:
         if not full:
diff --git a/modules/processing_diffusers.py b/modules/processing_diffusers.py
index 2164134b1..44dff811b 100644
--- a/modules/processing_diffusers.py
+++ b/modules/processing_diffusers.py
@@ -231,7 +231,8 @@ def process_hires(p: processing.StableDiffusionProcessing, output):
                 output = shared.sd_model(**hires_args) # pylint: disable=not-callable
                 if isinstance(output, dict):
                     output = SimpleNamespace(**output)
-                shared.history.add(output.images, info=processing.create_infotext(p), ops=p.ops)
+                if hasattr(output, 'images'):
+                    shared.history.add(output.images, info=processing.create_infotext(p), ops=p.ops)
                 sd_models_compile.check_deepcache(enable=False)
                 sd_models_compile.openvino_post_compile(op="base")
             except AssertionError as e:
@@ -313,7 +314,8 @@ def process_refine(p: processing.StableDiffusionProcessing, output):
                 output = shared.sd_refiner(**refiner_args) # pylint: disable=not-callable
                 if isinstance(output, dict):
                     output = SimpleNamespace(**output)
-                shared.history.add(output.images, info=processing.create_infotext(p), ops=p.ops)
+                if hasattr(output, 'images'):
+                    shared.history.add(output.images, info=processing.create_infotext(p), ops=p.ops)
                 sd_models_compile.openvino_post_compile(op="refiner")
             except AssertionError as e:
                 shared.log.info(e)
diff --git a/modules/sd_checkpoint.py b/modules/sd_checkpoint.py
index afc5842e4..a4d84192f 100644
--- a/modules/sd_checkpoint.py
+++ b/modules/sd_checkpoint.py
@@ -198,8 +198,9 @@ def get_closet_checkpoint_match(s: str):
     if shared.opts.sd_checkpoint_autodownload and s.count('/') == 1:
         modelloader.hf_login()
         found = modelloader.find_diffuser(s, full=True)
+        found = [f for f in found if f == s]
         shared.log.info(f'HF search: model="{s}" results={found}')
-        if found is not None and len(found) == 1 and found[0] == s:
+        if found is not None and len(found) == 1:
             checkpoint_info = CheckpointInfo(s)
             checkpoint_info.type = 'huggingface'
             return checkpoint_info
diff --git a/modules/sd_models.py b/modules/sd_models.py
index a6ff19b6f..2ad204b46 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -771,7 +771,7 @@ def load_diffuser_file(model_type, pipeline, checkpoint_info, diffusers_load_con
     return sd_model
 
 
-def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=None, op='model'): # pylint: disable=unused-argument
+def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=None, op='model', revision=None): # pylint: disable=unused-argument
     if timer is None:
         timer = Timer()
     logging.getLogger("diffusers").setLevel(logging.ERROR)
@@ -784,6 +784,8 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No
         "requires_safety_checker": False, # sd15 specific but we cant know ahead of time
         # "use_safetensors": True,
     }
+    if revision is not None:
+        diffusers_load_config['revision'] = revision
     if shared.opts.diffusers_model_load_variant != 'default':
         diffusers_load_config['variant'] = shared.opts.diffusers_model_load_variant
     if shared.opts.diffusers_pipeline == 'Custom Diffusers Pipeline' and len(shared.opts.custom_diffusers_pipeline) > 0:
@@ -1077,6 +1079,8 @@ def set_diffuser_pipe(pipe, new_pipe_type):
         'OmniGenPipeline',
         'StableDiffusion3ControlNetPipeline',
         'InstantIRPipeline',
+        'FluxFillPipeline',
+        'FluxControlPipeline',
     ]
 
     n = getattr(pipe.__class__, '__name__', '')
@@ -1345,7 +1349,7 @@ def reload_text_encoder(initial=False):
         set_t5(pipe=shared.sd_model, module='text_encoder_3', t5=shared.opts.sd_text_encoder, cache_dir=shared.opts.diffusers_dir)
 
 
-def reload_model_weights(sd_model=None, info=None, reuse_dict=False, op='model', force=False):
+def reload_model_weights(sd_model=None, info=None, reuse_dict=False, op='model', force=False, revision=None):
     load_dict = shared.opts.sd_model_dict != model_data.sd_dict
     from modules import lowvram, sd_hijack
     checkpoint_info = info or select_checkpoint(op=op) # are we selecting model or dictionary
@@ -1390,7 +1394,7 @@ def reload_model_weights(sd_model=None, info=None, reuse_dict=False, op='model',
             load_model(checkpoint_info, already_loaded_state_dict=state_dict, timer=timer, op=op)
             model_data.sd_dict = shared.opts.sd_model_dict
         else:
-            load_diffuser(checkpoint_info, already_loaded_state_dict=state_dict, timer=timer, op=op)
+            load_diffuser(checkpoint_info, already_loaded_state_dict=state_dict, timer=timer, op=op, revision=revision)
         if load_dict and next_checkpoint_info is not None:
             model_data.sd_dict = shared.opts.sd_model_dict
             shared.opts.data["sd_model_checkpoint"] = next_checkpoint_info.title
diff --git a/scripts/animatediff.py b/scripts/animatediff.py
index 4c50f9cf6..91db60915 100644
--- a/scripts/animatediff.py
+++ b/scripts/animatediff.py
@@ -189,7 +189,7 @@ def set_free_noise(frames):
 
 class Script(scripts.Script):
     def title(self):
-        return 'Video AnimateDiff'
+        return 'Video: AnimateDiff'
 
     def show(self, is_img2img):
         # return scripts.AlwaysVisible if shared.native else False
diff --git a/scripts/cogvideo.py b/scripts/cogvideo.py
index 7f2c7225e..c988c05c4 100644
--- a/scripts/cogvideo.py
+++ b/scripts/cogvideo.py
@@ -22,7 +22,7 @@
 
 class Script(scripts.Script):
     def title(self):
-        return 'Video CogVideoX'
+        return 'Video: CogVideoX'
 
     def show(self, is_img2img):
         return shared.native
diff --git a/scripts/flux_tools.py b/scripts/flux_tools.py
new file mode 100644
index 000000000..9a2fdbd63
--- /dev/null
+++ b/scripts/flux_tools.py
@@ -0,0 +1,115 @@
+# https://github.com/huggingface/diffusers/pull/9985
+
+import time
+import gradio as gr
+import diffusers
+from modules import scripts, processing, shared, devices, sd_models
+from installer import install
+
+
+redux_pipe: diffusers.FluxPriorReduxPipeline = None
+processor_canny = None
+processor_depth = None
+title = 'Flux Tools'
+
+
+class Script(scripts.Script):
+    def title(self):
+        return f'{title}'
+
+    def show(self, is_img2img):
+        return is_img2img if shared.native else False
+
+    def ui(self, _is_img2img): # ui elements
+        with gr.Row():
+            gr.HTML('<a href="https://blackforestlabs.ai/flux-1-tools/">&nbsp Flux.1 Redux</a><br>')
+        with gr.Row():
+            tool = gr.Dropdown(label='Tool', choices=['None', 'Redux', 'Fill', 'Canny', 'Depth'], value='None')
+            strength = gr.Checkbox(label='Override denoise strength', value=True)
+        return [tool, strength]
+
+    def run(self, p: processing.StableDiffusionProcessing, tool: str = 'None', strength: bool = True): # pylint: disable=arguments-differ
+        global redux_pipe, processor_canny, processor_depth # pylint: disable=global-statement
+        if tool is None or tool == 'None':
+            return
+        supported_model_list = ['f1']
+        if shared.sd_model_type not in supported_model_list:
+            shared.log.warning(f'{title}: class={shared.sd_model.__class__.__name__} model={shared.sd_model_type} required={supported_model_list}')
+            return None
+        image = getattr(p, 'init_images', None)
+        if image is None or len(image) == 0:
+            shared.log.error(f'{title}: tool={tool} no init_images')
+            return None
+        else:
+            image = image[0] if isinstance(image, list) else image
+
+        shared.log.info(f'{title}: tool={tool} init')
+
+        t0 = time.time()
+        if tool == 'Redux':
+            # pipe_prior_redux = FluxPriorReduxPipeline.from_pretrained("black-forest-labs/FLUX.1-Redux-dev", revision="refs/pr/8", torch_dtype=torch.bfloat16).to("cuda")
+            if redux_pipe is None:
+                redux_pipe = diffusers.FluxPriorReduxPipeline.from_pretrained(
+                    "black-forest-labs/FLUX.1-Redux-dev",
+                    revision="refs/pr/8",
+                    torch_dtype=devices.dtype,
+                    cache_dir=shared.opts.hfcache_dir
+                ).to(devices.device)
+            redux_output = redux_pipe(image)
+            for k, v in redux_output.items():
+                p.task_args[k] = v
+        else:
+            if redux_pipe is not None:
+                shared.log.debug(f'{title}: tool=Redux unload')
+                redux_pipe = None
+
+        if tool == 'Fill':
+            # pipe = FluxFillPipeline.from_pretrained("black-forest-labs/FLUX.1-Fill-dev", torch_dtype=torch.bfloat16, revision="refs/pr/4").to("cuda")
+            if p.image_mask is None:
+                shared.log.error(f'{title}: tool={tool} no image_mask')
+                return None
+            if shared.sd_model.__class__.__name__ != 'FluxFillPipeline':
+                shared.opts.data["sd_model_checkpoint"] = "black-forest-labs/FLUX.1-Fill-dev"
+                sd_models.reload_model_weights(op='model', revision="refs/pr/4")
+            p.task_args['image'] = image
+            p.task_args['mask_image'] = p.image_mask
+
+        if tool == 'Canny':
+            # pipe = FluxControlPipeline.from_pretrained("black-forest-labs/FLUX.1-Canny-dev", torch_dtype=torch.bfloat16, revision="refs/pr/1").to("cuda")
+            install('controlnet-aux')
+            install('timm==0.9.16')
+            if shared.sd_model.__class__.__name__ != 'FluxControlPipeline' or 'Canny' not in shared.opts.sd_model_checkpoint:
+                shared.opts.data["sd_model_checkpoint"] = "black-forest-labs/FLUX.1-Canny-dev"
+                sd_models.reload_model_weights(op='model', revision="refs/pr/1")
+            if processor_canny is None:
+                from controlnet_aux import CannyDetector
+                processor_canny = CannyDetector()
+            control_image = processor_canny(image, low_threshold=50, high_threshold=200, detect_resolution=1024, image_resolution=1024)
+            p.task_args['control_image'] = control_image
+            if strength:
+                p.task_args['strength'] = None
+        else:
+            if processor_canny is not None:
+                shared.log.debug(f'{title}: tool=Canny unload processor')
+                processor_canny = None
+
+        if tool == 'Depth':
+            # pipe = FluxControlPipeline.from_pretrained("black-forest-labs/FLUX.1-Depth-dev", torch_dtype=torch.bfloat16, revision="refs/pr/1").to("cuda")
+            install('git+https://github.com/asomoza/image_gen_aux.git', 'image_gen_aux')
+            if shared.sd_model.__class__.__name__ != 'FluxControlPipeline' or 'Depth' not in shared.opts.sd_model_checkpoint:
+                shared.opts.data["sd_model_checkpoint"] = "black-forest-labs/FLUX.1-Depth-dev"
+                sd_models.reload_model_weights(op='model', revision="refs/pr/1")
+            if processor_depth is None:
+                from image_gen_aux import DepthPreprocessor
+                processor_depth = DepthPreprocessor.from_pretrained("LiheYoung/depth-anything-large-hf")
+            control_image = processor_depth(control_image)[0].convert("RGB")
+            p.task_args['control_image'] = control_image
+            if strength:
+                p.task_args['strength'] = None
+        else:
+            if processor_depth is not None:
+                shared.log.debug(f'{title}: tool=Depth unload processor')
+                processor_depth = None
+
+        shared.log.debug(f'{title}: tool={tool} ready time={time.time() - t0:.2f}')
+        devices.torch_gc()
diff --git a/scripts/image2video.py b/scripts/image2video.py
index 876ed3193..5e08922ee 100644
--- a/scripts/image2video.py
+++ b/scripts/image2video.py
@@ -13,7 +13,7 @@
 
 class Script(scripts.Script):
     def title(self):
-        return 'Video VGen Image-to-Video'
+        return 'Video: VGen Image-to-Video'
 
     def show(self, is_img2img):
         return is_img2img if shared.native else False
diff --git a/wiki b/wiki
index 30f3265bb..313a6b911 160000
--- a/wiki
+++ b/wiki
@@ -1 +1 @@
-Subproject commit 30f3265bb06ac738e4467f58be4df3fc4b49c08b
+Subproject commit 313a6b911bd239b4fa8092ed89b936428214342e

From d7489dc0fa28a96c8238e3757b14ce487aae2fe0 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Sat, 23 Nov 2024 15:27:43 -0500
Subject: [PATCH 004/162] update modernui

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 extensions-builtin/sdnext-modernui | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/extensions-builtin/sdnext-modernui b/extensions-builtin/sdnext-modernui
index 4647bd7f8..6bc2f504e 160000
--- a/extensions-builtin/sdnext-modernui
+++ b/extensions-builtin/sdnext-modernui
@@ -1 +1 @@
-Subproject commit 4647bd7f86be9d2783a9ba1f38acaa9bcec942d2
+Subproject commit 6bc2f504e57eb75ebac1e9ec6c212549ebcfbc18

From a2590222ed0fd58b9c99461d76502a410047cad9 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Sat, 23 Nov 2024 18:16:18 -0500
Subject: [PATCH 005/162] flux tools

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 modules/sd_checkpoint.py |  5 ++++-
 scripts/flux_tools.py    | 20 ++++++++++++++------
 2 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/modules/sd_checkpoint.py b/modules/sd_checkpoint.py
index a4d84192f..20654e28b 100644
--- a/modules/sd_checkpoint.py
+++ b/modules/sd_checkpoint.py
@@ -168,7 +168,10 @@ def update_model_hashes():
 
 def get_closet_checkpoint_match(s: str):
     if s.startswith('https://huggingface.co/'):
-        s = s.replace('https://huggingface.co/', '')
+        model_name = s.replace('https://huggingface.co/', '')
+        checkpoint_info = CheckpointInfo(model_name) # create a virutal model info
+        checkpoint_info.type = 'huggingface'
+        return checkpoint_info
     if s.startswith('huggingface/'):
         model_name = s.replace('huggingface/', '')
         checkpoint_info = CheckpointInfo(model_name) # create a virutal model info
diff --git a/scripts/flux_tools.py b/scripts/flux_tools.py
index 9a2fdbd63..e5fe443b7 100644
--- a/scripts/flux_tools.py
+++ b/scripts/flux_tools.py
@@ -25,10 +25,12 @@ def ui(self, _is_img2img): # ui elements
             gr.HTML('<a href="https://blackforestlabs.ai/flux-1-tools/">&nbsp Flux.1 Redux</a><br>')
         with gr.Row():
             tool = gr.Dropdown(label='Tool', choices=['None', 'Redux', 'Fill', 'Canny', 'Depth'], value='None')
+        with gr.Row():
+            process = gr.Checkbox(label='Preprocess input images', value=True)
             strength = gr.Checkbox(label='Override denoise strength', value=True)
-        return [tool, strength]
+        return [tool, strength, process]
 
-    def run(self, p: processing.StableDiffusionProcessing, tool: str = 'None', strength: bool = True): # pylint: disable=arguments-differ
+    def run(self, p: processing.StableDiffusionProcessing, tool: str = 'None', strength: bool = True, process: bool = True): # pylint: disable=arguments-differ
         global redux_pipe, processor_canny, processor_depth # pylint: disable=global-statement
         if tool is None or tool == 'None':
             return
@@ -84,8 +86,11 @@ def run(self, p: processing.StableDiffusionProcessing, tool: str = 'None', stren
             if processor_canny is None:
                 from controlnet_aux import CannyDetector
                 processor_canny = CannyDetector()
-            control_image = processor_canny(image, low_threshold=50, high_threshold=200, detect_resolution=1024, image_resolution=1024)
-            p.task_args['control_image'] = control_image
+            if process:
+                control_image = processor_canny(image, low_threshold=50, high_threshold=200, detect_resolution=1024, image_resolution=1024)
+                p.task_args['control_image'] = control_image
+            else:
+                p.task_args['control_image'] = image
             if strength:
                 p.task_args['strength'] = None
         else:
@@ -102,8 +107,11 @@ def run(self, p: processing.StableDiffusionProcessing, tool: str = 'None', stren
             if processor_depth is None:
                 from image_gen_aux import DepthPreprocessor
                 processor_depth = DepthPreprocessor.from_pretrained("LiheYoung/depth-anything-large-hf")
-            control_image = processor_depth(control_image)[0].convert("RGB")
-            p.task_args['control_image'] = control_image
+            if process:
+                control_image = processor_depth(image)[0].convert("RGB")
+                p.task_args['control_image'] = control_image
+            else:
+                p.task_args['control_image'] = image
             if strength:
                 p.task_args['strength'] = None
         else:

From 9c486320df8af76011502e97a292292691eef005 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Sat, 23 Nov 2024 19:24:18 -0500
Subject: [PATCH 006/162] update ui

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 extensions-builtin/sdnext-modernui | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/extensions-builtin/sdnext-modernui b/extensions-builtin/sdnext-modernui
index 6bc2f504e..b31453f9d 160000
--- a/extensions-builtin/sdnext-modernui
+++ b/extensions-builtin/sdnext-modernui
@@ -1 +1 @@
-Subproject commit 6bc2f504e57eb75ebac1e9ec6c212549ebcfbc18
+Subproject commit b31453f9d109456819673e8574162edb70fef73c

From cb561fa48617eb7af096e17be357057050071109 Mon Sep 17 00:00:00 2001
From: AI-Casanova <54461896+AI-Casanova@users.noreply.github.com>
Date: Sat, 23 Nov 2024 21:57:03 -0600
Subject: [PATCH 007/162] Major lora refactor: works on my machine edition

---
 .../Lora/scripts/lora_script.py               |  14 +-
 modules/lora/extra_networks_lora.py           | 151 ++++++
 modules/lora/lora.py                          |   8 +
 modules/lora/lora_convert.py                  | 477 ++++++++++++++++++
 modules/lora/lora_extract.py                  | 271 ++++++++++
 modules/lora/lyco_helpers.py                  |  66 +++
 modules/lora/network.py                       | 187 +++++++
 modules/lora/network_full.py                  |  26 +
 modules/lora/network_glora.py                 |  30 ++
 modules/lora/network_hada.py                  |  46 ++
 modules/lora/network_ia3.py                   |  24 +
 modules/lora/network_lokr.py                  |  57 +++
 modules/lora/network_lora.py                  |  78 +++
 modules/lora/network_norm.py                  |  23 +
 modules/lora/network_oft.py                   |  81 +++
 modules/lora/network_overrides.py             |  49 ++
 modules/lora/networks.py                      | 453 +++++++++++++++++
 modules/lora/ui_extra_networks_lora.py        | 123 +++++
 modules/processing_diffusers.py               |   5 +
 modules/shared.py                             |   1 +
 scripts/lora_script.py                        |  62 +++
 21 files changed, 2225 insertions(+), 7 deletions(-)
 create mode 100644 modules/lora/extra_networks_lora.py
 create mode 100644 modules/lora/lora.py
 create mode 100644 modules/lora/lora_convert.py
 create mode 100644 modules/lora/lora_extract.py
 create mode 100644 modules/lora/lyco_helpers.py
 create mode 100644 modules/lora/network.py
 create mode 100644 modules/lora/network_full.py
 create mode 100644 modules/lora/network_glora.py
 create mode 100644 modules/lora/network_hada.py
 create mode 100644 modules/lora/network_ia3.py
 create mode 100644 modules/lora/network_lokr.py
 create mode 100644 modules/lora/network_lora.py
 create mode 100644 modules/lora/network_norm.py
 create mode 100644 modules/lora/network_oft.py
 create mode 100644 modules/lora/network_overrides.py
 create mode 100644 modules/lora/networks.py
 create mode 100644 modules/lora/ui_extra_networks_lora.py
 create mode 100644 scripts/lora_script.py

diff --git a/extensions-builtin/Lora/scripts/lora_script.py b/extensions-builtin/Lora/scripts/lora_script.py
index ffbef47d9..dea2985b3 100644
--- a/extensions-builtin/Lora/scripts/lora_script.py
+++ b/extensions-builtin/Lora/scripts/lora_script.py
@@ -5,7 +5,7 @@
 from network import NetworkOnDisk
 from ui_extra_networks_lora import ExtraNetworksPageLora
 from extra_networks_lora import ExtraNetworkLora
-from modules import script_callbacks, extra_networks, ui_extra_networks, ui_models # pylint: disable=unused-import
+from modules import script_callbacks, extra_networks, ui_extra_networks, ui_models, shared # pylint: disable=unused-import
 
 
 re_lora = re.compile("<lora:([^:]+):")
@@ -56,9 +56,9 @@ def network_replacement(m):
     hashes = {x[0].strip().replace(",", ""): x[1].strip() for x in hashes}
     d["Prompt"] = re.sub(re_lora, network_replacement, d["Prompt"])
 
-
-script_callbacks.on_app_started(api_networks)
-script_callbacks.on_before_ui(before_ui)
-script_callbacks.on_model_loaded(networks.assign_network_names_to_compvis_modules)
-script_callbacks.on_infotext_pasted(networks.infotext_pasted)
-script_callbacks.on_infotext_pasted(infotext_pasted)
+if not shared.native:
+    script_callbacks.on_app_started(api_networks)
+    script_callbacks.on_before_ui(before_ui)
+    script_callbacks.on_model_loaded(networks.assign_network_names_to_compvis_modules)
+    script_callbacks.on_infotext_pasted(networks.infotext_pasted)
+    script_callbacks.on_infotext_pasted(infotext_pasted)
diff --git a/modules/lora/extra_networks_lora.py b/modules/lora/extra_networks_lora.py
new file mode 100644
index 000000000..3aea659d9
--- /dev/null
+++ b/modules/lora/extra_networks_lora.py
@@ -0,0 +1,151 @@
+import re
+import time
+import numpy as np
+import modules.lora.networks as networks
+from modules import extra_networks, shared
+
+# from https://github.com/cheald/sd-webui-loractl/blob/master/loractl/lib/utils.py
+def get_stepwise(param, step, steps):
+    def sorted_positions(raw_steps):
+        steps = [[float(s.strip()) for s in re.split("[@~]", x)]
+                 for x in re.split("[,;]", str(raw_steps))]
+        if len(steps[0]) == 1: # If we just got a single number, just return it
+            return steps[0][0]
+        steps = [[s[0], s[1] if len(s) == 2 else 1] for s in steps] # Add implicit 1s to any steps which don't have a weight
+        steps.sort(key=lambda k: k[1]) # Sort by index
+        steps = [list(v) for v in zip(*steps)]
+        return steps
+
+    def calculate_weight(m, step, max_steps, step_offset=2):
+        if isinstance(m, list):
+            if m[1][-1] <= 1.0:
+                step = step / (max_steps - step_offset) if max_steps > 0 else 1.0
+            v = np.interp(step, m[1], m[0])
+            return v
+        else:
+            return m
+
+    stepwise = calculate_weight(sorted_positions(param), step, steps)
+    return stepwise
+
+
+def prompt(p):
+    if shared.opts.lora_apply_tags == 0:
+        return
+    all_tags = []
+    for loaded in networks.loaded_networks:
+        page = [en for en in shared.extra_networks if en.name == 'lora'][0]
+        item = page.create_item(loaded.name)
+        tags = (item or {}).get("tags", {})
+        loaded.tags = list(tags)
+        if len(loaded.tags) == 0:
+            loaded.tags.append(loaded.name)
+        if shared.opts.lora_apply_tags > 0:
+            loaded.tags = loaded.tags[:shared.opts.lora_apply_tags]
+        all_tags.extend(loaded.tags)
+    if len(all_tags) > 0:
+        shared.log.debug(f"Load network: type=LoRA tags={all_tags} max={shared.opts.lora_apply_tags} apply")
+        all_tags = ', '.join(all_tags)
+        p.extra_generation_params["LoRA tags"] = all_tags
+        if '_tags_' in p.prompt:
+            p.prompt = p.prompt.replace('_tags_', all_tags)
+        else:
+            p.prompt = f"{p.prompt}, {all_tags}"
+        if p.all_prompts is not None:
+            for i in range(len(p.all_prompts)):
+                if '_tags_' in p.all_prompts[i]:
+                    p.all_prompts[i] = p.all_prompts[i].replace('_tags_', all_tags)
+                else:
+                    p.all_prompts[i] = f"{p.all_prompts[i]}, {all_tags}"
+
+
+def infotext(p):
+    names = [i.name for i in networks.loaded_networks]
+    if len(names) > 0:
+        p.extra_generation_params["LoRA networks"] = ", ".join(names)
+    if shared.opts.lora_add_hashes_to_infotext:
+        network_hashes = []
+        for item in networks.loaded_networks:
+            if not item.network_on_disk.shorthash:
+                continue
+            network_hashes.append(item.network_on_disk.shorthash)
+        if len(network_hashes) > 0:
+            p.extra_generation_params["LoRA hashes"] = ", ".join(network_hashes)
+
+
+def parse(p, params_list, step=0):
+    names = []
+    te_multipliers = []
+    unet_multipliers = []
+    dyn_dims = []
+    for params in params_list:
+        assert params.items
+        names.append(params.positional[0])
+        te_multiplier = params.named.get("te", params.positional[1] if len(params.positional) > 1 else shared.opts.extra_networks_default_multiplier)
+        if isinstance(te_multiplier, str) and "@" in te_multiplier:
+            te_multiplier = get_stepwise(te_multiplier, step, p.steps)
+        else:
+            te_multiplier = float(te_multiplier)
+        unet_multiplier = [params.positional[2] if len(params.positional) > 2 else te_multiplier] * 3
+        unet_multiplier = [params.named.get("unet", unet_multiplier[0])] * 3
+        unet_multiplier[0] = params.named.get("in", unet_multiplier[0])
+        unet_multiplier[1] = params.named.get("mid", unet_multiplier[1])
+        unet_multiplier[2] = params.named.get("out", unet_multiplier[2])
+        for i in range(len(unet_multiplier)):
+            if isinstance(unet_multiplier[i], str) and "@" in unet_multiplier[i]:
+                unet_multiplier[i] = get_stepwise(unet_multiplier[i], step, p.steps)
+            else:
+                unet_multiplier[i] = float(unet_multiplier[i])
+        dyn_dim = int(params.positional[3]) if len(params.positional) > 3 else None
+        dyn_dim = int(params.named["dyn"]) if "dyn" in params.named else dyn_dim
+        te_multipliers.append(te_multiplier)
+        unet_multipliers.append(unet_multiplier)
+        dyn_dims.append(dyn_dim)
+    return names, te_multipliers, unet_multipliers, dyn_dims
+
+
+class ExtraNetworkLora(extra_networks.ExtraNetwork):
+
+    def __init__(self):
+        super().__init__('lora')
+        self.active = False
+        self.model = None
+        self.errors = {}
+
+    def activate(self, p, params_list, step=0):
+        t0 = time.time()
+        self.errors.clear()
+        if self.active:
+            if self.model != shared.opts.sd_model_checkpoint: # reset if model changed
+                self.active = False
+        if len(params_list) > 0 and not self.active: # activate patches once
+            shared.log.debug(f'Activate network: type=LoRA model="{shared.opts.sd_model_checkpoint}"')
+            self.active = True
+            self.model = shared.opts.sd_model_checkpoint
+        names, te_multipliers, unet_multipliers, dyn_dims = parse(p, params_list, step)
+        networks.load_networks(names, te_multipliers, unet_multipliers, dyn_dims)
+        t1 = time.time()
+        if len(networks.loaded_networks) > 0 and step == 0:
+            infotext(p)
+            prompt(p)
+            shared.log.info(f'Load network: type=LoRA apply={[n.name for n in networks.loaded_networks]} te={te_multipliers} unet={unet_multipliers} dims={dyn_dims} load={t1-t0:.2f}')
+
+    def deactivate(self, p):
+        t0 = time.time()
+        if shared.native and len(networks.diffuser_loaded) > 0:
+            if hasattr(shared.sd_model, "unload_lora_weights") and hasattr(shared.sd_model, "text_encoder"):
+                if not (shared.compiled_model_state is not None and shared.compiled_model_state.is_compiled is True):
+                    try:
+                        if shared.opts.lora_fuse_diffusers:
+                            shared.sd_model.unfuse_lora()
+                        shared.sd_model.unload_lora_weights() # fails for non-CLIP models
+                    except Exception:
+                        pass
+        t1 = time.time()
+        networks.timer['restore'] += t1 - t0
+        if self.active and networks.debug:
+            shared.log.debug(f"Network end: type=LoRA load={networks.timer['load']:.2f} apply={networks.timer['apply']:.2f} restore={networks.timer['restore']:.2f}")
+        if self.errors:
+            for k, v in self.errors.items():
+                shared.log.error(f'LoRA: name="{k}" errors={v}')
+            self.errors.clear()
diff --git a/modules/lora/lora.py b/modules/lora/lora.py
new file mode 100644
index 000000000..33adfe05c
--- /dev/null
+++ b/modules/lora/lora.py
@@ -0,0 +1,8 @@
+# import networks
+#
+# list_available_loras = networks.list_available_networks
+# available_loras = networks.available_networks
+# available_lora_aliases = networks.available_network_aliases
+# available_lora_hash_lookup = networks.available_network_hash_lookup
+# forbidden_lora_aliases = networks.forbidden_network_aliases
+# loaded_loras = networks.loaded_networks
diff --git a/modules/lora/lora_convert.py b/modules/lora/lora_convert.py
new file mode 100644
index 000000000..6bf563125
--- /dev/null
+++ b/modules/lora/lora_convert.py
@@ -0,0 +1,477 @@
+import os
+import re
+import bisect
+from typing import Dict
+import torch
+from modules import shared
+
+
+debug = os.environ.get('SD_LORA_DEBUG', None) is not None
+suffix_conversion = {
+    "attentions": {},
+    "resnets": {
+        "conv1": "in_layers_2",
+        "conv2": "out_layers_3",
+        "norm1": "in_layers_0",
+        "norm2": "out_layers_0",
+        "time_emb_proj": "emb_layers_1",
+        "conv_shortcut": "skip_connection",
+    }
+}
+re_digits = re.compile(r"\d+")
+re_x_proj = re.compile(r"(.*)_([qkv]_proj)$")
+re_compiled = {}
+
+
+def make_unet_conversion_map() -> Dict[str, str]:
+    unet_conversion_map_layer = []
+
+    for i in range(3):  # num_blocks is 3 in sdxl
+        # loop over downblocks/upblocks
+        for j in range(2):
+            # loop over resnets/attentions for downblocks
+            hf_down_res_prefix = f"down_blocks.{i}.resnets.{j}."
+            sd_down_res_prefix = f"input_blocks.{3 * i + j + 1}.0."
+            unet_conversion_map_layer.append((sd_down_res_prefix, hf_down_res_prefix))
+            if i < 3:
+                # no attention layers in down_blocks.3
+                hf_down_atn_prefix = f"down_blocks.{i}.attentions.{j}."
+                sd_down_atn_prefix = f"input_blocks.{3 * i + j + 1}.1."
+                unet_conversion_map_layer.append((sd_down_atn_prefix, hf_down_atn_prefix))
+
+        for j in range(3):
+            # loop over resnets/attentions for upblocks
+            hf_up_res_prefix = f"up_blocks.{i}.resnets.{j}."
+            sd_up_res_prefix = f"output_blocks.{3 * i + j}.0."
+            unet_conversion_map_layer.append((sd_up_res_prefix, hf_up_res_prefix))
+            # if i > 0: commentout for sdxl
+            # no attention layers in up_blocks.0
+            hf_up_atn_prefix = f"up_blocks.{i}.attentions.{j}."
+            sd_up_atn_prefix = f"output_blocks.{3 * i + j}.1."
+            unet_conversion_map_layer.append((sd_up_atn_prefix, hf_up_atn_prefix))
+
+        if i < 3:
+            # no downsample in down_blocks.3
+            hf_downsample_prefix = f"down_blocks.{i}.downsamplers.0.conv."
+            sd_downsample_prefix = f"input_blocks.{3 * (i + 1)}.0.op."
+            unet_conversion_map_layer.append((sd_downsample_prefix, hf_downsample_prefix))
+            # no upsample in up_blocks.3
+            hf_upsample_prefix = f"up_blocks.{i}.upsamplers.0."
+            sd_upsample_prefix = f"output_blocks.{3 * i + 2}.{2}."  # change for sdxl
+            unet_conversion_map_layer.append((sd_upsample_prefix, hf_upsample_prefix))
+
+    hf_mid_atn_prefix = "mid_block.attentions.0."
+    sd_mid_atn_prefix = "middle_block.1."
+    unet_conversion_map_layer.append((sd_mid_atn_prefix, hf_mid_atn_prefix))
+
+    for j in range(2):
+        hf_mid_res_prefix = f"mid_block.resnets.{j}."
+        sd_mid_res_prefix = f"middle_block.{2 * j}."
+        unet_conversion_map_layer.append((sd_mid_res_prefix, hf_mid_res_prefix))
+
+    unet_conversion_map_resnet = [
+        # (stable-diffusion, HF Diffusers)
+        ("in_layers.0.", "norm1."),
+        ("in_layers.2.", "conv1."),
+        ("out_layers.0.", "norm2."),
+        ("out_layers.3.", "conv2."),
+        ("emb_layers.1.", "time_emb_proj."),
+        ("skip_connection.", "conv_shortcut."),
+    ]
+
+    unet_conversion_map = []
+    for sd, hf in unet_conversion_map_layer:
+        if "resnets" in hf:
+            for sd_res, hf_res in unet_conversion_map_resnet:
+                unet_conversion_map.append((sd + sd_res, hf + hf_res))
+        else:
+            unet_conversion_map.append((sd, hf))
+
+    for j in range(2):
+        hf_time_embed_prefix = f"time_embedding.linear_{j + 1}."
+        sd_time_embed_prefix = f"time_embed.{j * 2}."
+        unet_conversion_map.append((sd_time_embed_prefix, hf_time_embed_prefix))
+
+    for j in range(2):
+        hf_label_embed_prefix = f"add_embedding.linear_{j + 1}."
+        sd_label_embed_prefix = f"label_emb.0.{j * 2}."
+        unet_conversion_map.append((sd_label_embed_prefix, hf_label_embed_prefix))
+
+    unet_conversion_map.append(("input_blocks.0.0.", "conv_in."))
+    unet_conversion_map.append(("out.0.", "conv_norm_out."))
+    unet_conversion_map.append(("out.2.", "conv_out."))
+
+    sd_hf_conversion_map = {sd.replace(".", "_")[:-1]: hf.replace(".", "_")[:-1] for sd, hf in unet_conversion_map}
+    return sd_hf_conversion_map
+
+
+class KeyConvert:
+    def __init__(self):
+            self.is_sdxl = True if shared.sd_model_type == "sdxl" else False
+            self.UNET_CONVERSION_MAP = make_unet_conversion_map() if self.is_sdxl else None
+            self.LORA_PREFIX_UNET = "lora_unet_"
+            self.LORA_PREFIX_TEXT_ENCODER = "lora_te_"
+            self.OFT_PREFIX_UNET = "oft_unet_"
+            # SDXL: must starts with LORA_PREFIX_TEXT_ENCODER
+            self.LORA_PREFIX_TEXT_ENCODER1 = "lora_te1_"
+            self.LORA_PREFIX_TEXT_ENCODER2 = "lora_te2_"
+
+    def __call__(self, key):
+        if self.is_sdxl:
+            if "diffusion_model" in key:  # Fix NTC Slider naming error
+                key = key.replace("diffusion_model", "lora_unet")
+            map_keys = list(self.UNET_CONVERSION_MAP.keys())  # prefix of U-Net modules
+            map_keys.sort()
+            search_key = key.replace(self.LORA_PREFIX_UNET, "").replace(self.OFT_PREFIX_UNET, "").replace(self.LORA_PREFIX_TEXT_ENCODER1, "").replace(self.LORA_PREFIX_TEXT_ENCODER2, "")
+            position = bisect.bisect_right(map_keys, search_key)
+            map_key = map_keys[position - 1]
+            if search_key.startswith(map_key):
+                key = key.replace(map_key, self.UNET_CONVERSION_MAP[map_key]).replace("oft", "lora") # pylint: disable=unsubscriptable-object
+        if "lycoris" in key and "transformer" in key:
+            key = key.replace("lycoris", "lora_transformer")
+        sd_module = shared.sd_model.network_layer_mapping.get(key, None)
+        if sd_module is None:
+            sd_module = shared.sd_model.network_layer_mapping.get(key.replace("guidance", "timestep"), None)  # FLUX1 fix
+        if debug and sd_module is None:
+            raise RuntimeError(f"LoRA key not found in network_layer_mapping: key={key} mapping={shared.sd_model.network_layer_mapping.keys()}")
+        return key, sd_module
+
+
+# Taken from https://github.com/huggingface/diffusers/blob/main/src/diffusers/loaders/lora_conversion_utils.py
+# Modified from 'lora_A' and 'lora_B' to 'lora_down' and 'lora_up'
+# Added early exit
+# The utilities under `_convert_kohya_flux_lora_to_diffusers()`
+# are taken from https://github.com/kohya-ss/sd-scripts/blob/a61cf73a5cb5209c3f4d1a3688dd276a4dfd1ecb/networks/convert_flux_lora.py
+# All credits go to `kohya-ss`.
+def _convert_to_ai_toolkit(sds_sd, ait_sd, sds_key, ait_key):
+    if sds_key + ".lora_down.weight" not in sds_sd:
+        return
+    down_weight = sds_sd.pop(sds_key + ".lora_down.weight")
+
+    # scale weight by alpha and dim
+    rank = down_weight.shape[0]
+    alpha = sds_sd.pop(sds_key + ".alpha").item()  # alpha is scalar
+    scale = alpha / rank  # LoRA is scaled by 'alpha / rank' in forward pass, so we need to scale it back here
+
+    # calculate scale_down and scale_up to keep the same value. if scale is 4, scale_down is 2 and scale_up is 2
+    scale_down = scale
+    scale_up = 1.0
+    while scale_down * 2 < scale_up:
+        scale_down *= 2
+        scale_up /= 2
+
+    ait_sd[ait_key + ".lora_down.weight"] = down_weight * scale_down
+    ait_sd[ait_key + ".lora_up.weight"] = sds_sd.pop(sds_key + ".lora_up.weight") * scale_up
+
+def _convert_to_ai_toolkit_cat(sds_sd, ait_sd, sds_key, ait_keys, dims=None):
+    if sds_key + ".lora_down.weight" not in sds_sd:
+        return
+    down_weight = sds_sd.pop(sds_key + ".lora_down.weight")
+    up_weight = sds_sd.pop(sds_key + ".lora_up.weight")
+    sd_lora_rank = down_weight.shape[0]
+
+    # scale weight by alpha and dim
+    alpha = sds_sd.pop(sds_key + ".alpha")
+    scale = alpha / sd_lora_rank
+
+    # calculate scale_down and scale_up
+    scale_down = scale
+    scale_up = 1.0
+    while scale_down * 2 < scale_up:
+        scale_down *= 2
+        scale_up /= 2
+
+    down_weight = down_weight * scale_down
+    up_weight = up_weight * scale_up
+
+    # calculate dims if not provided
+    num_splits = len(ait_keys)
+    if dims is None:
+        dims = [up_weight.shape[0] // num_splits] * num_splits
+    else:
+        assert sum(dims) == up_weight.shape[0]
+
+    # check upweight is sparse or not
+    is_sparse = False
+    if sd_lora_rank % num_splits == 0:
+        ait_rank = sd_lora_rank // num_splits
+        is_sparse = True
+        i = 0
+        for j in range(len(dims)):
+            for k in range(len(dims)):
+                if j == k:
+                    continue
+                is_sparse = is_sparse and torch.all(
+                    up_weight[i : i + dims[j], k * ait_rank : (k + 1) * ait_rank] == 0
+                )
+            i += dims[j]
+        # if is_sparse:
+        #     print(f"weight is sparse: {sds_key}")
+
+    # make ai-toolkit weight
+    ait_down_keys = [k + ".lora_down.weight" for k in ait_keys]
+    ait_up_keys = [k + ".lora_up.weight" for k in ait_keys]
+    if not is_sparse:
+        # down_weight is copied to each split
+        ait_sd.update({k: down_weight for k in ait_down_keys})
+
+        # up_weight is split to each split
+        ait_sd.update({k: v for k, v in zip(ait_up_keys, torch.split(up_weight, dims, dim=0))})  # noqa: C416 # pylint: disable=unnecessary-comprehension
+    else:
+        # down_weight is chunked to each split
+        ait_sd.update({k: v for k, v in zip(ait_down_keys, torch.chunk(down_weight, num_splits, dim=0))})  # noqa: C416 # pylint: disable=unnecessary-comprehension
+
+        # up_weight is sparse: only non-zero values are copied to each split
+        i = 0
+        for j in range(len(dims)):
+            ait_sd[ait_up_keys[j]] = up_weight[i : i + dims[j], j * ait_rank : (j + 1) * ait_rank].contiguous()
+            i += dims[j]
+
+def _convert_text_encoder_lora_key(key, lora_name):
+    """
+    Converts a text encoder LoRA key to a Diffusers compatible key.
+    """
+    if lora_name.startswith(("lora_te_", "lora_te1_")):
+        key_to_replace = "lora_te_" if lora_name.startswith("lora_te_") else "lora_te1_"
+    else:
+        key_to_replace = "lora_te2_"
+
+    diffusers_name = key.replace(key_to_replace, "").replace("_", ".")
+    diffusers_name = diffusers_name.replace("text.model", "text_model")
+    diffusers_name = diffusers_name.replace("self.attn", "self_attn")
+    diffusers_name = diffusers_name.replace("q.proj.lora", "to_q_lora")
+    diffusers_name = diffusers_name.replace("k.proj.lora", "to_k_lora")
+    diffusers_name = diffusers_name.replace("v.proj.lora", "to_v_lora")
+    diffusers_name = diffusers_name.replace("out.proj.lora", "to_out_lora")
+    diffusers_name = diffusers_name.replace("text.projection", "text_projection")
+
+    if "self_attn" in diffusers_name or "text_projection" in diffusers_name:
+        pass
+    elif "mlp" in diffusers_name:
+        # Be aware that this is the new diffusers convention and the rest of the code might
+        # not utilize it yet.
+        diffusers_name = diffusers_name.replace(".lora.", ".lora_linear_layer.")
+    return diffusers_name
+
+def _convert_kohya_flux_lora_to_diffusers(state_dict):
+    def _convert_sd_scripts_to_ai_toolkit(sds_sd):
+        ait_sd = {}
+        for i in range(19):
+            _convert_to_ai_toolkit(
+                sds_sd,
+                ait_sd,
+                f"lora_unet_double_blocks_{i}_img_attn_proj",
+                f"transformer.transformer_blocks.{i}.attn.to_out.0",
+            )
+            _convert_to_ai_toolkit_cat(
+                sds_sd,
+                ait_sd,
+                f"lora_unet_double_blocks_{i}_img_attn_qkv",
+                [
+                    f"transformer.transformer_blocks.{i}.attn.to_q",
+                    f"transformer.transformer_blocks.{i}.attn.to_k",
+                    f"transformer.transformer_blocks.{i}.attn.to_v",
+                ],
+            )
+            _convert_to_ai_toolkit(
+                sds_sd,
+                ait_sd,
+                f"lora_unet_double_blocks_{i}_img_mlp_0",
+                f"transformer.transformer_blocks.{i}.ff.net.0.proj",
+            )
+            _convert_to_ai_toolkit(
+                sds_sd,
+                ait_sd,
+                f"lora_unet_double_blocks_{i}_img_mlp_2",
+                f"transformer.transformer_blocks.{i}.ff.net.2",
+            )
+            _convert_to_ai_toolkit(
+                sds_sd,
+                ait_sd,
+                f"lora_unet_double_blocks_{i}_img_mod_lin",
+                f"transformer.transformer_blocks.{i}.norm1.linear",
+            )
+            _convert_to_ai_toolkit(
+                sds_sd,
+                ait_sd,
+                f"lora_unet_double_blocks_{i}_txt_attn_proj",
+                f"transformer.transformer_blocks.{i}.attn.to_add_out",
+            )
+            _convert_to_ai_toolkit_cat(
+                sds_sd,
+                ait_sd,
+                f"lora_unet_double_blocks_{i}_txt_attn_qkv",
+                [
+                    f"transformer.transformer_blocks.{i}.attn.add_q_proj",
+                    f"transformer.transformer_blocks.{i}.attn.add_k_proj",
+                    f"transformer.transformer_blocks.{i}.attn.add_v_proj",
+                ],
+            )
+            _convert_to_ai_toolkit(
+                sds_sd,
+                ait_sd,
+                f"lora_unet_double_blocks_{i}_txt_mlp_0",
+                f"transformer.transformer_blocks.{i}.ff_context.net.0.proj",
+            )
+            _convert_to_ai_toolkit(
+                sds_sd,
+                ait_sd,
+                f"lora_unet_double_blocks_{i}_txt_mlp_2",
+                f"transformer.transformer_blocks.{i}.ff_context.net.2",
+            )
+            _convert_to_ai_toolkit(
+                sds_sd,
+                ait_sd,
+                f"lora_unet_double_blocks_{i}_txt_mod_lin",
+                f"transformer.transformer_blocks.{i}.norm1_context.linear",
+            )
+
+        for i in range(38):
+            _convert_to_ai_toolkit_cat(
+                sds_sd,
+                ait_sd,
+                f"lora_unet_single_blocks_{i}_linear1",
+                [
+                    f"transformer.single_transformer_blocks.{i}.attn.to_q",
+                    f"transformer.single_transformer_blocks.{i}.attn.to_k",
+                    f"transformer.single_transformer_blocks.{i}.attn.to_v",
+                    f"transformer.single_transformer_blocks.{i}.proj_mlp",
+                ],
+                dims=[3072, 3072, 3072, 12288],
+            )
+            _convert_to_ai_toolkit(
+                sds_sd,
+                ait_sd,
+                f"lora_unet_single_blocks_{i}_linear2",
+                f"transformer.single_transformer_blocks.{i}.proj_out",
+            )
+            _convert_to_ai_toolkit(
+                sds_sd,
+                ait_sd,
+                f"lora_unet_single_blocks_{i}_modulation_lin",
+                f"transformer.single_transformer_blocks.{i}.norm.linear",
+            )
+
+        if len(sds_sd) > 0:
+            return None
+
+        return ait_sd
+
+    return _convert_sd_scripts_to_ai_toolkit(state_dict)
+
+def _convert_kohya_sd3_lora_to_diffusers(state_dict):
+    def _convert_sd_scripts_to_ai_toolkit(sds_sd):
+        ait_sd = {}
+        for i in range(38):
+            _convert_to_ai_toolkit_cat(
+                sds_sd,
+                ait_sd,
+                f"lora_unet_joint_blocks_{i}_context_block_attn_qkv",
+                [
+                    f"transformer.transformer_blocks.{i}.attn.to_q",
+                    f"transformer.transformer_blocks.{i}.attn.to_k",
+                    f"transformer.transformer_blocks.{i}.attn.to_v",
+                ],
+            )
+            _convert_to_ai_toolkit(
+                sds_sd,
+                ait_sd,
+                f"lora_unet_joint_blocks_{i}_context_block_mlp_fc1",
+                f"transformer.transformer_blocks.{i}.ff_context.net.0.proj",
+            )
+            _convert_to_ai_toolkit(
+                sds_sd,
+                ait_sd,
+                f"lora_unet_joint_blocks_{i}_context_block_mlp_fc2",
+                f"transformer.transformer_blocks.{i}.ff_context.net.2",
+            )
+            _convert_to_ai_toolkit(
+                sds_sd,
+                ait_sd,
+                f"lora_unet_joint_blocks_{i}_x_block_mlp_fc1",
+                f"transformer.transformer_blocks.{i}.ff.net.0.proj",
+            )
+            _convert_to_ai_toolkit(
+                sds_sd,
+                ait_sd,
+                f"lora_unet_joint_blocks_{i}_x_block_mlp_fc2",
+                f"transformer.transformer_blocks.{i}.ff.net.2",
+            )
+            _convert_to_ai_toolkit(
+                sds_sd,
+                ait_sd,
+                f"lora_unet_joint_blocks_{i}_context_block_adaLN_modulation_1",
+                f"transformer.transformer_blocks.{i}.norm1_context.linear",
+            )
+            _convert_to_ai_toolkit(
+                sds_sd,
+                ait_sd,
+                f"lora_unet_joint_blocks_{i}_x_block_adaLN_modulation_1",
+                f"transformer.transformer_blocks.{i}.norm1.linear",
+            )
+            _convert_to_ai_toolkit(
+                sds_sd,
+                ait_sd,
+                f"lora_unet_joint_blocks_{i}_context_block_attn_proj",
+                f"transformer.transformer_blocks.{i}.attn.to_add_out",
+            )
+            _convert_to_ai_toolkit(
+                sds_sd,
+                ait_sd,
+                f"lora_unet_joint_blocks_{i}_x_block_attn_proj",
+                f"transformer.transformer_blocks.{i}.attn.to_out_0",
+            )
+
+            _convert_to_ai_toolkit_cat(
+                sds_sd,
+                ait_sd,
+                f"lora_unet_joint_blocks_{i}_x_block_attn_qkv",
+                [
+                    f"transformer.transformer_blocks.{i}.attn.add_q_proj",
+                    f"transformer.transformer_blocks.{i}.attn.add_k_proj",
+                    f"transformer.transformer_blocks.{i}.attn.add_v_proj",
+                ],
+            )
+        remaining_keys = list(sds_sd.keys())
+        te_state_dict = {}
+        if remaining_keys:
+            if not all(k.startswith("lora_te1") for k in remaining_keys):
+                raise ValueError(f"Incompatible keys detected: \n\n {', '.join(remaining_keys)}")
+            for key in remaining_keys:
+                if not key.endswith("lora_down.weight"):
+                    continue
+
+                lora_name = key.split(".")[0]
+                lora_name_up = f"{lora_name}.lora_up.weight"
+                lora_name_alpha = f"{lora_name}.alpha"
+                diffusers_name = _convert_text_encoder_lora_key(key, lora_name)
+
+                if lora_name.startswith(("lora_te_", "lora_te1_")):
+                    down_weight = sds_sd.pop(key)
+                    sd_lora_rank = down_weight.shape[0]
+                    te_state_dict[diffusers_name] = down_weight
+                    te_state_dict[diffusers_name.replace(".down.", ".up.")] = sds_sd.pop(lora_name_up)
+
+                if lora_name_alpha in sds_sd:
+                    alpha = sds_sd.pop(lora_name_alpha).item()
+                    scale = alpha / sd_lora_rank
+
+                    scale_down = scale
+                    scale_up = 1.0
+                    while scale_down * 2 < scale_up:
+                        scale_down *= 2
+                        scale_up /= 2
+
+                    te_state_dict[diffusers_name] *= scale_down
+                    te_state_dict[diffusers_name.replace(".down.", ".up.")] *= scale_up
+
+        if len(sds_sd) > 0:
+            print(f"Unsupported keys for ai-toolkit: {sds_sd.keys()}")
+
+        if te_state_dict:
+            te_state_dict = {f"text_encoder.{module_name}": params for module_name, params in te_state_dict.items()}
+
+        new_state_dict = {**ait_sd, **te_state_dict}
+        return new_state_dict
+
+    return _convert_sd_scripts_to_ai_toolkit(state_dict)
diff --git a/modules/lora/lora_extract.py b/modules/lora/lora_extract.py
new file mode 100644
index 000000000..c2e0a275b
--- /dev/null
+++ b/modules/lora/lora_extract.py
@@ -0,0 +1,271 @@
+import os
+import time
+import json
+import datetime
+import torch
+from safetensors.torch import save_file
+import gradio as gr
+from rich import progress as p
+from modules import shared, devices
+from modules.ui_common import create_refresh_button
+from modules.call_queue import wrap_gradio_gpu_call
+
+
+class SVDHandler:
+    def __init__(self, maxrank=0, rank_ratio=1):
+        self.network_name: str = None
+        self.U: torch.Tensor = None
+        self.S: torch.Tensor = None
+        self.Vh: torch.Tensor = None
+        self.maxrank: int = maxrank
+        self.rank_ratio: float = rank_ratio
+        self.rank: int = 0
+        self.out_size: int = None
+        self.in_size: int = None
+        self.kernel_size: tuple[int, int] = None
+        self.conv2d: bool = False
+
+    def decompose(self, weight, backupweight):
+        self.conv2d = len(weight.size()) == 4
+        self.kernel_size = None if not self.conv2d else weight.size()[2:4]
+        self.out_size, self.in_size = weight.size()[0:2]
+        diffweight = weight.clone().to(devices.device)
+        diffweight -= backupweight.to(devices.device)
+        if self.conv2d:
+            if self.conv2d and self.kernel_size != (1, 1):
+                diffweight = diffweight.flatten(start_dim=1)
+            else:
+                diffweight = diffweight.squeeze()
+        self.U, self.S, self.Vh = torch.svd_lowrank(diffweight.to(device=devices.device, dtype=torch.float), self.maxrank, 2)
+        # del diffweight
+        self.U = self.U.to(device=devices.cpu, dtype=torch.bfloat16)
+        self.S = self.S.to(device=devices.cpu, dtype=torch.bfloat16)
+        self.Vh = self.Vh.t().to(device=devices.cpu, dtype=torch.bfloat16)  # svd_lowrank outputs a transposed matrix
+
+    def findrank(self):
+        if self.rank_ratio < 1:
+            S_squared = self.S.pow(2)
+            S_fro_sq = float(torch.sum(S_squared))
+            sum_S_squared = torch.cumsum(S_squared, dim=0) / S_fro_sq
+            index = int(torch.searchsorted(sum_S_squared, self.rank_ratio ** 2)) + 1
+            index = max(1, min(index, len(self.S) - 1))
+            self.rank = index
+            if self.maxrank > 0:
+                self.rank = min(self.rank, self.maxrank)
+        else:
+            self.rank = min(self.in_size, self.out_size, self.maxrank)
+
+    def makeweights(self):
+        self.findrank()
+        up = self.U[:, :self.rank] @ torch.diag(self.S[:self.rank])
+        down = self.Vh[:self.rank, :]
+        if self.conv2d and self.kernel_size is not None:
+            up = up.reshape(self.out_size, self.rank, 1, 1)
+            down = down.reshape(self.rank, self.in_size, self.kernel_size[0], self.kernel_size[1]) # pylint: disable=unsubscriptable-object
+        return_dict = {f'{self.network_name}.lora_up.weight': up.contiguous(),
+                       f'{self.network_name}.lora_down.weight': down.contiguous(),
+                       f'{self.network_name}.alpha': torch.tensor(down.shape[0]),
+                       }
+        return return_dict
+
+
+def loaded_lora():
+    if not shared.sd_loaded:
+        return ""
+    loaded = set()
+    if hasattr(shared.sd_model, 'unet'):
+        for _name, module in shared.sd_model.unet.named_modules():
+            current = getattr(module, "network_current_names", None)
+            if current is not None:
+                current = [item[0] for item in current]
+                loaded.update(current)
+    return list(loaded)
+
+
+def loaded_lora_str():
+    return ", ".join(loaded_lora())
+
+
+def make_meta(fn, maxrank, rank_ratio):
+    meta = {
+        "model_spec.sai_model_spec": "1.0.0",
+        "model_spec.title": os.path.splitext(os.path.basename(fn))[0],
+        "model_spec.author": "SD.Next",
+        "model_spec.implementation": "https://github.com/vladmandic/automatic",
+        "model_spec.date": datetime.datetime.now().astimezone().replace(microsecond=0).isoformat(),
+        "model_spec.base_model": shared.opts.sd_model_checkpoint,
+        "model_spec.dtype": str(devices.dtype),
+        "model_spec.base_lora": json.dumps(loaded_lora()),
+        "model_spec.config": f"maxrank={maxrank} rank_ratio={rank_ratio}",
+    }
+    if shared.sd_model_type == "sdxl":
+        meta["model_spec.architecture"] = "stable-diffusion-xl-v1-base/lora" # sai standard
+        meta["ss_base_model_version"] = "sdxl_base_v1-0" # kohya standard
+    elif shared.sd_model_type == "sd":
+        meta["model_spec.architecture"] = "stable-diffusion-v1/lora"
+        meta["ss_base_model_version"] = "sd_v1"
+    elif shared.sd_model_type == "f1":
+        meta["model_spec.architecture"] = "flux-1-dev/lora"
+        meta["ss_base_model_version"] = "flux1"
+    elif shared.sd_model_type == "sc":
+        meta["model_spec.architecture"] = "stable-cascade-v1-prior/lora"
+    return meta
+
+
+def make_lora(fn, maxrank, auto_rank, rank_ratio, modules, overwrite):
+    if not shared.sd_loaded or not shared.native:
+        msg = "LoRA extract: model not loaded"
+        shared.log.warning(msg)
+        yield msg
+        return
+    if loaded_lora() == "":
+        msg = "LoRA extract: no LoRA detected"
+        shared.log.warning(msg)
+        yield msg
+        return
+    if not fn:
+        msg = "LoRA extract: target filename required"
+        shared.log.warning(msg)
+        yield msg
+        return
+    t0 = time.time()
+    maxrank = int(maxrank)
+    rank_ratio = 1 if not auto_rank else rank_ratio
+    shared.log.debug(f'LoRA extract: modules={modules} maxrank={maxrank} auto={auto_rank} ratio={rank_ratio} fn="{fn}"')
+    shared.state.begin('LoRA extract')
+
+    with p.Progress(p.TextColumn('[cyan]LoRA extract'), p.BarColumn(), p.TaskProgressColumn(), p.TimeRemainingColumn(), p.TimeElapsedColumn(), p.TextColumn('[cyan]{task.description}'), console=shared.console) as progress:
+
+        if 'te' in modules and getattr(shared.sd_model, 'text_encoder', None) is not None:
+            modules = shared.sd_model.text_encoder.named_modules()
+            task = progress.add_task(description="te1 decompose", total=len(list(modules)))
+            for name, module in shared.sd_model.text_encoder.named_modules():
+                progress.update(task, advance=1)
+                weights_backup = getattr(module, "network_weights_backup", None)
+                if weights_backup is None or getattr(module, "network_current_names", None) is None:
+                    continue
+                prefix = "lora_te1_" if hasattr(shared.sd_model, 'text_encoder_2') else "lora_te_"
+                module.svdhandler = SVDHandler(maxrank, rank_ratio)
+                module.svdhandler.network_name = prefix + name.replace(".", "_")
+                with devices.inference_context():
+                    module.svdhandler.decompose(module.weight, weights_backup)
+            progress.remove_task(task)
+        t1 = time.time()
+
+        if 'te' in modules and getattr(shared.sd_model, 'text_encoder_2', None) is not None:
+            modules = shared.sd_model.text_encoder_2.named_modules()
+            task = progress.add_task(description="te2 decompose", total=len(list(modules)))
+            for name, module in shared.sd_model.text_encoder_2.named_modules():
+                progress.update(task, advance=1)
+                weights_backup = getattr(module, "network_weights_backup", None)
+                if weights_backup is None or getattr(module, "network_current_names", None) is None:
+                    continue
+                module.svdhandler = SVDHandler(maxrank, rank_ratio)
+                module.svdhandler.network_name = "lora_te2_" + name.replace(".", "_")
+                with devices.inference_context():
+                    module.svdhandler.decompose(module.weight, weights_backup)
+            progress.remove_task(task)
+        t2 = time.time()
+
+        if 'unet' in modules and getattr(shared.sd_model, 'unet', None) is not None:
+            modules = shared.sd_model.unet.named_modules()
+            task = progress.add_task(description="unet decompose", total=len(list(modules)))
+            for name, module in shared.sd_model.unet.named_modules():
+                progress.update(task, advance=1)
+                weights_backup = getattr(module, "network_weights_backup", None)
+                if weights_backup is None or getattr(module, "network_current_names", None) is None:
+                    continue
+                module.svdhandler = SVDHandler(maxrank, rank_ratio)
+                module.svdhandler.network_name = "lora_unet_" + name.replace(".", "_")
+                with devices.inference_context():
+                    module.svdhandler.decompose(module.weight, weights_backup)
+            progress.remove_task(task)
+        t3 = time.time()
+
+        # TODO: Handle quant for Flux
+        # if 'te' in modules and getattr(shared.sd_model, 'transformer', None) is not None:
+        #     for name, module in shared.sd_model.transformer.named_modules():
+        #         if "norm" in name and "linear" not in name:
+        #             continue
+        #         weights_backup = getattr(module, "network_weights_backup", None)
+        #         if weights_backup is None:
+        #             continue
+        #         module.svdhandler = SVDHandler()
+        #         module.svdhandler.network_name = "lora_transformer_" + name.replace(".", "_")
+        #         module.svdhandler.decompose(module.weight, weights_backup)
+        #         module.svdhandler.findrank(rank, rank_ratio)
+
+        lora_state_dict = {}
+        for sub in ['text_encoder', 'text_encoder_2', 'unet', 'transformer']:
+            submodel = getattr(shared.sd_model, sub, None)
+            if submodel is not None:
+                modules = submodel.named_modules()
+                task = progress.add_task(description=f"{sub} exctract", total=len(list(modules)))
+                for _name, module in submodel.named_modules():
+                    progress.update(task, advance=1)
+                    if not hasattr(module, "svdhandler"):
+                        continue
+                    lora_state_dict.update(module.svdhandler.makeweights())
+                    del module.svdhandler
+                progress.remove_task(task)
+        t4 = time.time()
+
+    if not os.path.isabs(fn):
+        fn = os.path.join(shared.cmd_opts.lora_dir, fn)
+    if not fn.endswith('.safetensors'):
+        fn += '.safetensors'
+    if os.path.exists(fn):
+        if overwrite:
+            os.remove(fn)
+        else:
+            msg = f'LoRA extract: fn="{fn}" file exists'
+            shared.log.warning(msg)
+            yield msg
+            return
+
+    shared.state.end()
+    meta = make_meta(fn, maxrank, rank_ratio)
+    shared.log.debug(f'LoRA metadata: {meta}')
+    try:
+        save_file(tensors=lora_state_dict, metadata=meta, filename=fn)
+    except Exception as e:
+        msg = f'LoRA extract error: fn="{fn}" {e}'
+        shared.log.error(msg)
+        yield msg
+        return
+    t5 = time.time()
+    shared.log.debug(f'LoRA extract: time={t5-t0:.2f} te1={t1-t0:.2f} te2={t2-t1:.2f} unet={t3-t2:.2f} save={t5-t4:.2f}')
+    keys = list(lora_state_dict.keys())
+    msg = f'LoRA extract: fn="{fn}" keys={len(keys)}'
+    shared.log.info(msg)
+    yield msg
+
+
+def create_ui():
+    def gr_show(visible=True):
+        return {"visible": visible, "__type__": "update"}
+
+    with gr.Tab(label="Extract LoRA"):
+        with gr.Row():
+            loaded = gr.Textbox(placeholder="Press refresh to query loaded LoRA", label="Loaded LoRA", interactive=False)
+            create_refresh_button(loaded, lambda: None, lambda: {'value': loaded_lora_str()}, "testid")
+        with gr.Group():
+            with gr.Row():
+                modules = gr.CheckboxGroup(label="Modules to extract", value=['unet'], choices=['te', 'unet'])
+            with gr.Row():
+                auto_rank = gr.Checkbox(value=False, label="Automatically determine rank")
+                rank_ratio = gr.Slider(label="Autorank ratio", value=1, minimum=0, maximum=1, step=0.05, visible=False)
+                rank = gr.Slider(label="Maximum rank", value=32, minimum=1, maximum=256)
+        with gr.Row():
+            filename = gr.Textbox(label="LoRA target filename")
+            overwrite = gr.Checkbox(value=False, label="Overwrite existing file")
+        with gr.Row():
+            extract = gr.Button(value="Extract LoRA", variant='primary')
+            status = gr.HTML(value="", show_label=False)
+
+        auto_rank.change(fn=lambda x: gr_show(x), inputs=[auto_rank], outputs=[rank_ratio])
+        extract.click(
+            fn=wrap_gradio_gpu_call(make_lora, extra_outputs=[]),
+            inputs=[filename, rank, auto_rank, rank_ratio, modules, overwrite],
+            outputs=[status]
+        )
diff --git a/modules/lora/lyco_helpers.py b/modules/lora/lyco_helpers.py
new file mode 100644
index 000000000..9a16d25ab
--- /dev/null
+++ b/modules/lora/lyco_helpers.py
@@ -0,0 +1,66 @@
+import torch
+
+
+def make_weight_cp(t, wa, wb):
+    temp = torch.einsum('i j k l, j r -> i r k l', t, wb)
+    return torch.einsum('i j k l, i r -> r j k l', temp, wa)
+
+
+def rebuild_conventional(up, down, shape, dyn_dim=None):
+    up = up.reshape(up.size(0), -1)
+    down = down.reshape(down.size(0), -1)
+    if dyn_dim is not None:
+        up = up[:, :dyn_dim]
+        down = down[:dyn_dim, :]
+    return (up @ down).reshape(shape)
+
+
+def rebuild_cp_decomposition(up, down, mid):
+    up = up.reshape(up.size(0), -1)
+    down = down.reshape(down.size(0), -1)
+    return torch.einsum('n m k l, i n, m j -> i j k l', mid, up, down)
+
+
+# copied from https://github.com/KohakuBlueleaf/LyCORIS/blob/dev/lycoris/modules/lokr.py
+def factorization(dimension: int, factor:int=-1) -> tuple[int, int]:
+    """
+    return a tuple of two value of input dimension decomposed by the number closest to factor
+    second value is higher or equal than first value.
+
+    In LoRA with Kroneckor Product, first value is a value for weight scale.
+    secon value is a value for weight.
+
+    Becuase of non-commutative property, A⊗B ≠ B⊗A. Meaning of two matrices is slightly different.
+
+    examples
+    factor
+        -1               2                4               8               16               ...
+    127 -> 1, 127   127 -> 1, 127    127 -> 1, 127   127 -> 1, 127   127 -> 1, 127
+    128 -> 8, 16    128 -> 2, 64     128 -> 4, 32    128 -> 8, 16    128 -> 8, 16
+    250 -> 10, 25   250 -> 2, 125    250 -> 2, 125   250 -> 5, 50    250 -> 10, 25
+    360 -> 8, 45    360 -> 2, 180    360 -> 4, 90    360 -> 8, 45    360 -> 12, 30
+    512 -> 16, 32   512 -> 2, 256    512 -> 4, 128   512 -> 8, 64    512 -> 16, 32
+    1024 -> 32, 32  1024 -> 2, 512   1024 -> 4, 256  1024 -> 8, 128  1024 -> 16, 64
+    """
+
+    if factor > 0 and (dimension % factor) == 0:
+        m = factor
+        n = dimension // factor
+        if m > n:
+            n, m = m, n
+        return m, n
+    if factor < 0:
+        factor = dimension
+    m, n = 1, dimension
+    length = m + n
+    while m<n:
+        new_m = m + 1
+        while dimension%new_m != 0:
+            new_m += 1
+        new_n = dimension // new_m
+        if new_m + new_n > length or new_m>factor:
+            break
+        m, n = new_m, new_n
+    if m > n:
+        n, m = m, n
+    return m, n
diff --git a/modules/lora/network.py b/modules/lora/network.py
new file mode 100644
index 000000000..0785ef9f4
--- /dev/null
+++ b/modules/lora/network.py
@@ -0,0 +1,187 @@
+import os
+from collections import namedtuple
+import enum
+
+from modules import sd_models, hashes, shared
+
+NetworkWeights = namedtuple('NetworkWeights', ['network_key', 'sd_key', 'w', 'sd_module'])
+metadata_tags_order = {"ss_sd_model_name": 1, "ss_resolution": 2, "ss_clip_skip": 3, "ss_num_train_images": 10, "ss_tag_frequency": 20}
+
+
+class SdVersion(enum.Enum):
+    Unknown = 1
+    SD1 = 2
+    SD2 = 3
+    SD3 = 3
+    SDXL = 4
+    SC = 5
+    F1 = 6
+
+
+class NetworkOnDisk:
+    def __init__(self, name, filename):
+        self.shorthash = None
+        self.hash = None
+        self.name = name
+        self.filename = filename
+        if filename.startswith(shared.cmd_opts.lora_dir):
+            self.fullname = os.path.splitext(filename[len(shared.cmd_opts.lora_dir):].strip("/"))[0]
+        else:
+            self.fullname = name
+        self.metadata = {}
+        self.is_safetensors = os.path.splitext(filename)[1].lower() == ".safetensors"
+        if self.is_safetensors:
+            self.metadata = sd_models.read_metadata_from_safetensors(filename)
+        if self.metadata:
+            m = {}
+            for k, v in sorted(self.metadata.items(), key=lambda x: metadata_tags_order.get(x[0], 999)):
+                m[k] = v
+            self.metadata = m
+        self.alias = self.metadata.get('ss_output_name', self.name)
+        sha256 = hashes.sha256_from_cache(self.filename, "lora/" + self.name) or hashes.sha256_from_cache(self.filename, "lora/" + self.name, use_addnet_hash=True) or self.metadata.get('sshs_model_hash')
+        self.set_hash(sha256)
+        self.sd_version = self.detect_version()
+
+    def detect_version(self):
+        base = str(self.metadata.get('ss_base_model_version', "")).lower()
+        arch = str(self.metadata.get('modelspec.architecture', "")).lower()
+        if base.startswith("sd_v1"):
+            return 'sd1'
+        if base.startswith("sdxl"):
+            return 'xl'
+        if base.startswith("stable_cascade"):
+            return 'sc'
+        if base.startswith("sd3"):
+            return 'sd3'
+        if base.startswith("flux"):
+            return 'f1'
+
+        if arch.startswith("stable-diffusion-v1"):
+            return 'sd1'
+        if arch.startswith("stable-diffusion-xl"):
+            return 'xl'
+        if arch.startswith("stable-cascade"):
+            return 'sc'
+        if arch.startswith("flux"):
+            return 'f1'
+
+        if "v1-5" in str(self.metadata.get('ss_sd_model_name', "")):
+            return 'sd1'
+        if str(self.metadata.get('ss_v2', "")) == "True":
+            return 'sd2'
+        if 'flux' in self.name.lower():
+            return 'f1'
+        if 'xl' in self.name.lower():
+            return 'xl'
+
+        return ''
+
+    def set_hash(self, v):
+        self.hash = v or ''
+        self.shorthash = self.hash[0:8]
+
+    def read_hash(self):
+        if not self.hash:
+            self.set_hash(hashes.sha256(self.filename, "lora/" + self.name, use_addnet_hash=self.is_safetensors) or '')
+
+    def get_alias(self):
+        import modules.lora.networks as networks
+        return self.name if shared.opts.lora_preferred_name == "filename" or self.alias.lower() in networks.forbidden_network_aliases else self.alias
+
+
+class Network:  # LoraModule
+    def __init__(self, name, network_on_disk: NetworkOnDisk):
+        self.name = name
+        self.network_on_disk = network_on_disk
+        self.te_multiplier = 1.0
+        self.unet_multiplier = [1.0] * 3
+        self.dyn_dim = None
+        self.modules = {}
+        self.bundle_embeddings = {}
+        self.mtime = None
+        self.mentioned_name = None
+        self.tags = None
+        """the text that was used to add the network to prompt - can be either name or an alias"""
+
+
+class ModuleType:
+    def create_module(self, net: Network, weights: NetworkWeights) -> Network | None: # pylint: disable=W0613
+        return None
+
+
+class NetworkModule:
+    def __init__(self, net: Network, weights: NetworkWeights):
+        self.network = net
+        self.network_key = weights.network_key
+        self.sd_key = weights.sd_key
+        self.sd_module = weights.sd_module
+        if hasattr(self.sd_module, 'weight'):
+            self.shape = self.sd_module.weight.shape
+        self.dim = None
+        self.bias = weights.w.get("bias")
+        self.alpha = weights.w["alpha"].item() if "alpha" in weights.w else None
+        self.scale = weights.w["scale"].item() if "scale" in weights.w else None
+        self.dora_scale = weights.w.get("dora_scale", None)
+        self.dora_norm_dims = len(self.shape) - 1
+
+    def multiplier(self):
+        unet_multiplier = 3 * [self.network.unet_multiplier] if not isinstance(self.network.unet_multiplier, list) else self.network.unet_multiplier
+        if 'transformer' in self.sd_key[:20]:
+            return self.network.te_multiplier
+        if "down_blocks" in self.sd_key:
+            return unet_multiplier[0]
+        if "mid_block" in self.sd_key:
+            return unet_multiplier[1]
+        if "up_blocks" in self.sd_key:
+            return unet_multiplier[2]
+        else:
+            return unet_multiplier[0]
+
+    def calc_scale(self):
+        if self.scale is not None:
+            return self.scale
+        if self.dim is not None and self.alpha is not None:
+            return self.alpha / self.dim
+        return 1.0
+
+    def apply_weight_decompose(self, updown, orig_weight):
+        # Match the device/dtype
+        orig_weight = orig_weight.to(updown.dtype)
+        dora_scale = self.dora_scale.to(device=orig_weight.device, dtype=updown.dtype)
+        updown = updown.to(orig_weight.device)
+
+        merged_scale1 = updown + orig_weight
+        merged_scale1_norm = (
+            merged_scale1.transpose(0, 1)
+            .reshape(merged_scale1.shape[1], -1)
+            .norm(dim=1, keepdim=True)
+            .reshape(merged_scale1.shape[1], *[1] * self.dora_norm_dims)
+            .transpose(0, 1)
+        )
+
+        dora_merged = (
+                merged_scale1 * (dora_scale / merged_scale1_norm)
+        )
+        final_updown = dora_merged - orig_weight
+        return final_updown
+
+    def finalize_updown(self, updown, orig_weight, output_shape, ex_bias=None):
+        if self.bias is not None:
+            updown = updown.reshape(self.bias.shape)
+            updown += self.bias.to(orig_weight.device, dtype=orig_weight.dtype)
+            updown = updown.reshape(output_shape)
+        if len(output_shape) == 4:
+            updown = updown.reshape(output_shape)
+        if orig_weight.size().numel() == updown.size().numel():
+            updown = updown.reshape(orig_weight.shape)
+        if ex_bias is not None:
+            ex_bias = ex_bias * self.multiplier()
+        if self.dora_scale is not None:
+            updown = self.apply_weight_decompose(updown, orig_weight)
+        return updown * self.calc_scale() * self.multiplier(), ex_bias
+
+    def calc_updown(self, target):
+        raise NotImplementedError
+
+    def forward(self, x, y):
+        raise NotImplementedError
diff --git a/modules/lora/network_full.py b/modules/lora/network_full.py
new file mode 100644
index 000000000..5eb0b2e4e
--- /dev/null
+++ b/modules/lora/network_full.py
@@ -0,0 +1,26 @@
+import modules.lora.network as network
+
+
+class ModuleTypeFull(network.ModuleType):
+    def create_module(self, net: network.Network, weights: network.NetworkWeights):
+        if all(x in weights.w for x in ["diff"]):
+            return NetworkModuleFull(net, weights)
+        return None
+
+
+class NetworkModuleFull(network.NetworkModule): # pylint: disable=abstract-method
+    def __init__(self,  net: network.Network, weights: network.NetworkWeights):
+        super().__init__(net, weights)
+
+        self.weight = weights.w.get("diff")
+        self.ex_bias = weights.w.get("diff_b")
+
+    def calc_updown(self, target):
+        output_shape = self.weight.shape
+        updown = self.weight.to(target.device, dtype=target.dtype)
+        if self.ex_bias is not None:
+            ex_bias = self.ex_bias.to(target.device, dtype=target.dtype)
+        else:
+            ex_bias = None
+
+        return self.finalize_updown(updown, target, output_shape, ex_bias)
diff --git a/modules/lora/network_glora.py b/modules/lora/network_glora.py
new file mode 100644
index 000000000..ffcb25986
--- /dev/null
+++ b/modules/lora/network_glora.py
@@ -0,0 +1,30 @@
+import modules.lora.network as network
+
+
+class ModuleTypeGLora(network.ModuleType):
+    def create_module(self, net: network.Network, weights: network.NetworkWeights):
+        if all(x in weights.w for x in ["a1.weight", "a2.weight", "alpha", "b1.weight", "b2.weight"]):
+            return NetworkModuleGLora(net, weights)
+        return None
+
+# adapted from https://github.com/KohakuBlueleaf/LyCORIS
+class NetworkModuleGLora(network.NetworkModule): # pylint: disable=abstract-method
+    def __init__(self,  net: network.Network, weights: network.NetworkWeights):
+        super().__init__(net, weights)
+
+        if hasattr(self.sd_module, 'weight'):
+            self.shape = self.sd_module.weight.shape
+
+        self.w1a = weights.w["a1.weight"]
+        self.w1b = weights.w["b1.weight"]
+        self.w2a = weights.w["a2.weight"]
+        self.w2b = weights.w["b2.weight"]
+
+    def calc_updown(self, target): # pylint: disable=arguments-differ
+        w1a = self.w1a.to(target.device, dtype=target.dtype)
+        w1b = self.w1b.to(target.device, dtype=target.dtype)
+        w2a = self.w2a.to(target.device, dtype=target.dtype)
+        w2b = self.w2b.to(target.device, dtype=target.dtype)
+        output_shape = [w1a.size(0), w1b.size(1)]
+        updown = (w2b @ w1b) + ((target @ w2a) @ w1a)
+        return self.finalize_updown(updown, target, output_shape)
diff --git a/modules/lora/network_hada.py b/modules/lora/network_hada.py
new file mode 100644
index 000000000..6fc142b3b
--- /dev/null
+++ b/modules/lora/network_hada.py
@@ -0,0 +1,46 @@
+import modules.lora.lyco_helpers as lyco_helpers
+import modules.lora.network as network
+
+
+class ModuleTypeHada(network.ModuleType):
+    def create_module(self, net: network.Network, weights: network.NetworkWeights):
+        if all(x in weights.w for x in ["hada_w1_a", "hada_w1_b", "hada_w2_a", "hada_w2_b"]):
+            return NetworkModuleHada(net, weights)
+        return None
+
+
+class NetworkModuleHada(network.NetworkModule): # pylint: disable=abstract-method
+    def __init__(self,  net: network.Network, weights: network.NetworkWeights):
+        super().__init__(net, weights)
+        if hasattr(self.sd_module, 'weight'):
+            self.shape = self.sd_module.weight.shape
+        self.w1a = weights.w["hada_w1_a"]
+        self.w1b = weights.w["hada_w1_b"]
+        self.dim = self.w1b.shape[0]
+        self.w2a = weights.w["hada_w2_a"]
+        self.w2b = weights.w["hada_w2_b"]
+        self.t1 = weights.w.get("hada_t1")
+        self.t2 = weights.w.get("hada_t2")
+
+    def calc_updown(self, target):
+        w1a = self.w1a.to(target.device, dtype=target.dtype)
+        w1b = self.w1b.to(target.device, dtype=target.dtype)
+        w2a = self.w2a.to(target.device, dtype=target.dtype)
+        w2b = self.w2b.to(target.device, dtype=target.dtype)
+        output_shape = [w1a.size(0), w1b.size(1)]
+        if self.t1 is not None:
+            output_shape = [w1a.size(1), w1b.size(1)]
+            t1 = self.t1.to(target.device, dtype=target.dtype)
+            updown1 = lyco_helpers.make_weight_cp(t1, w1a, w1b)
+            output_shape += t1.shape[2:]
+        else:
+            if len(w1b.shape) == 4:
+                output_shape += w1b.shape[2:]
+            updown1 = lyco_helpers.rebuild_conventional(w1a, w1b, output_shape)
+        if self.t2 is not None:
+            t2 = self.t2.to(target.device, dtype=target.dtype)
+            updown2 = lyco_helpers.make_weight_cp(t2, w2a, w2b)
+        else:
+            updown2 = lyco_helpers.rebuild_conventional(w2a, w2b, output_shape)
+        updown = updown1 * updown2
+        return self.finalize_updown(updown, target, output_shape)
diff --git a/modules/lora/network_ia3.py b/modules/lora/network_ia3.py
new file mode 100644
index 000000000..479e42526
--- /dev/null
+++ b/modules/lora/network_ia3.py
@@ -0,0 +1,24 @@
+import modules.lora.network as network
+
+class ModuleTypeIa3(network.ModuleType):
+    def create_module(self, net: network.Network, weights: network.NetworkWeights):
+        if all(x in weights.w for x in ["weight"]):
+            return NetworkModuleIa3(net, weights)
+        return None
+
+
+class NetworkModuleIa3(network.NetworkModule): # pylint: disable=abstract-method
+    def __init__(self,  net: network.Network, weights: network.NetworkWeights):
+        super().__init__(net, weights)
+        self.w = weights.w["weight"]
+        self.on_input = weights.w["on_input"].item()
+
+    def calc_updown(self, target):
+        w = self.w.to(target.device, dtype=target.dtype)
+        output_shape = [w.size(0), target.size(1)]
+        if self.on_input:
+            output_shape.reverse()
+        else:
+            w = w.reshape(-1, 1)
+        updown = target * w
+        return self.finalize_updown(updown, target, output_shape)
diff --git a/modules/lora/network_lokr.py b/modules/lora/network_lokr.py
new file mode 100644
index 000000000..877d4005b
--- /dev/null
+++ b/modules/lora/network_lokr.py
@@ -0,0 +1,57 @@
+import torch
+import modules.lora.lyco_helpers as lyco_helpers
+import modules.lora.network as network
+
+
+class ModuleTypeLokr(network.ModuleType):
+    def create_module(self, net: network.Network, weights: network.NetworkWeights):
+        has_1 = "lokr_w1" in weights.w or ("lokr_w1_a" in weights.w and "lokr_w1_b" in weights.w)
+        has_2 = "lokr_w2" in weights.w or ("lokr_w2_a" in weights.w and "lokr_w2_b" in weights.w)
+        if has_1 and has_2:
+            return NetworkModuleLokr(net, weights)
+        return None
+
+
+def make_kron(orig_shape, w1, w2):
+    if len(w2.shape) == 4:
+        w1 = w1.unsqueeze(2).unsqueeze(2)
+    w2 = w2.contiguous()
+    return torch.kron(w1, w2).reshape(orig_shape)
+
+
+class NetworkModuleLokr(network.NetworkModule): # pylint: disable=abstract-method
+    def __init__(self,  net: network.Network, weights: network.NetworkWeights):
+        super().__init__(net, weights)
+        self.w1 = weights.w.get("lokr_w1")
+        self.w1a = weights.w.get("lokr_w1_a")
+        self.w1b = weights.w.get("lokr_w1_b")
+        self.dim = self.w1b.shape[0] if self.w1b is not None else self.dim
+        self.w2 = weights.w.get("lokr_w2")
+        self.w2a = weights.w.get("lokr_w2_a")
+        self.w2b = weights.w.get("lokr_w2_b")
+        self.dim = self.w2b.shape[0] if self.w2b is not None else self.dim
+        self.t2 = weights.w.get("lokr_t2")
+
+    def calc_updown(self, target):
+        if self.w1 is not None:
+            w1 = self.w1.to(target.device, dtype=target.dtype)
+        else:
+            w1a = self.w1a.to(target.device, dtype=target.dtype)
+            w1b = self.w1b.to(target.device, dtype=target.dtype)
+            w1 = w1a @ w1b
+        if self.w2 is not None:
+            w2 = self.w2.to(target.device, dtype=target.dtype)
+        elif self.t2 is None:
+            w2a = self.w2a.to(target.device, dtype=target.dtype)
+            w2b = self.w2b.to(target.device, dtype=target.dtype)
+            w2 = w2a @ w2b
+        else:
+            t2 = self.t2.to(target.device, dtype=target.dtype)
+            w2a = self.w2a.to(target.device, dtype=target.dtype)
+            w2b = self.w2b.to(target.device, dtype=target.dtype)
+            w2 = lyco_helpers.make_weight_cp(t2, w2a, w2b)
+        output_shape = [w1.size(0) * w2.size(0), w1.size(1) * w2.size(1)]
+        if len(target.shape) == 4:
+            output_shape = target.shape
+        updown = make_kron(output_shape, w1, w2)
+        return self.finalize_updown(updown, target, output_shape)
diff --git a/modules/lora/network_lora.py b/modules/lora/network_lora.py
new file mode 100644
index 000000000..6c1d7ea3f
--- /dev/null
+++ b/modules/lora/network_lora.py
@@ -0,0 +1,78 @@
+import torch
+import diffusers.models.lora as diffusers_lora
+import modules.lora.lyco_helpers as lyco_helpers
+import modules.lora.network as network
+from modules import devices
+
+
+class ModuleTypeLora(network.ModuleType):
+    def create_module(self, net: network.Network, weights: network.NetworkWeights):
+        if all(x in weights.w for x in ["lora_up.weight", "lora_down.weight"]):
+            return NetworkModuleLora(net, weights)
+        return None
+
+
+class NetworkModuleLora(network.NetworkModule):
+
+    def __init__(self,  net: network.Network, weights: network.NetworkWeights):
+        super().__init__(net, weights)
+        self.up_model = self.create_module(weights.w, "lora_up.weight")
+        self.down_model = self.create_module(weights.w, "lora_down.weight")
+        self.mid_model = self.create_module(weights.w, "lora_mid.weight", none_ok=True)
+        self.dim = weights.w["lora_down.weight"].shape[0]
+
+    def create_module(self, weights, key, none_ok=False):
+        from modules.shared import opts
+        weight = weights.get(key)
+        if weight is None and none_ok:
+            return None
+        linear_modules = [torch.nn.Linear, torch.nn.modules.linear.NonDynamicallyQuantizableLinear, torch.nn.MultiheadAttention, diffusers_lora.LoRACompatibleLinear]
+        is_linear = type(self.sd_module) in linear_modules or self.sd_module.__class__.__name__ in {"NNCFLinear", "QLinear", "Linear4bit"}
+        is_conv = type(self.sd_module) in [torch.nn.Conv2d, diffusers_lora.LoRACompatibleConv] or self.sd_module.__class__.__name__ in {"NNCFConv2d", "QConv2d"}
+        if is_linear:
+            weight = weight.reshape(weight.shape[0], -1)
+            module = torch.nn.Linear(weight.shape[1], weight.shape[0], bias=False)
+        elif is_conv and key == "lora_down.weight" or key == "dyn_up":
+            if len(weight.shape) == 2:
+                weight = weight.reshape(weight.shape[0], -1, 1, 1)
+            if weight.shape[2] != 1 or weight.shape[3] != 1:
+                module = torch.nn.Conv2d(weight.shape[1], weight.shape[0], self.sd_module.kernel_size, self.sd_module.stride, self.sd_module.padding, bias=False)
+            else:
+                module = torch.nn.Conv2d(weight.shape[1], weight.shape[0], (1, 1), bias=False)
+        elif is_conv and key == "lora_mid.weight":
+            module = torch.nn.Conv2d(weight.shape[1], weight.shape[0], self.sd_module.kernel_size, self.sd_module.stride, self.sd_module.padding, bias=False)
+        elif is_conv and key == "lora_up.weight" or key == "dyn_down":
+            module = torch.nn.Conv2d(weight.shape[1], weight.shape[0], (1, 1), bias=False)
+        else:
+            raise AssertionError(f'Lora unsupported: layer={self.network_key} type={type(self.sd_module).__name__}')
+        with torch.no_grad():
+            if weight.shape != module.weight.shape:
+                weight = weight.reshape(module.weight.shape)
+            module.weight.copy_(weight)
+        if opts.lora_load_gpu:
+            module = module.to(device=devices.device, dtype=devices.dtype)
+        module.weight.requires_grad_(False)
+        return module
+
+    def calc_updown(self, target): # pylint: disable=W0237
+        target_dtype = target.dtype if target.dtype != torch.uint8 else self.up_model.weight.dtype
+        up = self.up_model.weight.to(target.device, dtype=target_dtype)
+        down = self.down_model.weight.to(target.device, dtype=target_dtype)
+        output_shape = [up.size(0), down.size(1)]
+        if self.mid_model is not None:
+            # cp-decomposition
+            mid = self.mid_model.weight.to(target.device, dtype=target_dtype)
+            updown = lyco_helpers.rebuild_cp_decomposition(up, down, mid)
+            output_shape += mid.shape[2:]
+        else:
+            if len(down.shape) == 4:
+                output_shape += down.shape[2:]
+            updown = lyco_helpers.rebuild_conventional(up, down, output_shape, self.network.dyn_dim)
+        return self.finalize_updown(updown, target, output_shape)
+
+    def forward(self, x, y):
+        self.up_model.to(device=devices.device)
+        self.down_model.to(device=devices.device)
+        if hasattr(y, "scale"):
+            return y(scale=1) + self.up_model(self.down_model(x)) * self.multiplier() * self.calc_scale()
+        return y + self.up_model(self.down_model(x)) * self.multiplier() * self.calc_scale()
diff --git a/modules/lora/network_norm.py b/modules/lora/network_norm.py
new file mode 100644
index 000000000..e8f1740e3
--- /dev/null
+++ b/modules/lora/network_norm.py
@@ -0,0 +1,23 @@
+import modules.lora.network as network
+
+class ModuleTypeNorm(network.ModuleType):
+    def create_module(self, net: network.Network, weights: network.NetworkWeights):
+        if all(x in weights.w for x in ["w_norm", "b_norm"]):
+            return NetworkModuleNorm(net, weights)
+        return None
+
+
+class NetworkModuleNorm(network.NetworkModule): # pylint: disable=abstract-method
+    def __init__(self,  net: network.Network, weights: network.NetworkWeights):
+        super().__init__(net, weights)
+        self.w_norm = weights.w.get("w_norm")
+        self.b_norm = weights.w.get("b_norm")
+
+    def calc_updown(self, target):
+        output_shape = self.w_norm.shape
+        updown = self.w_norm.to(target.device, dtype=target.dtype)
+        if self.b_norm is not None:
+            ex_bias = self.b_norm.to(target.device, dtype=target.dtype)
+        else:
+            ex_bias = None
+        return self.finalize_updown(updown, target, output_shape, ex_bias)
diff --git a/modules/lora/network_oft.py b/modules/lora/network_oft.py
new file mode 100644
index 000000000..808286066
--- /dev/null
+++ b/modules/lora/network_oft.py
@@ -0,0 +1,81 @@
+import torch
+import modules.lora.network as network
+from modules.lora.lyco_helpers import factorization
+from einops import rearrange
+
+
+class ModuleTypeOFT(network.ModuleType):
+    def create_module(self, net: network.Network, weights: network.NetworkWeights):
+        if all(x in weights.w for x in ["oft_blocks"]) or all(x in weights.w for x in ["oft_diag"]):
+            return NetworkModuleOFT(net, weights)
+        return None
+
+# Supports both kohya-ss' implementation of COFT  https://github.com/kohya-ss/sd-scripts/blob/main/networks/oft.py
+# and KohakuBlueleaf's implementation of OFT/COFT https://github.com/KohakuBlueleaf/LyCORIS/blob/dev/lycoris/modules/diag_oft.py
+class NetworkModuleOFT(network.NetworkModule): # pylint: disable=abstract-method
+    def __init__(self,  net: network.Network, weights: network.NetworkWeights):
+        super().__init__(net, weights)
+        self.lin_module = None
+        self.org_module: list[torch.Module] = [self.sd_module]
+        self.scale = 1.0
+
+        # kohya-ss
+        if "oft_blocks" in weights.w.keys():
+            self.is_kohya = True
+            self.oft_blocks = weights.w["oft_blocks"] # (num_blocks, block_size, block_size)
+            self.alpha = weights.w["alpha"] # alpha is constraint
+            self.dim = self.oft_blocks.shape[0] # lora dim
+        # LyCORIS
+        elif "oft_diag" in weights.w.keys():
+            self.is_kohya = False
+            self.oft_blocks = weights.w["oft_diag"]
+            # self.alpha is unused
+            self.dim = self.oft_blocks.shape[1] # (num_blocks, block_size, block_size)
+
+        is_linear = type(self.sd_module) in [torch.nn.Linear, torch.nn.modules.linear.NonDynamicallyQuantizableLinear]
+        is_conv = type(self.sd_module) in [torch.nn.Conv2d]
+        is_other_linear = type(self.sd_module) in [torch.nn.MultiheadAttention] # unsupported
+
+        if is_linear:
+            self.out_dim = self.sd_module.out_features
+        elif is_conv:
+            self.out_dim = self.sd_module.out_channels
+        elif is_other_linear:
+            self.out_dim = self.sd_module.embed_dim
+
+        if self.is_kohya:
+            self.constraint = self.alpha * self.out_dim
+            self.num_blocks = self.dim
+            self.block_size = self.out_dim // self.dim
+        else:
+            self.constraint = None
+            self.block_size, self.num_blocks = factorization(self.out_dim, self.dim)
+
+    def calc_updown(self, target):
+        oft_blocks = self.oft_blocks.to(target.device, dtype=target.dtype)
+        eye = torch.eye(self.block_size, device=target.device)
+        constraint = self.constraint.to(target.device)
+
+        if self.is_kohya:
+            block_Q = oft_blocks - oft_blocks.transpose(1, 2) # ensure skew-symmetric orthogonal matrix
+            norm_Q = torch.norm(block_Q.flatten()).to(target.device)
+            new_norm_Q = torch.clamp(norm_Q, max=constraint)
+            block_Q = block_Q * ((new_norm_Q + 1e-8) / (norm_Q + 1e-8))
+            mat1 = eye + block_Q
+            mat2 = (eye - block_Q).float().inverse()
+            oft_blocks = torch.matmul(mat1, mat2)
+
+        R = oft_blocks.to(target.device, dtype=target.dtype)
+
+        # This errors out for MultiheadAttention, might need to be handled up-stream
+        merged_weight = rearrange(target, '(k n) ... -> k n ...', k=self.num_blocks, n=self.block_size)
+        merged_weight = torch.einsum(
+            'k n m, k n ... -> k m ...',
+            R,
+            merged_weight
+        )
+        merged_weight = rearrange(merged_weight, 'k m ... -> (k m) ...')
+
+        updown = merged_weight.to(target.device, dtype=target.dtype) - target
+        output_shape = target.shape
+        return self.finalize_updown(updown, target, output_shape)
diff --git a/modules/lora/network_overrides.py b/modules/lora/network_overrides.py
new file mode 100644
index 000000000..5334f3c1b
--- /dev/null
+++ b/modules/lora/network_overrides.py
@@ -0,0 +1,49 @@
+from modules import shared
+
+
+maybe_diffusers = [ # forced if lora_maybe_diffusers is enabled
+    'aaebf6360f7d', # sd15-lcm
+    '3d18b05e4f56', # sdxl-lcm
+    'b71dcb732467', # sdxl-tcd
+    '813ea5fb1c67', # sdxl-turbo
+    # not really needed, but just in case
+    '5a48ac366664', # hyper-sd15-1step
+    'ee0ff23dcc42', # hyper-sd15-2step
+    'e476eb1da5df', # hyper-sd15-4step
+    'ecb844c3f3b0', # hyper-sd15-8step
+    '1ab289133ebb', # hyper-sd15-8step-cfg
+    '4f494295edb1', # hyper-sdxl-8step
+    'ca14a8c621f8', # hyper-sdxl-8step-cfg
+    '1c88f7295856', # hyper-sdxl-4step
+    'fdd5dcd1d88a', # hyper-sdxl-2step
+    '8cca3706050b', # hyper-sdxl-1step
+]
+
+force_diffusers = [ # forced always
+    '816d0eed49fd', # flash-sdxl
+    'c2ec22757b46', # flash-sd15
+]
+
+force_models = [ # forced always
+    'sc',
+    # 'sd3',
+    'kandinsky',
+    'hunyuandit',
+    'auraflow',
+]
+
+force_classes = [ # forced always
+]
+
+
+def check_override(shorthash=''):
+    force = False
+    force = force or (shared.sd_model_type in force_models)
+    force = force or (shared.sd_model.__class__.__name__ in force_classes)
+    if len(shorthash) < 4:
+        return force
+    force = force or (any(x.startswith(shorthash) for x in maybe_diffusers) if shared.opts.lora_maybe_diffusers else False)
+    force = force or any(x.startswith(shorthash) for x in force_diffusers)
+    if force and shared.opts.lora_maybe_diffusers:
+        shared.log.debug('LoRA override: force diffusers')
+    return force
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
new file mode 100644
index 000000000..762705b67
--- /dev/null
+++ b/modules/lora/networks.py
@@ -0,0 +1,453 @@
+from typing import Union, List
+import os
+import re
+import time
+import concurrent
+import modules.lora.network as network
+import modules.lora.network_lora as network_lora
+import modules.lora.network_hada as network_hada
+import modules.lora.network_ia3 as network_ia3
+import modules.lora.network_oft as network_oft
+import modules.lora.network_lokr as network_lokr
+import modules.lora.network_full as network_full
+import modules.lora.network_norm as network_norm
+import modules.lora.network_glora as network_glora
+import modules.lora.network_overrides as network_overrides
+import modules.lora.lora_convert as lora_convert
+import torch
+import diffusers.models.lora
+from modules import shared, devices, sd_models, sd_models_compile, errors, scripts, files_cache, model_quant
+
+
+debug = os.environ.get('SD_LORA_DEBUG', None) is not None
+extra_network_lora = None
+available_networks = {}
+available_network_aliases = {}
+loaded_networks: List[network.Network] = []
+timer = { 'load': 0, 'apply': 0, 'restore': 0, 'deactivate': 0 }
+lora_cache = {}
+diffuser_loaded = []
+diffuser_scales = []
+available_network_hash_lookup = {}
+forbidden_network_aliases = {}
+re_network_name = re.compile(r"(.*)\s*\([0-9a-fA-F]+\)")
+module_types = [
+    network_lora.ModuleTypeLora(),
+    network_hada.ModuleTypeHada(),
+    network_ia3.ModuleTypeIa3(),
+    network_oft.ModuleTypeOFT(),
+    network_lokr.ModuleTypeLokr(),
+    network_full.ModuleTypeFull(),
+    network_norm.ModuleTypeNorm(),
+    network_glora.ModuleTypeGLora(),
+]
+
+
+def assign_network_names_to_compvis_modules(sd_model):
+    if sd_model is None:
+        return
+    sd_model = getattr(shared.sd_model, "pipe", shared.sd_model)  # wrapped model compatiblility
+    network_layer_mapping = {}
+    if hasattr(sd_model, 'text_encoder') and sd_model.text_encoder is not None:
+        for name, module in sd_model.text_encoder.named_modules():
+            prefix = "lora_te1_" if hasattr(sd_model, 'text_encoder_2') else "lora_te_"
+            network_name = prefix + name.replace(".", "_")
+            network_layer_mapping[network_name] = module
+            module.network_layer_name = network_name
+    if hasattr(sd_model, 'text_encoder_2'):
+        for name, module in sd_model.text_encoder_2.named_modules():
+            network_name = "lora_te2_" + name.replace(".", "_")
+            network_layer_mapping[network_name] = module
+            module.network_layer_name = network_name
+    if hasattr(sd_model, 'unet'):
+        for name, module in sd_model.unet.named_modules():
+            network_name = "lora_unet_" + name.replace(".", "_")
+            network_layer_mapping[network_name] = module
+            module.network_layer_name = network_name
+    if hasattr(sd_model, 'transformer'):
+        for name, module in sd_model.transformer.named_modules():
+            network_name = "lora_transformer_" + name.replace(".", "_")
+            network_layer_mapping[network_name] = module
+            if "norm" in network_name and "linear" not in network_name and shared.sd_model_type != "sd3":
+                continue
+            module.network_layer_name = network_name
+    shared.sd_model.network_layer_mapping = network_layer_mapping
+
+
+def load_diffusers(name, network_on_disk, lora_scale=shared.opts.extra_networks_default_multiplier) -> network.Network | None:
+    name = name.replace(".", "_")
+    shared.log.debug(f'Load network: type=LoRA name="{name}" file="{network_on_disk.filename}" detected={network_on_disk.sd_version} method=diffusers scale={lora_scale} fuse={shared.opts.lora_fuse_diffusers}')
+    if not shared.native:
+        return None
+    if not hasattr(shared.sd_model, 'load_lora_weights'):
+        shared.log.error(f'Load network: type=LoRA class={shared.sd_model.__class__} does not implement load lora')
+        return None
+    try:
+        shared.sd_model.load_lora_weights(network_on_disk.filename, adapter_name=name)
+    except Exception as e:
+        if 'already in use' in str(e):
+            pass
+        else:
+            if 'The following keys have not been correctly renamed' in str(e):
+                shared.log.error(f'Load network: type=LoRA name="{name}" diffusers unsupported format')
+            else:
+                shared.log.error(f'Load network: type=LoRA name="{name}" {e}')
+            if debug:
+                errors.display(e, "LoRA")
+            return None
+    if name not in diffuser_loaded:
+        diffuser_loaded.append(name)
+        diffuser_scales.append(lora_scale)
+    net = network.Network(name, network_on_disk)
+    net.mtime = os.path.getmtime(network_on_disk.filename)
+    return net
+
+
+def load_network(name, network_on_disk) -> network.Network | None:
+    if not shared.sd_loaded:
+        return None
+
+    cached = lora_cache.get(name, None)
+    if debug:
+        shared.log.debug(f'Load network: type=LoRA name="{name}" file="{network_on_disk.filename}" type=lora {"cached" if cached else ""}')
+    if cached is not None:
+        return cached
+    net = network.Network(name, network_on_disk)
+    net.mtime = os.path.getmtime(network_on_disk.filename)
+    sd = sd_models.read_state_dict(network_on_disk.filename, what='network')
+    if shared.sd_model_type == 'f1':  # if kohya flux lora, convert state_dict
+        sd = lora_convert._convert_kohya_flux_lora_to_diffusers(sd) or sd  # pylint: disable=protected-access
+    if shared.sd_model_type == 'sd3':  # if kohya flux lora, convert state_dict
+        try:
+            sd = lora_convert._convert_kohya_sd3_lora_to_diffusers(sd) or sd  # pylint: disable=protected-access
+        except ValueError:  # EAFP for diffusers PEFT keys
+            pass
+    assign_network_names_to_compvis_modules(shared.sd_model)
+    keys_failed_to_match = {}
+    matched_networks = {}
+    bundle_embeddings = {}
+    convert = lora_convert.KeyConvert()
+    for key_network, weight in sd.items():
+        parts = key_network.split('.')
+        if parts[0] == "bundle_emb":
+            emb_name, vec_name = parts[1], key_network.split(".", 2)[-1]
+            emb_dict = bundle_embeddings.get(emb_name, {})
+            emb_dict[vec_name] = weight
+            bundle_embeddings[emb_name] = emb_dict
+            continue
+        if len(parts) > 5: # messy handler for diffusers peft lora
+            key_network_without_network_parts = '_'.join(parts[:-2])
+            if not key_network_without_network_parts.startswith('lora_'):
+                key_network_without_network_parts = 'lora_' + key_network_without_network_parts
+            network_part = '.'.join(parts[-2:]).replace('lora_A', 'lora_down').replace('lora_B', 'lora_up')
+        else:
+            key_network_without_network_parts, network_part = key_network.split(".", 1)
+        key, sd_module = convert(key_network_without_network_parts)
+        if sd_module is None:
+            keys_failed_to_match[key_network] = key
+            continue
+        if key not in matched_networks:
+            matched_networks[key] = network.NetworkWeights(network_key=key_network, sd_key=key, w={}, sd_module=sd_module)
+        matched_networks[key].w[network_part] = weight
+    network_types = []
+    for key, weights in matched_networks.items():
+        net_module = None
+        for nettype in module_types:
+            net_module = nettype.create_module(net, weights)
+            if net_module is not None:
+                network_types.append(nettype.__class__.__name__)
+                break
+        if net_module is None:
+            shared.log.error(f'LoRA unhandled: name={name} key={key} weights={weights.w.keys()}')
+        else:
+            net.modules[key] = net_module
+    if len(keys_failed_to_match) > 0:
+        shared.log.warning(f'LoRA name="{name}" type={set(network_types)} unmatched={len(keys_failed_to_match)} matched={len(matched_networks)}')
+        if debug:
+            shared.log.debug(f'LoRA name="{name}" unmatched={keys_failed_to_match}')
+    else:
+        shared.log.debug(f'LoRA name="{name}" type={set(network_types)} keys={len(matched_networks)}')
+    if len(matched_networks) == 0:
+        return None
+    lora_cache[name] = net
+    net.bundle_embeddings = bundle_embeddings
+    return net
+
+def maybe_recompile_model(names, te_multipliers):
+    recompile_model = False
+    if shared.compiled_model_state is not None and shared.compiled_model_state.is_compiled:
+        if len(names) == len(shared.compiled_model_state.lora_model):
+            for i, name in enumerate(names):
+                if shared.compiled_model_state.lora_model[
+                    i] != f"{name}:{te_multipliers[i] if te_multipliers else shared.opts.extra_networks_default_multiplier}":
+                    recompile_model = True
+                    shared.compiled_model_state.lora_model = []
+                    break
+            if not recompile_model:
+                if len(loaded_networks) > 0 and debug:
+                    shared.log.debug('Model Compile: Skipping LoRa loading')
+                return
+        else:
+            recompile_model = True
+            shared.compiled_model_state.lora_model = []
+    if recompile_model:
+        backup_cuda_compile = shared.opts.cuda_compile
+        sd_models.unload_model_weights(op='model')
+        shared.opts.cuda_compile = []
+        sd_models.reload_model_weights(op='model')
+        shared.opts.cuda_compile = backup_cuda_compile
+    return recompile_model
+
+
+def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=None):
+    networks_on_disk: list[network.NetworkOnDisk] = [available_network_aliases.get(name, None) for name in names]
+    if any(x is None for x in networks_on_disk):
+        list_available_networks()
+        networks_on_disk: list[network.NetworkOnDisk] = [available_network_aliases.get(name, None) for name in names]
+    failed_to_load_networks = []
+    recompile_model = maybe_recompile_model(names, te_multipliers)
+
+    loaded_networks.clear()
+    diffuser_loaded.clear()
+    diffuser_scales.clear()
+    timer['load'] = 0
+    t0 = time.time()
+
+    for i, (network_on_disk, name) in enumerate(zip(networks_on_disk, names)):
+        net = None
+        if network_on_disk is not None:
+            shorthash = getattr(network_on_disk, 'shorthash', '').lower()
+            if debug:
+                shared.log.debug(f'Load network: type=LoRA name="{name}" file="{network_on_disk.filename}" hash="{shorthash}"')
+            try:
+                if recompile_model:
+                    shared.compiled_model_state.lora_model.append(f"{name}:{te_multipliers[i] if te_multipliers else shared.opts.extra_networks_default_multiplier}")
+                if shared.opts.lora_force_diffusers or network_overrides.check_override(shorthash): # OpenVINO only works with Diffusers LoRa loading
+                    net = load_diffusers(name, network_on_disk, lora_scale=te_multipliers[i] if te_multipliers else shared.opts.extra_networks_default_multiplier)
+                else:
+                    net = load_network(name, network_on_disk)
+                if net is not None:
+                    net.mentioned_name = name
+                    network_on_disk.read_hash()
+            except Exception as e:
+                shared.log.error(f'Load network: type=LoRA file="{network_on_disk.filename}" {e}')
+                if debug:
+                    errors.display(e, 'LoRA')
+                continue
+        if net is None:
+            failed_to_load_networks.append(name)
+            shared.log.error(f'Load network: type=LoRA name="{name}" detected={network_on_disk.sd_version if network_on_disk is not None else None} failed')
+            continue
+        shared.sd_model.embedding_db.load_diffusers_embedding(None, net.bundle_embeddings)
+        net.te_multiplier = te_multipliers[i] if te_multipliers else shared.opts.extra_networks_default_multiplier
+        net.unet_multiplier = unet_multipliers[i] if unet_multipliers else shared.opts.extra_networks_default_multiplier
+        net.dyn_dim = dyn_dims[i] if dyn_dims else shared.opts.extra_networks_default_multiplier
+        loaded_networks.append(net)
+
+    while len(lora_cache) > shared.opts.lora_in_memory_limit:
+        name = next(iter(lora_cache))
+        lora_cache.pop(name, None)
+
+    if len(diffuser_loaded) > 0:
+        shared.log.debug(f'Load network: type=LoRA loaded={diffuser_loaded} available={shared.sd_model.get_list_adapters()} active={shared.sd_model.get_active_adapters()} scales={diffuser_scales}')
+        try:
+            shared.sd_model.set_adapters(adapter_names=diffuser_loaded, adapter_weights=diffuser_scales)
+            if shared.opts.lora_fuse_diffusers:
+                shared.sd_model.fuse_lora(adapter_names=diffuser_loaded, lora_scale=1.0, fuse_unet=True, fuse_text_encoder=True) # fuse uses fixed scale since later apply does the scaling
+                shared.sd_model.unload_lora_weights()
+        except Exception as e:
+            shared.log.error(f'Load network: type=LoRA {e}')
+            if debug:
+                errors.display(e, 'LoRA')
+
+    if len(loaded_networks) > 0 and debug:
+        shared.log.debug(f'Load network: type=LoRA loaded={len(loaded_networks)} cache={list(lora_cache)}')
+
+    devices.torch_gc()
+
+    if recompile_model:
+        shared.log.info("Load network: type=LoRA recompiling model")
+        backup_lora_model = shared.compiled_model_state.lora_model
+        if 'Model' in shared.opts.cuda_compile:
+            shared.sd_model = sd_models_compile.compile_diffusers(shared.sd_model)
+
+        shared.compiled_model_state.lora_model = backup_lora_model
+    if shared.opts.diffusers_offload_mode == "balanced":
+        sd_models.apply_balanced_offload(shared.sd_model)
+    t1 = time.time()
+    timer['load'] += t1 - t0
+
+def set_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], updown, ex_bias):
+    weights_backup = getattr(self, "network_weights_backup", None)
+    bias_backup = getattr(self, "network_bias_backup", None)
+    if weights_backup is None and bias_backup is None:
+        return
+    device = self.weight.device
+    with devices.inference_context():
+        if weights_backup is not None:
+            if updown is not None:
+                if len(weights_backup.shape) == 4 and weights_backup.shape[1] == 9:
+                    # inpainting model. zero pad updown to make channel[1]  4 to 9
+                    updown = torch.nn.functional.pad(updown, (0, 0, 0, 0, 0, 5))  # pylint: disable=not-callable
+                weights_backup = weights_backup.clone().to(device)
+                weights_backup += updown.to(weights_backup)
+            if getattr(self, "quant_type", None) in ['nf4', 'fp4']:
+                bnb = model_quant.load_bnb('Load network: type=LoRA', silent=True)
+                if bnb is not None:
+                    self.weight = bnb.nn.Params4bit(weights_backup, quant_state=self.quant_state, quant_type=self.quant_type, blocksize=self.blocksize)
+                else:
+                    self.weight.copy_(weights_backup, non_blocking=True)
+            else:
+                self.weight.copy_(weights_backup, non_blocking=True)
+            if hasattr(self, "qweight") and hasattr(self, "freeze"):
+                self.freeze()
+        if bias_backup is not None:
+            if ex_bias is not None:
+                bias_backup = bias_backup.clone() + ex_bias.to(weights_backup)
+            self.bias.copy_(bias_backup)
+        else:
+            self.bias = None
+        self.to(device)
+
+
+def maybe_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], wanted_names): # pylint: disable=W0613
+    weights_backup = getattr(self, "network_weights_backup", None)
+    if weights_backup is None and wanted_names != (): # pylint: disable=C1803
+        if getattr(self.weight, "quant_type", None) in ['nf4', 'fp4']:
+            bnb = model_quant.load_bnb('Load network: type=LoRA', silent=True)
+            if bnb is not None:
+                with devices.inference_context():
+                    weights_backup = bnb.functional.dequantize_4bit(self.weight, quant_state=self.weight.quant_state, quant_type=self.weight.quant_type, blocksize=self.weight.blocksize,)
+                    self.quant_state = self.weight.quant_state
+                    self.quant_type = self.weight.quant_type
+                    self.blocksize = self.weight.blocksize
+            else:
+                weights_backup = self.weight.clone()
+        else:
+            weights_backup = self.weight.clone()
+        if shared.opts.lora_offload_backup and weights_backup is not None:
+            weights_backup = weights_backup.to(devices.cpu)
+        self.network_weights_backup = weights_backup
+    bias_backup = getattr(self, "network_bias_backup", None)
+    if bias_backup is None:
+        if getattr(self, 'bias', None) is not None:
+            bias_backup = self.bias.clone()
+        else:
+            bias_backup = None
+        if shared.opts.lora_offload_backup and bias_backup is not None:
+            bias_backup = bias_backup.to(devices.cpu)
+        self.network_bias_backup = bias_backup
+
+
+def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv]):
+    """
+    Applies the currently selected set of networks to the weights of torch layer self.
+    If weights already have this particular set of networks applied, does nothing.
+    If not, restores orginal weights from backup and alters weights according to networks.
+    """
+    network_layer_name = getattr(self, 'network_layer_name', None)
+    if network_layer_name is None:
+        return
+    t0 = time.time()
+    current_names = getattr(self, "network_current_names", ())
+    wanted_names = tuple((x.name, x.te_multiplier, x.unet_multiplier, x.dyn_dim) for x in loaded_networks)
+    if any([net.modules.get(network_layer_name, None) for net in loaded_networks]): # noqa: C419 # pylint: disable=R1729
+        maybe_backup_weights(self, wanted_names)
+    if current_names != wanted_names:
+        for net in loaded_networks:
+            # default workflow where module is known and has weights
+            module = net.modules.get(network_layer_name, None)
+            if module is not None and hasattr(self, 'weight'):
+                try:
+                    with devices.inference_context():
+                        weight = self.weight # calculate quant weights once
+                        updown, ex_bias = module.calc_updown(weight)
+                        set_weights(self, updown, ex_bias)
+                except RuntimeError as e:
+                    extra_network_lora.errors[net.name] = extra_network_lora.errors.get(net.name, 0) + 1
+                    if debug:
+                        module_name = net.modules.get(network_layer_name, None)
+                        shared.log.error(f'LoRA apply weight name="{net.name}" module="{module_name}" layer="{network_layer_name}" {e}')
+                        errors.display(e, 'LoRA')
+                        raise RuntimeError('LoRA apply weight') from e
+                continue
+            if module is None:
+                continue
+            shared.log.warning(f'LoRA network="{net.name}" layer="{network_layer_name}" unsupported operation')
+            extra_network_lora.errors[net.name] = extra_network_lora.errors.get(net.name, 0) + 1
+        if not loaded_networks:  # restore from backup
+            t5 = time.time()
+            set_weights(self, None, None)
+        self.network_current_names = wanted_names
+    t1 = time.time()
+    timer['apply'] += t1 - t0
+
+def network_load():
+    sd_model = getattr(shared.sd_model, "pipe", shared.sd_model)  # wrapped model compatiblility
+    for component_name in ['text_encoder','text_encoder_2', 'unet', 'transformer']:
+        component = getattr(sd_model, component_name, None)
+        if component is not None:
+            for _, module in component.named_modules():
+                network_apply_weights(module)
+
+
+def list_available_networks():
+    t0 = time.time()
+    available_networks.clear()
+    available_network_aliases.clear()
+    forbidden_network_aliases.clear()
+    available_network_hash_lookup.clear()
+    forbidden_network_aliases.update({"none": 1, "Addams": 1})
+    if not os.path.exists(shared.cmd_opts.lora_dir):
+        shared.log.warning(f'LoRA directory not found: path="{shared.cmd_opts.lora_dir}"')
+
+    def add_network(filename):
+        if not os.path.isfile(filename):
+            return
+        name = os.path.splitext(os.path.basename(filename))[0]
+        name = name.replace('.', '_')
+        try:
+            entry = network.NetworkOnDisk(name, filename)
+            available_networks[entry.name] = entry
+            if entry.alias in available_network_aliases:
+                forbidden_network_aliases[entry.alias.lower()] = 1
+            if shared.opts.lora_preferred_name == 'filename':
+                available_network_aliases[entry.name] = entry
+            else:
+                available_network_aliases[entry.alias] = entry
+            if entry.shorthash:
+                available_network_hash_lookup[entry.shorthash] = entry
+        except OSError as e:  # should catch FileNotFoundError and PermissionError etc.
+            shared.log.error(f'LoRA: filename="{filename}" {e}')
+
+    candidates = list(files_cache.list_files(shared.cmd_opts.lora_dir, ext_filter=[".pt", ".ckpt", ".safetensors"]))
+    with concurrent.futures.ThreadPoolExecutor(max_workers=shared.max_workers) as executor:
+        for fn in candidates:
+            executor.submit(add_network, fn)
+    t1 = time.time()
+    shared.log.info(f'Available LoRAs: path="{shared.cmd_opts.lora_dir}" items={len(available_networks)} folders={len(forbidden_network_aliases)} time={t1 - t0:.2f}')
+
+
+def infotext_pasted(infotext, params): # pylint: disable=W0613
+    if "AddNet Module 1" in [x[1] for x in scripts.scripts_txt2img.infotext_fields]:
+        return  # if the other extension is active, it will handle those fields, no need to do anything
+    added = []
+    for k in params:
+        if not k.startswith("AddNet Model "):
+            continue
+        num = k[13:]
+        if params.get("AddNet Module " + num) != "LoRA":
+            continue
+        name = params.get("AddNet Model " + num)
+        if name is None:
+            continue
+        m = re_network_name.match(name)
+        if m:
+            name = m.group(1)
+        multiplier = params.get("AddNet Weight A " + num, "1.0")
+        added.append(f"<lora:{name}:{multiplier}>")
+    if added:
+        params["Prompt"] += "\n" + "".join(added)
+
+
+list_available_networks()
diff --git a/modules/lora/ui_extra_networks_lora.py b/modules/lora/ui_extra_networks_lora.py
new file mode 100644
index 000000000..73cce47a3
--- /dev/null
+++ b/modules/lora/ui_extra_networks_lora.py
@@ -0,0 +1,123 @@
+import os
+import json
+import concurrent
+import modules.lora.networks as networks
+from modules import shared, ui_extra_networks
+
+
+debug = os.environ.get('SD_LORA_DEBUG', None) is not None
+
+
+class ExtraNetworksPageLora(ui_extra_networks.ExtraNetworksPage):
+    def __init__(self):
+        super().__init__('Lora')
+        self.list_time = 0
+
+    def refresh(self):
+        networks.list_available_networks()
+
+    @staticmethod
+    def get_tags(l, info):
+        tags = {}
+        try:
+            if l.metadata is not None:
+                modelspec_tags = l.metadata.get('modelspec.tags', {})
+                possible_tags = l.metadata.get('ss_tag_frequency', {}) # tags from model metedata
+                if isinstance(possible_tags, str):
+                    possible_tags = {}
+                if isinstance(modelspec_tags, str):
+                    modelspec_tags = {}
+                if len(list(modelspec_tags)) > 0:
+                    possible_tags.update(modelspec_tags)
+                for k, v in possible_tags.items():
+                    words = k.split('_', 1) if '_' in k else [v, k]
+                    words = [str(w).replace('.json', '') for w in words]
+                    if words[0] == '{}':
+                        words[0] = 0
+                    tag = ' '.join(words[1:]).lower()
+                    tags[tag] = words[0]
+
+            def find_version():
+                found_versions = []
+                current_hash = l.hash[:8].upper()
+                all_versions = info.get('modelVersions', [])
+                for v in info.get('modelVersions', []):
+                    for f in v.get('files', []):
+                        if any(h.startswith(current_hash) for h in f.get('hashes', {}).values()):
+                            found_versions.append(v)
+                if len(found_versions) == 0:
+                    found_versions = all_versions
+                return found_versions
+
+            for v in find_version():  # trigger words from info json
+                possible_tags = v.get('trainedWords', [])
+                if isinstance(possible_tags, list):
+                    for tag_str in possible_tags:
+                        for tag in tag_str.split(','):
+                            tag = tag.strip().lower()
+                            if tag not in tags:
+                                tags[tag] = 0
+
+            possible_tags = info.get('tags', []) # tags from info json
+            if not isinstance(possible_tags, list):
+                possible_tags = list(possible_tags.values())
+            for tag in possible_tags:
+                tag = tag.strip().lower()
+                if tag not in tags:
+                    tags[tag] = 0
+        except Exception:
+            pass
+        bad_chars = [';', ':', '<', ">", "*", '?', '\'', '\"', '(', ')', '[', ']', '{', '}', '\\', '/']
+        clean_tags = {}
+        for k, v in tags.items():
+            tag = ''.join(i for i in k if i not in bad_chars).strip()
+            clean_tags[tag] = v
+
+        clean_tags.pop('img', None)
+        clean_tags.pop('dataset', None)
+        return clean_tags
+
+    def create_item(self, name):
+        l = networks.available_networks.get(name)
+        if l is None:
+            shared.log.warning(f'Networks: type=lora registered={len(list(networks.available_networks))} file="{name}" not registered')
+            return None
+        try:
+            # path, _ext = os.path.splitext(l.filename)
+            name = os.path.splitext(os.path.relpath(l.filename, shared.cmd_opts.lora_dir))[0]
+            item = {
+                "type": 'Lora',
+                "name": name,
+                "filename": l.filename,
+                "hash": l.shorthash,
+                "prompt": json.dumps(f" <lora:{l.get_alias()}:{shared.opts.extra_networks_default_multiplier}>"),
+                "metadata": json.dumps(l.metadata, indent=4) if l.metadata else None,
+                "mtime": os.path.getmtime(l.filename),
+                "size": os.path.getsize(l.filename),
+                "version": l.sd_version,
+            }
+            info = self.find_info(l.filename)
+            item["info"] = info
+            item["description"] = self.find_description(l.filename, info) # use existing info instead of double-read
+            item["tags"] = self.get_tags(l, info)
+            return item
+        except Exception as e:
+            shared.log.error(f'Networks: type=lora file="{name}" {e}')
+            if debug:
+                from modules import errors
+                errors.display(e, 'Lora')
+            return None
+
+    def list_items(self):
+        items = []
+        with concurrent.futures.ThreadPoolExecutor(max_workers=shared.max_workers) as executor:
+            future_items = {executor.submit(self.create_item, net): net for net in networks.available_networks}
+            for future in concurrent.futures.as_completed(future_items):
+                item = future.result()
+                if item is not None:
+                    items.append(item)
+        self.update_all_previews(items)
+        return items
+
+    def allowed_directories_for_previews(self):
+        return [shared.cmd_opts.lora_dir, shared.cmd_opts.lyco_dir]
diff --git a/modules/processing_diffusers.py b/modules/processing_diffusers.py
index 2164134b1..83d3b1b69 100644
--- a/modules/processing_diffusers.py
+++ b/modules/processing_diffusers.py
@@ -8,6 +8,8 @@
 from modules.processing_helpers import resize_hires, calculate_base_steps, calculate_hires_steps, calculate_refiner_steps, save_intermediate, update_sampler, is_txt2img, is_refiner_enabled
 from modules.processing_args import set_pipeline_args
 from modules.onnx_impl import preprocess_pipeline as preprocess_onnx_pipeline, check_parameters_changed as olive_check_parameters_changed
+from modules.lora.networks import network_load
+from modules.lora.networks import timer as network_timer
 
 
 debug = shared.log.trace if os.environ.get('SD_DIFFUSERS_DEBUG', None) is not None else lambda *args, **kwargs: None
@@ -424,6 +426,9 @@ def process_diffusers(p: processing.StableDiffusionProcessing):
         p.prompts = p.all_prompts[p.iteration * p.batch_size:(p.iteration+1) * p.batch_size]
     if p.negative_prompts is None or len(p.negative_prompts) == 0:
         p.negative_prompts = p.all_negative_prompts[p.iteration * p.batch_size:(p.iteration+1) * p.batch_size]
+    network_timer['apply'] = 0
+    network_timer['restore'] = 0
+    network_load()
 
     sd_models.move_model(shared.sd_model, devices.device)
     sd_models_compile.openvino_recompile_model(p, hires=False, refiner=False) # recompile if a parameter changes
diff --git a/modules/shared.py b/modules/shared.py
index a89cbbc95..a5af83f5e 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -908,6 +908,7 @@ def get_default_modes():
     "lora_in_memory_limit": OptionInfo(0, "LoRA memory cache", gr.Slider, {"minimum": 0, "maximum": 24, "step": 1}),
     "lora_quant": OptionInfo("NF4","LoRA precision in quantized models", gr.Radio, {"choices": ["NF4", "FP4"]}),
     "lora_load_gpu": OptionInfo(True if not cmd_opts.lowvram else False, "Load LoRA directly to GPU"),
+    "lora_offload_backup": OptionInfo(True, "Offload LoRA Backup Weights"),
 }))
 
 options_templates.update(options_section((None, "Internal options"), {
diff --git a/scripts/lora_script.py b/scripts/lora_script.py
new file mode 100644
index 000000000..a153a2caa
--- /dev/null
+++ b/scripts/lora_script.py
@@ -0,0 +1,62 @@
+import re
+import modules.lora.networks as networks
+from modules.lora.lora_extract import create_ui
+from modules.lora.network import NetworkOnDisk
+from modules.lora.ui_extra_networks_lora import ExtraNetworksPageLora
+from modules.lora.extra_networks_lora import ExtraNetworkLora
+from modules import script_callbacks, extra_networks, ui_extra_networks, ui_models, shared # pylint: disable=unused-import
+
+
+re_lora = re.compile("<lora:([^:]+):")
+
+
+def before_ui():
+    ui_extra_networks.register_page(ExtraNetworksPageLora())
+    networks.extra_network_lora = ExtraNetworkLora()
+    extra_networks.register_extra_network(networks.extra_network_lora)
+    ui_models.extra_ui.append(create_ui)
+
+
+def create_lora_json(obj: NetworkOnDisk):
+    return {
+        "name": obj.name,
+        "alias": obj.alias,
+        "path": obj.filename,
+        "metadata": obj.metadata,
+    }
+
+
+def api_networks(_, app):
+    @app.get("/sdapi/v1/loras")
+    async def get_loras():
+        return [create_lora_json(obj) for obj in networks.available_networks.values()]
+
+    @app.post("/sdapi/v1/refresh-loras")
+    async def refresh_loras():
+        return networks.list_available_networks()
+
+
+def infotext_pasted(infotext, d): # pylint: disable=unused-argument
+    hashes = d.get("Lora hashes", None)
+    if hashes is None:
+        return
+
+    def network_replacement(m):
+        alias = m.group(1)
+        shorthash = hashes.get(alias)
+        if shorthash is None:
+            return m.group(0)
+        network_on_disk = networks.available_network_hash_lookup.get(shorthash)
+        if network_on_disk is None:
+            return m.group(0)
+        return f'<lora:{network_on_disk.get_alias()}:'
+
+    hashes = [x.strip().split(':', 1) for x in hashes.split(",")]
+    hashes = {x[0].strip().replace(",", ""): x[1].strip() for x in hashes}
+    d["Prompt"] = re.sub(re_lora, network_replacement, d["Prompt"])
+
+if shared.native:
+    script_callbacks.on_app_started(api_networks)
+    script_callbacks.on_before_ui(before_ui)
+    script_callbacks.on_infotext_pasted(networks.infotext_pasted)
+    script_callbacks.on_infotext_pasted(infotext_pasted)

From 66cf65b604aed7bc26f17badb3287b2b8ab53c04 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Sun, 24 Nov 2024 13:02:45 -0500
Subject: [PATCH 008/162] correct lora assignment

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 extensions-builtin/Lora/networks.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/extensions-builtin/Lora/networks.py b/extensions-builtin/Lora/networks.py
index db617ee5b..fd6287c62 100644
--- a/extensions-builtin/Lora/networks.py
+++ b/extensions-builtin/Lora/networks.py
@@ -88,7 +88,7 @@ def assign_network_names_to_compvis_modules(sd_model):
             network_name = name.replace(".", "_")
             network_layer_mapping[network_name] = module
             module.network_layer_name = network_name
-    shared.sd_model.network_layer_mapping = network_layer_mapping
+    sd_model.network_layer_mapping = network_layer_mapping
 
 
 def load_diffusers(name, network_on_disk, lora_scale=shared.opts.extra_networks_default_multiplier) -> network.Network:
@@ -141,7 +141,7 @@ def load_network(name, network_on_disk) -> network.Network:
     sd = sd_models.read_state_dict(network_on_disk.filename, what='network')
     if shared.sd_model_type == 'f1':  # if kohya flux lora, convert state_dict
         sd = lora_convert._convert_kohya_flux_lora_to_diffusers(sd) or sd  # pylint: disable=protected-access
-    assign_network_names_to_compvis_modules(shared.sd_model) # this should not be needed but is here as an emergency fix for an unknown error people are experiencing in 1.2.0
+    assign_network_names_to_compvis_modules(shared.sd_model)
     keys_failed_to_match = {}
     matched_networks = {}
     bundle_embeddings = {}

From fdb8cb509ef50722fa6c9cb61c90b831927412fd Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Sun, 24 Nov 2024 13:49:27 -0500
Subject: [PATCH 009/162] force move te when using xhinker

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 modules/prompt_parser_diffusers.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/modules/prompt_parser_diffusers.py b/modules/prompt_parser_diffusers.py
index 234272907..bfecfade6 100644
--- a/modules/prompt_parser_diffusers.py
+++ b/modules/prompt_parser_diffusers.py
@@ -583,13 +583,13 @@ def get_xhinker_text_embeddings(pipe, prompt: str = "", neg_prompt: str = "", cl
     te1_device, te2_device, te3_device = None, None, None
     if hasattr(pipe, "text_encoder") and pipe.text_encoder.device != devices.device:
         te1_device = pipe.text_encoder.device
-        sd_models.move_model(pipe.text_encoder, devices.device)
+        sd_models.move_model(pipe.text_encoder, devices.device, force=True)
     if hasattr(pipe, "text_encoder_2") and pipe.text_encoder_2.device != devices.device:
         te2_device = pipe.text_encoder_2.device
-        sd_models.move_model(pipe.text_encoder_2, devices.device)
+        sd_models.move_model(pipe.text_encoder_2, devices.device, force=True)
     if hasattr(pipe, "text_encoder_3") and pipe.text_encoder_3.device != devices.device:
         te3_device = pipe.text_encoder_3.device
-        sd_models.move_model(pipe.text_encoder_3, devices.device)
+        sd_models.move_model(pipe.text_encoder_3, devices.device, force=True)
 
     if is_sd3:
         prompt_embed, negative_embed, positive_pooled, negative_pooled = get_weighted_text_embeddings_sd3(pipe=pipe, prompt=prompt, neg_prompt=neg_prompt, use_t5_encoder=bool(pipe.text_encoder_3))
@@ -601,10 +601,10 @@ def get_xhinker_text_embeddings(pipe, prompt: str = "", neg_prompt: str = "", cl
         prompt_embed, negative_embed = get_weighted_text_embeddings_sd15(pipe=pipe, prompt=prompt, neg_prompt=neg_prompt, clip_skip=clip_skip)
 
     if te1_device is not None:
-        sd_models.move_model(pipe.text_encoder, te1_device)
+        sd_models.move_model(pipe.text_encoder, te1_device, force=True)
     if te2_device is not None:
-        sd_models.move_model(pipe.text_encoder_2, te1_device)
+        sd_models.move_model(pipe.text_encoder_2, te1_device, force=True)
     if te3_device is not None:
-        sd_models.move_model(pipe.text_encoder_3, te1_device)
+        sd_models.move_model(pipe.text_encoder_3, te1_device, force=True)
 
     return prompt_embed, positive_pooled, negative_embed, negative_pooled

From e4038feff1ad6548def3553fa543e17b0fe5e34a Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Sun, 24 Nov 2024 15:05:58 -0500
Subject: [PATCH 010/162] minor updates

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md                       |  2 +-
 modules/prompt_parser_diffusers.py |  4 ++--
 modules/sd_models.py               |  7 +++++--
 modules/sd_samplers_diffusers.py   | 14 +++++++-------
 wiki                               |  2 +-
 5 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ad77bbbe5..cbd29840d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,6 @@
 # Change Log for SD.Next
 
-## Update for 2024-11-23
+## Update for 2024-11-24
 
 - [Flux Tools](https://blackforestlabs.ai/flux-1-tools/):  
   **Redux** is actually a tool, **Fill** is inpaint/outpaint optimized version of *Flux-dev*  
diff --git a/modules/prompt_parser_diffusers.py b/modules/prompt_parser_diffusers.py
index bfecfade6..2edef4bf5 100644
--- a/modules/prompt_parser_diffusers.py
+++ b/modules/prompt_parser_diffusers.py
@@ -162,7 +162,7 @@ def extend_embeds(self, batchidx, idx):  # Extends scheduled prompt via index
 
     def encode(self, pipe, positive_prompt, negative_prompt, batchidx):
         self.attention = shared.opts.prompt_attention
-        if self.attention == "xhinker" or 'Flux' in pipe.__class__.__name__:
+        if self.attention == "xhinker":
             prompt_embed, positive_pooled, negative_embed, negative_pooled = get_xhinker_text_embeddings(pipe, positive_prompt, negative_prompt, self.clip_skip)
         else:
             prompt_embed, positive_pooled, negative_embed, negative_pooled = get_weighted_text_embeddings(pipe, positive_prompt, negative_prompt, self.clip_skip)
@@ -591,7 +591,7 @@ def get_xhinker_text_embeddings(pipe, prompt: str = "", neg_prompt: str = "", cl
         te3_device = pipe.text_encoder_3.device
         sd_models.move_model(pipe.text_encoder_3, devices.device, force=True)
 
-    if is_sd3:
+    if 'StableDiffusion3' in pipe.__class__.__name__:
         prompt_embed, negative_embed, positive_pooled, negative_pooled = get_weighted_text_embeddings_sd3(pipe=pipe, prompt=prompt, neg_prompt=neg_prompt, use_t5_encoder=bool(pipe.text_encoder_3))
     elif 'Flux' in pipe.__class__.__name__:
         prompt_embed, positive_pooled = get_weighted_text_embeddings_flux1(pipe=pipe, prompt=prompt, prompt2=prompt_2, device=devices.device)
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 2ad204b46..aab35af18 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -16,7 +16,7 @@
 from omegaconf import OmegaConf
 from ldm.util import instantiate_from_config
 from modules import paths, shared, shared_state, modelloader, devices, script_callbacks, sd_vae, sd_unet, errors, sd_models_config, sd_models_compile, sd_hijack_accelerate, sd_detect
-from modules.timer import Timer
+from modules.timer import Timer, process as process_timer
 from modules.memstats import memory_stats
 from modules.modeldata import model_data
 from modules.sd_checkpoint import CheckpointInfo, select_checkpoint, list_models, checkpoints_list, checkpoint_titles, get_closet_checkpoint_match, model_hash, update_model_hashes, setup_model, write_metadata, read_metadata_from_safetensors # pylint: disable=unused-import
@@ -512,7 +512,10 @@ def move_model(model, device=None, force=False):
     except Exception as e1:
         t1 = time.time()
         shared.log.error(f'Model move: device={device} {e1}')
-    if os.environ.get('SD_MOVE_DEBUG', None) or (t1-t0) > 0.1:
+    if 'move' not in process_timer.records:
+        process_timer.records['move'] = 0
+    process_timer.records['move'] += t1 - t0
+    if os.environ.get('SD_MOVE_DEBUG', None) or (t1-t0) > 1:
         shared.log.debug(f'Model move: device={device} class={model.__class__.__name__} accelerate={getattr(model, "has_accelerate", False)} fn={fn} time={t1-t0:.2f}') # pylint: disable=protected-access
     devices.torch_gc()
 
diff --git a/modules/sd_samplers_diffusers.py b/modules/sd_samplers_diffusers.py
index 60c75b64e..9f24d5a91 100644
--- a/modules/sd_samplers_diffusers.py
+++ b/modules/sd_samplers_diffusers.py
@@ -80,13 +80,13 @@
     'DPM++ Cosine': { 'solver_order': 2, 'sigma_schedule': "exponential", 'prediction_type': "v-prediction" },
     'DPM SDE': { 'use_karras_sigmas': False, 'use_exponential_sigmas': False, 'use_beta_sigmas': False, 'noise_sampler_seed': None, 'timestep_spacing': 'linspace', 'steps_offset': 0,  },
 
-    'DPM2 FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 2, 'sigma_schedule': None, 'use_beta_sigmas': False, 'algorithm_type': 'dpmsolver2', 'use_noise_sampler': True },
-    'DPM2a FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 2, 'sigma_schedule': None, 'use_beta_sigmas': False, 'algorithm_type': 'dpmsolver2A', 'use_noise_sampler': True },
-    'DPM2++ 2M FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 2, 'sigma_schedule': None, 'use_beta_sigmas': False, 'algorithm_type': 'dpmsolver++2M', 'use_noise_sampler': True },
-    'DPM2++ 2S FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 2, 'sigma_schedule': None, 'use_beta_sigmas': False, 'algorithm_type': 'dpmsolver++2S', 'use_noise_sampler': True },
-    'DPM2++ SDE FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 2, 'sigma_schedule': None, 'use_beta_sigmas': False, 'algorithm_type': 'dpmsolver++sde', 'use_noise_sampler': True },
-    'DPM2++ 2M SDE FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 2, 'sigma_schedule': None, 'use_beta_sigmas': False, 'algorithm_type': 'dpmsolver++2Msde', 'use_noise_sampler': True },
-    'DPM2++ 3M SDE FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 3, 'sigma_schedule': None, 'use_beta_sigmas': False, 'algorithm_type': 'dpmsolver++3Msde', 'use_noise_sampler': True },
+    'DPM2 FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 2, 'sigma_schedule': None, 'use_beta_sigmas': False, 'algorithm_type': 'dpmsolver2', 'use_noise_sampler': True, 'beta_start': 0.00085, 'beta_end': 0.012 },
+    'DPM2a FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 2, 'sigma_schedule': None, 'use_beta_sigmas': False, 'algorithm_type': 'dpmsolver2A', 'use_noise_sampler': True, 'beta_start': 0.00085, 'beta_end': 0.012 },
+    'DPM2++ 2M FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 2, 'sigma_schedule': None, 'use_beta_sigmas': False, 'algorithm_type': 'dpmsolver++2M', 'use_noise_sampler': True, 'beta_start': 0.00085, 'beta_end': 0.012 },
+    'DPM2++ 2S FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 2, 'sigma_schedule': None, 'use_beta_sigmas': False, 'algorithm_type': 'dpmsolver++2S', 'use_noise_sampler': True, 'beta_start': 0.00085, 'beta_end': 0.012 },
+    'DPM2++ SDE FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 2, 'sigma_schedule': None, 'use_beta_sigmas': False, 'algorithm_type': 'dpmsolver++sde', 'use_noise_sampler': True, 'beta_start': 0.00085, 'beta_end': 0.012 },
+    'DPM2++ 2M SDE FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 2, 'sigma_schedule': None, 'use_beta_sigmas': False, 'algorithm_type': 'dpmsolver++2Msde', 'use_noise_sampler': True, 'beta_start': 0.00085, 'beta_end': 0.012 },
+    'DPM2++ 3M SDE FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 3, 'sigma_schedule': None, 'use_beta_sigmas': False, 'algorithm_type': 'dpmsolver++3Msde', 'use_noise_sampler': True, 'beta_start': 0.00085, 'beta_end': 0.012 },
 
     'Heun': { 'use_beta_sigmas': False, 'use_karras_sigmas': False, 'use_exponential_sigmas': False, 'timestep_spacing': 'linspace' },
     'Heun FlowMatch': { 'timestep_spacing': "linspace", 'shift': 1 },
diff --git a/wiki b/wiki
index 313a6b911..ba7d78b55 160000
--- a/wiki
+++ b/wiki
@@ -1 +1 @@
-Subproject commit 313a6b911bd239b4fa8092ed89b936428214342e
+Subproject commit ba7d78b55eb95afe8509bd0069b8ec345b259f21

From fccd1ed364e35721c01881684caaad8166294ceb Mon Sep 17 00:00:00 2001
From: AI-Casanova <54461896+AI-Casanova@users.noreply.github.com>
Date: Sun, 24 Nov 2024 14:16:33 -0600
Subject: [PATCH 011/162] Enable stepwise LoRA (untested)

---
 modules/processing_callbacks.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/modules/processing_callbacks.py b/modules/processing_callbacks.py
index 52ea3e575..f6e3c0672 100644
--- a/modules/processing_callbacks.py
+++ b/modules/processing_callbacks.py
@@ -4,6 +4,7 @@
 import torch
 import numpy as np
 from modules import shared, processing_correction, extra_networks, timer, prompt_parser_diffusers
+from modules.lora.networks import network_load
 
 p = None
 debug_callback = shared.log.trace if os.environ.get('SD_CALLBACK_DEBUG', None) is not None else lambda *args, **kwargs: None
@@ -63,6 +64,7 @@ def diffusers_callback(pipe, step: int = 0, timestep: int = 0, kwargs: dict = {}
             time.sleep(0.1)
     if hasattr(p, "stepwise_lora"):
         extra_networks.activate(p, p.extra_network_data, step=step)
+        network_load()
     if latents is None:
         return kwargs
     elif shared.opts.nan_skip:

From bf170ea08c07aa88d99695a01a43f6cfb45e1f22 Mon Sep 17 00:00:00 2001
From: AI-Casanova <54461896+AI-Casanova@users.noreply.github.com>
Date: Sun, 24 Nov 2024 23:22:57 -0600
Subject: [PATCH 012/162] Fix multiple LoRA

---
 modules/lora/networks.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index 762705b67..c6fde3e04 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -354,6 +354,8 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
     if any([net.modules.get(network_layer_name, None) for net in loaded_networks]): # noqa: C419 # pylint: disable=R1729
         maybe_backup_weights(self, wanted_names)
     if current_names != wanted_names:
+        batch_updown = None
+        batch_ex_bias = None
         for net in loaded_networks:
             # default workflow where module is known and has weights
             module = net.modules.get(network_layer_name, None)
@@ -362,7 +364,14 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
                     with devices.inference_context():
                         weight = self.weight # calculate quant weights once
                         updown, ex_bias = module.calc_updown(weight)
-                        set_weights(self, updown, ex_bias)
+                        if batch_updown is not None and updown is not None:
+                            batch_updown += updown
+                        else:
+                            batch_updown = updown
+                        if batch_ex_bias is not None and ex_bias is not None:
+                            batch_ex_bias += ex_bias
+                        else:
+                            batch_ex_bias = ex_bias
                 except RuntimeError as e:
                     extra_network_lora.errors[net.name] = extra_network_lora.errors.get(net.name, 0) + 1
                     if debug:
@@ -375,9 +384,7 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
                 continue
             shared.log.warning(f'LoRA network="{net.name}" layer="{network_layer_name}" unsupported operation')
             extra_network_lora.errors[net.name] = extra_network_lora.errors.get(net.name, 0) + 1
-        if not loaded_networks:  # restore from backup
-            t5 = time.time()
-            set_weights(self, None, None)
+        set_weights(self, batch_updown, batch_ex_bias)  # Set or restore weights from backup
         self.network_current_names = wanted_names
     t1 = time.time()
     timer['apply'] += t1 - t0

From d76365e227fc8734982e5b1d685cf2b18890b0b5 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Mon, 25 Nov 2024 08:01:02 -0500
Subject: [PATCH 013/162] xyz improvements

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 modules/ui_extra_networks.py |  2 ++
 scripts/xyz_grid_classes.py  |  4 ++--
 scripts/xyz_grid_shared.py   | 40 ++++++++++++++++--------------------
 3 files changed, 22 insertions(+), 24 deletions(-)

diff --git a/modules/ui_extra_networks.py b/modules/ui_extra_networks.py
index f6e6cee97..e152dc74b 100644
--- a/modules/ui_extra_networks.py
+++ b/modules/ui_extra_networks.py
@@ -135,6 +135,7 @@ def patch(self, text: str, tabname: str):
         return text.replace('~tabname', tabname)
 
     def create_xyz_grid(self):
+        """
         xyz_grid = [x for x in scripts.scripts_data if x.script_class.__module__ == "xyz_grid.py"][0].module
 
         def add_prompt(p, opt, x):
@@ -150,6 +151,7 @@ def add_prompt(p, opt, x):
             opt = xyz_grid.AxisOption(f"[Network] {self.title}", str, add_prompt, choices=lambda: [x["name"] for x in self.items])
             if opt not in xyz_grid.axis_options:
                 xyz_grid.axis_options.append(opt)
+        """
 
     def link_preview(self, filename):
         quoted_filename = urllib.parse.quote(filename.replace('\\', '/'))
diff --git a/scripts/xyz_grid_classes.py b/scripts/xyz_grid_classes.py
index b80b9f13c..06772856c 100644
--- a/scripts/xyz_grid_classes.py
+++ b/scripts/xyz_grid_classes.py
@@ -1,4 +1,4 @@
-from scripts.xyz_grid_shared import apply_field, apply_task_args, apply_setting, apply_prompt, apply_order, apply_sampler, apply_hr_sampler_name, confirm_samplers, apply_checkpoint, apply_refiner, apply_unet, apply_dict, apply_clip_skip, apply_vae, list_lora, apply_lora, apply_te, apply_styles, apply_upscaler, apply_context, apply_detailer, apply_override, apply_processing, apply_options, apply_seed, format_value_add_label, format_value, format_value_join_list, do_nothing, format_nothing, str_permutations # pylint: disable=no-name-in-module, unused-import
+from scripts.xyz_grid_shared import apply_field, apply_task_args, apply_setting, apply_prompt, apply_order, apply_sampler, apply_hr_sampler_name, confirm_samplers, apply_checkpoint, apply_refiner, apply_unet, apply_dict, apply_clip_skip, apply_vae, list_lora, apply_lora, apply_lora_strength, apply_te, apply_styles, apply_upscaler, apply_context, apply_detailer, apply_override, apply_processing, apply_options, apply_seed, format_value_add_label, format_value, format_value_join_list, do_nothing, format_nothing, str_permutations # pylint: disable=no-name-in-module, unused-import
 from modules import shared, shared_items, sd_samplers, ipadapter, sd_models, sd_vae, sd_unet
 
 
@@ -97,7 +97,7 @@ def __exit__(self, exc_type, exc_value, tb):
     AxisOption("[Prompt] Prompt order", str_permutations, apply_order, fmt=format_value_join_list),
     AxisOption("[Prompt] Prompt parser", str, apply_setting("prompt_attention"), choices=lambda: ["native", "compel", "xhinker", "a1111", "fixed"]),
     AxisOption("[Network] LoRA", str, apply_lora, cost=0.5, choices=list_lora),
-    AxisOption("[Network] LoRA strength", float, apply_setting('extra_networks_default_multiplier')),
+    AxisOption("[Network] LoRA strength", float, apply_lora_strength),
     AxisOption("[Network] Styles", str, apply_styles, choices=lambda: [s.name for s in shared.prompt_styles.styles.values()]),
     AxisOption("[Param] Width", int, apply_field("width")),
     AxisOption("[Param] Height", int, apply_field("height")),
diff --git a/scripts/xyz_grid_shared.py b/scripts/xyz_grid_shared.py
index d3ee0a864..82387fab8 100644
--- a/scripts/xyz_grid_shared.py
+++ b/scripts/xyz_grid_shared.py
@@ -63,28 +63,15 @@ def apply_seed(p, x, xs):
 
 
 def apply_prompt(p, x, xs):
-    if not hasattr(p, 'orig_prompt'):
-        p.orig_prompt = p.prompt
-        p.orig_negative = p.negative_prompt
-    if xs[0] not in p.orig_prompt and xs[0] not in p.orig_negative:
-        shared.log.warning(f'XYZ grid: prompt S/R string="{xs[0]}" not found')
-    else:
-        p.prompt = p.orig_prompt.replace(xs[0], x)
-        p.negative_prompt = p.orig_negative.replace(xs[0], x)
-        p.all_prompts = None
-        p.all_negative_prompts = None
-        """
-        if p.all_prompts is not None:
-            for i in range(len(p.all_prompts)):
-                for j in range(len(xs)):
-                    p.all_prompts[i] = p.all_prompts[i].replace(xs[j], x)
-        p.negative_prompt = p.negative_prompt.replace(xs[0], x)
-        if p.all_negative_prompts is not None:
-            for i in range(len(p.all_negative_prompts)):
-                for j in range(len(xs)):
-                    p.all_negative_prompts[i] = p.all_negative_prompts[i].replace(xs[j], x)
-        """
-        shared.log.debug(f'XYZ grid apply prompt: "{xs[0]}"="{x}"')
+    for s in xs:
+        if s in p.prompt:
+            shared.log.debug(f'XYZ grid apply prompt: "{s}"="{x}"')
+            p.prompt = p.prompt.replace(s, x)
+        if s in p.negative_prompt:
+            shared.log.debug(f'XYZ grid apply negative: "{s}"="{x}"')
+            p.negative_prompt = p.negative_prompt.replace(s, x)
+    p.all_prompts = None
+    p.all_negative_prompts = None
 
 
 def apply_order(p, x, xs):
@@ -220,6 +207,15 @@ def apply_lora(p, x, xs):
     shared.log.debug(f'XYZ grid apply LoRA: "{x}"')
 
 
+def apply_lora_strength(p, x, xs):
+    shared.log.debug(f'XYZ grid apply LoRA strength: "{x}"')
+    p.prompt = p.prompt.replace(':1.0>', '>')
+    p.prompt = p.prompt.replace(f':{shared.opts.extra_networks_default_multiplier}>', '>')
+    p.all_prompts = None
+    p.all_negative_prompts = None
+    shared.opts.data['extra_networks_default_multiplier'] = x
+
+
 def apply_te(p, x, xs):
     shared.opts.data["sd_text_encoder"] = x
     sd_models.reload_text_encoder()

From 172108127da14ab07ac2077b102137bda143deed Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Mon, 25 Nov 2024 08:11:40 -0500
Subject: [PATCH 014/162] xyz improvements

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 scripts/xyz_grid_classes.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/xyz_grid_classes.py b/scripts/xyz_grid_classes.py
index 06772856c..cc70d68f8 100644
--- a/scripts/xyz_grid_classes.py
+++ b/scripts/xyz_grid_classes.py
@@ -97,7 +97,7 @@ def __exit__(self, exc_type, exc_value, tb):
     AxisOption("[Prompt] Prompt order", str_permutations, apply_order, fmt=format_value_join_list),
     AxisOption("[Prompt] Prompt parser", str, apply_setting("prompt_attention"), choices=lambda: ["native", "compel", "xhinker", "a1111", "fixed"]),
     AxisOption("[Network] LoRA", str, apply_lora, cost=0.5, choices=list_lora),
-    AxisOption("[Network] LoRA strength", float, apply_lora_strength),
+    AxisOption("[Network] LoRA strength", float, apply_lora_strength, cost=0.6),
     AxisOption("[Network] Styles", str, apply_styles, choices=lambda: [s.name for s in shared.prompt_styles.styles.values()]),
     AxisOption("[Param] Width", int, apply_field("width")),
     AxisOption("[Param] Height", int, apply_field("height")),

From 9e56a8f0a742cfe70bc9ef44f47edb546285b661 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Mon, 25 Nov 2024 08:49:02 -0500
Subject: [PATCH 015/162] avoid live-preview during vae-decode

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md               |  7 ++++--
 javascript/logger.js       | 18 ++++++--------
 javascript/notification.js | 50 ++++++++++++++++++++------------------
 javascript/progressBar.js  | 12 ++++++---
 modules/shared_state.py    |  5 ++++
 5 files changed, 53 insertions(+), 39 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index cbd29840d..9733dfa2b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,6 @@
 # Change Log for SD.Next
 
-## Update for 2024-11-24
+## Update for 2024-11-26
 
 - [Flux Tools](https://blackforestlabs.ai/flux-1-tools/):  
   **Redux** is actually a tool, **Fill** is inpaint/outpaint optimized version of *Flux-dev*  
@@ -29,7 +29,7 @@
 - Sampler improvements  
   - update DPM FlowMatch samplers  
 - UI:  
-  - browser->server logging  
+  - browser->server logging framework  
 - Fixes:  
   - update `diffusers`  
   - fix README links  
@@ -37,6 +37,9 @@
   - relax settings validator  
   - improve js progress calls resiliency  
   - fix text-to-video pipeline  
+  - avoid live-preview if vae-decode is running  
+  - allow xyz-grid with multi-axis s&r  
+  - fix xyz-grid with lora  
 
 ## Update for 2024-11-21
 
diff --git a/javascript/logger.js b/javascript/logger.js
index 5aa8face3..1677fa537 100644
--- a/javascript/logger.js
+++ b/javascript/logger.js
@@ -1,5 +1,3 @@
-const serverTimeout = 5000;
-
 const log = async (...msg) => {
   const dt = new Date();
   const ts = `${dt.getHours().toString().padStart(2, '0')}:${dt.getMinutes().toString().padStart(2, '0')}:${dt.getSeconds().toString().padStart(2, '0')}.${dt.getMilliseconds().toString().padStart(3, '0')}`;
@@ -19,15 +17,15 @@ const error = async (...msg) => {
   const ts = `${dt.getHours().toString().padStart(2, '0')}:${dt.getMinutes().toString().padStart(2, '0')}:${dt.getSeconds().toString().padStart(2, '0')}.${dt.getMilliseconds().toString().padStart(3, '0')}`;
   if (window.logger) window.logger.innerHTML += window.logPrettyPrint(...msg);
   console.error(ts, ...msg); // eslint-disable-line no-console
-  const txt = msg.join(' ');
-  if (!txt.includes('asctime') && !txt.includes('xhr.')) xhrPost('/sdapi/v1/log', { error: txt }); // eslint-disable-line no-use-before-define
+  // const txt = msg.join(' ');
+  // if (!txt.includes('asctime') && !txt.includes('xhr.')) xhrPost('/sdapi/v1/log', { error: txt }); // eslint-disable-line no-use-before-define
 };
 
-const xhrInternal = (xhrObj, data, handler = undefined, errorHandler = undefined, ignore = false) => {
+const xhrInternal = (xhrObj, data, handler = undefined, errorHandler = undefined, ignore = false, serverTimeout = 5000) => {
   const err = (msg) => {
     if (!ignore) {
       error(`${msg}: state=${xhrObj.readyState} status=${xhrObj.status} response=${xhrObj.responseText}`);
-      if (errorHandler) errorHandler();
+      if (errorHandler) errorHandler(xhrObj);
     }
   };
 
@@ -54,15 +52,15 @@ const xhrInternal = (xhrObj, data, handler = undefined, errorHandler = undefined
   xhrObj.send(req);
 };
 
-const xhrGet = (url, data, handler = undefined, errorHandler = undefined, ignore = false) => {
+const xhrGet = (url, data, handler = undefined, errorHandler = undefined, ignore = false, serverTimeout = 5000) => {
   const xhr = new XMLHttpRequest();
   const args = Object.keys(data).map((k) => `${encodeURIComponent(k)}=${encodeURIComponent(data[k])}`).join('&');
   xhr.open('GET', `${url}?${args}`, true);
-  xhrInternal(xhr, data, handler, errorHandler, ignore);
+  xhrInternal(xhr, data, handler, errorHandler, ignore, serverTimeout);
 };
 
-function xhrPost(url, data, handler = undefined, errorHandler = undefined, ignore = false) {
+function xhrPost(url, data, handler = undefined, errorHandler = undefined, ignore = false, serverTimeout = 5000) {
   const xhr = new XMLHttpRequest();
   xhr.open('POST', url, true);
-  xhrInternal(xhr, data, handler, errorHandler, ignore);
+  xhrInternal(xhr, data, handler, errorHandler, ignore, serverTimeout);
 }
diff --git a/javascript/notification.js b/javascript/notification.js
index 33e8d1c55..c702c90e7 100644
--- a/javascript/notification.js
+++ b/javascript/notification.js
@@ -4,28 +4,32 @@ let lastHeadImg = null;
 let notificationButton = null;
 
 async function sendNotification() {
-  if (!notificationButton) {
-    notificationButton = gradioApp().getElementById('request_notifications');
-    if (notificationButton) notificationButton.addEventListener('click', (evt) => Notification.requestPermission(), true);
+  try {
+    if (!notificationButton) {
+      notificationButton = gradioApp().getElementById('request_notifications');
+      if (notificationButton) notificationButton.addEventListener('click', (evt) => Notification.requestPermission(), true);
+    }
+    if (document.hasFocus()) return; // window is in focus so don't send notifications
+    let galleryPreviews = gradioApp().querySelectorAll('div[id^="tab_"][style*="display: block"] div[id$="_results"] .thumbnail-item > img');
+    if (!galleryPreviews || galleryPreviews.length === 0) galleryPreviews = gradioApp().querySelectorAll('.thumbnail-item > img');
+    if (!galleryPreviews || galleryPreviews.length === 0) return;
+    const headImg = galleryPreviews[0]?.src;
+    if (!headImg || headImg === lastHeadImg || headImg.includes('logo-bg-')) return;
+    const audioNotification = gradioApp().querySelector('#audio_notification audio');
+    if (audioNotification) audioNotification.play();
+    lastHeadImg = headImg;
+    const imgs = new Set(Array.from(galleryPreviews).map((img) => img.src)); // Multiple copies of the images are in the DOM when one is selected
+    const notification = new Notification('SD.Next', {
+      body: `Generated ${imgs.size > 1 ? imgs.size - opts.return_grid : 1} image${imgs.size > 1 ? 's' : ''}`,
+      icon: headImg,
+      image: headImg,
+    });
+    notification.onclick = () => {
+      parent.focus();
+      this.close();
+    };
+    log('sendNotifications');
+  } catch (e) {
+    error(`sendNotification: ${e}`);
   }
-  if (document.hasFocus()) return; // window is in focus so don't send notifications
-  let galleryPreviews = gradioApp().querySelectorAll('div[id^="tab_"][style*="display: block"] div[id$="_results"] .thumbnail-item > img');
-  if (!galleryPreviews || galleryPreviews.length === 0) galleryPreviews = gradioApp().querySelectorAll('.thumbnail-item > img');
-  if (!galleryPreviews || galleryPreviews.length === 0) return;
-  const headImg = galleryPreviews[0]?.src;
-  if (!headImg || headImg === lastHeadImg || headImg.includes('logo-bg-')) return;
-  const audioNotification = gradioApp().querySelector('#audio_notification audio');
-  if (audioNotification) audioNotification.play();
-  lastHeadImg = headImg;
-  const imgs = new Set(Array.from(galleryPreviews).map((img) => img.src)); // Multiple copies of the images are in the DOM when one is selected
-  const notification = new Notification('SD.Next', {
-    body: `Generated ${imgs.size > 1 ? imgs.size - opts.return_grid : 1} image${imgs.size > 1 ? 's' : ''}`,
-    icon: headImg,
-    image: headImg,
-  });
-  notification.onclick = () => {
-    parent.focus();
-    this.close();
-  };
-  log('sendNotifications');
 }
diff --git a/javascript/progressBar.js b/javascript/progressBar.js
index 9d897bc87..52f666b3a 100644
--- a/javascript/progressBar.js
+++ b/javascript/progressBar.js
@@ -12,8 +12,10 @@ function formatTime(secs) {
 
 function checkPaused(state) {
   lastState.paused = state ? !state : !lastState.paused;
-  document.getElementById('txt2img_pause').innerText = lastState.paused ? 'Resume' : 'Pause';
-  document.getElementById('img2img_pause').innerText = lastState.paused ? 'Resume' : 'Pause';
+  const t_el = document.getElementById('txt2img_pause');
+  const i_el = document.getElementById('img2img_pause');
+  if (t_el) t_el.innerText = lastState.paused ? 'Resume' : 'Pause';
+  if (i_el) i_el.innerText = lastState.paused ? 'Resume' : 'Pause';
 }
 
 function setProgress(res) {
@@ -87,7 +89,9 @@ function requestProgress(id_task, progressEl, galleryEl, atEnd = null, onProgres
     debug('taskEnd:', id_task);
     localStorage.removeItem('task');
     setProgress();
-    if (parentGallery && livePreview) parentGallery.removeChild(livePreview);
+    try {
+      if (parentGallery && livePreview) parentGallery.removeChild(livePreview);
+    } catch { /* ignore */ }
     checkPaused(true);
     sendNotification();
     if (atEnd) atEnd();
@@ -118,7 +122,7 @@ function requestProgress(id_task, progressEl, galleryEl, atEnd = null, onProgres
       done();
     };
 
-    xhrPost('./internal/progress', { id_task, id_live_preview }, onProgressHandler, onProgressErrorHandler);
+    xhrPost('./internal/progress', { id_task, id_live_preview }, onProgressHandler, onProgressErrorHandler, false, 5000);
   };
   start(id_task, 0);
 }
diff --git a/modules/shared_state.py b/modules/shared_state.py
index 7def42b8c..51d33f9ed 100644
--- a/modules/shared_state.py
+++ b/modules/shared_state.py
@@ -28,6 +28,9 @@ class State:
     oom = False
     debug_output = os.environ.get('SD_STATE_DEBUG', None)
 
+    def __str__(self) -> str:
+        return f'State: job={self.job} {self.job_no}/{self.job_count} step={self.sampling_step}/{self.sampling_steps} skipped={self.skipped} interrupted={self.interrupted} paused={self.paused} info={self.textinfo}'
+
     def skip(self):
         log.debug('Requested skip')
         self.skipped = True
@@ -135,6 +138,8 @@ def end(self, api=None):
         modules.devices.torch_gc()
 
     def set_current_image(self):
+        if self.job == 'VAE': # avoid generating preview while vae is running
+            return
         from modules.shared import opts, cmd_opts
         """sets self.current_image from self.current_latent if enough sampling steps have been made after the last call to this"""
         if cmd_opts.lowvram or self.api:

From 8204f7f8cd49df67e183ef768cda480920ad2777 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Mon, 25 Nov 2024 08:49:25 -0500
Subject: [PATCH 016/162] update ui

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 extensions-builtin/sdnext-modernui | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/extensions-builtin/sdnext-modernui b/extensions-builtin/sdnext-modernui
index b31453f9d..a3f8a0ec4 160000
--- a/extensions-builtin/sdnext-modernui
+++ b/extensions-builtin/sdnext-modernui
@@ -1 +1 @@
-Subproject commit b31453f9d109456819673e8574162edb70fef73c
+Subproject commit a3f8a0ec45cdc991689ee61ee79626f1b69e7c21

From 23b63098000a444297f52bd20b0eca9b3610e930 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Mon, 25 Nov 2024 14:37:25 -0500
Subject: [PATCH 017/162] ui updates

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md                     |  4 ++++
 javascript/base.css              |  1 -
 javascript/black-teal.css        |  3 ++-
 javascript/extraNetworks.js      | 16 ++++++++++++++
 javascript/progressBar.js        | 38 ++++++++++++++++++--------------
 javascript/sdnext.css            | 18 +++++++--------
 modules/call_queue.py            | 11 ++++-----
 modules/processing.py            |  9 +++-----
 modules/processing_callbacks.py  | 14 ++++++------
 modules/processing_correction.py | 13 +++++++++--
 modules/processing_diffusers.py  |  1 -
 modules/shared_state.py          |  4 ++++
 modules/timer.py                 |  6 +++++
 modules/txt2img.py               |  2 +-
 modules/ui_common.py             | 13 ++++++++---
 modules/ui_extra_networks.py     |  2 +-
 modules/ui_img2img.py            |  8 +++----
 17 files changed, 106 insertions(+), 57 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9733dfa2b..170c780a5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,6 +29,10 @@
 - Sampler improvements  
   - update DPM FlowMatch samplers  
 - UI:  
+  - improved stats on generate completion  
+  - improved live preview display and performance  
+  - improved accordion behavior  
+  - auto-size networks height for sidebar  
   - browser->server logging framework  
 - Fixes:  
   - update `diffusers`  
diff --git a/javascript/base.css b/javascript/base.css
index 7daa8b2bd..6c18ad7c5 100644
--- a/javascript/base.css
+++ b/javascript/base.css
@@ -25,7 +25,6 @@
 .progressDiv .progress { width: 0%; height: 20px; background: #0060df; color: white; font-weight: bold; line-height: 20px; padding: 0 8px 0 0; text-align: right; overflow: visible; white-space: nowrap; padding: 0 0.5em; }
 .livePreview { position: absolute; z-index: 50; background-color: transparent; width: -moz-available; width: -webkit-fill-available; }
 .livePreview img { position: absolute; object-fit: contain; width: 100%; height: 100%; }
-.dark .livePreview { background-color: rgb(17 24 39 / var(--tw-bg-opacity)); }
 .popup-metadata { color: white; background: #0000; display: inline-block; white-space: pre-wrap; font-size: 0.75em; }
 
 /* fullpage image viewer */
diff --git a/javascript/black-teal.css b/javascript/black-teal.css
index c6f266c54..b73f9fdc7 100644
--- a/javascript/black-teal.css
+++ b/javascript/black-teal.css
@@ -108,6 +108,7 @@ fieldset .gr-block.gr-box, label.block span { padding: 0; margin-top: -4px; }
 .eta-bar { display: none !important }
 .gradio-slider { max-width: 200px; }
 .gradio-slider input[type="number"] { background: var(--neutral-950); margin-top: 2px; }
+.gradio-image { height: unset !important; }
 svg.feather.feather-image, .feather .feather-image { display: none }
 .gap-2 { padding-top: 8px; }
 .gr-box > div > div > input.gr-text-input { right: 0; width: 4em; padding: 0; top: -12px; border: none; max-height: 20px; }
@@ -134,7 +135,7 @@ svg.feather.feather-image, .feather .feather-image { display: none }
 .gallery-item { box-shadow: none !important; }
 .performance { color: #888; }
 .extra-networks { border-left: 2px solid var(--highlight-color) !important; padding-left: 4px; }
-.image-buttons { gap: 10px !important; justify-content: center; }
+.image-buttons { justify-content: center; gap: 0 !important; }
 .image-buttons > button { max-width: 160px; }
 .tooltip { background: var(--primary-300); color: black; border: none; border-radius: var(--radius-lg) }
 #system_row > button, #settings_row > button, #config_row > button { max-width: 10em; }
diff --git a/javascript/extraNetworks.js b/javascript/extraNetworks.js
index 622e40faf..1d1bcfb24 100644
--- a/javascript/extraNetworks.js
+++ b/javascript/extraNetworks.js
@@ -434,6 +434,22 @@ function setupExtraNetworksForTab(tabname) {
     };
   }
 
+  // auto-resize networks sidebar
+  const resizeObserver = new ResizeObserver((entries) => {
+    for (const entry of entries) {
+      for (const el of Array.from(gradioApp().getElementById(`${tabname}_extra_tabs`).querySelectorAll('.extra-networks-page'))) {
+        const h = Math.trunc(entry.contentRect.height);
+        if (h <= 0) return;
+        if (window.opts.extra_networks_card_cover === 'sidebar' && window.opts.theme_type === 'Standard') el.style.height = `max(55vh, ${h - 90}px)`;
+        // log(`${tabname} height: ${entry.target.id}=${h} ${el.id}=${el.clientHeight}`);
+      }
+    }
+  });
+  const settingsEl = gradioApp().getElementById(`${tabname}_settings`);
+  const interfaceEl = gradioApp().getElementById(`${tabname}_interface`);
+  if (settingsEl) resizeObserver.observe(settingsEl);
+  if (interfaceEl) resizeObserver.observe(interfaceEl);
+
   // en style
   if (!en) return;
   let lastView;
diff --git a/javascript/progressBar.js b/javascript/progressBar.js
index 52f666b3a..ff9be4666 100644
--- a/javascript/progressBar.js
+++ b/javascript/progressBar.js
@@ -68,29 +68,33 @@ function requestProgress(id_task, progressEl, galleryEl, atEnd = null, onProgres
   let img;
 
   const initLivePreview = () => {
+    if (!parentGallery) return;
+    const footers = Array.from(gradioApp().querySelectorAll('.gallery_footer'));
+    for (const footer of footers) footer.style.display = 'none'; // remove all footers
+
+    livePreview = document.createElement('div');
+    livePreview.className = 'livePreview';
+    parentGallery.insertBefore(livePreview, galleryEl);
     img = new Image();
-    if (parentGallery) {
-      livePreview = document.createElement('div');
-      livePreview.className = 'livePreview';
-      parentGallery.insertBefore(livePreview, galleryEl);
-      const rect = galleryEl.getBoundingClientRect();
-      if (rect.width) {
-        livePreview.style.width = `${rect.width}px`;
-        livePreview.style.height = `${rect.height}px`;
-      }
-      img.onload = () => {
-        livePreview.appendChild(img);
-        if (livePreview.childElementCount > 2) livePreview.removeChild(livePreview.firstElementChild);
-      };
-    }
+    img.id = 'livePreviewImage';
+    livePreview.appendChild(img);
+    img.onload = () => {
+      img.style.width = `min(100%, max(${img.naturalWidth}px, 512px))`;
+      parentGallery.style.minHeight = `${img.height}px`;
+    };
   };
 
   const done = () => {
     debug('taskEnd:', id_task);
     localStorage.removeItem('task');
     setProgress();
+    const footers = Array.from(gradioApp().querySelectorAll('.gallery_footer'));
+    for (const footer of footers) footer.style.display = 'flex'; // remove all footers
     try {
-      if (parentGallery && livePreview) parentGallery.removeChild(livePreview);
+      if (parentGallery && livePreview) {
+        parentGallery.removeChild(livePreview);
+        parentGallery.style.minHeight = 'unset';
+      }
     } catch { /* ignore */ }
     checkPaused(true);
     sendNotification();
@@ -112,7 +116,9 @@ function requestProgress(id_task, progressEl, galleryEl, atEnd = null, onProgres
       }
       setProgress(res);
       if (res.live_preview && !livePreview) initLivePreview();
-      if (res.live_preview && galleryEl) img.src = res.live_preview;
+      if (res.live_preview && galleryEl) {
+        if (img.src !== res.live_preview) img.src = res.live_preview;
+      }
       if (onProgress) onProgress(res);
       setTimeout(() => start(id_task, id_live_preview), opts.live_preview_refresh_period || 500);
     };
diff --git a/javascript/sdnext.css b/javascript/sdnext.css
index 08fae2eb8..240b7492f 100644
--- a/javascript/sdnext.css
+++ b/javascript/sdnext.css
@@ -16,7 +16,7 @@ tr { border-bottom: none !important; padding: 0 0.5em !important; }
 td > div > span { overflow-y: auto; max-height: 3em; overflow-x: hidden; }
 textarea { overflow-y: auto !important; }
 span { font-size: var(--text-md) !important; }
-button { font-size: var(--text-lg) !important; }
+button { font-size: var(--text-lg) !important; min-width: unset !important; }
 input[type='color'] { width: 64px; height: 32px; }
 input::-webkit-outer-spin-button, input::-webkit-inner-spin-button { margin-left: 4px; }
 
@@ -83,13 +83,14 @@ button.custom-button { border-radius: var(--button-large-radius); padding: var(-
 .block.token-counter div{ display: inline; }
 .block.token-counter span{ padding: 0.1em 0.75em; }
 .performance { font-size: var(--text-xs); color: #444; }
-.performance p { display: inline-block; color: var(--body-text-color-subdued) !important }
+.performance p { display: inline-block; color: var(--primary-500) !important }
 .performance .time { margin-right: 0; }
 .thumbnails { background: var(--body-background-fill); }
-.control-image { height: calc(100vw/3) !important; }
 .prompt textarea { resize: vertical; }
+.image-container { height: unset !important; }
+.control-image { height: unset !important; }
+.grid-wrap { overflow-y: auto !important; }
 #control_results { margin: 0; padding: 0; }
-#control_gallery { height: calc(100vw/3 + 60px); }
 #txt2img_gallery, #img2img_gallery { height: 50vh; }
 #control-result { background: var(--button-secondary-background-fill); padding: 0.2em; }
 #control-inputs { margin-top: 1em; }
@@ -122,7 +123,7 @@ div#extras_scale_to_tab div.form { flex-direction: row; }
 #img2img_sketch, #img2maskimg, #inpaint_sketch { overflow: overlay !important; resize: auto; background: var(--panel-background-fill); z-index: 5; }
 .image-buttons button { min-width: auto; }
 .infotext { overflow-wrap: break-word; line-height: 1.5em; font-size: 0.95em !important; }
-.infotext > p { padding-left: 1em; text-indent: -1em; white-space: pre-wrap; color: var(--block-info-text-color) !important; }
+.infotext > p { white-space: pre-wrap; color: var(--block-info-text-color) !important; }
 .tooltip { display: block; position: fixed; top: 1em; right: 1em; padding: 0.5em; background: var(--input-background-fill); color: var(--body-text-color); border: 1pt solid var(--button-primary-border-color);
   width: 22em; min-height: 1.3em; font-size: var(--text-xs); transition: opacity 0.2s ease-in; pointer-events: none; opacity: 0; z-index: 999; }
 .tooltip-show { opacity: 0.9; }
@@ -158,11 +159,10 @@ div#extras_scale_to_tab div.form { flex-direction: row; }
 .progressDiv { position: relative; height: 20px; background: #b4c0cc; margin-bottom: -3px; }
 .dark .progressDiv { background: #424c5b; }
 .progressDiv .progress { width: 0%; height: 20px; background: #0060df; color: white; font-weight: bold; line-height: 20px; padding: 0 8px 0 0; text-align: right; overflow: visible; white-space: nowrap; padding: 0 0.5em; }
-.livePreview { position: absolute; z-index: 50; background-color: transparent; width: -moz-available; width: -webkit-fill-available; }
-.livePreview img { position: absolute; object-fit: contain; width: 100%; height: 100%; }
-.dark .livePreview { background-color: rgb(17 24 39 / var(--tw-bg-opacity)); }
+.livePreview { position: absolute; z-index: 50; width: -moz-available; width: -webkit-fill-available; height: 100%; background-color: var(--background-color); }
+.livePreview img { object-fit: contain; width: 100%; justify-self: center; }
 .popup-metadata { color: white; background: #0000; display: inline-block; white-space: pre-wrap; font-size: var(--text-xxs); }
-
+.generating { animation: unset !important; border: unset !important; }
 /* fullpage image viewer */
 #lightboxModal { display: none; position: fixed; z-index: 1001; left: 0; top: 0; width: 100%; height: 100%; overflow: hidden; background-color: rgba(20, 20, 20, 0.75); backdrop-filter: blur(6px);
   user-select: none; -webkit-user-select: none; flex-direction: row; font-family: 'NotoSans';}
diff --git a/modules/call_queue.py b/modules/call_queue.py
index 4065d13d9..cdc2fe1f7 100644
--- a/modules/call_queue.py
+++ b/modules/call_queue.py
@@ -2,7 +2,7 @@
 import threading
 import time
 import cProfile
-from modules import shared, progress, errors
+from modules import shared, progress, errors, timer
 
 queue_lock = threading.Lock()
 
@@ -73,15 +73,16 @@ def f(*args, extra_outputs_array=extra_outputs, **kwargs):
         elapsed_m = int(elapsed // 60)
         elapsed_s = elapsed % 60
         elapsed_text = f"{elapsed_m}m {elapsed_s:.2f}s" if elapsed_m > 0 else f"{elapsed_s:.2f}s"
+        summary = timer.process.summary(min_time=0.1, total=False).replace('=', ' ')
         vram_html = ''
         if not shared.mem_mon.disabled:
             vram = {k: -(v//-(1024*1024)) for k, v in shared.mem_mon.read().items()}
+            used = round(100 * vram['used'] / (vram['total'] + 0.001))
             if vram.get('active_peak', 0) > 0:
-                vram_html = " | <p class='vram'>"
-                vram_html += f"GPU active {max(vram['active_peak'], vram['reserved_peak'])} MB reserved {vram['reserved']} | used {vram['used']} MB free {vram['free']} MB total {vram['total']} MB"
+                vram_html = " | "
+                vram_html += f"GPU {max(vram['active_peak'], vram['reserved_peak'])} MB {used}%"
                 vram_html += f" | retries {vram['retries']} oom {vram['oom']}" if vram.get('retries', 0) > 0 or vram.get('oom', 0) > 0 else ''
-                vram_html += "</p>"
         if isinstance(res, list):
-            res[-1] += f"<div class='performance'><p class='time'>Time: {elapsed_text}</p>{vram_html}</div>"
+            res[-1] += f"<div class='performance'><p>Time: {elapsed_text} | {summary}{vram_html}</p></div>"
         return tuple(res)
     return f
diff --git a/modules/processing.py b/modules/processing.py
index 0d557e64e..16e7a9213 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -323,7 +323,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
                 processed = p.scripts.process_images(p)
                 if processed is not None:
                     samples = processed.images
-                    infotexts = processed.infotexts
+                    infotexts += processed.infotexts
             if samples is None:
                 if not shared.native:
                     from modules.processing_original import process_original
@@ -393,11 +393,8 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
                 if shared.opts.mask_apply_overlay:
                     image = apply_overlay(image, p.paste_to, i, p.overlay_images)
 
-                if len(infotexts) > i:
-                    info = infotexts[i]
-                else:
-                    info = create_infotext(p, p.prompts, p.seeds, p.subseeds, index=i, all_negative_prompts=p.negative_prompts)
-                    infotexts.append(info)
+                info = create_infotext(p, p.prompts, p.seeds, p.subseeds, index=i, all_negative_prompts=p.negative_prompts)
+                infotexts.append(info)
                 image.info["parameters"] = info
                 output_images.append(image)
                 if shared.opts.samples_save and not p.do_not_save_samples and p.outpath_samples is not None:
diff --git a/modules/processing_callbacks.py b/modules/processing_callbacks.py
index 52ea3e575..7d6c8ec04 100644
--- a/modules/processing_callbacks.py
+++ b/modules/processing_callbacks.py
@@ -6,7 +6,8 @@
 from modules import shared, processing_correction, extra_networks, timer, prompt_parser_diffusers
 
 p = None
-debug_callback = shared.log.trace if os.environ.get('SD_CALLBACK_DEBUG', None) is not None else lambda *args, **kwargs: None
+debug = os.environ.get('SD_CALLBACK_DEBUG', None) is not None
+debug_callback = shared.log.trace if debug else lambda *args, **kwargs: None
 
 
 def set_callbacks_p(processing):
@@ -50,7 +51,8 @@ def diffusers_callback(pipe, step: int = 0, timestep: int = 0, kwargs: dict = {}
     if p is None:
         return kwargs
     latents = kwargs.get('latents', None)
-    debug_callback(f'Callback: step={step} timestep={timestep} latents={latents.shape if latents is not None else None} kwargs={list(kwargs)}')
+    if debug:
+        debug_callback(f'Callback: step={step} timestep={timestep} latents={latents.shape if latents is not None else None} kwargs={list(kwargs)}')
     order = getattr(pipe.scheduler, "order", 1) if hasattr(pipe, 'scheduler') else 1
     shared.state.sampling_step = step // order
     if shared.state.interrupted or shared.state.skipped:
@@ -67,7 +69,7 @@ def diffusers_callback(pipe, step: int = 0, timestep: int = 0, kwargs: dict = {}
         return kwargs
     elif shared.opts.nan_skip:
         assert not torch.isnan(latents[..., 0, 0]).all(), f'NaN detected at step {step}: Skipping...'
-    if len(getattr(p, 'ip_adapter_names', [])) > 0:
+    if len(getattr(p, 'ip_adapter_names', [])) > 0 and p.ip_adapter_names[0] != 'None':
         ip_adapter_scales = list(p.ip_adapter_scales)
         ip_adapter_starts = list(p.ip_adapter_starts)
         ip_adapter_ends = list(p.ip_adapter_ends)
@@ -78,7 +80,7 @@ def diffusers_callback(pipe, step: int = 0, timestep: int = 0, kwargs: dict = {}
                 debug_callback(f"Callback: IP Adapter scales={ip_adapter_scales}")
             pipe.set_ip_adapter_scale(ip_adapter_scales)
     if step != getattr(pipe, 'num_timesteps', 0):
-        kwargs = processing_correction.correction_callback(p, timestep, kwargs)
+        kwargs = processing_correction.correction_callback(p, timestep, kwargs, initial=step == 0)
     kwargs = prompt_callback(step, kwargs)  # monkey patch for diffusers callback issues
     if step == int(getattr(pipe, 'num_timesteps', 100) * p.cfg_end) and 'prompt_embeds' in kwargs and 'negative_prompt_embeds' in kwargs:
         if "PAG" in shared.sd_model.__class__.__name__:
@@ -105,7 +107,5 @@ def diffusers_callback(pipe, step: int = 0, timestep: int = 0, kwargs: dict = {}
     if shared.cmd_opts.profile and shared.profiler is not None:
         shared.profiler.step()
     t1 = time.time()
-    if 'callback' not in timer.process.records:
-        timer.process.records['callback'] = 0
-    timer.process.records['callback'] += t1 - t0
+    timer.process.add('callback', t1 - t0)
     return kwargs
diff --git a/modules/processing_correction.py b/modules/processing_correction.py
index e715d8c49..050fae889 100644
--- a/modules/processing_correction.py
+++ b/modules/processing_correction.py
@@ -7,9 +7,11 @@
 import torch
 from modules import shared, sd_vae_taesd, devices
 
+
 debug_enabled = os.environ.get('SD_HDR_DEBUG', None) is not None
 debug = shared.log.trace if debug_enabled else lambda *args, **kwargs: None
 debug('Trace: HDR')
+skip_correction = False
 
 
 def sharpen_tensor(tensor, ratio=0):
@@ -116,8 +118,15 @@ def correction(p, timestep, latent):
     return latent
 
 
-def correction_callback(p, timestep, kwargs):
-    if not any([p.hdr_clamp, p.hdr_mode, p.hdr_maximize, p.hdr_sharpen, p.hdr_color, p.hdr_brightness, p.hdr_tint_ratio]):
+def correction_callback(p, timestep, kwargs, initial: bool = False):
+    global skip_correction # pylint: disable=global-statement
+    if initial:
+        if not any([p.hdr_clamp, p.hdr_mode, p.hdr_maximize, p.hdr_sharpen, p.hdr_color, p.hdr_brightness, p.hdr_tint_ratio]):
+            skip_correction = True
+            return kwargs
+        else:
+            skip_correction = False
+    elif skip_correction:
         return kwargs
     latents = kwargs["latents"]
     if debug_enabled:
diff --git a/modules/processing_diffusers.py b/modules/processing_diffusers.py
index 44dff811b..7b91fcd42 100644
--- a/modules/processing_diffusers.py
+++ b/modules/processing_diffusers.py
@@ -75,7 +75,6 @@ def process_base(p: processing.StableDiffusionProcessing):
         clip_skip=p.clip_skip,
         desc='Base',
     )
-    timer.process.record('args')
     shared.state.sampling_steps = base_args.get('prior_num_inference_steps', None) or p.steps or base_args.get('num_inference_steps', None)
     if shared.opts.scheduler_eta is not None and shared.opts.scheduler_eta > 0 and shared.opts.scheduler_eta < 1:
         p.extra_generation_params["Sampler Eta"] = shared.opts.scheduler_eta
diff --git a/modules/shared_state.py b/modules/shared_state.py
index 51d33f9ed..0173b540c 100644
--- a/modules/shared_state.py
+++ b/modules/shared_state.py
@@ -2,6 +2,7 @@
 import time
 import datetime
 from modules.errors import log
+from modules import timer
 
 
 class State:
@@ -150,6 +151,7 @@ def set_current_image(self):
     def do_set_current_image(self):
         if self.current_latent is None:
             return
+        t0 = time.time()
         from modules.shared import opts
         import modules.sd_samplers # pylint: disable=W0621
         try:
@@ -159,6 +161,8 @@ def do_set_current_image(self):
         except Exception:
             # log.error(f'Error setting current image: step={self.sampling_step} {e}')
             pass
+        t1 = time.time()
+        timer.process.add('preview', t1 - t0)
 
     def assign_current_image(self, image):
         self.current_image = image
diff --git a/modules/timer.py b/modules/timer.py
index 8a5db726d..7657ac8e8 100644
--- a/modules/timer.py
+++ b/modules/timer.py
@@ -15,6 +15,12 @@ def elapsed(self, reset=True):
             self.start = end
         return res
 
+    def add(self, name, t):
+        if name not in self.records:
+            self.records[name] = t
+        else:
+            self.records[name] += t
+
     def record(self, category=None, extra_time=0, reset=True):
         e = self.elapsed(reset)
         if category is None:
diff --git a/modules/txt2img.py b/modules/txt2img.py
index 2f0e2f4b3..e82c744a2 100644
--- a/modules/txt2img.py
+++ b/modules/txt2img.py
@@ -88,7 +88,7 @@ def txt2img(id_task, state,
     p.scripts = scripts.scripts_txt2img
     p.script_args = args
     p.state = state
-    processed = scripts.scripts_txt2img.run(p, *args)
+    processed: processing.Processed = scripts.scripts_txt2img.run(p, *args)
     if processed is None:
         processed = processing.process_images(p)
     processed = scripts.scripts_txt2img.after(p, processed, *args)
diff --git a/modules/ui_common.py b/modules/ui_common.py
index 9c4bb5cdc..e21033718 100644
--- a/modules/ui_common.py
+++ b/modules/ui_common.py
@@ -245,10 +245,17 @@ def create_output_panel(tabname, preview=True, prompt=None, height=None):
                 gr.HTML(value="", elem_id="main_info", visible=False, elem_classes=["main-info"])
             # columns are for <576px, <768px, <992px, <1200px, <1400px, >1400px
             result_gallery = gr.Gallery(value=[],
-                                        label='Output', show_label=False, show_download_button=True, allow_preview=True, container=False, preview=preview,
-                                        columns=4, object_fit='scale-down', height=height,
+                                        label='Output',
+                                        show_label=False,
+                                        show_download_button=True,
+                                        allow_preview=True,
+                                        container=False,
+                                        preview=preview,
+                                        columns=4,
+                                        object_fit='scale-down',
+                                        height=height,
                                         elem_id=f"{tabname}_gallery",
-                                        )
+                                       )
             if prompt is not None:
                 interrogate_clip_btn, interrogate_booru_btn = ui_sections.create_interrogate_buttons('control')
                 interrogate_clip_btn.click(fn=interrogate_clip, inputs=[result_gallery], outputs=[prompt])
diff --git a/modules/ui_extra_networks.py b/modules/ui_extra_networks.py
index e152dc74b..c326219df 100644
--- a/modules/ui_extra_networks.py
+++ b/modules/ui_extra_networks.py
@@ -16,7 +16,7 @@
 import gradio as gr
 from PIL import Image
 from starlette.responses import FileResponse, JSONResponse
-from modules import paths, shared, scripts, files_cache, errors, infotext
+from modules import paths, shared, files_cache, errors, infotext
 from modules.ui_components import ToolButton
 import modules.ui_symbols as symbols
 
diff --git a/modules/ui_img2img.py b/modules/ui_img2img.py
index 22c89dac8..046c181ce 100644
--- a/modules/ui_img2img.py
+++ b/modules/ui_img2img.py
@@ -68,20 +68,20 @@ def add_copy_image_controls(tab_name, elem):
                     img2img_selected_tab = gr.State(0) # pylint: disable=abstract-class-instantiated
                     state = gr.Textbox(value='', visible=False)
                     with gr.TabItem('Image', id='img2img', elem_id="img2img_img2img_tab") as tab_img2img:
-                        init_img = gr.Image(label="Image for img2img", elem_id="img2img_image", show_label=False, source="upload", interactive=True, type="pil", tool="editor", image_mode="RGBA", height=512)
+                        init_img = gr.Image(label="Image for img2img", elem_id="img2img_image", show_label=False, source="upload", interactive=True, type="pil", tool="editor", image_mode="RGBA")
                         interrogate_clip, interrogate_booru = ui_sections.create_interrogate_buttons('img2img')
                         add_copy_image_controls('img2img', init_img)
 
                     with gr.TabItem('Sketch', id='img2img_sketch', elem_id="img2img_img2img_sketch_tab") as tab_sketch:
-                        sketch = gr.Image(label="Image for img2img", elem_id="img2img_sketch", show_label=False, source="upload", interactive=True, type="pil", tool="color-sketch", image_mode="RGBA", height=512)
+                        sketch = gr.Image(label="Image for img2img", elem_id="img2img_sketch", show_label=False, source="upload", interactive=True, type="pil", tool="color-sketch", image_mode="RGBA")
                         add_copy_image_controls('sketch', sketch)
 
                     with gr.TabItem('Inpaint', id='inpaint', elem_id="img2img_inpaint_tab") as tab_inpaint:
-                        init_img_with_mask = gr.Image(label="Image for inpainting with mask", show_label=False, elem_id="img2maskimg", source="upload", interactive=True, type="pil", tool="sketch", image_mode="RGBA", height=512)
+                        init_img_with_mask = gr.Image(label="Image for inpainting with mask", show_label=False, elem_id="img2maskimg", source="upload", interactive=True, type="pil", tool="sketch", image_mode="RGBA")
                         add_copy_image_controls('inpaint', init_img_with_mask)
 
                     with gr.TabItem('Composite', id='inpaint_sketch', elem_id="img2img_inpaint_sketch_tab") as tab_inpaint_color:
-                        inpaint_color_sketch = gr.Image(label="Color sketch inpainting", show_label=False, elem_id="inpaint_sketch", source="upload", interactive=True, type="pil", tool="color-sketch", image_mode="RGBA", height=512)
+                        inpaint_color_sketch = gr.Image(label="Color sketch inpainting", show_label=False, elem_id="inpaint_sketch", source="upload", interactive=True, type="pil", tool="color-sketch", image_mode="RGBA")
                         inpaint_color_sketch_orig = gr.State(None) # pylint: disable=abstract-class-instantiated
                         add_copy_image_controls('inpaint_sketch', inpaint_color_sketch)
 

From 58146c0fc7851e44a15f66fda4a381b914454442 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Mon, 25 Nov 2024 14:42:46 -0500
Subject: [PATCH 018/162] correct preview stats

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 modules/sd_samplers_common.py | 6 +++++-
 modules/shared_state.py       | 4 ----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/modules/sd_samplers_common.py b/modules/sd_samplers_common.py
index a487fe9b7..f6f6c18d5 100644
--- a/modules/sd_samplers_common.py
+++ b/modules/sd_samplers_common.py
@@ -1,9 +1,10 @@
+import time
 import threading
 from collections import namedtuple
 import torch
 import torchvision.transforms as T
 from PIL import Image
-from modules import shared, devices, processing, images, sd_vae_approx, sd_vae_taesd, sd_vae_stablecascade, sd_samplers
+from modules import shared, devices, processing, images, sd_vae_approx, sd_vae_taesd, sd_vae_stablecascade, sd_samplers, timer
 
 
 SamplerData = namedtuple('SamplerData', ['name', 'constructor', 'aliases', 'options'])
@@ -33,6 +34,7 @@ def setup_img2img_steps(p, steps=None):
 
 def single_sample_to_image(sample, approximation=None):
     with queue_lock:
+        t0 = time.time()
         sd_cascade = False
         if approximation is None:
             approximation = approximation_indexes.get(shared.opts.show_progress_type, None)
@@ -84,6 +86,8 @@ def single_sample_to_image(sample, approximation=None):
         except Exception as e:
             warn_once(f'Preview: {e}')
             image = Image.new(mode="RGB", size=(512, 512))
+        t1 = time.time()
+        timer.process.add('preview', t1 - t0)
         return image
 
 
diff --git a/modules/shared_state.py b/modules/shared_state.py
index 0173b540c..51d33f9ed 100644
--- a/modules/shared_state.py
+++ b/modules/shared_state.py
@@ -2,7 +2,6 @@
 import time
 import datetime
 from modules.errors import log
-from modules import timer
 
 
 class State:
@@ -151,7 +150,6 @@ def set_current_image(self):
     def do_set_current_image(self):
         if self.current_latent is None:
             return
-        t0 = time.time()
         from modules.shared import opts
         import modules.sd_samplers # pylint: disable=W0621
         try:
@@ -161,8 +159,6 @@ def do_set_current_image(self):
         except Exception:
             # log.error(f'Error setting current image: step={self.sampling_step} {e}')
             pass
-        t1 = time.time()
-        timer.process.add('preview', t1 - t0)
 
     def assign_current_image(self, image):
         self.current_image = image

From 4468144031d7c28c35c609c19889ca433a673c47 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Mon, 25 Nov 2024 19:41:07 -0500
Subject: [PATCH 019/162] update modernui

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 extensions-builtin/sdnext-modernui | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/extensions-builtin/sdnext-modernui b/extensions-builtin/sdnext-modernui
index a3f8a0ec4..ea7062d27 160000
--- a/extensions-builtin/sdnext-modernui
+++ b/extensions-builtin/sdnext-modernui
@@ -1 +1 @@
-Subproject commit a3f8a0ec45cdc991689ee61ee79626f1b69e7c21
+Subproject commit ea7062d27d11ee50a0f22a34753f81a9c9e9d57c

From 14c8414025aeafe65d1ff579589ddf9a45b5fb52 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Mon, 25 Nov 2024 21:02:46 -0500
Subject: [PATCH 020/162] update stats and previews

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md                       |  3 ++
 extensions-builtin/sdnext-modernui |  2 +-
 javascript/progressBar.js          |  6 +++-
 modules/control/run.py             |  7 ++--
 modules/ui_common.py               |  1 +
 modules/ui_control.py              | 57 ++++++++++++++++++++++--------
 6 files changed, 54 insertions(+), 22 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 170c780a5..3ff4f0944 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -33,6 +33,9 @@
   - improved live preview display and performance  
   - improved accordion behavior  
   - auto-size networks height for sidebar  
+  - control: hide preview column by default
+  - control: optionn to hide input column
+  - control: add stats
   - browser->server logging framework  
 - Fixes:  
   - update `diffusers`  
diff --git a/extensions-builtin/sdnext-modernui b/extensions-builtin/sdnext-modernui
index ea7062d27..3008cee4b 160000
--- a/extensions-builtin/sdnext-modernui
+++ b/extensions-builtin/sdnext-modernui
@@ -1 +1 @@
-Subproject commit ea7062d27d11ee50a0f22a34753f81a9c9e9d57c
+Subproject commit 3008cee4b67bb00f8f1a4fe4510ec27ba92aa418
diff --git a/javascript/progressBar.js b/javascript/progressBar.js
index ff9be4666..c385fe5db 100644
--- a/javascript/progressBar.js
+++ b/javascript/progressBar.js
@@ -71,6 +71,8 @@ function requestProgress(id_task, progressEl, galleryEl, atEnd = null, onProgres
     if (!parentGallery) return;
     const footers = Array.from(gradioApp().querySelectorAll('.gallery_footer'));
     for (const footer of footers) footer.style.display = 'none'; // remove all footers
+    const galleries = Array.from(gradioApp().querySelectorAll('.gallery_main'));
+    for (const gallery of galleries) gallery.style.display = 'none'; // remove all footers
 
     livePreview = document.createElement('div');
     livePreview.className = 'livePreview';
@@ -89,7 +91,9 @@ function requestProgress(id_task, progressEl, galleryEl, atEnd = null, onProgres
     localStorage.removeItem('task');
     setProgress();
     const footers = Array.from(gradioApp().querySelectorAll('.gallery_footer'));
-    for (const footer of footers) footer.style.display = 'flex'; // remove all footers
+    for (const footer of footers) footer.style.display = 'flex'; // restore all footers
+    const galleries = Array.from(gradioApp().querySelectorAll('.gallery_main'));
+    for (const gallery of galleries) gallery.style.display = 'flex'; // remove all galleries
     try {
       if (parentGallery && livePreview) {
         parentGallery.removeChild(livePreview);
diff --git a/modules/control/run.py b/modules/control/run.py
index 5d6343c98..88dddc213 100644
--- a/modules/control/run.py
+++ b/modules/control/run.py
@@ -717,14 +717,11 @@ def set_pipe():
         shared.log.error(f'Control pipeline failed: type={unit_type} units={len(active_model)} error={e}')
         errors.display(e, 'Control')
 
-    t_end = time.time()
-
     if len(output_images) == 0:
         output_images = None
         image_txt = '| Images None'
     else:
-        image_str = [f'{image.width}x{image.height}' for image in output_images]
-        image_txt = f'| Time {t_end-t_start:.2f}s | Images {len(output_images)} | Size {" ".join(image_str)}'
+        image_txt = ''
         p.init_images = output_images # may be used for hires
 
     if video_type != 'None' and isinstance(output_images, list):
@@ -738,7 +735,7 @@ def set_pipe():
     restore_pipeline()
     debug(f'Ready: {image_txt}')
 
-    html_txt = f'<p>Ready {image_txt}</p>'
+    html_txt = f'<p>Ready {image_txt}</p>' if image_txt != '' else ''
     if len(info_txt) > 0:
         html_txt = html_txt + infotext_to_html(info_txt[0])
     if is_generator:
diff --git a/modules/ui_common.py b/modules/ui_common.py
index e21033718..3e7c68bec 100644
--- a/modules/ui_common.py
+++ b/modules/ui_common.py
@@ -255,6 +255,7 @@ def create_output_panel(tabname, preview=True, prompt=None, height=None):
                                         object_fit='scale-down',
                                         height=height,
                                         elem_id=f"{tabname}_gallery",
+                                        elem_classes=["gallery_main"],
                                        )
             if prompt is not None:
                 interrogate_clip_btn, interrogate_booru_btn = ui_sections.create_interrogate_buttons('control')
diff --git a/modules/ui_control.py b/modules/ui_control.py
index 0bf070036..f4329663a 100644
--- a/modules/ui_control.py
+++ b/modules/ui_control.py
@@ -9,7 +9,7 @@
 from modules.control.units import lite # vislearn ControlNet-XS
 from modules.control.units import t2iadapter # TencentARC T2I-Adapter
 from modules.control.units import reference # reference pipeline
-from modules import errors, shared, progress, ui_components, ui_symbols, ui_common, ui_sections, generation_parameters_copypaste, call_queue, scripts, masking, images, processing_vae # pylint: disable=ungrouped-imports
+from modules import errors, shared, progress, ui_components, ui_symbols, ui_common, ui_sections, generation_parameters_copypaste, call_queue, scripts, masking, images, processing_vae, timer # pylint: disable=ungrouped-imports
 from modules import ui_control_helpers as helpers
 
 
@@ -21,13 +21,36 @@
 debug('Trace: CONTROL')
 
 
-def return_controls(res):
+def return_stats(t: float = None):
+    if t is None:
+        elapsed_text = ''
+    else:
+        elapsed = time.perf_counter() - t
+        elapsed_m = int(elapsed // 60)
+        elapsed_s = elapsed % 60
+        elapsed_text = f"Time: {elapsed_m}m {elapsed_s:.2f}s |" if elapsed_m > 0 else f"Time: {elapsed_s:.2f}s |"
+    summary = timer.process.summary(min_time=0.1, total=False).replace('=', ' ')
+    vram_html = ''
+    if not shared.mem_mon.disabled:
+        vram = {k: -(v//-(1024*1024)) for k, v in shared.mem_mon.read().items()}
+        used = round(100 * vram['used'] / (vram['total'] + 0.001))
+        if vram.get('active_peak', 0) > 0:
+            vram_html += f"| GPU {max(vram['active_peak'], vram['reserved_peak'])} MB {used}%"
+            vram_html += f" | retries {vram['retries']} oom {vram['oom']}" if vram.get('retries', 0) > 0 or vram.get('oom', 0) > 0 else ''
+    return f"<div class='performance'><p>{elapsed_text} {summary} {vram_html}</p></div>"
+
+
+def return_controls(res, t: float = None):
     # return preview, image, video, gallery, text
     debug(f'Control received: type={type(res)} {res}')
+    if t is None:
+        perf = ''
+    else:
+        perf = return_stats(t)
     if res is None: # no response
-        return [None, None, None, None, '']
+        return [None, None, None, None, '', perf]
     elif isinstance(res, str): # error response
-        return [None, None, None, None, res]
+        return [None, None, None, None, res, perf]
     elif isinstance(res, tuple): # standard response received as tuple via control_run->yield(output_images, process_image, result_txt)
         preview_image = res[1] # may be None
         output_image = res[0][0] if isinstance(res[0], list) else res[0] # may be image or list of images
@@ -37,9 +60,9 @@ def return_controls(res):
             output_gallery = [res[0]] if res[0] is not None else [] # must return list, but can receive single image
         result_txt = res[2] if len(res) > 2 else '' # do we have a message
         output_video = res[3] if len(res) > 3 else None # do we have a video filename
-        return [preview_image, output_image, output_video, output_gallery, result_txt]
+        return [preview_image, output_image, output_video, output_gallery, result_txt, perf]
     else: # unexpected
-        return [None, None, None, None, f'Control: Unexpected response: {type(res)}']
+        return [None, None, None, None, f'Control: Unexpected response: {type(res)}', perf]
 
 
 def get_units(*values):
@@ -67,17 +90,18 @@ def generate_click(job_id: str, state: str, active_tab: str, *args):
     shared.state.begin('Generate')
     progress.add_task_to_queue(job_id)
     with call_queue.queue_lock:
-        yield [None, None, None, None, 'Control: starting']
+        yield [None, None, None, None, 'Control: starting', '']
         shared.mem_mon.reset()
         progress.start_task(job_id)
         try:
+            t = time.perf_counter()
             for results in control_run(state, units, helpers.input_source, helpers.input_init, helpers.input_mask, active_tab, True, *args):
                 progress.record_results(job_id, results)
-                yield return_controls(results)
+                yield return_controls(results, t)
         except Exception as e:
             shared.log.error(f"Control exception: {e}")
             errors.display(e, 'Control')
-            yield [None, None, None, None, f'Control: Exception: {e}']
+            yield [None, None, None, None, f'Control: Exception: {e}', '']
         progress.finish_task(job_id)
     shared.state.end()
 
@@ -106,7 +130,8 @@ def create_ui(_blocks: gr.Blocks=None):
 
                 with gr.Accordion(open=False, label="Input", elem_id="control_input", elem_classes=["small-accordion"]):
                     with gr.Row():
-                        show_preview = gr.Checkbox(label="Show preview", value=True, elem_id="control_show_preview")
+                        show_input = gr.Checkbox(label="Show input", value=True, elem_id="control_show_input")
+                        show_preview = gr.Checkbox(label="Show preview", value=False, elem_id="control_show_preview")
                     with gr.Row():
                         input_type = gr.Radio(label="Input type", choices=['Control only', 'Init image same as control', 'Separate init image'], value='Control only', type='index', elem_id='control_input_type')
                     with gr.Row():
@@ -153,13 +178,13 @@ def create_ui(_blocks: gr.Blocks=None):
                 override_settings = ui_common.create_override_inputs('control')
 
             with gr.Row(variant='compact', elem_id="control_extra_networks", elem_classes=["extra_networks_root"], visible=False) as extra_networks_ui:
-                from modules import timer, ui_extra_networks
+                from modules import ui_extra_networks
                 extra_networks_ui = ui_extra_networks.create_ui(extra_networks_ui, btn_extra, 'control', skip_indexing=shared.opts.extra_network_skip_indexing)
                 timer.startup.record('ui-networks')
 
             with gr.Row(elem_id='control-inputs'):
-                with gr.Column(scale=9, elem_id='control-input-column', visible=True) as _column_input:
-                    gr.HTML('<span id="control-input-button">Control input</p>')
+                with gr.Column(scale=9, elem_id='control-input-column', visible=True) as column_input:
+                    gr.HTML('<span id="control-input-button">Input</p>')
                     with gr.Tabs(elem_classes=['control-tabs'], elem_id='control-tab-input'):
                         with gr.Tab('Image', id='in-image') as tab_image:
                             input_mode = gr.Label(value='select', visible=False)
@@ -190,12 +215,12 @@ def create_ui(_blocks: gr.Blocks=None):
                     gr.HTML('<span id="control-output-button">Output</p>')
                     with gr.Tabs(elem_classes=['control-tabs'], elem_id='control-tab-output') as output_tabs:
                         with gr.Tab('Gallery', id='out-gallery'):
-                            output_gallery, _output_gen_info, _output_html_info, _output_html_info_formatted, _output_html_log = ui_common.create_output_panel("control", preview=True, prompt=prompt, height=gr_height)
+                            output_gallery, _output_gen_info, _output_html_info, _output_html_info_formatted, output_html_log = ui_common.create_output_panel("control", preview=True, prompt=prompt, height=gr_height)
                         with gr.Tab('Image', id='out-image'):
                             output_image = gr.Image(label="Output", show_label=False, type="pil", interactive=False, tool="editor", height=gr_height, elem_id='control_output_image', elem_classes=['control-image'])
                         with gr.Tab('Video', id='out-video'):
                             output_video = gr.Video(label="Output", show_label=False, height=gr_height, elem_id='control_output_video', elem_classes=['control-image'])
-                with gr.Column(scale=9, elem_id='control-preview-column', visible=True) as column_preview:
+                with gr.Column(scale=9, elem_id='control-preview-column', visible=False) as column_preview:
                     gr.HTML('<span id="control-preview-button">Preview</p>')
                     with gr.Tabs(elem_classes=['control-tabs'], elem_id='control-tab-preview'):
                         with gr.Tab('Preview', id='preview-image') as _tab_preview:
@@ -498,6 +523,7 @@ def create_ui(_blocks: gr.Blocks=None):
             btn_update = gr.Button('Update', interactive=True, visible=False, elem_id='control_update')
             btn_update.click(fn=get_units, inputs=controls, outputs=[], show_progress=True, queue=False)
 
+            show_input.change(fn=lambda x: gr.update(visible=x), inputs=[show_input], outputs=[column_input])
             show_preview.change(fn=lambda x: gr.update(visible=x), inputs=[show_preview], outputs=[column_preview])
             input_type.change(fn=lambda x: gr.update(visible=x == 2), inputs=[input_type], outputs=[column_init])
             btn_prompt_counter.click(fn=call_queue.wrap_queued_call(ui_common.update_token_counter), inputs=[prompt, steps], outputs=[prompt_counter])
@@ -550,6 +576,7 @@ def create_ui(_blocks: gr.Blocks=None):
                 output_video,
                 output_gallery,
                 result_txt,
+                output_html_log,
             ]
             control_dict = dict(
                 fn=generate_click,

From 49e6c1564c6821808e5ba49c93dd06feadd23979 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Tue, 26 Nov 2024 13:13:04 -0500
Subject: [PATCH 021/162] add style aligned

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md                        |  47 +++--
 modules/processing_class.py         |   4 +-
 modules/processing_helpers.py       |   4 +-
 modules/sd_samplers.py              |   2 +
 modules/style_aligned/inversion.py  | 124 ++++++++++++
 modules/style_aligned/sa_handler.py | 281 ++++++++++++++++++++++++++++
 scripts/style_aligned.py            | 117 ++++++++++++
 7 files changed, 559 insertions(+), 20 deletions(-)
 create mode 100644 modules/style_aligned/inversion.py
 create mode 100644 modules/style_aligned/sa_handler.py
 create mode 100644 scripts/style_aligned.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3ff4f0944..8183fbbc8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,11 +2,13 @@
 
 ## Update for 2024-11-26
 
-- [Flux Tools](https://blackforestlabs.ai/flux-1-tools/):  
+### New models and integrations
+
+- [Flux Tools](https://blackforestlabs.ai/flux-1-tools/)  
   **Redux** is actually a tool, **Fill** is inpaint/outpaint optimized version of *Flux-dev*  
   **Canny** & **Depth** are optimized versions of *Flux-dev* for their respective tasks: they are *not* ControlNets that work on top of a model  
-  To use, go to image or control interface and select *Flux Tools* in scripts  
-  All models are auto-downloaded on first use  
+  to use, go to image or control interface and select *Flux Tools* in scripts  
+  all models are auto-downloaded on first use  
   *note*: All models are [gated](https://github.com/vladmandic/automatic/wiki/Gated) and require acceptance of terms and conditions via web page  
   *recommended*: Enable on-the-fly [quantization](https://github.com/vladmandic/automatic/wiki/Quantization) or [compression](https://github.com/vladmandic/automatic/wiki/NNCF-Compression) to reduce resource usage  
   *todo*: support for Canny/Depth LoRAs  
@@ -19,16 +21,23 @@
     *recommended*: guidance scale 30  
   - [Depth](https://huggingface.co/black-forest-labs/FLUX.1-Depth-dev): ~23.8GB, replaces currently loaded model  
     *recommended*: guidance scale 10  
-- Model loader improvements:  
+- [Style Aligned Image Generation](https://style-aligned-gen.github.io/)  
+  enable in scripts, compatible with sd-xl  
+  enter multiple prompts in prompt field separated by new line  
+  style-aligned applies selected attention layers uniformly to all images to achive consistency  
+  can be used with or without input image in which case first prompt is used to establish baseline  
+  *note:* all prompts are processes as a single batch, so vram is limiting factor
+
+### UI and workflow improvements
+
+- **Model loader** improvements:  
   - detect model components on model load fail  
   - Flux, SD35: force unload model  
   - Flux: apply `bnb` quant when loading *unet/transformer*  
   - Flux: all-in-one safetensors  
     example: <https://civitai.com/models/646328?modelVersionId=1040235>  
   - Flux: do not recast quants  
-- Sampler improvements  
-  - update DPM FlowMatch samplers  
-- UI:  
+- **UI**:  
   - improved stats on generate completion  
   - improved live preview display and performance  
   - improved accordion behavior  
@@ -37,16 +46,20 @@
   - control: optionn to hide input column
   - control: add stats
   - browser->server logging framework  
-- Fixes:  
-  - update `diffusers`  
-  - fix README links  
-  - fix sdxl controlnet single-file loader  
-  - relax settings validator  
-  - improve js progress calls resiliency  
-  - fix text-to-video pipeline  
-  - avoid live-preview if vae-decode is running  
-  - allow xyz-grid with multi-axis s&r  
-  - fix xyz-grid with lora  
+- **Sampler** improvements  
+  - update DPM FlowMatch samplers  
+
+### Fixes:  
+
+- update `diffusers`  
+- fix README links  
+- fix sdxl controlnet single-file loader  
+- relax settings validator  
+- improve js progress calls resiliency  
+- fix text-to-video pipeline  
+- avoid live-preview if vae-decode is running  
+- allow xyz-grid with multi-axis s&r  
+- fix xyz-grid with lora  
 
 ## Update for 2024-11-21
 
diff --git a/modules/processing_class.py b/modules/processing_class.py
index 79f51576f..21e86c1b0 100644
--- a/modules/processing_class.py
+++ b/modules/processing_class.py
@@ -31,8 +31,8 @@ def __init__(self,
                  n_iter: int = 1,
                  steps: int = 50,
                  clip_skip: int = 1,
-                 width: int = 512,
-                 height: int = 512,
+                 width: int = 1024,
+                 height: int = 1024,
                  # samplers
                  sampler_index: int = None, # pylint: disable=unused-argument # used only to set sampler_name
                  sampler_name: str = None,
diff --git a/modules/processing_helpers.py b/modules/processing_helpers.py
index ec7fbf048..22acf296c 100644
--- a/modules/processing_helpers.py
+++ b/modules/processing_helpers.py
@@ -561,7 +561,9 @@ def save_intermediate(p, latents, suffix):
 def update_sampler(p, sd_model, second_pass=False):
     sampler_selection = p.hr_sampler_name if second_pass else p.sampler_name
     if hasattr(sd_model, 'scheduler'):
-        if sampler_selection is None or sampler_selection == 'None':
+        if sampler_selection == 'None':
+            return
+        if sampler_selection is None:
             sampler = sd_samplers.all_samplers_map.get("UniPC")
         else:
             sampler = sd_samplers.all_samplers_map.get(sampler_selection, None)
diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py
index e560744dd..d8416e5d9 100644
--- a/modules/sd_samplers.py
+++ b/modules/sd_samplers.py
@@ -47,6 +47,8 @@ def visible_sampler_names():
 
 
 def create_sampler(name, model):
+    if name is None or name == 'None':
+        return model.scheduler
     try:
         current = model.scheduler.__class__.__name__
     except Exception:
diff --git a/modules/style_aligned/inversion.py b/modules/style_aligned/inversion.py
new file mode 100644
index 000000000..8c91cc02a
--- /dev/null
+++ b/modules/style_aligned/inversion.py
@@ -0,0 +1,124 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from __future__ import annotations
+from typing import Callable, TYPE_CHECKING
+from diffusers import StableDiffusionXLPipeline
+import torch
+from tqdm import tqdm
+if TYPE_CHECKING:
+    import numpy as np
+
+
+T = torch.Tensor
+TN = T
+InversionCallback = Callable[[StableDiffusionXLPipeline, int, T, dict[str, T]], dict[str, T]]
+
+
+def _get_text_embeddings(prompt: str, tokenizer, text_encoder, device):
+    # Tokenize text and get embeddings
+    text_inputs = tokenizer(prompt, padding='max_length', max_length=tokenizer.model_max_length, truncation=True, return_tensors='pt')
+    text_input_ids = text_inputs.input_ids
+
+    with torch.no_grad():
+        prompt_embeds = text_encoder(
+            text_input_ids.to(device),
+            output_hidden_states=True,
+        )
+
+    pooled_prompt_embeds = prompt_embeds[0]
+    prompt_embeds = prompt_embeds.hidden_states[-2]
+    if prompt == '':
+        negative_prompt_embeds = torch.zeros_like(prompt_embeds)
+        negative_pooled_prompt_embeds = torch.zeros_like(pooled_prompt_embeds)
+        return negative_prompt_embeds, negative_pooled_prompt_embeds
+    return prompt_embeds, pooled_prompt_embeds
+
+
+def _encode_text_sdxl(model: StableDiffusionXLPipeline, prompt: str) -> tuple[dict[str, T], T]:
+    device = model._execution_device # pylint: disable=protected-access
+    prompt_embeds, pooled_prompt_embeds, = _get_text_embeddings(prompt, model.tokenizer, model.text_encoder, device) # pylint: disable=unused-variable
+    prompt_embeds_2, pooled_prompt_embeds2, = _get_text_embeddings( prompt, model.tokenizer_2, model.text_encoder_2, device)
+    prompt_embeds = torch.cat((prompt_embeds, prompt_embeds_2), dim=-1)
+    text_encoder_projection_dim = model.text_encoder_2.config.projection_dim
+    add_time_ids = model._get_add_time_ids((1024, 1024), (0, 0), (1024, 1024), model.text_encoder.dtype, # pylint: disable=protected-access
+                                           text_encoder_projection_dim).to(device)
+    added_cond_kwargs = {"text_embeds": pooled_prompt_embeds2, "time_ids": add_time_ids}
+    return added_cond_kwargs, prompt_embeds
+
+
+def _encode_text_sdxl_with_negative(model: StableDiffusionXLPipeline, prompt: str) -> tuple[dict[str, T], T]:
+    added_cond_kwargs, prompt_embeds = _encode_text_sdxl(model, prompt)
+    added_cond_kwargs_uncond, prompt_embeds_uncond = _encode_text_sdxl(model, "")
+    prompt_embeds = torch.cat((prompt_embeds_uncond, prompt_embeds, ))
+    added_cond_kwargs = {"text_embeds": torch.cat((added_cond_kwargs_uncond["text_embeds"], added_cond_kwargs["text_embeds"])),
+                         "time_ids": torch.cat((added_cond_kwargs_uncond["time_ids"], added_cond_kwargs["time_ids"])),}
+    return added_cond_kwargs, prompt_embeds
+
+
+def _encode_image(model: StableDiffusionXLPipeline, image: np.ndarray) -> T:
+    image = torch.from_numpy(image).float() / 255.
+    image = (image * 2 - 1).permute(2, 0, 1).unsqueeze(0)
+    latent = model.vae.encode(image.to(model.vae.device, model.vae.dtype))['latent_dist'].mean * model.vae.config.scaling_factor
+    return latent
+
+
+def _next_step(model: StableDiffusionXLPipeline, model_output: T, timestep: int, sample: T) -> T:
+    timestep, next_timestep = min(timestep - model.scheduler.config.num_train_timesteps // model.scheduler.num_inference_steps, 999), timestep
+    alpha_prod_t = model.scheduler.alphas_cumprod[int(timestep)] if timestep >= 0 else model.scheduler.final_alpha_cumprod
+    alpha_prod_t_next = model.scheduler.alphas_cumprod[int(next_timestep)]
+    beta_prod_t = 1 - alpha_prod_t
+    next_original_sample = (sample - beta_prod_t ** 0.5 * model_output) / alpha_prod_t ** 0.5
+    next_sample_direction = (1 - alpha_prod_t_next) ** 0.5 * model_output
+    next_sample = alpha_prod_t_next ** 0.5 * next_original_sample + next_sample_direction
+    return next_sample
+
+
+def _get_noise_pred(model: StableDiffusionXLPipeline, latent: T, t: T, context: T, guidance_scale: float, added_cond_kwargs: dict[str, T]):
+    latents_input = torch.cat([latent] * 2)
+    noise_pred = model.unet(latents_input, t, encoder_hidden_states=context, added_cond_kwargs=added_cond_kwargs)["sample"]
+    noise_pred_uncond, noise_prediction_text = noise_pred.chunk(2)
+    noise_pred = noise_pred_uncond + guidance_scale * (noise_prediction_text - noise_pred_uncond)
+    # latents = next_step(model, noise_pred, t, latent)
+    return noise_pred
+
+
+def _ddim_loop(model: StableDiffusionXLPipeline, z0, prompt, guidance_scale) -> T:
+    all_latent = [z0]
+    added_cond_kwargs, text_embedding = _encode_text_sdxl_with_negative(model, prompt)
+    latent = z0.clone().detach().to(model.text_encoder.dtype)
+    for i in tqdm(range(model.scheduler.num_inference_steps)):
+        t = model.scheduler.timesteps[len(model.scheduler.timesteps) - i - 1]
+        noise_pred = _get_noise_pred(model, latent, t, text_embedding, guidance_scale, added_cond_kwargs)
+        latent = _next_step(model, noise_pred, t, latent)
+        all_latent.append(latent)
+    return torch.cat(all_latent).flip(0)
+
+
+def make_inversion_callback(zts, offset: int = 0):
+
+    def callback_on_step_end(pipeline: StableDiffusionXLPipeline, i: int, t: T, callback_kwargs: dict[str, T]) -> dict[str, T]: # pylint: disable=unused-argument
+        latents = callback_kwargs['latents']
+        latents[0] = zts[max(offset + 1, i + 1)].to(latents.device, latents.dtype)
+        return {'latents': latents}
+    return  zts[offset], callback_on_step_end
+
+
+@torch.no_grad()
+def ddim_inversion(model: StableDiffusionXLPipeline, x0: np.ndarray, prompt: str, num_inference_steps: int, guidance_scale,) -> T:
+    z0 = _encode_image(model, x0)
+    model.scheduler.set_timesteps(num_inference_steps, device=z0.device)
+    zs = _ddim_loop(model, z0, prompt, guidance_scale)
+    return zs
diff --git a/modules/style_aligned/sa_handler.py b/modules/style_aligned/sa_handler.py
new file mode 100644
index 000000000..ee4b1ca79
--- /dev/null
+++ b/modules/style_aligned/sa_handler.py
@@ -0,0 +1,281 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from __future__ import annotations
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from diffusers import StableDiffusionXLPipeline
+from dataclasses import dataclass
+import torch
+import torch.nn as nn
+from torch.nn import functional as nnf
+from diffusers.models import attention_processor # pylint: disable=ungrouped-imports
+import einops
+
+T = torch.Tensor
+
+
+@dataclass(frozen=True)
+class StyleAlignedArgs:
+    share_group_norm: bool = True
+    share_layer_norm: bool = True
+    share_attention: bool = True
+    adain_queries: bool = True
+    adain_keys: bool = True
+    adain_values: bool = False
+    full_attention_share: bool = False
+    shared_score_scale: float = 1.
+    shared_score_shift: float = 0.
+    only_self_level: float = 0.
+
+
+def expand_first(feat: T, scale=1.,) -> T:
+    b = feat.shape[0]
+    feat_style = torch.stack((feat[0], feat[b // 2])).unsqueeze(1)
+    if scale == 1:
+        feat_style = feat_style.expand(2, b // 2, *feat.shape[1:])
+    else:
+        feat_style = feat_style.repeat(1, b // 2, 1, 1, 1)
+        feat_style = torch.cat([feat_style[:, :1], scale * feat_style[:, 1:]], dim=1)
+    return feat_style.reshape(*feat.shape)
+
+
+def concat_first(feat: T, dim=2, scale=1.) -> T:
+    feat_style = expand_first(feat, scale=scale)
+    return torch.cat((feat, feat_style), dim=dim)
+
+
+def calc_mean_std(feat, eps: float = 1e-5) -> tuple[T, T]:
+    feat_std = (feat.var(dim=-2, keepdims=True) + eps).sqrt()
+    feat_mean = feat.mean(dim=-2, keepdims=True)
+    return feat_mean, feat_std
+
+
+def adain(feat: T) -> T:
+    feat_mean, feat_std = calc_mean_std(feat)
+    feat_style_mean = expand_first(feat_mean)
+    feat_style_std = expand_first(feat_std)
+    feat = (feat - feat_mean) / feat_std
+    feat = feat * feat_style_std + feat_style_mean
+    return feat
+
+
+class DefaultAttentionProcessor(nn.Module):
+
+    def __init__(self):
+        super().__init__()
+        self.processor = attention_processor.AttnProcessor2_0()
+
+    def __call__(self, attn: attention_processor.Attention, hidden_states, encoder_hidden_states=None,
+                 attention_mask=None, **kwargs):
+        return self.processor(attn, hidden_states, encoder_hidden_states, attention_mask)
+
+
+class SharedAttentionProcessor(DefaultAttentionProcessor):
+
+    def shifted_scaled_dot_product_attention(self, attn: attention_processor.Attention, query: T, key: T, value: T) -> T:
+        logits = torch.einsum('bhqd,bhkd->bhqk', query, key) * attn.scale
+        logits[:, :, :, query.shape[2]:] += self.shared_score_shift
+        probs = logits.softmax(-1)
+        return torch.einsum('bhqk,bhkd->bhqd', probs, value)
+
+    def shared_call( # pylint: disable=unused-argument
+            self,
+            attn: attention_processor.Attention,
+            hidden_states,
+            encoder_hidden_states=None,
+            attention_mask=None,
+            **kwargs
+    ):
+
+        residual = hidden_states
+        input_ndim = hidden_states.ndim
+        if input_ndim == 4:
+            batch_size, channel, height, width = hidden_states.shape
+            hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2)
+        batch_size, sequence_length, _ = (
+            hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape
+        )
+
+        if attention_mask is not None:
+            attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size)
+            # scaled_dot_product_attention expects attention_mask shape to be
+            # (batch, heads, source_length, target_length)
+            attention_mask = attention_mask.view(batch_size, attn.heads, -1, attention_mask.shape[-1])
+
+        if attn.group_norm is not None:
+            hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2)
+
+        query = attn.to_q(hidden_states)
+        key = attn.to_k(hidden_states)
+        value = attn.to_v(hidden_states)
+        inner_dim = key.shape[-1]
+        head_dim = inner_dim // attn.heads
+
+        query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
+        key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
+        value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
+        # if self.step >= self.start_inject:
+        if self.adain_queries:
+            query = adain(query)
+        if self.adain_keys:
+            key = adain(key)
+        if self.adain_values:
+            value = adain(value)
+        if self.share_attention:
+            key = concat_first(key, -2, scale=self.shared_score_scale)
+            value = concat_first(value, -2)
+            if self.shared_score_shift != 0:
+                hidden_states = self.shifted_scaled_dot_product_attention(attn, query, key, value,)
+            else:
+                hidden_states = nnf.scaled_dot_product_attention(
+                    query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False
+                )
+        else:
+            hidden_states = nnf.scaled_dot_product_attention(
+                query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False
+            )
+        # hidden_states = adain(hidden_states)
+        hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim)
+        hidden_states = hidden_states.to(query.dtype)
+
+        # linear proj
+        hidden_states = attn.to_out[0](hidden_states)
+        # dropout
+        hidden_states = attn.to_out[1](hidden_states)
+
+        if input_ndim == 4:
+            hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width)
+
+        if attn.residual_connection:
+            hidden_states = hidden_states + residual
+
+        hidden_states = hidden_states / attn.rescale_output_factor
+        return hidden_states
+
+    def __call__(self, attn: attention_processor.Attention, hidden_states, encoder_hidden_states=None,
+                 attention_mask=None, **kwargs):
+        if self.full_attention_share:
+            _b, n, _d = hidden_states.shape
+            hidden_states = einops.rearrange(hidden_states, '(k b) n d -> k (b n) d', k=2)
+            hidden_states = super().__call__(attn, hidden_states, encoder_hidden_states=encoder_hidden_states,
+                                             attention_mask=attention_mask, **kwargs)
+            hidden_states = einops.rearrange(hidden_states, 'k (b n) d -> (k b) n d', n=n)
+        else:
+            hidden_states = self.shared_call(attn, hidden_states, hidden_states, attention_mask, **kwargs)
+
+        return hidden_states
+
+    def __init__(self, style_aligned_args: StyleAlignedArgs):
+        super().__init__()
+        self.share_attention = style_aligned_args.share_attention
+        self.adain_queries = style_aligned_args.adain_queries
+        self.adain_keys = style_aligned_args.adain_keys
+        self.adain_values = style_aligned_args.adain_values
+        self.full_attention_share = style_aligned_args.full_attention_share
+        self.shared_score_scale = style_aligned_args.shared_score_scale
+        self.shared_score_shift = style_aligned_args.shared_score_shift
+
+
+def _get_switch_vec(total_num_layers, level):
+    if level <= 0:
+        return torch.zeros(total_num_layers, dtype=torch.bool)
+    if level >= 1:
+        return torch.ones(total_num_layers, dtype=torch.bool)
+    to_flip = level > .5
+    if to_flip:
+        level = 1 - level
+    num_switch = int(level * total_num_layers)
+    vec = torch.arange(total_num_layers)
+    vec = vec % (total_num_layers // num_switch)
+    vec = vec == 0
+    if to_flip:
+        vec = ~vec
+    return vec
+
+
+def init_attention_processors(pipeline: StableDiffusionXLPipeline, style_aligned_args: StyleAlignedArgs | None = None):
+    attn_procs = {}
+    unet = pipeline.unet
+    number_of_self, number_of_cross = 0, 0
+    num_self_layers = len([name for name in unet.attn_processors.keys() if 'attn1' in name])
+    if style_aligned_args is None:
+        only_self_vec = _get_switch_vec(num_self_layers, 1)
+    else:
+        only_self_vec = _get_switch_vec(num_self_layers, style_aligned_args.only_self_level)
+    for i, name in enumerate(unet.attn_processors.keys()):
+        is_self_attention = 'attn1' in name
+        if is_self_attention:
+            number_of_self += 1
+            if style_aligned_args is None or only_self_vec[i // 2]:
+                attn_procs[name] = DefaultAttentionProcessor()
+            else:
+                attn_procs[name] = SharedAttentionProcessor(style_aligned_args)
+        else:
+            number_of_cross += 1
+            attn_procs[name] = DefaultAttentionProcessor()
+
+    unet.set_attn_processor(attn_procs)
+
+
+def register_shared_norm(pipeline: StableDiffusionXLPipeline,
+                         share_group_norm: bool = True,
+                         share_layer_norm: bool = True,
+                        ):
+    def register_norm_forward(norm_layer: nn.GroupNorm | nn.LayerNorm) -> nn.GroupNorm | nn.LayerNorm:
+        if not hasattr(norm_layer, 'orig_forward'):
+            setattr(norm_layer, 'orig_forward', norm_layer.forward) # noqa
+        orig_forward = norm_layer.orig_forward
+
+        def forward_(hidden_states: T) -> T:
+            n = hidden_states.shape[-2]
+            hidden_states = concat_first(hidden_states, dim=-2)
+            hidden_states = orig_forward(hidden_states)
+            return hidden_states[..., :n, :]
+
+        norm_layer.forward = forward_
+        return norm_layer
+
+    def get_norm_layers(pipeline_, norm_layers_: dict[str, list[nn.GroupNorm | nn.LayerNorm]]):
+        if isinstance(pipeline_, nn.LayerNorm) and share_layer_norm:
+            norm_layers_['layer'].append(pipeline_)
+        if isinstance(pipeline_, nn.GroupNorm) and share_group_norm:
+            norm_layers_['group'].append(pipeline_)
+        else:
+            for layer in pipeline_.children():
+                get_norm_layers(layer, norm_layers_)
+
+    norm_layers = {'group': [], 'layer': []}
+    get_norm_layers(pipeline.unet, norm_layers)
+    return [register_norm_forward(layer) for layer in norm_layers['group']] + [register_norm_forward(layer) for layer in
+                                                                               norm_layers['layer']]
+
+
+class Handler:
+
+    def register(self, style_aligned_args: StyleAlignedArgs):
+        self.norm_layers = register_shared_norm(self.pipeline, style_aligned_args.share_group_norm,
+                                                style_aligned_args.share_layer_norm)
+        init_attention_processors(self.pipeline, style_aligned_args)
+
+    def remove(self):
+        for layer in self.norm_layers:
+            layer.forward = layer.orig_forward
+        self.norm_layers = []
+        init_attention_processors(self.pipeline, None)
+
+    def __init__(self, pipeline: StableDiffusionXLPipeline):
+        self.pipeline = pipeline
+        self.norm_layers = []
diff --git a/scripts/style_aligned.py b/scripts/style_aligned.py
new file mode 100644
index 000000000..25feb49bc
--- /dev/null
+++ b/scripts/style_aligned.py
@@ -0,0 +1,117 @@
+import gradio as gr
+import torch
+import numpy as np
+import diffusers
+from modules import scripts, processing, shared, devices
+
+
+handler = None
+zts = None
+supported_model_list = ['sdxl']
+orig_prompt_attention = None
+
+
+class Script(scripts.Script):
+    def title(self):
+        return 'Style Aligned Image Generation'
+
+    def show(self, is_img2img):
+        return shared.native
+
+    def reset(self):
+        global handler, zts # pylint: disable=global-statement
+        handler = None
+        zts = None
+        shared.log.info('SA: image upload')
+
+    def preset(self, preset):
+        if preset == 'text':
+            return [['attention', 'adain_queries', 'adain_keys'], 1.0, 0, 0.0]
+        if preset == 'image':
+            return [['group_norm', 'layer_norm', 'attention', 'adain_queries', 'adain_keys'], 1.0, 2, 0.0]
+        if preset == 'all':
+            return [['group_norm', 'layer_norm', 'attention', 'adain_queries', 'adain_keys', 'adain_values', 'full_attention_share'], 1.0, 1, 0.5]
+
+    def ui(self, _is_img2img): # ui elements
+        with gr.Row():
+            gr.HTML('<a href="https://github.com/google/style-aligned">&nbsp Style Aligned Image Generation</a><br><br>')
+        with gr.Row():
+            preset = gr.Dropdown(label="Preset", choices=['text', 'image', 'all'], value='text')
+            scheduler = gr.Checkbox(label="Override scheduler", value=False)
+        with gr.Row():
+            shared_opts = gr.Dropdown(label="Shared options",
+                                      multiselect=True,
+                                      choices=['group_norm', 'layer_norm', 'attention', 'adain_queries', 'adain_keys', 'adain_values', 'full_attention_share'],
+                                      value=['attention', 'adain_queries', 'adain_keys'],
+                                    )
+        with gr.Row():
+            shared_score_scale = gr.Slider(label="Scale", minimum=0.0, maximum=2.0, step=0.01, value=1.0)
+            shared_score_shift = gr.Slider(label="Shift", minimum=0, maximum=10, step=1, value=0)
+            only_self_level = gr.Slider(label="Level", minimum=0.0, maximum=1.0, step=0.01, value=0.0)
+        with gr.Row():
+            prompt = gr.Textbox(lines=1, label='Optional image description', placeholder='use the style from the image')
+        with gr.Row():
+            image = gr.Image(label='Optional image', source='upload', type='pil')
+
+        image.change(self.reset)
+        preset.change(self.preset, inputs=[preset], outputs=[shared_opts, shared_score_scale, shared_score_shift, only_self_level])
+
+        return [image, prompt, scheduler, shared_opts, shared_score_scale, shared_score_shift, only_self_level]
+
+    def run(self, p: processing.StableDiffusionProcessing, image, prompt, scheduler, shared_opts, shared_score_scale, shared_score_shift, only_self_level): # pylint: disable=arguments-differ
+        global handler, zts, orig_prompt_attention # pylint: disable=global-statement
+        if shared.sd_model_type not in supported_model_list:
+            shared.log.warning(f'SA: class={shared.sd_model.__class__.__name__} model={shared.sd_model_type} required={supported_model_list}')
+            return None
+
+        from modules.style_aligned import sa_handler, inversion
+
+        handler = sa_handler.Handler(shared.sd_model)
+        sa_args = sa_handler.StyleAlignedArgs(
+            share_group_norm='group_norm' in shared_opts,
+            share_layer_norm='layer_norm' in shared_opts,
+            share_attention='attention' in shared_opts,
+            adain_queries='adain_queries' in shared_opts,
+            adain_keys='adain_keys' in shared_opts,
+            adain_values='adain_values' in shared_opts,
+            full_attention_share='full_attention_share' in shared_opts,
+            shared_score_scale=float(shared_score_scale),
+            shared_score_shift=np.log(shared_score_shift) if shared_score_shift > 0 else 0,
+            only_self_level=1 if only_self_level else 0,
+            )
+        handler.register(sa_args)
+
+        if scheduler:
+            shared.sd_model.scheduler = diffusers.DDIMScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", clip_sample=False, set_alpha_to_one=False)
+            p.sampler_name = 'None'
+
+        if image is not None and zts is None:
+            shared.log.info(f'SA: inversion image={image} prompt="{prompt}"')
+            image = image.resize((1024, 1024))
+            x0 = np.array(image).astype(np.float32) / 255.0
+            shared.sd_model.scheduler = diffusers.DDIMScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", clip_sample=False, set_alpha_to_one=False)
+            zts = inversion.ddim_inversion(shared.sd_model, x0, prompt, num_inference_steps=50, guidance_scale=2)
+
+        p.prompt = p.prompt.splitlines()
+        p.batch_size = len(p.prompt)
+        orig_prompt_attention = shared.opts.prompt_attention
+        shared.opts.data['prompt_attention'] = 'fixed' # otherwise need to deal with class_tokens_mask
+
+        if zts is not None:
+            processing.fix_seed(p)
+            zT, inversion_callback = inversion.make_inversion_callback(zts, offset=0)
+            generator = torch.Generator(device='cpu')
+            generator.manual_seed(p.seed)
+            latents = torch.randn(p.batch_size, 4, 128, 128, device='cpu', generator=generator, dtype=devices.dtype,).to(devices.device)
+            latents[0] = zT
+            p.task_args['latents'] = latents
+            p.task_args['callback_on_step_end'] = inversion_callback
+
+        shared.log.info(f'SA: batch={p.batch_size} type={"image" if zts is not None else "text"} config={sa_args.__dict__}')
+
+    def after(self, p: processing.StableDiffusionProcessing, *args): # pylint: disable=unused-argument
+        global handler # pylint: disable=global-statement
+        if handler is not None:
+            handler.remove()
+            handler = None
+            shared.opts.data['prompt_attention'] = orig_prompt_attention

From 00ebdf3476526b136594e98ed9540fb24a4f5fd9 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Wed, 27 Nov 2024 10:07:37 -0500
Subject: [PATCH 022/162] fix api scripts callbacks

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 .eslintrc.json          |  3 ++-
 CHANGELOG.md            |  5 +++--
 cli/api-pulid.js        | 24 +++++++++++++++++-------
 modules/api/control.py  |  2 ++
 modules/api/generate.py |  4 ++++
 modules/control/run.py  |  1 -
 modules/shared.py       |  4 ++--
 scripts/pulid_ext.py    |  6 ++++--
 8 files changed, 34 insertions(+), 15 deletions(-)

diff --git a/.eslintrc.json b/.eslintrc.json
index 2dddb41a1..c86dbb749 100644
--- a/.eslintrc.json
+++ b/.eslintrc.json
@@ -37,7 +37,8 @@
     "object-curly-newline":"off",
     "prefer-rest-params":"off",
     "prefer-destructuring":"off",
-    "radix":"off"
+    "radix":"off",
+    "node/shebang": "off"
   },
   "globals": {
     // asssets
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8183fbbc8..3ad139f0e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,6 @@
 # Change Log for SD.Next
 
-## Update for 2024-11-26
+## Update for 2024-11-27
 
 ### New models and integrations
 
@@ -49,7 +49,7 @@
 - **Sampler** improvements  
   - update DPM FlowMatch samplers  
 
-### Fixes:  
+### Fixes  
 
 - update `diffusers`  
 - fix README links  
@@ -60,6 +60,7 @@
 - avoid live-preview if vae-decode is running  
 - allow xyz-grid with multi-axis s&r  
 - fix xyz-grid with lora  
+- fix api script callbacks  
 
 ## Update for 2024-11-21
 
diff --git a/cli/api-pulid.js b/cli/api-pulid.js
index fde0ae43b..033824e9b 100755
--- a/cli/api-pulid.js
+++ b/cli/api-pulid.js
@@ -10,12 +10,13 @@ const argparse = require('argparse');
 const sd_url = process.env.SDAPI_URL || 'http://127.0.0.1:7860';
 const sd_username = process.env.SDAPI_USR;
 const sd_password = process.env.SDAPI_PWD;
+let args = {};
 
 function b64(file) {
   const data = fs.readFileSync(file);
-  const b64 = Buffer.from(data).toString('base64');
+  const b64str = Buffer.from(data).toString('base64');
   const ext = path.extname(file).replace('.', '');
-  str = `data:image/${ext};base64,${b64}`;
+  const str = `data:image/${ext};base64,${b64str}`;
   // console.log('b64:', ext, b64.length);
   return str;
 }
@@ -39,7 +40,16 @@ function options() {
   if (args.pulid) {
     const b64image = b64(args.pulid);
     opt.script_name = 'pulid';
-    opt.script_args = [b64image, 0.9];
+    opt.script_args = [
+      b64image, // b64 encoded image, required param
+      0.9, // strength, optional
+      20, // zero, optional
+      'dpmpp_sde', // sampler, optional
+      'v2', // ortho, optional
+      true, // restore (disable pulid after run), optional
+      true, // offload, optional
+      'v1.1', // version, optional
+    ];
   }
   // console.log('options:', opt);
   return opt;
@@ -53,8 +63,8 @@ function init() {
   parser.add_argument('--height', { type: 'int', help: 'height' });
   parser.add_argument('--pulid', { type: 'str', help: 'pulid init image' });
   parser.add_argument('--output', { type: 'str', help: 'output path' });
-  const args = parser.parse_args();
-  return args
+  const parsed = parser.parse_args();
+  return parsed;
 }
 
 async function main() {
@@ -73,12 +83,12 @@ async function main() {
     console.log('result:', json.info);
     for (const i in json.images) { // eslint-disable-line guard-for-in
       const file = args.output || `/tmp/test-${i}.jpg`;
-      const data = atob(json.images[i])
+      const data = atob(json.images[i]);
       fs.writeFileSync(file, data, 'binary');
       console.log('image saved:', file);
     }
   }
 }
 
-const args = init();
+args = init();
 main();
diff --git a/modules/api/control.py b/modules/api/control.py
index 29c5a77f1..345930341 100644
--- a/modules/api/control.py
+++ b/modules/api/control.py
@@ -159,6 +159,8 @@ def post_control(self, req: ReqControl):
             output_images = []
             output_processed = []
             output_info = ''
+            # TODO control script process
+            # init script args, call scripts.script_control.run, call scripts.script_control.after
             run.control_set({ 'do_not_save_grid': not req.save_images, 'do_not_save_samples': not req.save_images, **self.prepare_ip_adapter(req) })
             run.control_set(getattr(req, "extra", {}))
             res = run.control_run(**args)
diff --git a/modules/api/generate.py b/modules/api/generate.py
index b8ee645a4..9b409a14b 100644
--- a/modules/api/generate.py
+++ b/modules/api/generate.py
@@ -116,6 +116,8 @@ def post_text2img(self, txt2imgreq: models.ReqTxt2Img):
                 processed = scripts.scripts_txt2img.run(p, *script_args) # Need to pass args as list here
             else:
                 processed = process_images(p)
+            processed = scripts.scripts_txt2img.after(p, processed, *script_args)
+            p.close()
             shared.state.end(api=False)
         if processed is None or processed.images is None or len(processed.images) == 0:
             b64images = []
@@ -166,6 +168,8 @@ def post_img2img(self, img2imgreq: models.ReqImg2Img):
                 processed = scripts.scripts_img2img.run(p, *script_args) # Need to pass args as list here
             else:
                 processed = process_images(p)
+            processed = scripts.scripts_img2img.after(p, processed, *script_args)
+            p.close()
             shared.state.end(api=False)
         if processed is None or processed.images is None or len(processed.images) == 0:
             b64images = []
diff --git a/modules/control/run.py b/modules/control/run.py
index 88dddc213..2fe13dd73 100644
--- a/modules/control/run.py
+++ b/modules/control/run.py
@@ -87,7 +87,6 @@ def control_run(state: str = '',
             u.process.override = u.override
 
     global instance, pipe, original_pipeline # pylint: disable=global-statement
-    t_start = time.time()
     debug(f'Control: type={unit_type} input={inputs} init={inits} type={input_type}')
     if inputs is None or (type(inputs) is list and len(inputs) == 0):
         inputs = [None]
diff --git a/modules/shared.py b/modules/shared.py
index a89cbbc95..0c4d36746 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -559,8 +559,8 @@ def get_default_modes():
     "diffusers_extract_ema": OptionInfo(False, "Use model EMA weights when possible"),
     "diffusers_generator_device": OptionInfo("GPU", "Generator device", gr.Radio, {"choices": ["GPU", "CPU", "Unset"]}),
     "diffusers_offload_mode": OptionInfo(startup_offload_mode, "Model offload mode", gr.Radio, {"choices": ['none', 'balanced', 'model', 'sequential']}),
-    "diffusers_offload_max_gpu_memory": OptionInfo(round(gpu_memory * 0.75, 1), "Max GPU memory for balanced offload mode in GB", gr.Slider, {"minimum": 0, "maximum": gpu_memory, "step": 0.01,}),
-    "diffusers_offload_max_cpu_memory": OptionInfo(round(cpu_memory * 0.75, 1), "Max CPU memory for balanced offload mode in GB", gr.Slider, {"minimum": 0, "maximum": cpu_memory, "step": 0.01,}),
+    "diffusers_offload_max_gpu_memory": OptionInfo(round(gpu_memory * 0.75, 1), "Max GPU memory before balanced offload", gr.Slider, {"minimum": 0, "maximum": gpu_memory, "step": 0.01, "visible": True }),
+    "diffusers_offload_max_cpu_memory": OptionInfo(round(cpu_memory * 0.75, 1), "Max CPU memory before balanced offload", gr.Slider, {"minimum": 0, "maximum": cpu_memory, "step": 0.01, "visible": False }),
     "diffusers_vae_upcast": OptionInfo("default", "VAE upcasting", gr.Radio, {"choices": ['default', 'true', 'false']}),
     "diffusers_vae_slicing": OptionInfo(True, "VAE slicing"),
     "diffusers_vae_tiling": OptionInfo(cmd_opts.lowvram or cmd_opts.medvram, "VAE tiling"),
diff --git a/scripts/pulid_ext.py b/scripts/pulid_ext.py
index 676fa79f3..ee08e348b 100644
--- a/scripts/pulid_ext.py
+++ b/scripts/pulid_ext.py
@@ -164,11 +164,13 @@ def run(
             p.batch_size = 1
 
         sdp = shared.opts.cross_attention_optimization == "Scaled-Dot-Product"
+        sampler_fn = getattr(self.pulid.sampling, f'sample_{sampler}', None)
         strength = getattr(p, 'pulid_strength', strength)
         zero = getattr(p, 'pulid_zero', zero)
         ortho = getattr(p, 'pulid_ortho', ortho)
         sampler = getattr(p, 'pulid_sampler', sampler)
-        sampler_fn = getattr(self.pulid.sampling, f'sample_{sampler}', None)
+        restore = getattr(p, 'pulid_restore', restore)
+        p.pulid_restore = restore
         if sampler_fn is None:
             sampler_fn = self.pulid.sampling.sample_dpmpp_2m_sde
 
@@ -199,7 +201,7 @@ def run(
                 return None
 
         shared.sd_model.sampler = sampler_fn
-        shared.log.info(f'PuLID: class={shared.sd_model.__class__.__name__} version="{version}" sdp={sdp} strength={strength} zero={zero} ortho={ortho} sampler={sampler_fn} images={[i.shape for i in images]} offload={offload}')
+        shared.log.info(f'PuLID: class={shared.sd_model.__class__.__name__} version="{version}" sdp={sdp} strength={strength} zero={zero} ortho={ortho} sampler={sampler_fn} images={[i.shape for i in images]} offload={offload} restore={restore}')
         self.pulid.attention.NUM_ZERO = zero
         self.pulid.attention.ORTHO = ortho == 'v1'
         self.pulid.attention.ORTHO_v2 = ortho == 'v2'

From a2dc3670a1a2bc27cce9fdfd375767c6ec9ace88 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Wed, 27 Nov 2024 15:36:22 -0500
Subject: [PATCH 023/162] model-loader allow absolsute path

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md             |  1 +
 cli/api-model.js         | 30 ++++++++++++++++++++++++++++++
 cli/full-test.sh         |  3 +++
 launch.py                |  8 +++++---
 modules/sd_checkpoint.py |  5 +++++
 wiki                     |  2 +-
 6 files changed, 45 insertions(+), 4 deletions(-)
 create mode 100755 cli/api-model.js

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3ad139f0e..0167d5509 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -32,6 +32,7 @@
 
 - **Model loader** improvements:  
   - detect model components on model load fail  
+  - allow passing absolute path to model loader  
   - Flux, SD35: force unload model  
   - Flux: apply `bnb` quant when loading *unet/transformer*  
   - Flux: all-in-one safetensors  
diff --git a/cli/api-model.js b/cli/api-model.js
new file mode 100755
index 000000000..e2ce5344a
--- /dev/null
+++ b/cli/api-model.js
@@ -0,0 +1,30 @@
+#!/usr/bin/env node
+
+const sd_url = process.env.SDAPI_URL || 'http://127.0.0.1:7860';
+const sd_username = process.env.SDAPI_USR;
+const sd_password = process.env.SDAPI_PWD;
+const models = [
+  '/mnt/models/stable-diffusion/sd15/lyriel_v16.safetensors',
+  '/mnt/models/stable-diffusion/flux/flux-finesse_v2-f1h-fp8.safetensors',
+  '/mnt/models/stable-diffusion/sdxl/TempestV0.1-Artistic.safetensors',
+];
+
+async function options(data) {
+  const method = 'POST';
+  const headers = new Headers();
+  const body = JSON.stringify(data);
+  headers.set('Content-Type', 'application/json');
+  if (sd_username && sd_password) headers.set({ Authorization: `Basic ${btoa('sd_username:sd_password')}` });
+  const res = await fetch(`${sd_url}/sdapi/v1/options`, { method, headers, body });
+  return res;
+}
+
+async function main() {
+  for (const model of models) {
+    console.log('model:', model);
+    const res = await options({ sd_model_checkpoint: model });
+    console.log('result:', res);
+  }
+}
+
+main();
diff --git a/cli/full-test.sh b/cli/full-test.sh
index e410528ad..912dc3a5b 100755
--- a/cli/full-test.sh
+++ b/cli/full-test.sh
@@ -1,5 +1,8 @@
 #!/usr/bin/env bash
 
+node cli/api-txt2img.js
+node cli/api-pulid.js
+
 source venv/bin/activate
 echo image-exif
 python cli/api-info.py --input html/logo-bg-0.jpg
diff --git a/launch.py b/launch.py
index f944a7e54..e00da58c7 100755
--- a/launch.py
+++ b/launch.py
@@ -55,9 +55,11 @@ def get_custom_args():
         if 'PS1' in env:
             del env['PS1']
         installer.log.trace(f'Environment: {installer.print_dict(env)}')
-    else:
-        env = [f'{k}={v}' for k, v in os.environ.items() if k.startswith('SD_')]
-        installer.log.debug(f'Env flags: {env}')
+    env = [f'{k}={v}' for k, v in os.environ.items() if k.startswith('SD_')]
+    installer.log.debug(f'Env flags: {env}')
+    ldd = os.environ.get('LD_PRELOAD', None)
+    if ldd is not None:
+        installer.log.debug(f'Linker flags: "{ldd}"')
 
 
 @lru_cache()
diff --git a/modules/sd_checkpoint.py b/modules/sd_checkpoint.py
index 20654e28b..e035fc3db 100644
--- a/modules/sd_checkpoint.py
+++ b/modules/sd_checkpoint.py
@@ -188,6 +188,11 @@ def get_closet_checkpoint_match(s: str):
     if found and len(found) == 1:
         return found[0]
 
+    # absolute path
+    if s.endswith('.safetensors') and os.path.isfile(s):
+        checkpoint_info = CheckpointInfo(s)
+        return checkpoint_info
+
     # reference search
     """
     found = sorted([info for info in shared.reference_models.values() if os.path.basename(info['path']).lower().startswith(s.lower())], key=lambda x: len(x['path']))
diff --git a/wiki b/wiki
index ba7d78b55..441d2c4e1 160000
--- a/wiki
+++ b/wiki
@@ -1 +1 @@
-Subproject commit ba7d78b55eb95afe8509bd0069b8ec345b259f21
+Subproject commit 441d2c4e19349f0b219948837922e6373347076e

From a2f5ef4ae769e0a7c59f5c7d94945a2e9b160eaf Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Wed, 27 Nov 2024 15:42:37 -0500
Subject: [PATCH 024/162] update wiki

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 wiki | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wiki b/wiki
index 441d2c4e1..f57cdb49d 160000
--- a/wiki
+++ b/wiki
@@ -1 +1 @@
-Subproject commit 441d2c4e19349f0b219948837922e6373347076e
+Subproject commit f57cdb49d8ca928024b43525897d1c1379eab4c4

From 164ce252dc873ca32d01222714f019c8f71c2e8d Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Thu, 28 Nov 2024 08:46:10 -0500
Subject: [PATCH 025/162] add sd35 controlnets

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md                        |  4 ++-
 installer.py                        |  2 +-
 modules/control/units/controlnet.py |  3 +++
 modules/model_quant.py              | 41 +++++++++++++++++++++++++++++
 modules/model_sd3.py                |  1 +
 modules/sd_models.py                |  2 +-
 modules/sd_models_compile.py        |  1 -
 modules/shared.py                   |  3 ++-
 8 files changed, 52 insertions(+), 5 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0167d5509..b3f2282c0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,6 @@
 # Change Log for SD.Next
 
-## Update for 2024-11-27
+## Update for 2024-11-28
 
 ### New models and integrations
 
@@ -21,6 +21,8 @@
     *recommended*: guidance scale 30  
   - [Depth](https://huggingface.co/black-forest-labs/FLUX.1-Depth-dev): ~23.8GB, replaces currently loaded model  
     *recommended*: guidance scale 10  
+- [StabilityAI SD35 ControlNets]([sd3_medium](https://huggingface.co/stabilityai/stable-diffusion-3.5-controlnets))
+  - In addition to previously released `InstantX` and `Alimama`, we now have *official* ones from StabilityAI  
 - [Style Aligned Image Generation](https://style-aligned-gen.github.io/)  
   enable in scripts, compatible with sd-xl  
   enter multiple prompts in prompt field separated by new line  
diff --git a/installer.py b/installer.py
index 396b53fab..37202552d 100644
--- a/installer.py
+++ b/installer.py
@@ -459,7 +459,7 @@ def check_python(supported_minors=[9, 10, 11, 12], reason=None):
 def check_diffusers():
     if args.skip_all or args.skip_requirements:
         return
-    sha = '7ac6e286ee994270e737b70c904ea50049d53567'
+    sha = '069186fac510d6f6f88a5e435523b235c823a8a0'
     pkg = pkg_resources.working_set.by_key.get('diffusers', None)
     minor = int(pkg.version.split('.')[1] if pkg is not None else 0)
     cur = opts.get('diffusers_version', '') if minor > 0 else ''
diff --git a/modules/control/units/controlnet.py b/modules/control/units/controlnet.py
index 20b99412a..3f68a4896 100644
--- a/modules/control/units/controlnet.py
+++ b/modules/control/units/controlnet.py
@@ -85,6 +85,9 @@
     "XLabs-AI HED": 'XLabs-AI/flux-controlnet-hed-diffusers'
 }
 predefined_sd3 = {
+    "StabilityAI Canny": 'diffusers-internal-dev/sd35-controlnet-canny-8b',
+    "StabilityAI Depth": 'diffusers-internal-dev/sd35-controlnet-depth-8b',
+    "StabilityAI Blur": 'diffusers-internal-dev/sd35-controlnet-blur-8b',
     "InstantX Canny": 'InstantX/SD3-Controlnet-Canny',
     "InstantX Pose": 'InstantX/SD3-Controlnet-Pose',
     "InstantX Depth": 'InstantX/SD3-Controlnet-Depth',
diff --git a/modules/model_quant.py b/modules/model_quant.py
index 0e7bdd4b3..9482fe898 100644
--- a/modules/model_quant.py
+++ b/modules/model_quant.py
@@ -5,6 +5,7 @@
 
 bnb = None
 quanto = None
+ao = None
 
 
 def create_bnb_config(kwargs = None, allow_bnb: bool = True):
@@ -12,6 +13,8 @@ def create_bnb_config(kwargs = None, allow_bnb: bool = True):
     if len(shared.opts.bnb_quantization) > 0 and allow_bnb:
         if 'Model' in shared.opts.bnb_quantization:
             load_bnb()
+            if bnb is None:
+                return kwargs
             bnb_config = diffusers.BitsAndBytesConfig(
                 load_in_8bit=shared.opts.bnb_quantization_type in ['fp8'],
                 load_in_4bit=shared.opts.bnb_quantization_type in ['nf4', 'fp4'],
@@ -28,6 +31,44 @@ def create_bnb_config(kwargs = None, allow_bnb: bool = True):
     return kwargs
 
 
+def create_ao_config(kwargs = None, allow_ao: bool = True):
+    from modules import shared
+    if len(shared.opts.torchao_quantization) > 0 and shared.opts.torchao_quantization_mode == 'pre' and allow_ao:
+        if 'Model' in shared.opts.torchao_quantization:
+            load_torchao()
+            if ao is None:
+                return kwargs
+            ao_config = {}
+            # ao_config = diffusers.TorchAoConfig("int8wo") # TODO torchao
+            shared.log.debug(f'Quantization: module=all type=bnb dtype={shared.opts.torchao_quantization_type}')
+            if kwargs is None:
+                return ao_config
+            else:
+                kwargs['quantization_config'] = ao_config
+                return kwargs
+    return kwargs
+
+
+def load_torchao(msg='', silent=False):
+    global ao # pylint: disable=global-statement
+    if ao is not None:
+        return ao
+    install('torchao', quiet=True)
+    try:
+        import torchao
+        ao = torchao
+        fn = f'{sys._getframe(2).f_code.co_name}:{sys._getframe(1).f_code.co_name}' # pylint: disable=protected-access
+        log.debug(f'Quantization: type=quanto version={ao.__version__} fn={fn}') # pylint: disable=protected-access
+        return ao
+    except Exception as e:
+        if len(msg) > 0:
+            log.error(f"{msg} failed to import optimum.quanto: {e}")
+        ao = None
+        if not silent:
+            raise
+    return None
+
+
 def load_bnb(msg='', silent=False):
     global bnb # pylint: disable=global-statement
     if bnb is not None:
diff --git a/modules/model_sd3.py b/modules/model_sd3.py
index b9d579085..ba036760a 100644
--- a/modules/model_sd3.py
+++ b/modules/model_sd3.py
@@ -150,6 +150,7 @@ def load_sd3(checkpoint_info, cache_dir=None, config=None):
     shared.log.debug(f'Load model: type=SD3 kwargs={list(kwargs)} repo="{repo_id}"')
 
     kwargs = model_quant.create_bnb_config(kwargs)
+    kwargs = model_quant.create_ao_config(kwargs)
     pipe = loader(
         repo_id,
         torch_dtype=devices.dtype,
diff --git a/modules/sd_models.py b/modules/sd_models.py
index aab35af18..68446bdd3 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -279,7 +279,7 @@ def eval_model(model, op=None, sd_model=None): # pylint: disable=unused-argument
                 model.eval()
             return model
         sd_model = sd_models_compile.apply_compile_to_model(sd_model, eval_model, ["Model", "VAE", "Text Encoder"], op="eval")
-    if len(shared.opts.torchao_quantization) > 0:
+    if len(shared.opts.torchao_quantization) > 0 and shared.opts.torchao_quantization_mode != 'post':
         sd_model = sd_models_compile.torchao_quantization(sd_model)
 
     if shared.opts.opt_channelslast and hasattr(sd_model, 'unet'):
diff --git a/modules/sd_models_compile.py b/modules/sd_models_compile.py
index 91ed84ded..38d3ef57f 100644
--- a/modules/sd_models_compile.py
+++ b/modules/sd_models_compile.py
@@ -535,7 +535,6 @@ def torchao_quantization(sd_model):
         if hasattr(sd_model, 'transformer') and 'Model' in shared.opts.torchao_quantization:
             modules.append('transformer')
             q.quantize_(sd_model.transformer, fn(), device=devices.device)
-            # sd_model.transformer = q.autoquant(sd_model.transformer, error_on_unseen=False)
         if hasattr(sd_model, 'vae') and 'VAE' in shared.opts.torchao_quantization:
             modules.append('vae')
             q.quantize_(sd_model.vae, fn(), device=devices.device)
diff --git a/modules/shared.py b/modules/shared.py
index 0c4d36746..5b54a0de2 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -477,7 +477,7 @@ def get_default_modes():
     "sd_checkpoint_autoload": OptionInfo(True, "Model autoload on start"),
     "sd_checkpoint_autodownload": OptionInfo(True, "Model auto-download on demand"),
     "sd_textencoder_cache": OptionInfo(True, "Cache text encoder results", gr.Checkbox, {"visible": False}),
-    "sd_textencoder_cache_size": OptionInfo(4, "Text encoder results LRU cache size", gr.Slider, {"minimum": 0, "maximum": 10, "step": 1}),
+    "sd_textencoder_cache_size": OptionInfo(4, "Text encoder cache size", gr.Slider, {"minimum": 0, "maximum": 16, "step": 1}),
     "stream_load": OptionInfo(False, "Load models using stream loading method", gr.Checkbox, {"visible": not native }),
     "prompt_mean_norm": OptionInfo(False, "Prompt attention normalization", gr.Checkbox),
     "comma_padding_backtrack": OptionInfo(20, "Prompt padding", gr.Slider, {"minimum": 0, "maximum": 74, "step": 1, "visible": not native }),
@@ -590,6 +590,7 @@ def get_default_modes():
     "optimum_quanto_weights_type": OptionInfo("qint8", "Optimum.quanto quantization type", gr.Radio, {"choices": ['qint8', 'qfloat8_e4m3fn', 'qfloat8_e5m2', 'qint4', 'qint2'], "visible": native}),
     "optimum_quanto_activations_type": OptionInfo("none", "Optimum.quanto quantization activations ", gr.Radio, {"choices": ['none', 'qint8', 'qfloat8_e4m3fn', 'qfloat8_e5m2'], "visible": native}),
     "torchao_quantization": OptionInfo([], "TorchAO quantization enabled", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder"], "visible": native}),
+    "torchao_quantization_mode": OptionInfo("pre", "TorchAO quantization mode", gr.Radio, {"choices": ['pre', 'post'], "visible": native}),
     "torchao_quantization_type": OptionInfo("int8", "TorchAO quantization type", gr.Radio, {"choices": ["int8+act", "int8", "int4", "fp8+act", "fp8", "fpx"], "visible": native}),
     "nncf_compress_weights": OptionInfo([], "NNCF compression enabled", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder", "ControlNet"], "visible": native}),
     "nncf_compress_weights_mode": OptionInfo("INT8", "NNCF compress mode", gr.Radio, {"choices": ['INT8', 'INT8_SYM', 'INT4_ASYM', 'INT4_SYM', 'NF4'] if cmd_opts.use_openvino else ['INT8']}),

From 2c417b3280bea67f45182d728e295eb7e9920ba5 Mon Sep 17 00:00:00 2001
From: P-Hellmann <phtijger@gmail.com>
Date: Thu, 28 Nov 2024 15:14:24 +0100
Subject: [PATCH 026/162] Create black-teal-reimagined.css

Modern look of black-teal theme in SD.NEXT
---
 javascript/black-teal-reimagined.css | 1105 ++++++++++++++++++++++++++
 1 file changed, 1105 insertions(+)
 create mode 100644 javascript/black-teal-reimagined.css

diff --git a/javascript/black-teal-reimagined.css b/javascript/black-teal-reimagined.css
new file mode 100644
index 000000000..e5618c02c
--- /dev/null
+++ b/javascript/black-teal-reimagined.css
@@ -0,0 +1,1105 @@
+/* Generic HTML Tags */
+@font-face {
+  font-family: 'NotoSans';
+  font-display: swap;
+  font-style: normal;
+  font-weight: 100;
+  src: local('NotoSans'), url('notosans-nerdfont-regular.ttf');
+}
+
+html {
+  scroll-behavior: smooth;
+}
+
+:root,
+.light,
+.dark {
+  --font: 'NotoSans';
+  --font-mono: 'ui-monospace', 'Consolas', monospace;
+  --font-size: 16px;
+
+  /* Primary Colors */
+  --primary-50: #7dffff;
+  --primary-100: #72e8e8;
+  --primary-200: #67d2d2;
+  --primary-300: #5dbcbc;
+  --primary-400: #52a7a7;
+  --primary-500: #489292;
+  --primary-600: #3e7d7d;
+  --primary-700: #356969;
+  --primary-800: #2b5656;
+  --primary-900: #224444;
+  --primary-950: #193232;
+
+  /* Neutral Colors */
+  --neutral-50: #f0f0f0;
+  --neutral-100: #e0e0e0;
+  --neutral-200: #d0d0d0;
+  --neutral-300: #b0b0b0;
+  --neutral-400: #909090;
+  --neutral-500: #707070;
+  --neutral-600: #606060;
+  --neutral-700: #404040;
+  --neutral-800: #303030;
+  --neutral-900: #202020;
+  --neutral-950: #101010;
+
+  /* Highlight and Inactive Colors */
+  --highlight-color: var(--primary-200);
+  --inactive-color: var(--primary-800);
+
+  /* Text Colors */
+  --body-text-color: var(--neutral-100);
+  --body-text-color-subdued: var(--neutral-300);
+
+  /* Background Colors */
+  --background-color: var(--neutral-950);
+  --background-fill-primary: var(--neutral-700);
+  --input-background-fill: var(--neutral-800);
+
+  /* Padding and Borders */
+  --input-padding: 4px;
+  --input-shadow: none;
+  --button-primary-text-color: var(--neutral-100);
+  --button-primary-background-fill: var(--primary-600);
+  --button-primary-background-fill-hover: var(--primary-800);
+  --button-secondary-text-color: var(--neutral-100);
+  --button-secondary-background-fill: var(--neutral-900);
+  --button-secondary-background-fill-hover: var(--neutral-600);
+
+  /* Border Radius */
+  --radius-xs: 2px;
+  --radius-sm: 4px;
+  --radius-md: 6px;
+  --radius-lg: 8px;
+  --radius-xl: 10px;
+  --radius-xxl: 15px;
+  --radius-xxxl: 20px;
+
+  /* Shadows */
+  --shadow-sm: 0 1px 2px rgba(0, 0, 0, 0.1);
+  --shadow-md: 0 2px 4px rgba(0, 0, 0, 0.1);
+  --shadow-lg: 0 4px 8px rgba(0, 0, 0, 0.1);
+  --shadow-xl: 0 8px 16px rgba(0, 0, 0, 0.1);
+
+  /* Animation */
+  --transition: all 0.3s ease;
+
+  /* Scrollbar */
+  --scrollbar-bg: var(--neutral-800);
+  --scrollbar-thumb: var(--highlight-color);
+}
+
+html {
+  font-size: var(--font-size);
+  font-family: var(--font);
+}
+
+body,
+button,
+input,
+select,
+textarea {
+  font-family: var(--font);
+  color: var(--body-text-color);
+  transition: var(--transition);
+}
+
+button {
+  max-width: 400px;
+  white-space: nowrap;
+  padding: 8px 12px;
+  border: none;
+  border-radius: var(--radius-md);
+  background-color: var(--button-primary-background-fill);
+  color: var(--button-primary-text-color);
+  cursor: pointer;
+  box-shadow: var(--shadow-sm);
+  transition: transform 0.2s ease, background-color 0.3s ease;
+}
+
+button:hover {
+  background-color: var(--button-primary-background-fill-hover);
+  transform: scale(1.05);
+}
+
+/* Range Input Styles */
+.slider-container {
+  width: 100%;
+  /* Ensures the container takes full width */
+  max-width: 100%;
+  /* Prevents overflow */
+  padding: 0 10px;
+  /* Adds padding for aesthetic spacing */
+  box-sizing: border-box;
+  /* Ensures padding doesn't affect width */
+}
+
+input[type='range'] {
+  display: block;
+  margin: 0;
+  padding: 0;
+  height: 1em;
+  background-color: transparent;
+  overflow: hidden;
+  cursor: pointer;
+  box-shadow: none;
+  -webkit-appearance: none;
+  opacity: 0.7;
+  appearance: none;
+  width: 100%;
+  /* Makes the slider responsive */
+}
+
+input[type='range'] {
+  opacity: 1;
+}
+
+input[type='range']::-webkit-slider-thumb {
+  -webkit-appearance: none;
+  height: 1em;
+  width: 1em;
+  background-color: var(--highlight-color);
+  border-radius: var(--radius-xs);
+  box-shadow: var(--shadow-md);
+  cursor: pointer;
+  /* Ensures the thumb is clickable */
+}
+
+input[type='range']::-webkit-slider-runnable-track {
+  -webkit-appearance: none;
+  height: 6px;
+  background: var(--input-background-fill);
+  border-radius: var(--radius-md);
+}
+
+input[type='range']::-moz-range-thumb {
+  height: 1em;
+  width: 1em;
+  background-color: var(--highlight-color);
+  border-radius: var(--radius-xs);
+  box-shadow: var(--shadow-md);
+  cursor: pointer;
+  /* Ensures the thumb is clickable */
+}
+
+input[type='range']::-moz-range-track {
+  height: 6px;
+  background: var(--input-background-fill);
+  border-radius: var(--radius-md);
+}
+
+@media (max-width: 768px) {
+  .slider-container {
+    width: 100%;
+    /* Adjust width for smaller screens */
+  }
+
+  .networks-menu,
+  .styles-menu {
+    width: 100%;
+    /* Ensure menus are full width */
+    margin: 0;
+    /* Reset margins for smaller screens */
+  }
+}
+
+/* Scrollbar Styles */
+:root {
+  scrollbar-color: var(--scrollbar-thumb) var(--scrollbar-bg);
+}
+
+::-webkit-scrollbar {
+  width: 12px;
+  height: 12px;
+}
+
+::-webkit-scrollbar-track {
+  background: var(--scrollbar-bg);
+}
+
+::-webkit-scrollbar-thumb {
+  background-color: var(--scrollbar-thumb);
+  border-radius: var(--radius-lg);
+  box-shadow: var(--shadow-sm);
+}
+
+/* Tab Navigation Styles */
+.tab-nav {
+  display: flex;
+  /* Use flexbox for layout */
+  justify-content: space-around;
+  /* Space out the tabs evenly */
+  align-items: center;
+  /* Center items vertically */
+  background: var(--background-color);
+  /* Background color */
+  border-bottom: 1px solid var(--highlight-color) !important;
+  /* Bottom border for separation */
+  box-shadow: var(--shadow-md);
+  /* Shadow for depth */
+}
+
+/* Individual Tab Styles */
+.tab-nav>button {
+  background: var(--neutral-900);
+  /* No background for default state */
+  color: var(--text-color);
+  /* Text color */
+  border: none;
+  /* No border */
+  border-radius: var(--radius-xxxl);
+  /* Rounded corners */
+  cursor: pointer;
+  /* Pointer cursor */
+  transition: background 0.3s ease, color 0.3s ease;
+  /* Smooth transition */
+}
+
+/* Active Tab Style */
+.tab-nav>button.active {
+  background: var(--highlight-color);
+  /* Highlight active tab */
+  color: var(--background-color);
+  /* Change text color for active tab */
+}
+
+/* Hover State for Tabs */
+.tab-nav>button:hover {
+  background: var(--highlight-color);
+  /* Background on hover */
+  color: var(--background-color);
+  /* Change text color on hover */
+}
+
+/* Responsive Styles */
+@media (max-width: 768px) {
+  .tab-nav {
+    flex-direction: column;
+    /* Stack tabs vertically on smaller screens */
+    align-items: stretch;
+    /* Stretch tabs to full width */
+  }
+
+  .tab-nav>button {
+    width: 100%;
+    /* Full width for buttons */
+    text-align: left;
+    /* Align text to the left */
+  }
+}
+
+/* Quick Settings Panel Styles */
+#quicksettings {
+  background: var(--background-color);
+  /* Background color */
+  box-shadow: var(--shadow-lg);
+  /* Shadow for depth */
+  border-radius: var(--radius-lg);
+  /* Rounded corners */
+  padding: 1em;
+  /* Padding for spacing */
+  z-index: 200;
+  /* Ensure it stays on top */
+}
+
+/* Quick Settings Header */
+#quicksettings .header {
+  font-size: var(--text-lg);
+  /* Font size for header */
+  font-weight: bold;
+  /* Bold text */
+  margin-bottom: 0.5em;
+  /* Space below header */
+}
+
+/* Quick Settings Options */
+#quicksettings .option {
+  display: flex;
+  /* Flexbox for layout */
+  justify-content: space-between;
+  /* Space between label and toggle */
+  align-items: center;
+  /* Center items vertically */
+  padding: 0.5em 0;
+  /* Padding for each option */
+  border-bottom: 1px solid var(--neutral-600);
+  /* Separator line */
+}
+
+/* Option Label Styles */
+#quicksettings .option label {
+  color: var(--text-color);
+  /* Text color */
+}
+
+/* Toggle Switch Styles */
+#quicksettings .option input[type="checkbox"] {
+  cursor: pointer;
+  /* Pointer cursor */
+}
+
+/* Quick Settings Footer */
+#quicksettings .footer {
+  margin-top: 1em;
+  /* Space above footer */
+  text-align: right;
+  /* Align text to the right */
+}
+
+/* Close Button Styles */
+#quicksettings .footer button {
+  background: var(--button-primary-background-fill);
+  /* Button background */
+  color: var(--button-primary-text-color);
+  /* Button text color */
+  border: none;
+  /* No border */
+  border-radius: var(--radius-md);
+  /* Rounded corners */
+  padding: 0.5em 1em;
+  /* Padding for button */
+  cursor: pointer;
+  /* Pointer cursor */
+  transition: 0.3s ease;
+  /* Smooth transition */
+}
+
+/* Close Button Hover State */
+#quicksettings .footer button:hover {
+  background: var(--highlight-color);
+  /* Change background on hover */
+}
+
+/* Responsive Styles */
+@media (max-width: 768px) {
+  #quicksettings {
+    right: 10px;
+    /* Adjust position for smaller screens */
+    width: 90%;
+    /* Full width on smaller screens */
+  }
+}
+
+/* Form Styles */
+div.form {
+  border-width: 0;
+  box-shadow: var(--shadow-md);
+  background: var(--background-fill-primary);
+  padding: 16px;
+  border-radius: var(--radius-md);
+}
+
+/* Gradio Style Classes */
+fieldset .gr-block.gr-box,
+label.block span {
+  padding: 0;
+  margin-top: -4px;
+}
+
+.border-2 {
+  border-width: 0;
+}
+
+.border-b-2 {
+  border-bottom-width: 2px;
+  border-color: var(--highlight-color) !important;
+  padding-bottom: 2px;
+  margin-bottom: 8px;
+}
+
+.bg-white {
+  color: lightyellow;
+  background-color: var(--inactive-color);
+}
+
+.gr-box {
+  border-radius: var(--radius-sm) !important;
+  background-color: var(--neutral-950) !important;
+  box-shadow: var(--shadow-md);
+  border-width: 0;
+  padding: 4px;
+  margin: 12px 0;
+}
+
+.gr-button {
+  font-weight: normal;
+  box-shadow: var(--shadow-sm);
+  font-size: 0.8rem;
+  min-width: 32px;
+  min-height: 32px;
+  padding: 3px;
+  margin: 3px;
+  transition: var(--transition);
+}
+
+.gr-button:hover {
+  background-color: var(--highlight-color);
+}
+
+.gr-check-radio {
+  background-color: var(--inactive-color);
+  border-width: 0;
+  border-radius: var(--radius-lg);
+  box-shadow: var(--shadow-sm);
+}
+
+.gr-check-radio:checked {
+  background-color: var(--highlight-color);
+}
+
+.gr-compact {
+  background-color: var(--background-color);
+}
+
+.gr-form {
+  border-width: 0;
+}
+
+.gr-input {
+  background-color: var(--neutral-800) !important;
+  padding: 4px;
+  margin: 4px;
+  border-radius: var(--radius-md);
+  transition: var(--transition);
+}
+
+.gr-input:hover {
+  background-color: var(--neutral-700);
+}
+
+.gr-input-label {
+  color: lightyellow;
+  border-width: 0;
+  background: transparent;
+  padding: 2px !important;
+}
+
+.gr-panel {
+  background-color: var(--background-color);
+  border-radius: var(--radius-md);
+  box-shadow: var(--shadow-md);
+}
+
+.eta-bar {
+  display: none !important;
+}
+
+.gradio-slider {
+  max-width: 200px;
+}
+
+.gradio-slider input[type="number"] {
+  background: var(--neutral-950);
+  margin-top: 2px;
+}
+
+.gradio-image {
+  height: unset !important;
+}
+
+svg.feather.feather-image,
+.feather .feather-image {
+  display: none;
+}
+
+.gap-2 {
+  padding-top: 8px;
+}
+
+.gr-box>div>div>input.gr-text-input {
+  right: 0;
+  width: 4em;
+  padding: 0;
+  top: -12px;
+  border: none;
+  max-height: 20px;
+}
+
+.output-html {
+  line-height: 1.2 rem;
+  overflow-x: hidden;
+}
+
+.output-html>div {
+  margin-bottom: 8px;
+}
+
+.overflow-hidden .flex .flex-col .relative col .gap-4 {
+  min-width: var(--left-column);
+  max-width: var(--left-column);
+}
+
+.p-2 {
+  padding: 0;
+}
+
+.px-4 {
+  padding-left: 1rem;
+  padding-right: 1rem;
+}
+
+.py-6 {
+  padding-bottom: 0;
+}
+
+.tabs {
+  background-color: var(--background-color);
+}
+
+.block.token-counter span {
+  background-color: var(--input-background-fill) !important;
+  box-shadow: 2px 2px 2px #111;
+  border: none !important;
+  font-size: 0.7rem;
+}
+
+.label-wrap {
+  margin: 8px 0px 4px 0px;
+}
+
+.gradio-button.tool {
+  border: none;
+  background: none;
+  box-shadow: none;
+  filter: hue-rotate(340deg) saturate(0.5);
+}
+
+#tab_extensions table td,
+#tab_extensions table th,
+#tab_config table td,
+#tab_config table th {
+  border: none;
+}
+
+#tab_extensions table tr:hover,
+#tab_config table tr:hover {
+  background-color: var(--neutral-500) !important;
+}
+
+#tab_extensions table,
+#tab_config table {
+  width: 96vw;
+}
+
+#tab_extensions table thead,
+#tab_config table thead {
+  background-color: var(--neutral-700);
+}
+
+#tab_extensions table,
+#tab_config table {
+  background-color: var(--neutral-900);
+}
+
+/* Automatic Style Classes */
+.progressDiv {
+  border-radius: var(--radius-sm) !important;
+  position: fixed;
+  top: 44px;
+  right: 26px;
+  max-width: 262px;
+  height: 48px;
+  z-index: 99;
+  box-shadow: var(--button-shadow);
+}
+
+.progressDiv .progress {
+  border-radius: var(--radius-lg) !important;
+  background: var(--highlight-color);
+  line-height: 3rem;
+  height: 48px;
+}
+
+.gallery-item {
+  box-shadow: none !important;
+}
+
+.performance {
+  color: #888;
+}
+
+.extra-networks {
+  border-left: 2px solid var(--highlight-color) !important;
+  padding-left: 4px;
+}
+
+.image-buttons {
+  justify-content: center;
+  gap: 0 !important;
+}
+
+.image-buttons>button {
+  max-width: 160px;
+}
+
+.tooltip {
+  background: var(--primary-300);
+  color: black;
+  border: none;
+  border-radius: var(--radius-lg);
+}
+
+#system_row>button,
+#settings_row>button,
+#config_row>button {
+  max-width: 10em;
+}
+
+/* Gradio Elements Overrides */
+#div.gradio-container {
+  overflow-x: hidden;
+}
+
+#img2img_label_copy_to_img2img {
+  font-weight: normal;
+}
+
+#txt2img_prompt,
+#txt2img_neg_prompt,
+#img2img_prompt,
+#img2img _neg_prompt,
+#control_prompt,
+#control_neg_prompt {
+  background-color: var(--background-color);
+  box-shadow: none !important;
+}
+
+#txt2img_prompt>label>textarea,
+#txt2img_neg_prompt>label>textarea,
+#img2img_prompt>label>textarea,
+#img2img_neg_prompt>label>textarea,
+#control_prompt>label>textarea,
+#control_neg_prompt>label>textarea {
+  font-size: 1.0em;
+  line-height: 1.4em;
+  border-radius: var(--radius-md);
+}
+
+#txt2img_styles,
+#img2img_styles,
+#control_styles {
+  padding: 0;
+  margin-top: 2px;
+}
+
+#txt2img_styles_refresh,
+#img2img_styles_refresh,
+#control_styles_refresh {
+  padding: 0;
+  margin-top: 1em;
+}
+
+#img2img_settings {
+  min-width: calc(2 * var(--left-column));
+  max-width: calc(2 * var(--left-column));
+  background-color: var(--neutral-950);
+  padding-top: 16px;
+}
+
+#interrogate,
+#deepbooru {
+  margin: 0 0px 10px 0px;
+  max-width: 80px;
+  max-height: 80px;
+  font-weight: normal;
+  font-size: 0.95em;
+}
+
+#quicksettings .gr-button-tool {
+  font-size: 1.6rem;
+  box-shadow: none;
+  margin-left: -20px;
+  margin-top: -2px;
+  height: 2.4em;
+}
+
+#footer,
+#style_pos_col,
+#style_neg_col,
+#roll_col,
+#extras_upscaler_2,
+#extras_upscaler_2_visibility,
+#txt2img_seed_resize_from_w,
+#txt2img_seed_resize_from_h {
+  display: none;
+}
+
+#save-animation {
+  border-radius: var(--radius-sm) !important;
+  margin-bottom: 16px;
+  background-color: var(--neutral-950);
+}
+
+#script_list {
+  padding: 4px;
+  margin-top: 16px;
+  margin-bottom: 8px;
+}
+
+#settings>div.flex-wrap {
+  width: 15em;
+}
+
+#settings_search {
+  margin-top: 1em;
+  margin-left: 1em;
+}
+
+#settings_search textarea {
+  padding: 0.5em;
+  height: 2.2em !important;
+}
+
+#txt2img_cfg_scale {
+  min-width: 200px;
+}
+
+#txt2img_checkboxes,
+#img2img_checkboxes,
+#control_checkboxes {
+  background-color: transparent;
+  margin-bottom: 0.2em;
+}
+
+textarea[rows="1"] {
+  height: 33px !important;
+  width: 99% !important;
+  padding: 8px !important;
+}
+
+#extras_upscale {
+  margin-top: 10px;
+}
+
+#txt2img_progress_row>div {
+  min-width: var(--left-column);
+  max-width: var(--left-column);
+}
+
+#txt2img_settings {
+  min-width: var(--left-column);
+  max-width: var(--left-column);
+  background-color: var(--neutral-950);
+  padding-top: 16px;
+}
+
+#pnginfo_html2_info {
+  margin-top: -18px;
+  background-color: var(--input-background-fill);
+  padding: var(--input-padding);
+}
+
+#txt2img_styles_row,
+#img2img_styles_row,
+#control_styles_row {
+  margin-top: -6px;
+}
+
+.block>span {
+  margin-bottom: 0 !important;
+  margin-top: var(--spacing-lg);
+}
+
+/* Extra Networks Container */
+#extra_networks_root {
+  width: 300px;
+  /* Set a fixed width for the sidebar */
+  position: absolute;
+  height: auto;
+  right: 0;
+  top: 13em;
+  z-index: 100;
+  background: var(--background-color);
+  box-shadow: var(--shadow-md);
+  border-radius: var(--radius-lg);
+  overflow: hidden;
+  /* Prevents overflow of content */
+}
+
+/* Extra Networks Styles */
+.extra-networks {
+  background: var(--background-color);
+  padding: var(--block-label-padding);
+  border-radius: var(--radius-lg);
+}
+
+/* Extra Networks Div Styles */
+.extra-networks>div {
+  margin: 0;
+  border-bottom: none !important;
+  gap: 0.3em 0;
+}
+
+.extra-networks .tab-nav>button:hover {
+  background: var(--highlight-color);
+}
+
+/* Network tab search and description important fix, dont remove */
+#txt2img_description,
+#txt2img_extra_search,
+#img2img_description,
+#img2img_extra_search,
+#control_description,
+#control_extra_search {
+  margin-top: 50px;
+}
+
+/* Individual Buttons */
+.extra-networks .buttons>button {
+  margin-left: -0.2em;
+  height: 1.4em;
+  color: var(--primary-300) !important;
+  font-size: 20px !important;
+  background: var(--button-primary-background-fill);
+  border: none;
+  border-radius: var(--radius-sm);
+  transition: var(--transition);
+}
+
+.extra-networks .buttons>button:hover {
+  background: var(--highlight-color);
+}
+
+/* Extra Networks Tab */
+.extra-networks-tab {
+  padding: 0 !important;
+}
+
+/* Subdirectories Styles */
+.extra-network-subdirs {
+  background: var(--input-background-fill);
+  overflow-x: hidden;
+  overflow-y: auto;
+  min-width: 120px;
+  padding-top: 0.5em;
+  margin-top: -4px !important;
+}
+
+/* Extra Networks Page */
+.extra-networks-page {
+  display: flex;
+}
+
+/* Network Cards Container */
+.extra-network-cards {
+  display: flex;
+  flex-wrap: wrap;
+  overflow-y: auto;
+  overflow-x: hidden;
+  align-content: flex-start;
+  width: 100%;
+  /* Ensures it takes full width */
+}
+
+/* Individual Card Styles */
+.extra-network-cards .card {
+  height: fit-content;
+  margin: 0 0 0.5em 0.5em;
+  position: relative;
+  scroll-snap-align: start;
+  scroll-margin-top: 0;
+  background: var(--neutral-800);
+  /* Background for cards */
+  border-radius: var(--radius-md);
+  box-shadow: var(--shadow-md);
+  transition: var(--transition);
+}
+
+/* Overlay Styles */
+.extra-network-cards .card .overlay {
+  z-index: 10;
+  width: 100%;
+  background: none;
+  border-radius: var(--radius-md);
+}
+
+/* Overlay Name Styles */
+.extra-network-cards .card .overlay .name {
+  font-size: var(--text-lg);
+  font-weight: bold;
+  text-shadow: 1px 1px black;
+  color: white;
+  overflow-wrap: anywhere;
+  position: absolute;
+  bottom: 0;
+  padding: 0.2em;
+  z-index: 10;
+}
+
+/* Preview Styles */
+.extra-network-cards .card .preview {
+  box-shadow: var(--button-shadow);
+  min-height: 30px;
+  border-radius: var(--radius-md);
+}
+
+/* Hover Effects */
+.extra-network-cards .card:hover .overlay {
+  background: rgba(0, 0, 0, 0.70);
+}
+
+.extra-network-cards .card:hover .preview {
+  box-shadow: none;
+  filter: grayscale(100%);
+}
+
+/* Tags Styles */
+.extra-network-cards .card .overlay .tags {
+  display: none;
+  overflow-wrap: anywhere;
+  position: absolute;
+  top: 100%;
+  z-index: 20;
+  background: var(--body-background-fill);
+  overflow-x: hidden;
+  overflow-y: auto;
+  max-height: 333px;
+}
+
+/* Individual Tag Styles */
+.extra-network-cards .card .overlay .tag {
+  padding: 2px;
+  margin: 2px;
+  background: rgba(70, 70, 70, 0.60);
+  font-size: var(--text-md);
+  cursor: pointer;
+  display: inline-block;
+}
+
+/* Actions Styles */
+.extra-network-cards .card .actions>span {
+  padding: 4px;
+  font-size: 34px !important;
+}
+
+.extra-network-cards .card .actions>span:hover {
+  color: var(--highlight-color);
+}
+
+/* Version Styles */
+.extra-network-cards .card .version {
+  position: absolute;
+  top: 0;
+  left: 0;
+  padding: 2px;
+  font-weight: bolder;
+  text-shadow: 1px 1px black;
+  text-transform: uppercase;
+  background: gray;
+  opacity: 75%;
+  margin: 4px;
+  line-height: 0.9rem;
+}
+
+/* Hover Actions */
+.extra-network-cards .card:hover .actions {
+  display: block;
+}
+
+.extra-network-cards .card:hover .overlay .tags {
+  display: block;
+}
+
+/* No Preview Card Styles */
+.extra-network-cards .card:has(>img[src*="card-no-preview.png"])::before {
+  content: '';
+  position: absolute;
+  width: 100%;
+  height: 100%;
+  mix-blend-mode: multiply;
+  background-color: var(--data-color);
+}
+
+/* Card List Styles */
+.extra-network-cards .card-list {
+  display: flex;
+  margin: 0.3em;
+  padding: 0.3em;
+  background: var(--input-background-fill);
+  cursor: pointer;
+  border-radius: var(--button-large-radius);
+}
+
+.extra-network-cards .card-list .tag {
+  color: var(--primary-500);
+  margin-left: 0.8em;
+}
+
+/* Correction color picker styling */
+#txt2img_hdr_color_picker label input {
+  width: 100%;
+  height: 100%;
+}
+
+/* Based on Gradio Built-in Dark Theme */
+:root,
+.light,
+.dark {
+  --body-background-fill: var(--background-color);
+  --color-accent-soft: var(--neutral-700);
+  --background-fill-secondary: none;
+  --border-color-accent: var(--background-color);
+  --border-color-primary: var(--background-color);
+  --link-text-color-active: var(--primary-500);
+  --link-text-color: var(--secondary-500);
+  --link-text-color-hover: var(--secondary-400);
+  --link-text-color-visited: var(--secondary-600);
+  --shadow-spread: 1px;
+  --block-background-fill: none;
+  --block-border-color: var(--border-color-primary);
+  --block_border_width: none;
+  --block-info-text-color: var(--body-text-color-subdued);
+  --block-label-background-fill: var(--background-fill-secondary);
+  --block-label-border-color: var(--border-color-primary);
+  --block_label_border_width: none;
+  --block-label-text-color: var(--neutral-200);
+  --block-shadow: none;
+  --block-title-background-fill: none;
+  --block-title-border-color: none;
+  --block-title-border-width: 0px;
+  --block-title-padding: 0;
+  --block-title-radius: none;
+  --block-title-text-size: var(--text-md);
+  --block-title-text-weight: 400;
+  --container-radius: var(--radius-lg);
+  --form-gap-width: 1px;
+  --layout-gap: var(--spacing-xxl);
+  --panel-border-width: 0;
+  --section-header-text-size: var(--text-md);
+  --section-header-text-weight: 400;
+  --checkbox-border-radius: var(--radius-sm);
+  --checkbox-label-gap: 2px;
+  --checkbox-label-padding: var(--spacing-md);
+  --checkbox-label-shadow: var(--shadow-drop);
+  --checkbox-label-text-size: var(--text-md);
+  --checkbox-label-text-weight: 400;
+  --checkbox-check: url("data:image/svg+xml,%3csvg viewBox='0 0 16 16' fill='white' xmlns='http://www.w3.org/2000/svg'%3e%3cpath d='M12.207 4.793a1 1 0 010 1.414l-5 5a1 1 0 01-1.414 0l-2-2a1 1 0 011.414-1.414L6.5 9.086l4.293-4.293a1 1 0 011.414 0z'/%3e%3c/svg%3e");
+  --radio-circle: url("data:image/svg+xml,%3csvg viewBox='0 0 16 16' fill='white' xmlns='http://www.w3.org/2000/svg'%3e%3ccircle cx='8' cy='8' r='3'/%3e%3c/svg%3e");
+  --checkbox-shadow: var(--input-shadow);
+  --error-border-width: 1px;
+  --input-border-width: 0;
+  --input-radius: var(--radius-lg);
+  --input-text-size: var(--text-md);
+  --input-text-weight: 400;
+  --loader-color: var(--color-accent);
+  --prose-text-size: var(--text-md);
+  --prose-text-weight: 400;
+  --prose-header-text-weight: 400;
+  --slider-color: var(--neutral-900);
+  --table-radius: var(--radius-lg);
+  --button-large-padding: 2px 6px;
+  --button-large-radius: var(--radius-lg);
+  --button-large-text-size: var(--text-lg);
+  --button-large-text-weight: 400;
+  --button-shadow: none;
+  --button-shadow-active: none;
+  --button-shadow-hover: none;
+  --button-small-padding: var(--spacing-sm) calc(2 * var(--spacing-sm));
+  --button-small-radius: var(--radius-lg);
+  --button-small-text-size: var(--text-md);
+  --button-small-text-weight: 400;
+  --button-transition: none;
+  --size-9: 64px;
+  --size-14: 64px;
+}
\ No newline at end of file

From 41a63008dcec0911c43f61e89a64424191a3a2d6 Mon Sep 17 00:00:00 2001
From: Disty0 <disty@disty.xyz>
Date: Thu, 28 Nov 2024 19:18:41 +0300
Subject: [PATCH 027/162] Fix memmon

---
 modules/memmon.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/modules/memmon.py b/modules/memmon.py
index 6887e1e1c..d9fa3963d 100644
--- a/modules/memmon.py
+++ b/modules/memmon.py
@@ -42,14 +42,14 @@ def read(self):
         if not self.disabled:
             try:
                 self.data["free"], self.data["total"] = torch.cuda.mem_get_info(self.device.index if self.device.index is not None else torch.cuda.current_device())
+                self.data["used"] = self.data["total"] - self.data["free"]
                 torch_stats = torch.cuda.memory_stats(self.device)
-                self.data["active"] = torch_stats["active.all.current"]
+                self.data["active"] = torch_stats.get("active.all.current", torch_stats["active_bytes.all.current"])
                 self.data["active_peak"] = torch_stats["active_bytes.all.peak"]
                 self.data["reserved"] = torch_stats["reserved_bytes.all.current"]
                 self.data["reserved_peak"] = torch_stats["reserved_bytes.all.peak"]
-                self.data['retries'] = torch_stats["num_alloc_retries"]
-                self.data['oom'] = torch_stats["num_ooms"]
-                self.data["used"] = self.data["total"] - self.data["free"]
+                self.data['retries'] = torch_stats.get("num_alloc_retries", -1)
+                self.data['oom'] = torch_stats.get("num_ooms", -1)
             except Exception:
                 self.disabled = True
         return self.data

From 964b4c9e5a6f7bdab943007edeeb3b2c828ac158 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Thu, 28 Nov 2024 09:11:42 -0500
Subject: [PATCH 028/162] euler flowmatch add sigma methods

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md                     |  3 ++-
 modules/sd_samplers_diffusers.py | 20 ++++++++++----------
 2 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b3f2282c0..f88c78302 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -50,7 +50,8 @@
   - control: add stats
   - browser->server logging framework  
 - **Sampler** improvements  
-  - update DPM FlowMatch samplers  
+  - Euler FlowMatch: add sigma methods (*karras/exponential/betas*)  
+  - DPM FlowMatch: update all and add sigma methods  
 
 ### Fixes  
 
diff --git a/modules/sd_samplers_diffusers.py b/modules/sd_samplers_diffusers.py
index 9f24d5a91..4672df92e 100644
--- a/modules/sd_samplers_diffusers.py
+++ b/modules/sd_samplers_diffusers.py
@@ -69,7 +69,7 @@
     'Euler a': { 'steps_offset': 0, 'rescale_betas_zero_snr': False, 'timestep_spacing': 'linspace' },
     'Euler SGM': { 'steps_offset': 0, 'interpolation_type': "linear", 'rescale_betas_zero_snr': False, 'final_sigmas_type': 'zero', 'timestep_spacing': 'trailing', 'use_beta_sigmas': False, 'use_exponential_sigmas': False, 'use_karras_sigmas': False, 'prediction_type': "sample" },
     'Euler EDM': { 'sigma_schedule': "karras" },
-    'Euler FlowMatch': { 'timestep_spacing': "linspace", 'shift': 1, 'use_dynamic_shifting': False },
+    'Euler FlowMatch': { 'timestep_spacing': "linspace", 'shift': 1, 'use_dynamic_shifting': False, 'use_karras_sigmas': False, 'use_exponential_sigmas': False, 'use_beta_sigmas': False },
 
     'DPM++': { 'solver_order': 2, 'thresholding': False, 'sample_max_value': 1.0, 'algorithm_type': "dpmsolver++", 'solver_type': "midpoint", 'lower_order_final': True, 'use_karras_sigmas': False, 'use_exponential_sigmas': False, 'use_beta_sigmas': False, 'final_sigmas_type': 'sigma_min' },
     'DPM++ 1S': { 'thresholding': False, 'sample_max_value': 1.0, 'algorithm_type': "dpmsolver++", 'solver_type': "midpoint", 'lower_order_final': True, 'use_karras_sigmas': False, 'use_exponential_sigmas': False, 'use_beta_sigmas': False, 'use_lu_lambdas': False, 'final_sigmas_type': 'zero', 'timestep_spacing': 'linspace', 'solver_order': 1 },
@@ -200,16 +200,16 @@ def __init__(self, name, constructor, model, **kwargs):
         timesteps = re.split(',| ', shared.opts.schedulers_timesteps)
         timesteps = [int(x) for x in timesteps if x.isdigit()]
         if len(timesteps) == 0:
-            if 'use_beta_sigmas' in self.config:
-                self.config['use_beta_sigmas'] = shared.opts.schedulers_sigma == 'beta'
-            if 'use_karras_sigmas' in self.config:
-                self.config['use_karras_sigmas'] = shared.opts.schedulers_sigma == 'karras'
-            if 'use_exponential_sigmas' in self.config:
-                self.config['use_exponential_sigmas'] = shared.opts.schedulers_sigma == 'exponential'
-            if 'use_lu_lambdas' in self.config:
-                self.config['use_lu_lambdas'] = shared.opts.schedulers_sigma == 'lambdas'
             if 'sigma_schedule' in self.config:
                 self.config['sigma_schedule'] = shared.opts.schedulers_sigma if shared.opts.schedulers_sigma != 'default' else None
+            if shared.opts.schedulers_sigma == 'betas' and 'use_beta_sigmas' in self.config:
+                self.config['use_beta_sigmas'] = True
+            elif shared.opts.schedulers_sigma == 'karras' and 'use_karras_sigmas' in self.config:
+                self.config['use_karras_sigmas'] = True
+            elif shared.opts.schedulers_sigma == 'exponential' and 'use_exponential_sigmas' in self.config:
+                self.config['use_exponential_sigmas'] = True
+            elif shared.opts.schedulers_sigma == 'lambdas' and 'use_lu_lambdas' in self.config:
+                self.config['use_lu_lambdas'] = True
         else:
             pass # timesteps are set using set_timesteps in set_pipeline_args
 
@@ -236,7 +236,7 @@ def __init__(self, name, constructor, model, **kwargs):
         if 'use_dynamic_shifting' in self.config:
             if 'Flux' in model.__class__.__name__:
                 self.config['use_dynamic_shifting'] = shared.opts.schedulers_dynamic_shift
-        if 'use_beta_sigmas' in self.config:
+        if 'use_beta_sigmas' in self.config and 'sigma_schedule' in self.config:
             self.config['use_beta_sigmas'] = 'StableDiffusion3' in model.__class__.__name__
         if 'rescale_betas_zero_snr' in self.config:
             self.config['rescale_betas_zero_snr'] = shared.opts.schedulers_rescale_betas

From 425f51bb2524914a5a8ed834d144b82cc168cc75 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Thu, 28 Nov 2024 13:28:46 -0500
Subject: [PATCH 029/162] simplify impaint/sketch canvas handling

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md               |  5 ++-
 javascript/black-teal.css  |  1 -
 javascript/imageMaskFix.js | 38 ------------------
 javascript/sdnext.css      | 20 ++++++----
 javascript/ui.js           |  2 +-
 modules/img2img.py         | 14 +++----
 modules/ui_control.py      |  4 +-
 modules/ui_img2img.py      | 81 ++++++++++++++++++--------------------
 8 files changed, 65 insertions(+), 100 deletions(-)
 delete mode 100644 javascript/imageMaskFix.js

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f88c78302..919041bde 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -48,7 +48,8 @@
   - control: hide preview column by default
   - control: optionn to hide input column
   - control: add stats
-  - browser->server logging framework  
+  - browser -> server logging framework  
+  - add addtional themes: `black-reimagined`  
 - **Sampler** improvements  
   - Euler FlowMatch: add sigma methods (*karras/exponential/betas*)  
   - DPM FlowMatch: update all and add sigma methods  
@@ -65,6 +66,8 @@
 - allow xyz-grid with multi-axis s&r  
 - fix xyz-grid with lora  
 - fix api script callbacks  
+- fix gpu memory monitoring  
+- simplify img2img/inpaint/sketch canvas handling
 
 ## Update for 2024-11-21
 
diff --git a/javascript/black-teal.css b/javascript/black-teal.css
index b73f9fdc7..2ebf32e96 100644
--- a/javascript/black-teal.css
+++ b/javascript/black-teal.css
@@ -108,7 +108,6 @@ fieldset .gr-block.gr-box, label.block span { padding: 0; margin-top: -4px; }
 .eta-bar { display: none !important }
 .gradio-slider { max-width: 200px; }
 .gradio-slider input[type="number"] { background: var(--neutral-950); margin-top: 2px; }
-.gradio-image { height: unset !important; }
 svg.feather.feather-image, .feather .feather-image { display: none }
 .gap-2 { padding-top: 8px; }
 .gr-box > div > div > input.gr-text-input { right: 0; width: 4em; padding: 0; top: -12px; border: none; max-height: 20px; }
diff --git a/javascript/imageMaskFix.js b/javascript/imageMaskFix.js
deleted file mode 100644
index fd37caf90..000000000
--- a/javascript/imageMaskFix.js
+++ /dev/null
@@ -1,38 +0,0 @@
-/**
- * temporary fix for https://github.com/AUTOMATIC1111/stable-diffusion-webui/issues/668
- * @see https://github.com/gradio-app/gradio/issues/1721
- */
-function imageMaskResize() {
-  const canvases = gradioApp().querySelectorAll('#img2maskimg .touch-none canvas');
-  if (!canvases.length) {
-    window.removeEventListener('resize', imageMaskResize);
-    return;
-  }
-  const wrapper = canvases[0].closest('.touch-none');
-  const previewImage = wrapper.previousElementSibling;
-  if (!previewImage.complete) {
-    previewImage.addEventListener('load', imageMaskResize);
-    return;
-  }
-  const w = previewImage.width;
-  const h = previewImage.height;
-  const nw = previewImage.naturalWidth;
-  const nh = previewImage.naturalHeight;
-  const portrait = nh > nw;
-  const wW = Math.min(w, portrait ? h / nh * nw : w / nw * nw);
-  const wH = Math.min(h, portrait ? h / nh * nh : w / nw * nh);
-  wrapper.style.width = `${wW}px`;
-  wrapper.style.height = `${wH}px`;
-  wrapper.style.left = '0px';
-  wrapper.style.top = '0px';
-  canvases.forEach((c) => {
-    c.style.width = '';
-    c.style.height = '';
-    c.style.maxWidth = '100%';
-    c.style.maxHeight = '100%';
-    c.style.objectFit = 'contain';
-  });
-}
-
-onAfterUiUpdate(imageMaskResize);
-window.addEventListener('resize', imageMaskResize);
diff --git a/javascript/sdnext.css b/javascript/sdnext.css
index 240b7492f..c5145c973 100644
--- a/javascript/sdnext.css
+++ b/javascript/sdnext.css
@@ -30,6 +30,19 @@ input::-webkit-outer-spin-button, input::-webkit-inner-spin-button { margin-left
 .hidden { display: none; }
 .tabitem { padding: 0 !important; }
 
+/* gradio image/canvas elements */
+.image-container { overflow: auto; }
+/*
+.gradio-image { min-height: fit-content; }
+.gradio-image img { object-fit: contain; }
+*/
+/*
+.gradio-image { min-height: 200px !important; }
+.image-container { height: unset !important; }
+.control-image { height: unset !important; }
+#img2img_sketch, #img2maskimg, #inpaint_sketch { overflow: overlay !important; resize: auto; background: var(--panel-background-fill); z-index: 5; }
+*/
+
 /* color elements */
 .gradio-dropdown, .block.gradio-slider, .block.gradio-checkbox, .block.gradio-textbox, .block.gradio-radio, .block.gradio-checkboxgroup, .block.gradio-number, .block.gradio-colorpicker { border-width: 0 !important; box-shadow: none !important;}
 .gradio-accordion { padding-top: var(--spacing-md) !important; padding-right: 0 !important; padding-bottom: 0 !important; color: var(--body-text-color); }
@@ -87,8 +100,6 @@ button.custom-button { border-radius: var(--button-large-radius); padding: var(-
 .performance .time { margin-right: 0; }
 .thumbnails { background: var(--body-background-fill); }
 .prompt textarea { resize: vertical; }
-.image-container { height: unset !important; }
-.control-image { height: unset !important; }
 .grid-wrap { overflow-y: auto !important; }
 #control_results { margin: 0; padding: 0; }
 #txt2img_gallery, #img2img_gallery { height: 50vh; }
@@ -106,7 +117,6 @@ button.custom-button { border-radius: var(--button-large-radius); padding: var(-
 #txt2img_prompt, #txt2img_neg_prompt, #img2img_prompt, #img2img_neg_prompt, #control_prompt, #control_neg_prompt { display: contents; }
 #txt2img_actions_column, #img2img_actions_column, #control_actions { flex-flow: wrap; justify-content: space-between; }
 
-
 .interrogate-clip { position: absolute; right: 6em; top: 8px; max-width: fit-content; background: none !important; z-index: 50; }
 .interrogate-blip { position: absolute; right: 4em; top: 8px; max-width: fit-content; background: none !important; z-index: 50; }
 .interrogate-col { min-width: 0 !important; max-width: fit-content; margin-right: var(--spacing-xxl); }
@@ -119,8 +129,6 @@ div#extras_scale_to_tab div.form { flex-direction: row; }
 #img2img_unused_scale_by_slider { visibility: hidden; width: 0.5em; max-width: 0.5em; min-width: 0.5em; }
 .inactive{ opacity: 0.5; }
 div#extras_scale_to_tab div.form { flex-direction: row; }
-#mode_img2img .gradio-image>div.fixed-height, #mode_img2img .gradio-image>div.fixed-height img{ height: 480px !important; max-height: 480px !important; min-height: 480px !important; }
-#img2img_sketch, #img2maskimg, #inpaint_sketch { overflow: overlay !important; resize: auto; background: var(--panel-background-fill); z-index: 5; }
 .image-buttons button { min-width: auto; }
 .infotext { overflow-wrap: break-word; line-height: 1.5em; font-size: 0.95em !important; }
 .infotext > p { white-space: pre-wrap; color: var(--block-info-text-color) !important; }
@@ -380,8 +388,6 @@ div:has(>#tab-gallery-folders) { flex-grow: 0 !important; background-color: var(
     #img2img_actions_column { display: flex; min-width: fit-content !important; flex-direction: row;justify-content: space-evenly; align-items: center;}
     #txt2img_generate_box, #img2img_generate_box, #txt2img_enqueue_wrapper,#img2img_enqueue_wrapper {display: flex;flex-direction: column;height: 4em !important;align-items: stretch;justify-content: space-evenly;}
     #img2img_interface, #img2img_results, #img2img_footer p { text-wrap: wrap; min-width: 100% !important; max-width: 100% !important;} /* maintain single column for from image operations on larger mobile devices */
-    #img2img_sketch, #img2maskimg, #inpaint_sketch {display: flex; overflow: auto !important; resize: none !important; } /* fix inpaint image display being too large for mobile displays */
-    #img2maskimg canvas { width: auto !important; max-height: 100% !important; height: auto !important; }
     #txt2img_sampler, #txt2img_batch, #txt2img_seed_group, #txt2img_advanced, #txt2img_second_pass, #img2img_sampling_group, #img2img_resize_group, #img2img_batch_group, #img2img_seed_group, #img2img_denoise_group, #img2img_advanced_group { width: 100% !important; } /* fix from text/image UI elements to prevent them from moving around within the UI */
     #img2img_resize_group .gradio-radio>div { display: flex; flex-direction: column; width: unset !important; }
     #inpaint_controls div { display:flex;flex-direction: row;}
diff --git a/javascript/ui.js b/javascript/ui.js
index 81d1c67e4..3e3f14390 100644
--- a/javascript/ui.js
+++ b/javascript/ui.js
@@ -139,7 +139,7 @@ function switch_to_inpaint(...args) {
   return Array.from(arguments);
 }
 
-function switch_to_inpaint_sketch(...args) {
+function switch_to_composite(...args) {
   switchToTab('Image');
   switch_to_img2img_tab(3);
   return Array.from(arguments);
diff --git a/modules/img2img.py b/modules/img2img.py
index 8274386cc..077df1259 100644
--- a/modules/img2img.py
+++ b/modules/img2img.py
@@ -164,12 +164,7 @@ def img2img(id_task: str, state: str, mode: int,
             return [], '', '', 'Error: init image not provided'
         image = init_img.convert("RGB")
         mask = None
-    elif mode == 1:  # img2img sketch
-        if sketch is None:
-            return [], '', '', 'Error: sketch image not provided'
-        image = sketch.convert("RGB")
-        mask = None
-    elif mode == 2:  # inpaint
+    elif mode == 1:  # inpaint
         if init_img_with_mask is None:
             return [], '', '', 'Error: init image with mask not provided'
         image = init_img_with_mask["image"]
@@ -177,7 +172,12 @@ def img2img(id_task: str, state: str, mode: int,
         alpha_mask = ImageOps.invert(image.split()[-1]).convert('L').point(lambda x: 255 if x > 0 else 0, mode='1')
         mask = ImageChops.lighter(alpha_mask, mask.convert('L')).convert('L')
         image = image.convert("RGB")
-    elif mode == 3:  # inpaint sketch
+    elif mode == 2:  # sketch
+        if sketch is None:
+            return [], '', '', 'Error: sketch image not provided'
+        image = sketch.convert("RGB")
+        mask = None
+    elif mode == 3:  # composite
         if inpaint_color_sketch is None:
             return [], '', '', 'Error: color sketch image not provided'
         image = inpaint_color_sketch
diff --git a/modules/ui_control.py b/modules/ui_control.py
index f4329663a..072d9b9c9 100644
--- a/modules/ui_control.py
+++ b/modules/ui_control.py
@@ -13,7 +13,7 @@
 from modules import ui_control_helpers as helpers
 
 
-gr_height = None
+gr_height = 512
 max_units = shared.opts.control_max_units
 units: list[unit.Unit] = [] # main state variable
 controls: list[gr.component] = [] # list of gr controls
@@ -135,7 +135,7 @@ def create_ui(_blocks: gr.Blocks=None):
                     with gr.Row():
                         input_type = gr.Radio(label="Input type", choices=['Control only', 'Init image same as control', 'Separate init image'], value='Control only', type='index', elem_id='control_input_type')
                     with gr.Row():
-                        denoising_strength = gr.Slider(minimum=0.01, maximum=1.0, step=0.01, label='Denoising strength', value=0.50, elem_id="control_input_denoising_strength")
+                        denoising_strength = gr.Slider(minimum=0.01, maximum=1.0, step=0.01, label='Denoising strength', value=0.30, elem_id="control_input_denoising_strength")
 
                 with gr.Accordion(open=False, label="Size", elem_id="control_size", elem_classes=["small-accordion"]):
                     with gr.Tabs():
diff --git a/modules/ui_img2img.py b/modules/ui_img2img.py
index 046c181ce..3c3d63656 100644
--- a/modules/ui_img2img.py
+++ b/modules/ui_img2img.py
@@ -1,7 +1,6 @@
 import os
 from PIL import Image
 import gradio as gr
-import numpy as np
 from modules.call_queue import wrap_gradio_gpu_call, wrap_queued_call
 from modules import timer, shared, ui_common, ui_sections, generation_parameters_copypaste, processing_vae
 
@@ -56,7 +55,7 @@ def copy_image(img):
 
                 def add_copy_image_controls(tab_name, elem):
                     with gr.Row(variant="compact", elem_id=f"img2img_copy_to_{tab_name}"):
-                        for title, name in zip(['➠ Image', '➠ Sketch', '➠ Inpaint', '➠ Composite'], ['img2img', 'sketch', 'inpaint', 'inpaint_sketch']):
+                        for title, name in zip(['➠ Image', '➠ Inpaint', '➠ Sketch', '➠ Composite'], ['img2img', 'sketch', 'inpaint', 'composite']):
                             if name == tab_name:
                                 gr.Button(title, elem_id=f'copy_to_{name}', interactive=False)
                                 copy_image_destinations[name] = elem
@@ -67,33 +66,36 @@ def add_copy_image_controls(tab_name, elem):
                 with gr.Tabs(elem_id="mode_img2img"):
                     img2img_selected_tab = gr.State(0) # pylint: disable=abstract-class-instantiated
                     state = gr.Textbox(value='', visible=False)
-                    with gr.TabItem('Image', id='img2img', elem_id="img2img_img2img_tab") as tab_img2img:
-                        init_img = gr.Image(label="Image for img2img", elem_id="img2img_image", show_label=False, source="upload", interactive=True, type="pil", tool="editor", image_mode="RGBA")
+                    with gr.TabItem('Image', id='img2img_image', elem_id="img2img_image_tab") as tab_img2img:
+                        img_init = gr.Image(label="", elem_id="img2img_image", show_label=False, source="upload", interactive=True, type="pil", tool="editor", image_mode="RGBA", height=512)
                         interrogate_clip, interrogate_booru = ui_sections.create_interrogate_buttons('img2img')
-                        add_copy_image_controls('img2img', init_img)
-
-                    with gr.TabItem('Sketch', id='img2img_sketch', elem_id="img2img_img2img_sketch_tab") as tab_sketch:
-                        sketch = gr.Image(label="Image for img2img", elem_id="img2img_sketch", show_label=False, source="upload", interactive=True, type="pil", tool="color-sketch", image_mode="RGBA")
-                        add_copy_image_controls('sketch', sketch)
-
-                    with gr.TabItem('Inpaint', id='inpaint', elem_id="img2img_inpaint_tab") as tab_inpaint:
-                        init_img_with_mask = gr.Image(label="Image for inpainting with mask", show_label=False, elem_id="img2maskimg", source="upload", interactive=True, type="pil", tool="sketch", image_mode="RGBA")
-                        add_copy_image_controls('inpaint', init_img_with_mask)
-
-                    with gr.TabItem('Composite', id='inpaint_sketch', elem_id="img2img_inpaint_sketch_tab") as tab_inpaint_color:
-                        inpaint_color_sketch = gr.Image(label="Color sketch inpainting", show_label=False, elem_id="inpaint_sketch", source="upload", interactive=True, type="pil", tool="color-sketch", image_mode="RGBA")
-                        inpaint_color_sketch_orig = gr.State(None) # pylint: disable=abstract-class-instantiated
-                        add_copy_image_controls('inpaint_sketch', inpaint_color_sketch)
-
-                        def update_orig(image, state):
-                            if image is not None:
-                                same_size = state is not None and state.size == image.size
-                                has_exact_match = np.any(np.all(np.array(image) == np.array(state), axis=-1))
-                                edited = same_size and has_exact_match
-                                return image if not edited or state is None else state
-                            return state
-
-                        inpaint_color_sketch.change(update_orig, [inpaint_color_sketch, inpaint_color_sketch_orig], inpaint_color_sketch_orig)
+                        add_copy_image_controls('img2img', img_init)
+
+                    with gr.TabItem('Inpaint', id='img2img_inpaint', elem_id="img2img_inpaint_tab") as tab_inpaint:
+                        img_inpaint = gr.Image(label="", elem_id="img2img_inpaint", show_label=False, source="upload", interactive=True, type="pil", tool="sketch", image_mode="RGBA", height=512)
+                        add_copy_image_controls('inpaint', img_inpaint)
+
+                    with gr.TabItem('Sketch', id='img2img_sketch', elem_id="img2img_sketch_tab") as tab_sketch:
+                        img_sketch = gr.Image(label="", elem_id="img2img_sketch", show_label=False, source="upload", interactive=True, type="pil", tool="color-sketch", image_mode="RGBA", height=512)
+                        add_copy_image_controls('sketch', img_sketch)
+
+                    with gr.TabItem('Composite', id='img2img_composite', elem_id="img2img_composite_tab") as tab_inpaint_color:
+                        img_composite = gr.Image(label="", show_label=False, elem_id="img2img_composite", source="upload", interactive=True, type="pil", tool="color-sketch", image_mode="RGBA", height=512)
+                        img_composite_orig = gr.State(None) # pylint: disable=abstract-class-instantiated
+                        img_composite_orig_update = False
+
+                        def fn_img_composite_upload():
+                            nonlocal img_composite_orig_update
+                            img_composite_orig_update = True
+                        def fn_img_composite_change(img, img_composite):
+                            nonlocal img_composite_orig_update
+                            res = img if img_composite_orig_update else img_composite
+                            img_composite_orig_update = False
+                            return res
+
+                        img_composite.upload(fn=fn_img_composite_upload, inputs=[], outputs=[])
+                        img_composite.change(fn=fn_img_composite_change, inputs=[img_composite, img_composite_orig], outputs=[img_composite_orig])
+                        add_copy_image_controls('composite', img_composite)
 
                     with gr.TabItem('Upload', id='inpaint_upload', elem_id="img2img_inpaint_upload_tab") as tab_inpaint_upload:
                         init_img_inpaint = gr.Image(label="Image for img2img", show_label=False, source="upload", interactive=True, type="pil", elem_id="img_inpaint_base")
@@ -120,13 +122,13 @@ def update_orig(image, state):
                     with gr.Accordion(open=False, label="Sampler", elem_classes=["small-accordion"], elem_id="img2img_sampler_group"):
                         steps, sampler_index = ui_sections.create_sampler_and_steps_selection(None, "img2img")
                         ui_sections.create_sampler_options('img2img')
-                    resize_mode, resize_name, resize_context, width, height, scale_by, selected_scale_tab = ui_sections.create_resize_inputs('img2img', [init_img, sketch], latent=True, non_zero=False)
+                    resize_mode, resize_name, resize_context, width, height, scale_by, selected_scale_tab = ui_sections.create_resize_inputs('img2img', [img_init, img_sketch], latent=True, non_zero=False)
                     batch_count, batch_size = ui_sections.create_batch_inputs('img2img', accordion=True)
                     seed, reuse_seed, subseed, reuse_subseed, subseed_strength, seed_resize_from_h, seed_resize_from_w = ui_sections.create_seed_inputs('img2img')
 
                     with gr.Accordion(open=False, label="Denoise", elem_classes=["small-accordion"], elem_id="img2img_denoise_group"):
                         with gr.Row():
-                            denoising_strength = gr.Slider(minimum=0.0, maximum=0.99, step=0.01, label='Denoising strength', value=0.50, elem_id="img2img_denoising_strength")
+                            denoising_strength = gr.Slider(minimum=0.0, maximum=0.99, step=0.01, label='Denoising strength', value=0.30, elem_id="img2img_denoising_strength")
                             refiner_start = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, label='Denoise start', value=0.0, elem_id="img2img_refiner_start")
 
                     full_quality, tiling, hidiffusion, cfg_scale, clip_skip, image_cfg_scale, diffusers_guidance_rescale, pag_scale, pag_adaptive, cfg_end = ui_sections.create_advanced_inputs('img2img')
@@ -167,13 +169,8 @@ def select_img2img_tab(tab):
             img2img_args = [
                 dummy_component1, state, dummy_component2,
                 img2img_prompt, img2img_negative_prompt, img2img_prompt_styles,
-                init_img,
-                sketch,
-                init_img_with_mask,
-                inpaint_color_sketch,
-                inpaint_color_sketch_orig,
-                init_img_inpaint,
-                init_mask_inpaint,
+                img_init, img_sketch, img_inpaint, img_composite, img_composite_orig,
+                init_img_inpaint, init_mask_inpaint,
                 steps,
                 sampler_index,
                 mask_blur, mask_alpha,
@@ -225,10 +222,7 @@ def select_img2img_tab(tab):
                     img2img_batch_files,
                     img2img_batch_input_dir,
                     img2img_batch_output_dir,
-                    init_img,
-                    sketch,
-                    init_img_with_mask,
-                    inpaint_color_sketch,
+                    img_init, img_sketch, img_inpaint, img_composite,
                     init_img_inpaint,
                 ],
                 outputs=[img2img_prompt, dummy_component],
@@ -285,7 +279,8 @@ def select_img2img_tab(tab):
                 (seed_resize_from_h, "Seed resize from-2"),
                 *modules.scripts.scripts_img2img.infotext_fields
             ]
-            generation_parameters_copypaste.add_paste_fields("img2img", init_img, img2img_paste_fields, override_settings)
-            generation_parameters_copypaste.add_paste_fields("inpaint", init_img_with_mask, img2img_paste_fields, override_settings)
+            generation_parameters_copypaste.add_paste_fields("img2img", img_init, img2img_paste_fields, override_settings)
+            generation_parameters_copypaste.add_paste_fields("sketch", img_sketch, img2img_paste_fields, override_settings)
+            generation_parameters_copypaste.add_paste_fields("inpaint", img_inpaint, img2img_paste_fields, override_settings)
             img2img_bindings = generation_parameters_copypaste.ParamBinding(paste_button=img2img_paste, tabname="img2img", source_text_component=img2img_prompt, source_image_component=None)
             generation_parameters_copypaste.register_paste_params_button(img2img_bindings)

From 75dd6219c6b715159a1166f189a74cd76b1ce1b3 Mon Sep 17 00:00:00 2001
From: Disty0 <disty@disty.xyz>
Date: Thu, 28 Nov 2024 22:12:25 +0300
Subject: [PATCH 030/162] Fix Cascade and add full_vqgan_decode

---
 modules/processing_args.py      |  2 +-
 modules/processing_diffusers.py |  2 +-
 modules/processing_vae.py       | 64 ++++++++++++++++++++++++++++++++-
 3 files changed, 65 insertions(+), 3 deletions(-)

diff --git a/modules/processing_args.py b/modules/processing_args.py
index ff766ec04..a716b685e 100644
--- a/modules/processing_args.py
+++ b/modules/processing_args.py
@@ -135,7 +135,7 @@ def set_pipeline_args(p, model, prompts: list, negative_prompts: list, prompts_2
             prompts = [p.replace('|image|', '<|image_1|>') for p in prompts]
         if hasattr(model, 'text_encoder') and hasattr(model, 'tokenizer') and 'prompt_embeds' in possible and prompt_parser_diffusers.embedder is not None:
             args['prompt_embeds'] = prompt_parser_diffusers.embedder('prompt_embeds')
-            if 'StableCascade' in model.__class__.__name__ and len(getattr(p, 'negative_pooleds', [])) > 0:
+            if 'StableCascade' in model.__class__.__name__ and prompt_parser_diffusers.embedder is not None:
                 args['prompt_embeds_pooled'] = prompt_parser_diffusers.embedder('positive_pooleds').unsqueeze(0)
             elif 'XL' in model.__class__.__name__ and prompt_parser_diffusers.embedder is not None:
                 args['pooled_prompt_embeds'] = prompt_parser_diffusers.embedder('positive_pooleds')
diff --git a/modules/processing_diffusers.py b/modules/processing_diffusers.py
index 7b91fcd42..a278f980e 100644
--- a/modules/processing_diffusers.py
+++ b/modules/processing_diffusers.py
@@ -352,7 +352,7 @@ def process_decode(p: processing.StableDiffusionProcessing, output):
         if not hasattr(model, 'vae'):
             if hasattr(model, 'pipe') and hasattr(model.pipe, 'vae'):
                 model = model.pipe
-        if hasattr(model, "vae") and output.images is not None and len(output.images) > 0:
+        if (hasattr(model, "vae") or hasattr(model, "vqgan")) and output.images is not None and len(output.images) > 0:
             if p.hr_resize_mode > 0 and (p.hr_upscaler != 'None' or p.hr_resize_mode == 5):
                 width = max(getattr(p, 'width', 0), getattr(p, 'hr_upscale_to_x', 0))
                 height = max(getattr(p, 'height', 0), getattr(p, 'hr_upscale_to_y', 0))
diff --git a/modules/processing_vae.py b/modules/processing_vae.py
index 3c0357c81..1c4a45f07 100644
--- a/modules/processing_vae.py
+++ b/modules/processing_vae.py
@@ -33,6 +33,62 @@ def create_latents(image, p, dtype=None, device=None):
     return latents
 
 
+def full_vqgan_decode(latents, model):
+    t0 = time.time()
+    if model is None or not hasattr(model, 'vqgan'):
+        shared.log.error('VQGAN not found in model')
+        return []
+    if debug:
+        devices.torch_gc(force=True)
+        shared.mem_mon.reset()
+
+    base_device = None
+    if shared.opts.diffusers_move_unet and not getattr(model, 'has_accelerate', False):
+        base_device = sd_models.move_base(model, devices.cpu)
+
+    if shared.opts.diffusers_offload_mode == "balanced":
+        shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
+    elif shared.opts.diffusers_offload_mode != "sequential":
+        sd_models.move_model(model.vqgan, devices.device)
+
+    latents = latents.to(devices.device, dtype=model.vqgan.dtype)
+
+    #normalize latents
+    scaling_factor = model.vqgan.config.get("scale_factor", None)
+    if scaling_factor:
+        latents = latents * scaling_factor
+
+    vae_name = os.path.splitext(os.path.basename(sd_vae.loaded_vae_file))[0] if sd_vae.loaded_vae_file is not None else "default"
+    vae_stats = f'name="{vae_name}" dtype={model.vqgan.dtype} device={model.vqgan.device}'
+    latents_stats = f'shape={latents.shape} dtype={latents.dtype} device={latents.device}'
+    stats = f'vae {vae_stats} latents {latents_stats}'
+
+    log_debug(f'VAE config: {model.vqgan.config}')
+    try:
+        decoded = model.vqgan.decode(latents).sample.clamp(0, 1)
+    except Exception as e:
+        shared.log.error(f'VAE decode: {stats} {e}')
+        errors.display(e, 'VAE decode')
+        decoded = []
+
+    # delete vae after OpenVINO compile
+    if 'VAE' in shared.opts.cuda_compile and shared.opts.cuda_compile_backend == "openvino_fx" and shared.compiled_model_state.first_pass_vae:
+        shared.compiled_model_state.first_pass_vae = False
+        if not shared.opts.openvino_disable_memory_cleanup and hasattr(shared.sd_model, "vqgan"):
+            model.vqgan.apply(sd_models.convert_to_faketensors)
+            devices.torch_gc(force=True)
+
+    if shared.opts.diffusers_offload_mode == "balanced":
+        shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
+    elif shared.opts.diffusers_move_unet and not getattr(model, 'has_accelerate', False) and base_device is not None:
+        sd_models.move_base(model, base_device)
+    t1 = time.time()
+    if debug:
+        log_debug(f'VAE memory: {shared.mem_mon.read()}')
+    shared.log.debug(f'VAE decode: {stats} time={round(t1-t0, 3)}')
+    return decoded
+
+
 def full_vae_decode(latents, model):
     t0 = time.time()
     if not hasattr(model, 'vae') and hasattr(model, 'pipe'):
@@ -161,7 +217,7 @@ def vae_decode(latents, model, output_type='np', full_quality=True, width=None,
         return []
     if shared.state.interrupted or shared.state.skipped:
         return []
-    if not hasattr(model, 'vae'):
+    if not hasattr(model, 'vae') and not hasattr(model, 'vqgan'):
         shared.log.error('VAE not found in model')
         return []
 
@@ -176,12 +232,18 @@ def vae_decode(latents, model, output_type='np', full_quality=True, width=None,
         decoded = latents.float().cpu().numpy()
     elif full_quality and hasattr(model, "vae"):
         decoded = full_vae_decode(latents=latents, model=model)
+    elif hasattr(model, "vqgan"):
+        decoded = full_vqgan_decode(latents=latents, model=model)
     else:
         decoded = taesd_vae_decode(latents=latents)
 
     if torch.is_tensor(decoded):
         if hasattr(model, 'image_processor'):
             imgs = model.image_processor.postprocess(decoded, output_type=output_type)
+        elif hasattr(model, "vqgan"):
+            imgs = decoded.permute(0, 2, 3, 1).cpu().float().numpy()
+            if output_type == "pil":
+                imgs = model.numpy_to_pil(imgs)
         else:
             import diffusers
             model.image_processor = diffusers.image_processor.VaeImageProcessor()

From 1f39d718f9f141c431fabce6bede20839aa22863 Mon Sep 17 00:00:00 2001
From: P-Hellmann <phtijger@gmail.com>
Date: Fri, 29 Nov 2024 08:58:22 +0100
Subject: [PATCH 031/162] Small changes to black-teal-reimagined

---
 javascript/black-teal-reimagined.css | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/javascript/black-teal-reimagined.css b/javascript/black-teal-reimagined.css
index e5618c02c..94fccdea9 100644
--- a/javascript/black-teal-reimagined.css
+++ b/javascript/black-teal-reimagined.css
@@ -813,10 +813,18 @@ textarea[rows="1"] {
   background: var(--background-color);
   box-shadow: var(--shadow-md);
   border-radius: var(--radius-lg);
+  transform: translateX(100%);
+  animation: slideIn 0.5s forwards;
   overflow: hidden;
   /* Prevents overflow of content */
 }
 
+@keyframes slideIn {
+  to {
+    transform: translateX(0);
+  }
+}
+
 /* Extra Networks Styles */
 .extra-networks {
   background: var(--background-color);
@@ -1032,6 +1040,20 @@ textarea[rows="1"] {
   height: 100%;
 }
 
+/* Token counters styling */
+
+#txt2img_token_counter, #txt2img_negative_token_counter {
+  display: flex;
+  flex-direction: column;
+  justify-content: space-evenly;
+  padding: 10px;
+}
+
+#txt2img_prompt_container {
+  margin: 5px;
+  padding: 0px;
+}
+
 /* Based on Gradio Built-in Dark Theme */
 :root,
 .light,

From 3e3501218b0179713f2eb92e4e59f5aeafa8349f Mon Sep 17 00:00:00 2001
From: P-Hellmann <phtijger@gmail.com>
Date: Fri, 29 Nov 2024 10:01:53 +0100
Subject: [PATCH 032/162] Removed redundant css

---
 javascript/black-teal-reimagined.css | 36 ++++------------------------
 1 file changed, 4 insertions(+), 32 deletions(-)

diff --git a/javascript/black-teal-reimagined.css b/javascript/black-teal-reimagined.css
index 94fccdea9..70315782d 100644
--- a/javascript/black-teal-reimagined.css
+++ b/javascript/black-teal-reimagined.css
@@ -655,27 +655,6 @@ svg.feather.feather-image,
   font-weight: normal;
 }
 
-#txt2img_prompt,
-#txt2img_neg_prompt,
-#img2img_prompt,
-#img2img _neg_prompt,
-#control_prompt,
-#control_neg_prompt {
-  background-color: var(--background-color);
-  box-shadow: none !important;
-}
-
-#txt2img_prompt>label>textarea,
-#txt2img_neg_prompt>label>textarea,
-#img2img_prompt>label>textarea,
-#img2img_neg_prompt>label>textarea,
-#control_prompt>label>textarea,
-#control_neg_prompt>label>textarea {
-  font-size: 1.0em;
-  line-height: 1.4em;
-  border-radius: var(--radius-md);
-}
-
 #txt2img_styles,
 #img2img_styles,
 #control_styles {
@@ -746,11 +725,6 @@ svg.feather.feather-image,
   margin-left: 1em;
 }
 
-#settings_search textarea {
-  padding: 0.5em;
-  height: 2.2em !important;
-}
-
 #txt2img_cfg_scale {
   min-width: 200px;
 }
@@ -762,12 +736,6 @@ svg.feather.feather-image,
   margin-bottom: 0.2em;
 }
 
-textarea[rows="1"] {
-  height: 33px !important;
-  width: 99% !important;
-  padding: 8px !important;
-}
-
 #extras_upscale {
   margin-top: 10px;
 }
@@ -1054,6 +1022,10 @@ textarea[rows="1"] {
   padding: 0px;
 }
 
+#text2img_prompt label, #text2img_neg_prompt label {
+  margin: 0px;
+}
+
 /* Based on Gradio Built-in Dark Theme */
 :root,
 .light,

From 73dbdbbddc2e4adffb5d91d9b0805e0cf5046edc Mon Sep 17 00:00:00 2001
From: P-Hellmann <phtijger@gmail.com>
Date: Fri, 29 Nov 2024 10:58:40 +0100
Subject: [PATCH 033/162] Rearrange forms and tab-nav

---
 javascript/black-teal-reimagined.css | 34 ++++++++++++----------------
 1 file changed, 15 insertions(+), 19 deletions(-)

diff --git a/javascript/black-teal-reimagined.css b/javascript/black-teal-reimagined.css
index 70315782d..9e7d357a4 100644
--- a/javascript/black-teal-reimagined.css
+++ b/javascript/black-teal-reimagined.css
@@ -228,16 +228,17 @@ input[type='range']::-moz-range-track {
 .tab-nav {
   display: flex;
   /* Use flexbox for layout */
-  justify-content: space-around;
+  justify-content: space-evenly;
   /* Space out the tabs evenly */
   align-items: center;
   /* Center items vertically */
   background: var(--background-color);
   /* Background color */
-  border-bottom: 1px solid var(--highlight-color) !important;
+  border-bottom: 3px solid var(--highlight-color) !important;
   /* Bottom border for separation */
   box-shadow: var(--shadow-md);
   /* Shadow for depth */
+  margin-bottom: 5px;
 }
 
 /* Individual Tab Styles */
@@ -246,19 +247,24 @@ input[type='range']::-moz-range-track {
   /* No background for default state */
   color: var(--text-color);
   /* Text color */
-  border: none;
+  border: 1px solid var(--highlight-color);
   /* No border */
-  border-radius: var(--radius-xxxl);
+  border-radius: var(--radius-xxl);
   /* Rounded corners */
   cursor: pointer;
   /* Pointer cursor */
   transition: background 0.3s ease, color 0.3s ease;
   /* Smooth transition */
+  padding-top: 5px;
+  padding-bottom: 5px;
+  padding-right: 10px;
+  padding-left: 10px;
+  margin-bottom: 3px;
 }
 
 /* Active Tab Style */
-.tab-nav>button.active {
-  background: var(--highlight-color);
+.tab-nav>button.selected {
+  background: var(--primary-100);
   /* Highlight active tab */
   color: var(--background-color);
   /* Change text color for active tab */
@@ -386,7 +392,8 @@ div.form {
   border-width: 0;
   box-shadow: var(--shadow-md);
   background: var(--background-fill-primary);
-  padding: 16px;
+  border-bottom: 3px solid var(--highlight-color);
+  padding: 3px;
   border-radius: var(--radius-md);
 }
 
@@ -720,11 +727,6 @@ svg.feather.feather-image,
   width: 15em;
 }
 
-#settings_search {
-  margin-top: 1em;
-  margin-left: 1em;
-}
-
 #txt2img_cfg_scale {
   min-width: 200px;
 }
@@ -749,7 +751,6 @@ svg.feather.feather-image,
   min-width: var(--left-column);
   max-width: var(--left-column);
   background-color: var(--neutral-950);
-  padding-top: 16px;
 }
 
 #pnginfo_html2_info {
@@ -837,11 +838,6 @@ svg.feather.feather-image,
   background: var(--highlight-color);
 }
 
-/* Extra Networks Tab */
-.extra-networks-tab {
-  padding: 0 !important;
-}
-
 /* Subdirectories Styles */
 .extra-network-subdirs {
   background: var(--input-background-fill);
@@ -1014,7 +1010,7 @@ svg.feather.feather-image,
   display: flex;
   flex-direction: column;
   justify-content: space-evenly;
-  padding: 10px;
+  padding: 5px;
 }
 
 #txt2img_prompt_container {

From e3704ba057bbeeb58b63b107b0feb4426bf6a5bf Mon Sep 17 00:00:00 2001
From: P-Hellmann <phtijger@gmail.com>
Date: Fri, 29 Nov 2024 11:23:07 +0100
Subject: [PATCH 034/162] small changes

---
 extensions-builtin/sdnext-modernui   |  2 +-
 javascript/black-teal-reimagined.css | 44 ++++------------------------
 2 files changed, 7 insertions(+), 39 deletions(-)

diff --git a/extensions-builtin/sdnext-modernui b/extensions-builtin/sdnext-modernui
index 3008cee4b..f083ce41a 160000
--- a/extensions-builtin/sdnext-modernui
+++ b/extensions-builtin/sdnext-modernui
@@ -1 +1 @@
-Subproject commit 3008cee4b67bb00f8f1a4fe4510ec27ba92aa418
+Subproject commit f083ce41a9f18b500f26745ea9e86855e509d2cb
diff --git a/javascript/black-teal-reimagined.css b/javascript/black-teal-reimagined.css
index 9e7d357a4..eb6942b8c 100644
--- a/javascript/black-teal-reimagined.css
+++ b/javascript/black-teal-reimagined.css
@@ -216,6 +216,7 @@ input[type='range']::-moz-range-track {
 
 ::-webkit-scrollbar-track {
   background: var(--scrollbar-bg);
+  border-radius: var(--radius-lg);
 }
 
 ::-webkit-scrollbar-thumb {
@@ -234,11 +235,14 @@ input[type='range']::-moz-range-track {
   /* Center items vertically */
   background: var(--background-color);
   /* Background color */
-  border-bottom: 3px solid var(--highlight-color) !important;
+  border-bottom: 1px dashed var(--highlight-color) !important;
   /* Bottom border for separation */
   box-shadow: var(--shadow-md);
   /* Shadow for depth */
   margin-bottom: 5px;
+  /* Add some space between the tab nav and the content */
+  padding-bottom: 5px;
+  /* Add space between buttons and border */
 }
 
 /* Individual Tab Styles */
@@ -395,6 +399,7 @@ div.form {
   border-bottom: 3px solid var(--highlight-color);
   padding: 3px;
   border-radius: var(--radius-md);
+  margin: 1px;
 }
 
 /* Gradio Style Classes */
@@ -772,12 +777,6 @@ svg.feather.feather-image,
 
 /* Extra Networks Container */
 #extra_networks_root {
-  width: 300px;
-  /* Set a fixed width for the sidebar */
-  position: absolute;
-  height: auto;
-  right: 0;
-  top: 13em;
   z-index: 100;
   background: var(--background-color);
   box-shadow: var(--shadow-md);
@@ -797,15 +796,6 @@ svg.feather.feather-image,
 /* Extra Networks Styles */
 .extra-networks {
   background: var(--background-color);
-  padding: var(--block-label-padding);
-  border-radius: var(--radius-lg);
-}
-
-/* Extra Networks Div Styles */
-.extra-networks>div {
-  margin: 0;
-  border-bottom: none !important;
-  gap: 0.3em 0;
 }
 
 .extra-networks .tab-nav>button:hover {
@@ -822,32 +812,10 @@ svg.feather.feather-image,
   margin-top: 50px;
 }
 
-/* Individual Buttons */
-.extra-networks .buttons>button {
-  margin-left: -0.2em;
-  height: 1.4em;
-  color: var(--primary-300) !important;
-  font-size: 20px !important;
-  background: var(--button-primary-background-fill);
-  border: none;
-  border-radius: var(--radius-sm);
-  transition: var(--transition);
-}
-
 .extra-networks .buttons>button:hover {
   background: var(--highlight-color);
 }
 
-/* Subdirectories Styles */
-.extra-network-subdirs {
-  background: var(--input-background-fill);
-  overflow-x: hidden;
-  overflow-y: auto;
-  min-width: 120px;
-  padding-top: 0.5em;
-  margin-top: -4px !important;
-}
-
 /* Extra Networks Page */
 .extra-networks-page {
   display: flex;

From fd5df851ad5dff22167f02876dd22fb97e1bbf4c Mon Sep 17 00:00:00 2001
From: P-Hellmann <phtijger@gmail.com>
Date: Fri, 29 Nov 2024 12:14:24 +0100
Subject: [PATCH 035/162] networks page rework

---
 javascript/black-teal-reimagined.css | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/javascript/black-teal-reimagined.css b/javascript/black-teal-reimagined.css
index eb6942b8c..b1e840348 100644
--- a/javascript/black-teal-reimagined.css
+++ b/javascript/black-teal-reimagined.css
@@ -816,11 +816,6 @@ svg.feather.feather-image,
   background: var(--highlight-color);
 }
 
-/* Extra Networks Page */
-.extra-networks-page {
-  display: flex;
-}
-
 /* Network Cards Container */
 .extra-network-cards {
   display: flex;
@@ -828,6 +823,8 @@ svg.feather.feather-image,
   overflow-y: auto;
   overflow-x: hidden;
   align-content: flex-start;
+  padding-top: 20px;
+  justify-content: center;
   width: 100%;
   /* Ensures it takes full width */
 }
@@ -872,16 +869,23 @@ svg.feather.feather-image,
   box-shadow: var(--button-shadow);
   min-height: 30px;
   border-radius: var(--radius-md);
+  z-index: 9999;
 }
 
 /* Hover Effects */
+.extra-network-cards .card:hover {
+  transform: scale(1.3);
+  z-index: 9999; /* Use a high value to ensure it appears on top */
+  transition: transform 0.3s ease, z-index 0s; /* Smooth transition */
+}
+
 .extra-network-cards .card:hover .overlay {
-  background: rgba(0, 0, 0, 0.70);
+  z-index: 10000; /* Ensure overlay is also on top */
 }
 
 .extra-network-cards .card:hover .preview {
   box-shadow: none;
-  filter: grayscale(100%);
+  filter: grayscale(0%);
 }
 
 /* Tags Styles */
@@ -913,6 +917,15 @@ svg.feather.feather-image,
   font-size: 34px !important;
 }
 
+.extra-network-cards .card .actions {
+  background: none;
+}
+
+.extra-network-cards .card .actions .details {
+  bottom: 50px;
+  background-color: var(--neutral-800);
+}
+
 .extra-network-cards .card .actions>span:hover {
   color: var(--highlight-color);
 }

From c76619aa18f7f60d42226aa529b3fde711d4dc94 Mon Sep 17 00:00:00 2001
From: P-Hellmann <phtijger@gmail.com>
Date: Fri, 29 Nov 2024 12:24:19 +0100
Subject: [PATCH 036/162] mini changes

---
 javascript/black-teal-reimagined.css | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/javascript/black-teal-reimagined.css b/javascript/black-teal-reimagined.css
index b1e840348..b7567ce75 100644
--- a/javascript/black-teal-reimagined.css
+++ b/javascript/black-teal-reimagined.css
@@ -631,11 +631,6 @@ svg.feather.feather-image,
   color: #888;
 }
 
-.extra-networks {
-  border-left: 2px solid var(--highlight-color) !important;
-  padding-left: 4px;
-}
-
 .image-buttons {
   justify-content: center;
   gap: 0 !important;
@@ -795,7 +790,8 @@ svg.feather.feather-image,
 
 /* Extra Networks Styles */
 .extra-networks {
-  background: var(--background-color);
+  border-left: 2px solid var(--highlight-color) !important;
+  padding-left: 4px;
 }
 
 .extra-networks .tab-nav>button:hover {

From b74166f9cb878c087a857ba29db13ced3c6333ca Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Fri, 29 Nov 2024 07:18:05 -0500
Subject: [PATCH 037/162] detailer add augment setting

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 modules/devices.py          | 2 +-
 modules/postprocess/yolo.py | 2 +-
 modules/shared.py           | 9 +++++----
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/modules/devices.py b/modules/devices.py
index 56ac50091..9ca1863a5 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -471,7 +471,7 @@ def set_cuda_params():
         device_name = get_raw_openvino_device()
     else:
         device_name = torch.device(get_optimal_device_name())
-    log.info(f'Torch parameters: backend={backend} device={device_name} config={opts.cuda_dtype} dtype={dtype} vae={dtype_vae} unet={dtype_unet} context={inference_context.__name__} nohalf={opts.no_half} nohalfvae={opts.no_half_vae} upscast={opts.upcast_sampling} deterministic={opts.cudnn_deterministic} test-fp16={fp16_ok} test-bf16={bf16_ok} optimization="{opts.cross_attention_optimization}"')
+    log.info(f'Torch parameters: backend={backend} device={device_name} config={opts.cuda_dtype} dtype={dtype} vae={dtype_vae} unet={dtype_unet} context={inference_context.__name__} nohalf={opts.no_half} nohalfvae={opts.no_half_vae} upcast={opts.upcast_sampling} deterministic={opts.cudnn_deterministic} test-fp16={fp16_ok} test-bf16={bf16_ok} optimization="{opts.cross_attention_optimization}"')
 
 
 def cond_cast_unet(tensor):
diff --git a/modules/postprocess/yolo.py b/modules/postprocess/yolo.py
index f42b6bb9f..5deab1282 100644
--- a/modules/postprocess/yolo.py
+++ b/modules/postprocess/yolo.py
@@ -72,7 +72,7 @@ def predict(
             imgsz: int = 640,
             half: bool = True,
             device = devices.device,
-            augment: bool = True,
+            augment: bool = shared.opts.detailer_augment,
             agnostic: bool = False,
             retina: bool = False,
             mask: bool = True,
diff --git a/modules/shared.py b/modules/shared.py
index 5b54a0de2..720819135 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -824,7 +824,7 @@ def get_default_modes():
     'postprocessing_enable_in_main_ui': OptionInfo([], "Additional postprocessing operations", gr.Dropdown, lambda: {"multiselect":True, "choices": [x.name for x in shared_items.postprocessing_scripts()]}),
     'postprocessing_operation_order': OptionInfo([], "Postprocessing operation order", gr.Dropdown, lambda: {"multiselect":True, "choices": [x.name for x in shared_items.postprocessing_scripts()]}),
 
-    "postprocessing_sep_img2img": OptionInfo("<h2>Img2Img & Inpainting</h2>", "", gr.HTML),
+    "postprocessing_sep_img2img": OptionInfo("<h2>Inpaint</h2>", "", gr.HTML),
     "img2img_color_correction": OptionInfo(False, "Apply color correction"),
     "mask_apply_overlay": OptionInfo(True, "Apply mask as overlay"),
     "img2img_background_color": OptionInfo("#ffffff", "Image transparent color fill", gr.ColorPicker, {}),
@@ -832,7 +832,7 @@ def get_default_modes():
     "initial_noise_multiplier": OptionInfo(1.0, "Noise multiplier for image processing", gr.Slider, {"minimum": 0.1, "maximum": 1.5, "step": 0.01, "visible": not native}),
     "img2img_extra_noise": OptionInfo(0.0, "Extra noise multiplier for img2img", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01, "visible": not native}),
 
-    # "postprocessing_sep_detailer": OptionInfo("<h2>Detailer</h2>", "", gr.HTML),
+    "postprocessing_sep_detailer": OptionInfo("<h2>Detailer</h2>", "", gr.HTML),
     "detailer_model": OptionInfo("Detailer", "Detailer model", gr.Radio, lambda: {"choices": [x.name() for x in detailers], "visible": False}),
     "detailer_classes": OptionInfo("", "Detailer classes", gr.Textbox, { "visible": False}),
     "detailer_conf": OptionInfo(0.6, "Min confidence", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.05, "visible": False}),
@@ -844,11 +844,12 @@ def get_default_modes():
     "detailer_blur": OptionInfo(10, "Item edge blur", gr.Slider, {"minimum": 0, "maximum": 100, "step": 1, "visible": False}),
     "detailer_strength": OptionInfo(0.5, "Detailer strength", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01, "visible": False}),
     "detailer_models": OptionInfo(['face-yolo8n'], "Detailer models", gr.Dropdown, lambda: {"multiselect":True, "choices": list(yolo.list), "visible": False}),
-    "code_former_weight": OptionInfo(0.2, "CodeFormer weight parameter", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01, "visible": False}),
     "detailer_unload": OptionInfo(False, "Move detailer model to CPU when complete"),
+    "detailer_augment": OptionInfo(True, "Detailer use model augment"),
 
     "postprocessing_sep_face_restore": OptionInfo("<h2>Face restore</h2>", "", gr.HTML),
-    "face_restoration_model": OptionInfo("Face restorer", "Face restoration", gr.Radio, lambda: {"choices": ['None'] + [x.name() for x in face_restorers]}),
+    "face_restoration_model": OptionInfo("None", "Face restoration", gr.Radio, lambda: {"choices": ['None'] + [x.name() for x in face_restorers]}),
+    "code_former_weight": OptionInfo(0.2, "CodeFormer weight parameter", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01}),
 
     "postprocessing_sep_upscalers": OptionInfo("<h2>Upscaling</h2>", "", gr.HTML),
     "upscaler_unload": OptionInfo(False, "Unload upscaler after processing"),

From f2d5307c54cae2157273c47adfff302c9ed06a4d Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Fri, 29 Nov 2024 07:49:25 -0500
Subject: [PATCH 038/162] update modernui reference

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 extensions-builtin/sdnext-modernui | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/extensions-builtin/sdnext-modernui b/extensions-builtin/sdnext-modernui
index f083ce41a..3008cee4b 160000
--- a/extensions-builtin/sdnext-modernui
+++ b/extensions-builtin/sdnext-modernui
@@ -1 +1 @@
-Subproject commit f083ce41a9f18b500f26745ea9e86855e509d2cb
+Subproject commit 3008cee4b67bb00f8f1a4fe4510ec27ba92aa418

From a635421231743e0f07f4005dd83ef357f4ee0b42 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Fri, 29 Nov 2024 08:01:26 -0500
Subject: [PATCH 039/162] lint fixes

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 extensions-builtin/sdnext-modernui |  2 +-
 modules/lora/lora.py               |  8 --------
 modules/lora/lora_convert.py       | 17 +++++++++--------
 modules/lora/network.py            |  6 ++++--
 modules/lora/network_norm.py       |  1 +
 modules/lora/network_oft.py        |  3 ++-
 modules/lora/networks.py           | 14 +++++++++-----
 7 files changed, 26 insertions(+), 25 deletions(-)
 delete mode 100644 modules/lora/lora.py

diff --git a/extensions-builtin/sdnext-modernui b/extensions-builtin/sdnext-modernui
index 3008cee4b..f083ce41a 160000
--- a/extensions-builtin/sdnext-modernui
+++ b/extensions-builtin/sdnext-modernui
@@ -1 +1 @@
-Subproject commit 3008cee4b67bb00f8f1a4fe4510ec27ba92aa418
+Subproject commit f083ce41a9f18b500f26745ea9e86855e509d2cb
diff --git a/modules/lora/lora.py b/modules/lora/lora.py
deleted file mode 100644
index 33adfe05c..000000000
--- a/modules/lora/lora.py
+++ /dev/null
@@ -1,8 +0,0 @@
-# import networks
-#
-# list_available_loras = networks.list_available_networks
-# available_loras = networks.available_networks
-# available_lora_aliases = networks.available_network_aliases
-# available_lora_hash_lookup = networks.available_network_hash_lookup
-# forbidden_lora_aliases = networks.forbidden_network_aliases
-# loaded_loras = networks.loaded_networks
diff --git a/modules/lora/lora_convert.py b/modules/lora/lora_convert.py
index 6bf563125..dc86a24cf 100644
--- a/modules/lora/lora_convert.py
+++ b/modules/lora/lora_convert.py
@@ -107,14 +107,14 @@ def make_unet_conversion_map() -> Dict[str, str]:
 
 class KeyConvert:
     def __init__(self):
-            self.is_sdxl = True if shared.sd_model_type == "sdxl" else False
-            self.UNET_CONVERSION_MAP = make_unet_conversion_map() if self.is_sdxl else None
-            self.LORA_PREFIX_UNET = "lora_unet_"
-            self.LORA_PREFIX_TEXT_ENCODER = "lora_te_"
-            self.OFT_PREFIX_UNET = "oft_unet_"
-            # SDXL: must starts with LORA_PREFIX_TEXT_ENCODER
-            self.LORA_PREFIX_TEXT_ENCODER1 = "lora_te1_"
-            self.LORA_PREFIX_TEXT_ENCODER2 = "lora_te2_"
+        self.is_sdxl = True if shared.sd_model_type == "sdxl" else False
+        self.UNET_CONVERSION_MAP = make_unet_conversion_map() if self.is_sdxl else None
+        self.LORA_PREFIX_UNET = "lora_unet_"
+        self.LORA_PREFIX_TEXT_ENCODER = "lora_te_"
+        self.OFT_PREFIX_UNET = "oft_unet_"
+        # SDXL: must starts with LORA_PREFIX_TEXT_ENCODER
+        self.LORA_PREFIX_TEXT_ENCODER1 = "lora_te1_"
+        self.LORA_PREFIX_TEXT_ENCODER2 = "lora_te2_"
 
     def __call__(self, key):
         if self.is_sdxl:
@@ -446,6 +446,7 @@ def _convert_sd_scripts_to_ai_toolkit(sds_sd):
                 lora_name_alpha = f"{lora_name}.alpha"
                 diffusers_name = _convert_text_encoder_lora_key(key, lora_name)
 
+                sd_lora_rank = 1
                 if lora_name.startswith(("lora_te_", "lora_te1_")):
                     down_weight = sds_sd.pop(key)
                     sd_lora_rank = down_weight.shape[0]
diff --git a/modules/lora/network.py b/modules/lora/network.py
index 0785ef9f4..8e6f87368 100644
--- a/modules/lora/network.py
+++ b/modules/lora/network.py
@@ -1,9 +1,11 @@
 import os
-from collections import namedtuple
 import enum
+from typing import Union
+from collections import namedtuple
 
 from modules import sd_models, hashes, shared
 
+
 NetworkWeights = namedtuple('NetworkWeights', ['network_key', 'sd_key', 'w', 'sd_module'])
 metadata_tags_order = {"ss_sd_model_name": 1, "ss_resolution": 2, "ss_clip_skip": 3, "ss_num_train_images": 10, "ss_tag_frequency": 20}
 
@@ -105,7 +107,7 @@ def __init__(self, name, network_on_disk: NetworkOnDisk):
 
 
 class ModuleType:
-    def create_module(self, net: Network, weights: NetworkWeights) -> Network | None: # pylint: disable=W0613
+    def create_module(self, net: Network, weights: NetworkWeights) -> Union[Network, None]: # pylint: disable=W0613
         return None
 
 
diff --git a/modules/lora/network_norm.py b/modules/lora/network_norm.py
index e8f1740e3..5d059e92e 100644
--- a/modules/lora/network_norm.py
+++ b/modules/lora/network_norm.py
@@ -1,5 +1,6 @@
 import modules.lora.network as network
 
+
 class ModuleTypeNorm(network.ModuleType):
     def create_module(self, net: network.Network, weights: network.NetworkWeights):
         if all(x in weights.w for x in ["w_norm", "b_norm"]):
diff --git a/modules/lora/network_oft.py b/modules/lora/network_oft.py
index 808286066..e2e61ad45 100644
--- a/modules/lora/network_oft.py
+++ b/modules/lora/network_oft.py
@@ -1,7 +1,7 @@
 import torch
+from einops import rearrange
 import modules.lora.network as network
 from modules.lora.lyco_helpers import factorization
-from einops import rearrange
 
 
 class ModuleTypeOFT(network.ModuleType):
@@ -10,6 +10,7 @@ def create_module(self, net: network.Network, weights: network.NetworkWeights):
             return NetworkModuleOFT(net, weights)
         return None
 
+
 # Supports both kohya-ss' implementation of COFT  https://github.com/kohya-ss/sd-scripts/blob/main/networks/oft.py
 # and KohakuBlueleaf's implementation of OFT/COFT https://github.com/KohakuBlueleaf/LyCORIS/blob/dev/lycoris/modules/diag_oft.py
 class NetworkModuleOFT(network.NetworkModule): # pylint: disable=abstract-method
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index c6fde3e04..737623b1e 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -3,6 +3,9 @@
 import re
 import time
 import concurrent
+import torch
+import diffusers.models.lora
+
 import modules.lora.network as network
 import modules.lora.network_lora as network_lora
 import modules.lora.network_hada as network_hada
@@ -14,8 +17,6 @@
 import modules.lora.network_glora as network_glora
 import modules.lora.network_overrides as network_overrides
 import modules.lora.lora_convert as lora_convert
-import torch
-import diffusers.models.lora
 from modules import shared, devices, sd_models, sd_models_compile, errors, scripts, files_cache, model_quant
 
 
@@ -74,7 +75,7 @@ def assign_network_names_to_compvis_modules(sd_model):
     shared.sd_model.network_layer_mapping = network_layer_mapping
 
 
-def load_diffusers(name, network_on_disk, lora_scale=shared.opts.extra_networks_default_multiplier) -> network.Network | None:
+def load_diffusers(name, network_on_disk, lora_scale=shared.opts.extra_networks_default_multiplier) -> Union[network.Network, None]:
     name = name.replace(".", "_")
     shared.log.debug(f'Load network: type=LoRA name="{name}" file="{network_on_disk.filename}" detected={network_on_disk.sd_version} method=diffusers scale={lora_scale} fuse={shared.opts.lora_fuse_diffusers}')
     if not shared.native:
@@ -103,7 +104,7 @@ def load_diffusers(name, network_on_disk, lora_scale=shared.opts.extra_networks_
     return net
 
 
-def load_network(name, network_on_disk) -> network.Network | None:
+def load_network(name, network_on_disk) -> Union[network.Network, None]:
     if not shared.sd_loaded:
         return None
 
@@ -173,6 +174,7 @@ def load_network(name, network_on_disk) -> network.Network | None:
     net.bundle_embeddings = bundle_embeddings
     return net
 
+
 def maybe_recompile_model(names, te_multipliers):
     recompile_model = False
     if shared.compiled_model_state is not None and shared.compiled_model_state.is_compiled:
@@ -186,7 +188,7 @@ def maybe_recompile_model(names, te_multipliers):
             if not recompile_model:
                 if len(loaded_networks) > 0 and debug:
                     shared.log.debug('Model Compile: Skipping LoRa loading')
-                return
+                return recompile_model
         else:
             recompile_model = True
             shared.compiled_model_state.lora_model = []
@@ -277,6 +279,7 @@ def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=No
     t1 = time.time()
     timer['load'] += t1 - t0
 
+
 def set_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], updown, ex_bias):
     weights_backup = getattr(self, "network_weights_backup", None)
     bias_backup = getattr(self, "network_bias_backup", None)
@@ -389,6 +392,7 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
     t1 = time.time()
     timer['apply'] += t1 - t0
 
+
 def network_load():
     sd_model = getattr(shared.sd_model, "pipe", shared.sd_model)  # wrapped model compatiblility
     for component_name in ['text_encoder','text_encoder_2', 'unet', 'transformer']:

From 6aa7a4707ef93a993a043d134811ccb3321a6f7f Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Fri, 29 Nov 2024 09:39:38 -0500
Subject: [PATCH 040/162] modules.lora full integration

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 .../Lora/scripts/lora_script.py               |  1 +
 modules/api/api.py                            |  2 +
 modules/api/endpoints.py                      | 10 +++
 modules/extensions.py                         |  2 +-
 modules/extra_networks.py                     |  7 ++-
 modules/hashes.py                             |  7 +++
 modules/infotext.py                           | 35 +++++++++++
 modules/loader.py                             |  2 +-
 modules/lora/networks.py                      | 25 --------
 modules/sd_checkpoint.py                      |  6 ++
 modules/shared.py                             |  5 +-
 modules/ui_extra_networks.py                  | 10 +--
 modules/{lora => }/ui_extra_networks_lora.py  |  0
 modules/ui_models.py                          |  6 +-
 scripts/lora_script.py                        | 62 -------------------
 webui.py                                      | 19 +++++-
 16 files changed, 98 insertions(+), 101 deletions(-)
 rename modules/{lora => }/ui_extra_networks_lora.py (100%)
 delete mode 100644 scripts/lora_script.py

diff --git a/extensions-builtin/Lora/scripts/lora_script.py b/extensions-builtin/Lora/scripts/lora_script.py
index dea2985b3..24723dd7f 100644
--- a/extensions-builtin/Lora/scripts/lora_script.py
+++ b/extensions-builtin/Lora/scripts/lora_script.py
@@ -56,6 +56,7 @@ def network_replacement(m):
     hashes = {x[0].strip().replace(",", ""): x[1].strip() for x in hashes}
     d["Prompt"] = re.sub(re_lora, network_replacement, d["Prompt"])
 
+
 if not shared.native:
     script_callbacks.on_app_started(api_networks)
     script_callbacks.on_before_ui(before_ui)
diff --git a/modules/api/api.py b/modules/api/api.py
index d48cbf521..7d2c2f279 100644
--- a/modules/api/api.py
+++ b/modules/api/api.py
@@ -79,6 +79,7 @@ def __init__(self, app: FastAPI, queue_lock: Lock):
         self.add_api_route("/sdapi/v1/sd-vae", endpoints.get_sd_vaes, methods=["GET"], response_model=List[models.ItemVae])
         self.add_api_route("/sdapi/v1/extensions", endpoints.get_extensions_list, methods=["GET"], response_model=List[models.ItemExtension])
         self.add_api_route("/sdapi/v1/extra-networks", endpoints.get_extra_networks, methods=["GET"], response_model=List[models.ItemExtraNetwork])
+        self.add_api_route("/sdapi/v1/loras", endpoints.get_loras, methods=["GET"], response_model=List[dict])
 
         # functional api
         self.add_api_route("/sdapi/v1/png-info", endpoints.post_pnginfo, methods=["POST"], response_model=models.ResImageInfo)
@@ -88,6 +89,7 @@ def __init__(self, app: FastAPI, queue_lock: Lock):
         self.add_api_route("/sdapi/v1/unload-checkpoint", endpoints.post_unload_checkpoint, methods=["POST"])
         self.add_api_route("/sdapi/v1/reload-checkpoint", endpoints.post_reload_checkpoint, methods=["POST"])
         self.add_api_route("/sdapi/v1/refresh-vae", endpoints.post_refresh_vae, methods=["POST"])
+        self.add_api_route("/sdapi/v1/refresh-loras", endpoints.post_refresh_loras, methods=["POST"])
         self.add_api_route("/sdapi/v1/history", endpoints.get_history, methods=["GET"], response_model=List[str])
         self.add_api_route("/sdapi/v1/history", endpoints.post_history, methods=["POST"], response_model=int)
 
diff --git a/modules/api/endpoints.py b/modules/api/endpoints.py
index 61993db84..1c56b7171 100644
--- a/modules/api/endpoints.py
+++ b/modules/api/endpoints.py
@@ -40,6 +40,12 @@ def convert_embeddings(embeddings):
 
     return {"loaded": convert_embeddings(db.word_embeddings), "skipped": convert_embeddings(db.skipped_embeddings)}
 
+def get_loras():
+    from modules.lora import network, networks
+    def create_lora_json(obj: network.NetworkOnDisk):
+        return { "name": obj.name, "alias": obj.alias, "path": obj.filename, "metadata": obj.metadata }
+    return [create_lora_json(obj) for obj in networks.available_networks.values()]
+
 def get_extra_networks(page: Optional[str] = None, name: Optional[str] = None, filename: Optional[str] = None, title: Optional[str] = None, fullname: Optional[str] = None, hash: Optional[str] = None): # pylint: disable=redefined-builtin
     res = []
     for pg in shared.extra_networks:
@@ -126,6 +132,10 @@ def post_refresh_checkpoints():
 def post_refresh_vae():
     return shared.refresh_vaes()
 
+def post_refresh_loras():
+    from modules.lora import networks
+    return networks.list_available_networks()
+
 def get_extensions_list():
     from modules import extensions
     extensions.list_extensions()
diff --git a/modules/extensions.py b/modules/extensions.py
index 5a8a53d29..ccd92dbf0 100644
--- a/modules/extensions.py
+++ b/modules/extensions.py
@@ -154,4 +154,4 @@ def list_extensions():
     for dirname, path, is_builtin in extension_paths:
         extension = Extension(name=dirname, path=path, enabled=dirname not in disabled_extensions, is_builtin=is_builtin)
         extensions.append(extension)
-    shared.log.info(f'Disabled extensions: {[e.name for e in extensions if not e.enabled]}')
+    shared.log.debug(f'Disabled extensions: {[e.name for e in extensions if not e.enabled]}')
diff --git a/modules/extra_networks.py b/modules/extra_networks.py
index b464bd349..010157af9 100644
--- a/modules/extra_networks.py
+++ b/modules/extra_networks.py
@@ -15,10 +15,13 @@ def register_extra_network(extra_network):
 
 
 def register_default_extra_networks():
-    from modules.ui_extra_networks_hypernet import ExtraNetworkHypernet
-    register_extra_network(ExtraNetworkHypernet())
     from modules.ui_extra_networks_styles import ExtraNetworkStyles
     register_extra_network(ExtraNetworkStyles())
+    from modules.lora.extra_networks_lora import ExtraNetworkLora
+    register_extra_network(ExtraNetworkLora())
+    if shared.opts.hypernetwork_enabled:
+        from modules.ui_extra_networks_hypernet import ExtraNetworkHypernet
+        register_extra_network(ExtraNetworkHypernet())
 
 
 class ExtraNetworkParams:
diff --git a/modules/hashes.py b/modules/hashes.py
index cf83794b0..a003f4840 100644
--- a/modules/hashes.py
+++ b/modules/hashes.py
@@ -9,6 +9,13 @@
 cache_data = None
 progress_ok = True
 
+
+def init_cache():
+    global cache_data # pylint: disable=global-statement
+    if cache_data is None:
+        cache_data = {} if not os.path.isfile(cache_filename) else shared.readfile(cache_filename, lock=True)
+
+
 def dump_cache():
     shared.writefile(cache_data, cache_filename)
 
diff --git a/modules/infotext.py b/modules/infotext.py
index 05e06e600..4b9dd15ff 100644
--- a/modules/infotext.py
+++ b/modules/infotext.py
@@ -10,6 +10,7 @@
     debug = lambda *args, **kwargs: None # pylint: disable=unnecessary-lambda-assignment
 re_size = re.compile(r"^(\d+)x(\d+)$") # int x int
 re_param = re.compile(r'\s*([\w ]+):\s*("(?:\\"[^,]|\\"|\\|[^\"])+"|[^,]*)(?:,|$)') # multi-word: value
+re_lora = re.compile("<lora:([^:]+):")
 
 
 def quote(text):
@@ -27,6 +28,39 @@ def unquote(text):
         return text
 
 
+def check_lora(params):
+    try:
+        import modules.lora.networks as networks
+        from modules.errors import log # pylint: disable=redefined-outer-name
+    except Exception:
+        return
+    loras = [s.strip() for s in params.get('LoRA hashes', '').split(',')]
+    found = []
+    missing = []
+    for l in loras:
+        lora = networks.available_network_hash_lookup.get(l, None)
+        if lora is not None:
+            found.append(lora.name)
+        else:
+            missing.append(l)
+    loras = [s.strip() for s in params.get('LoRA networks', '').split(',')]
+    for l in loras:
+        lora = networks.available_network_aliases.get(l, None)
+        if lora is not None:
+            found.append(lora.name)
+        else:
+            missing.append(l)
+    # networks.available_network_aliases.get(name, None)
+    loras = re_lora.findall(params.get('Prompt', ''))
+    for l in loras:
+        lora = networks.available_network_aliases.get(l, None)
+        if lora is not None:
+            found.append(lora.name)
+        else:
+            missing.append(l)
+    log.debug(f'LoRA: found={list(set(found))} missing={list(set(missing))}')
+
+
 def parse(infotext):
     if not isinstance(infotext, str):
         return {}
@@ -75,6 +109,7 @@ def parse(infotext):
             params[key] = val
         debug(f'Param parsed: type={type(params[key])} {key}={params[key]} raw="{val}"')
 
+    # check_lora(params)
     return params
 
 
diff --git a/modules/loader.py b/modules/loader.py
index cd51cc8eb..38d942fd4 100644
--- a/modules/loader.py
+++ b/modules/loader.py
@@ -14,7 +14,7 @@
 logging.getLogger("DeepSpeed").disabled = True
 
 
-os.environ.setdefault('TORCH_LOGS', '-all')
+# os.environ.setdefault('TORCH_LOGS', '-all')
 import torch # pylint: disable=C0411
 if torch.__version__.startswith('2.5.0'):
     errors.log.warning(f'Disabling cuDNN for SDP on torch={torch.__version__}')
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index 737623b1e..dc6d86b2f 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -437,28 +437,3 @@ def add_network(filename):
             executor.submit(add_network, fn)
     t1 = time.time()
     shared.log.info(f'Available LoRAs: path="{shared.cmd_opts.lora_dir}" items={len(available_networks)} folders={len(forbidden_network_aliases)} time={t1 - t0:.2f}')
-
-
-def infotext_pasted(infotext, params): # pylint: disable=W0613
-    if "AddNet Module 1" in [x[1] for x in scripts.scripts_txt2img.infotext_fields]:
-        return  # if the other extension is active, it will handle those fields, no need to do anything
-    added = []
-    for k in params:
-        if not k.startswith("AddNet Model "):
-            continue
-        num = k[13:]
-        if params.get("AddNet Module " + num) != "LoRA":
-            continue
-        name = params.get("AddNet Model " + num)
-        if name is None:
-            continue
-        m = re_network_name.match(name)
-        if m:
-            name = m.group(1)
-        multiplier = params.get("AddNet Weight A " + num, "1.0")
-        added.append(f"<lora:{name}:{multiplier}>")
-    if added:
-        params["Prompt"] += "\n" + "".join(added)
-
-
-list_available_networks()
diff --git a/modules/sd_checkpoint.py b/modules/sd_checkpoint.py
index e035fc3db..a95ade0b1 100644
--- a/modules/sd_checkpoint.py
+++ b/modules/sd_checkpoint.py
@@ -275,6 +275,12 @@ def select_checkpoint(op='model'):
     return checkpoint_info
 
 
+def init_metadata():
+    global sd_metadata # pylint: disable=global-statement
+    if sd_metadata is None:
+        sd_metadata = shared.readfile(sd_metadata_file, lock=True) if os.path.isfile(sd_metadata_file) else {}
+
+
 def read_metadata_from_safetensors(filename):
     global sd_metadata # pylint: disable=global-statement
     if sd_metadata is None:
diff --git a/modules/shared.py b/modules/shared.py
index 72af37500..e213997c7 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -132,7 +132,8 @@ def readfile(filename, silent=False, lock=False):
         #    data = json.loads(data)
         t1 = time.time()
         if not silent:
-            log.debug(f'Read: file="{filename}" json={len(data)} bytes={os.path.getsize(filename)} time={t1-t0:.3f}')
+            fn = f'{sys._getframe(2).f_code.co_name}:{sys._getframe(1).f_code.co_name}' # pylint: disable=protected-access
+            log.debug(f'Read: file="{filename}" json={len(data)} bytes={os.path.getsize(filename)} time={t1-t0:.3f} fn={fn}')
     except FileNotFoundError as err:
         log.debug(f'Reading failed: {filename} {err}')
     except Exception as err:
@@ -363,7 +364,7 @@ def list_samplers():
 
 def temp_disable_extensions():
     disable_safe = ['sd-webui-controlnet', 'multidiffusion-upscaler-for-automatic1111', 'a1111-sd-webui-lycoris', 'sd-webui-agent-scheduler', 'clip-interrogator-ext', 'stable-diffusion-webui-rembg', 'sd-extension-chainner', 'stable-diffusion-webui-images-browser']
-    disable_diffusers = ['sd-webui-controlnet', 'multidiffusion-upscaler-for-automatic1111', 'a1111-sd-webui-lycoris', 'sd-webui-animatediff']
+    disable_diffusers = ['sd-webui-controlnet', 'multidiffusion-upscaler-for-automatic1111', 'a1111-sd-webui-lycoris', 'sd-webui-animatediff', 'Lora']
     disable_themes = ['sd-webui-lobe-theme', 'cozy-nest', 'sdnext-modernui']
     disable_original = []
     disabled = []
diff --git a/modules/ui_extra_networks.py b/modules/ui_extra_networks.py
index c326219df..898522366 100644
--- a/modules/ui_extra_networks.py
+++ b/modules/ui_extra_networks.py
@@ -460,17 +460,19 @@ def register_page(page: ExtraNetworksPage):
 
 
 def register_pages():
-    from modules.ui_extra_networks_textual_inversion import ExtraNetworksPageTextualInversion
     from modules.ui_extra_networks_checkpoints import ExtraNetworksPageCheckpoints
-    from modules.ui_extra_networks_styles import ExtraNetworksPageStyles
+    from modules.ui_extra_networks_lora import ExtraNetworksPageLora
     from modules.ui_extra_networks_vae import ExtraNetworksPageVAEs
+    from modules.ui_extra_networks_styles import ExtraNetworksPageStyles
     from modules.ui_extra_networks_history import ExtraNetworksPageHistory
+    from modules.ui_extra_networks_textual_inversion import ExtraNetworksPageTextualInversion
     debug('EN register-pages')
     register_page(ExtraNetworksPageCheckpoints())
-    register_page(ExtraNetworksPageStyles())
-    register_page(ExtraNetworksPageTextualInversion())
+    register_page(ExtraNetworksPageLora())
     register_page(ExtraNetworksPageVAEs())
+    register_page(ExtraNetworksPageStyles())
     register_page(ExtraNetworksPageHistory())
+    register_page(ExtraNetworksPageTextualInversion())
     if shared.opts.hypernetwork_enabled:
         from modules.ui_extra_networks_hypernets import ExtraNetworksPageHypernetworks
         register_page(ExtraNetworksPageHypernetworks())
diff --git a/modules/lora/ui_extra_networks_lora.py b/modules/ui_extra_networks_lora.py
similarity index 100%
rename from modules/lora/ui_extra_networks_lora.py
rename to modules/ui_extra_networks_lora.py
diff --git a/modules/ui_models.py b/modules/ui_models.py
index 624c3849d..7ab8b0d07 100644
--- a/modules/ui_models.py
+++ b/modules/ui_models.py
@@ -8,7 +8,7 @@
 from modules.ui_components import ToolButton
 from modules.ui_common import create_refresh_button
 from modules.call_queue import wrap_gradio_gpu_call
-from modules.shared import opts, log, req, readfile, max_workers
+from modules.shared import opts, log, req, readfile, max_workers, native
 import modules.ui_symbols
 import modules.errors
 import modules.hashes
@@ -794,6 +794,10 @@ def civit_update_download():
                 civit_results4.select(fn=civit_update_select, inputs=[civit_results4], outputs=[models_outcome, civit_update_download_btn])
                 civit_update_download_btn.click(fn=civit_update_download, inputs=[], outputs=[models_outcome])
 
+            if native:
+                from modules.lora.lora_extract import create_ui as lora_extract_ui
+                lora_extract_ui()
+
             for ui in extra_ui:
                 if callable(ui):
                     ui()
diff --git a/scripts/lora_script.py b/scripts/lora_script.py
deleted file mode 100644
index a153a2caa..000000000
--- a/scripts/lora_script.py
+++ /dev/null
@@ -1,62 +0,0 @@
-import re
-import modules.lora.networks as networks
-from modules.lora.lora_extract import create_ui
-from modules.lora.network import NetworkOnDisk
-from modules.lora.ui_extra_networks_lora import ExtraNetworksPageLora
-from modules.lora.extra_networks_lora import ExtraNetworkLora
-from modules import script_callbacks, extra_networks, ui_extra_networks, ui_models, shared # pylint: disable=unused-import
-
-
-re_lora = re.compile("<lora:([^:]+):")
-
-
-def before_ui():
-    ui_extra_networks.register_page(ExtraNetworksPageLora())
-    networks.extra_network_lora = ExtraNetworkLora()
-    extra_networks.register_extra_network(networks.extra_network_lora)
-    ui_models.extra_ui.append(create_ui)
-
-
-def create_lora_json(obj: NetworkOnDisk):
-    return {
-        "name": obj.name,
-        "alias": obj.alias,
-        "path": obj.filename,
-        "metadata": obj.metadata,
-    }
-
-
-def api_networks(_, app):
-    @app.get("/sdapi/v1/loras")
-    async def get_loras():
-        return [create_lora_json(obj) for obj in networks.available_networks.values()]
-
-    @app.post("/sdapi/v1/refresh-loras")
-    async def refresh_loras():
-        return networks.list_available_networks()
-
-
-def infotext_pasted(infotext, d): # pylint: disable=unused-argument
-    hashes = d.get("Lora hashes", None)
-    if hashes is None:
-        return
-
-    def network_replacement(m):
-        alias = m.group(1)
-        shorthash = hashes.get(alias)
-        if shorthash is None:
-            return m.group(0)
-        network_on_disk = networks.available_network_hash_lookup.get(shorthash)
-        if network_on_disk is None:
-            return m.group(0)
-        return f'<lora:{network_on_disk.get_alias()}:'
-
-    hashes = [x.strip().split(':', 1) for x in hashes.split(",")]
-    hashes = {x[0].strip().replace(",", ""): x[1].strip() for x in hashes}
-    d["Prompt"] = re.sub(re_lora, network_replacement, d["Prompt"])
-
-if shared.native:
-    script_callbacks.on_app_started(api_networks)
-    script_callbacks.on_before_ui(before_ui)
-    script_callbacks.on_infotext_pasted(networks.infotext_pasted)
-    script_callbacks.on_infotext_pasted(infotext_pasted)
diff --git a/webui.py b/webui.py
index 9684ef7c8..3aae34447 100644
--- a/webui.py
+++ b/webui.py
@@ -8,6 +8,7 @@
 import importlib
 import contextlib
 from threading import Thread
+import modules.hashes
 import modules.loader
 import torch # pylint: disable=wrong-import-order
 from modules import timer, errors, paths # pylint: disable=unused-import
@@ -18,6 +19,7 @@
 from modules.paths import create_paths
 from modules.call_queue import queue_lock, wrap_queued_call, wrap_gradio_gpu_call # pylint: disable=unused-import
 import modules.devices
+import modules.sd_checkpoint
 import modules.sd_samplers
 import modules.lowvram
 import modules.scripts
@@ -32,6 +34,7 @@
 import modules.upscaler
 import modules.textual_inversion.textual_inversion
 import modules.hypernetworks.hypernetwork
+import modules.lora.networks
 import modules.script_callbacks
 from modules.api.middleware import setup_middleware
 from modules.shared import cmd_opts, opts # pylint: disable=unused-import
@@ -77,6 +80,9 @@ def check_rollback_vae():
 
 def initialize():
     log.debug('Initializing')
+
+    modules.sd_checkpoint.init_metadata()
+    modules.hashes.init_cache()
     check_rollback_vae()
 
     modules.sd_samplers.list_samplers()
@@ -89,7 +95,7 @@ def initialize():
     timer.startup.record("unet")
 
     modules.model_te.refresh_te_list()
-    timer.startup.record("unet")
+    timer.startup.record("te")
 
     extensions.list_extensions()
     timer.startup.record("extensions")
@@ -98,6 +104,12 @@ def initialize():
     modules.sd_models.setup_model()
     timer.startup.record("models")
 
+    modules.lora.networks.list_available_networks()
+    timer.startup.record("lora")
+
+    shared.prompt_styles.reload()
+    timer.startup.record("styles")
+
     import modules.postprocess.codeformer_model as codeformer
     codeformer.setup_model(shared.opts.codeformer_models_path)
     sys.modules["modules.codeformer_model"] = codeformer
@@ -116,8 +128,9 @@ def initialize():
     modelloader.load_upscalers()
     timer.startup.record("upscalers")
 
-    shared.reload_hypernetworks()
-    shared.prompt_styles.reload()
+    if shared.opts.hypernetwork_enabled:
+        shared.reload_hypernetworks()
+        timer.startup.record("hypernetworks")
 
     ui_extra_networks.initialize()
     ui_extra_networks.register_pages()

From b6963470a98785aae3e2cfe2b6ee4c2292aaa8da Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Fri, 29 Nov 2024 10:05:06 -0500
Subject: [PATCH 041/162] conditional imports and summary timer

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 modules/api/api.py              |  7 +++++--
 modules/infotext.py             |  1 +
 modules/lora/networks.py        |  8 +++++++-
 modules/processing_callbacks.py |  5 ++++-
 modules/processing_diffusers.py | 11 ++++++-----
 modules/ui_extra_networks.py    |  7 ++++---
 webui.py                        |  7 ++++---
 7 files changed, 31 insertions(+), 15 deletions(-)

diff --git a/modules/api/api.py b/modules/api/api.py
index 7d2c2f279..b958085ea 100644
--- a/modules/api/api.py
+++ b/modules/api/api.py
@@ -79,7 +79,6 @@ def __init__(self, app: FastAPI, queue_lock: Lock):
         self.add_api_route("/sdapi/v1/sd-vae", endpoints.get_sd_vaes, methods=["GET"], response_model=List[models.ItemVae])
         self.add_api_route("/sdapi/v1/extensions", endpoints.get_extensions_list, methods=["GET"], response_model=List[models.ItemExtension])
         self.add_api_route("/sdapi/v1/extra-networks", endpoints.get_extra_networks, methods=["GET"], response_model=List[models.ItemExtraNetwork])
-        self.add_api_route("/sdapi/v1/loras", endpoints.get_loras, methods=["GET"], response_model=List[dict])
 
         # functional api
         self.add_api_route("/sdapi/v1/png-info", endpoints.post_pnginfo, methods=["POST"], response_model=models.ResImageInfo)
@@ -89,10 +88,14 @@ def __init__(self, app: FastAPI, queue_lock: Lock):
         self.add_api_route("/sdapi/v1/unload-checkpoint", endpoints.post_unload_checkpoint, methods=["POST"])
         self.add_api_route("/sdapi/v1/reload-checkpoint", endpoints.post_reload_checkpoint, methods=["POST"])
         self.add_api_route("/sdapi/v1/refresh-vae", endpoints.post_refresh_vae, methods=["POST"])
-        self.add_api_route("/sdapi/v1/refresh-loras", endpoints.post_refresh_loras, methods=["POST"])
         self.add_api_route("/sdapi/v1/history", endpoints.get_history, methods=["GET"], response_model=List[str])
         self.add_api_route("/sdapi/v1/history", endpoints.post_history, methods=["POST"], response_model=int)
 
+        # lora api
+        if shared.native:
+            self.add_api_route("/sdapi/v1/loras", endpoints.get_loras, methods=["GET"], response_model=List[dict])
+            self.add_api_route("/sdapi/v1/refresh-loras", endpoints.post_refresh_loras, methods=["POST"])
+
         # gallery api
         gallery.register_api(app)
 
diff --git a/modules/infotext.py b/modules/infotext.py
index 4b9dd15ff..baa995c88 100644
--- a/modules/infotext.py
+++ b/modules/infotext.py
@@ -28,6 +28,7 @@ def unquote(text):
         return text
 
 
+# disabled by default can be enabled if needed
 def check_lora(params):
     try:
         import modules.lora.networks as networks
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index dc6d86b2f..2db145a5a 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -17,7 +17,7 @@
 import modules.lora.network_glora as network_glora
 import modules.lora.network_overrides as network_overrides
 import modules.lora.lora_convert as lora_convert
-from modules import shared, devices, sd_models, sd_models_compile, errors, scripts, files_cache, model_quant
+from modules import shared, devices, sd_models, sd_models_compile, errors, files_cache, model_quant
 
 
 debug = os.environ.get('SD_LORA_DEBUG', None) is not None
@@ -44,6 +44,10 @@
 ]
 
 
+def total_time():
+    return sum(timer.values())
+
+
 def assign_network_names_to_compvis_modules(sd_model):
     if sd_model is None:
         return
@@ -394,6 +398,8 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
 
 
 def network_load():
+    for k in timer.keys():
+        timer[k] = 0
     sd_model = getattr(shared.sd_model, "pipe", shared.sd_model)  # wrapped model compatiblility
     for component_name in ['text_encoder','text_encoder_2', 'unet', 'transformer']:
         component = getattr(sd_model, component_name, None)
diff --git a/modules/processing_callbacks.py b/modules/processing_callbacks.py
index 0bb94abad..e1bf723cc 100644
--- a/modules/processing_callbacks.py
+++ b/modules/processing_callbacks.py
@@ -6,6 +6,7 @@
 from modules import shared, processing_correction, extra_networks, timer, prompt_parser_diffusers
 from modules.lora.networks import network_load
 
+
 p = None
 debug = os.environ.get('SD_CALLBACK_DEBUG', None) is not None
 debug_callback = shared.log.trace if debug else lambda *args, **kwargs: None
@@ -15,6 +16,7 @@ def set_callbacks_p(processing):
     global p # pylint: disable=global-statement
     p = processing
 
+
 def prompt_callback(step, kwargs):
     if prompt_parser_diffusers.embedder is None or 'prompt_embeds' not in kwargs:
         return kwargs
@@ -29,6 +31,7 @@ def prompt_callback(step, kwargs):
         debug_callback(f"Callback: {e}")
     return kwargs
 
+
 def diffusers_callback_legacy(step: int, timestep: int, latents: typing.Union[torch.FloatTensor, np.ndarray]):
     if p is None:
         return
@@ -64,7 +67,7 @@ def diffusers_callback(pipe, step: int = 0, timestep: int = 0, kwargs: dict = {}
             if shared.state.interrupted or shared.state.skipped:
                 raise AssertionError('Interrupted...')
             time.sleep(0.1)
-    if hasattr(p, "stepwise_lora"):
+    if hasattr(p, "stepwise_lora") and shared.native:
         extra_networks.activate(p, p.extra_network_data, step=step)
         network_load()
     if latents is None:
diff --git a/modules/processing_diffusers.py b/modules/processing_diffusers.py
index 2e8fb357c..ae24f5f80 100644
--- a/modules/processing_diffusers.py
+++ b/modules/processing_diffusers.py
@@ -8,8 +8,7 @@
 from modules.processing_helpers import resize_hires, calculate_base_steps, calculate_hires_steps, calculate_refiner_steps, save_intermediate, update_sampler, is_txt2img, is_refiner_enabled
 from modules.processing_args import set_pipeline_args
 from modules.onnx_impl import preprocess_pipeline as preprocess_onnx_pipeline, check_parameters_changed as olive_check_parameters_changed
-from modules.lora.networks import network_load
-from modules.lora.networks import timer as network_timer
+from modules.lora import networks
 
 
 debug = shared.log.trace if os.environ.get('SD_DIFFUSERS_DEBUG', None) is not None else lambda *args, **kwargs: None
@@ -427,9 +426,9 @@ def process_diffusers(p: processing.StableDiffusionProcessing):
         p.prompts = p.all_prompts[p.iteration * p.batch_size:(p.iteration+1) * p.batch_size]
     if p.negative_prompts is None or len(p.negative_prompts) == 0:
         p.negative_prompts = p.all_negative_prompts[p.iteration * p.batch_size:(p.iteration+1) * p.batch_size]
-    network_timer['apply'] = 0
-    network_timer['restore'] = 0
-    network_load()
+
+    # load loras
+    networks.network_load()
 
     sd_models.move_model(shared.sd_model, devices.device)
     sd_models_compile.openvino_recompile_model(p, hires=False, refiner=False) # recompile if a parameter changes
@@ -459,6 +458,8 @@ def process_diffusers(p: processing.StableDiffusionProcessing):
     results = process_decode(p, output)
 
     timer.process.record('decode')
+    timer.process.add('lora', networks.total_time())
+
     shared.sd_model = orig_pipeline
     if p.state == '':
         global last_p # pylint: disable=global-statement
diff --git a/modules/ui_extra_networks.py b/modules/ui_extra_networks.py
index 898522366..94664c5cb 100644
--- a/modules/ui_extra_networks.py
+++ b/modules/ui_extra_networks.py
@@ -460,19 +460,20 @@ def register_page(page: ExtraNetworksPage):
 
 
 def register_pages():
+    debug('EN register-pages')
     from modules.ui_extra_networks_checkpoints import ExtraNetworksPageCheckpoints
-    from modules.ui_extra_networks_lora import ExtraNetworksPageLora
     from modules.ui_extra_networks_vae import ExtraNetworksPageVAEs
     from modules.ui_extra_networks_styles import ExtraNetworksPageStyles
     from modules.ui_extra_networks_history import ExtraNetworksPageHistory
     from modules.ui_extra_networks_textual_inversion import ExtraNetworksPageTextualInversion
-    debug('EN register-pages')
     register_page(ExtraNetworksPageCheckpoints())
-    register_page(ExtraNetworksPageLora())
     register_page(ExtraNetworksPageVAEs())
     register_page(ExtraNetworksPageStyles())
     register_page(ExtraNetworksPageHistory())
     register_page(ExtraNetworksPageTextualInversion())
+    if shared.native:
+        from modules.ui_extra_networks_lora import ExtraNetworksPageLora
+        register_page(ExtraNetworksPageLora())
     if shared.opts.hypernetwork_enabled:
         from modules.ui_extra_networks_hypernets import ExtraNetworksPageHypernetworks
         register_page(ExtraNetworksPageHypernetworks())
diff --git a/webui.py b/webui.py
index 3aae34447..2b8d7c56f 100644
--- a/webui.py
+++ b/webui.py
@@ -34,7 +34,6 @@
 import modules.upscaler
 import modules.textual_inversion.textual_inversion
 import modules.hypernetworks.hypernetwork
-import modules.lora.networks
 import modules.script_callbacks
 from modules.api.middleware import setup_middleware
 from modules.shared import cmd_opts, opts # pylint: disable=unused-import
@@ -104,8 +103,10 @@ def initialize():
     modules.sd_models.setup_model()
     timer.startup.record("models")
 
-    modules.lora.networks.list_available_networks()
-    timer.startup.record("lora")
+    if shared.native:
+        import modules.lora.networks as lora_networks
+        lora_networks.list_available_networks()
+        timer.startup.record("lora")
 
     shared.prompt_styles.reload()
     timer.startup.record("styles")

From 493c953d49788a6ad50363b766324fda5a943042 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Fri, 29 Nov 2024 10:22:47 -0500
Subject: [PATCH 042/162] cleanup

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 modules/modelloader.py            | 4 ++--
 modules/sd_checkpoint.py          | 6 +++++-
 modules/ui_extra_networks_lora.py | 2 +-
 webui.py                          | 6 +++---
 4 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/modules/modelloader.py b/modules/modelloader.py
index b1b3930d6..b022b4fc6 100644
--- a/modules/modelloader.py
+++ b/modules/modelloader.py
@@ -267,7 +267,7 @@ def download_diffusers_model(hub_id: str, cache_dir: str = None, download_config
 
 def load_diffusers_models(clear=True):
     excluded_models = []
-    t0 = time.time()
+    # t0 = time.time()
     place = shared.opts.diffusers_dir
     if place is None or len(place) == 0 or not os.path.isdir(place):
         place = os.path.join(models_path, 'Diffusers')
@@ -316,7 +316,7 @@ def load_diffusers_models(clear=True):
                 debug(f'Error analyzing diffusers model: "{folder}" {e}')
     except Exception as e:
         shared.log.error(f"Error listing diffusers: {place} {e}")
-    shared.log.debug(f'Scanning diffusers cache: folder="{place}" items={len(list(diffuser_repos))} time={time.time()-t0:.2f}')
+    # shared.log.debug(f'Scanning diffusers cache: folder="{place}" items={len(list(diffuser_repos))} time={time.time()-t0:.2f}')
     return diffuser_repos
 
 
diff --git a/modules/sd_checkpoint.py b/modules/sd_checkpoint.py
index a95ade0b1..2f6533ef0 100644
--- a/modules/sd_checkpoint.py
+++ b/modules/sd_checkpoint.py
@@ -123,13 +123,17 @@ def list_models():
     checkpoint_aliases.clear()
     ext_filter = [".safetensors"] if shared.opts.sd_disable_ckpt or shared.native else [".ckpt", ".safetensors"]
     model_list = list(modelloader.load_models(model_path=model_path, model_url=None, command_path=shared.opts.ckpt_dir, ext_filter=ext_filter, download_name=None, ext_blacklist=[".vae.ckpt", ".vae.safetensors"]))
+    safetensors_list = []
     for filename in sorted(model_list, key=str.lower):
         checkpoint_info = CheckpointInfo(filename)
+        safetensors_list.append(checkpoint_info)
         if checkpoint_info.name is not None:
             checkpoint_info.register()
+    diffusers_list = []
     if shared.native:
         for repo in modelloader.load_diffusers_models(clear=True):
             checkpoint_info = CheckpointInfo(repo['name'], sha=repo['hash'])
+            diffusers_list.append(checkpoint_info)
             if checkpoint_info.name is not None:
                 checkpoint_info.register()
     if shared.cmd_opts.ckpt is not None:
@@ -143,7 +147,7 @@ def list_models():
                 shared.opts.data['sd_model_checkpoint'] = checkpoint_info.title
     elif shared.cmd_opts.ckpt != shared.default_sd_model_file and shared.cmd_opts.ckpt is not None:
         shared.log.warning(f'Load model: path="{shared.cmd_opts.ckpt}" not found')
-    shared.log.info(f'Available Models: path="{shared.opts.ckpt_dir}" items={len(checkpoints_list)} time={time.time()-t0:.2f}')
+    shared.log.info(f'Available Models: items={len(checkpoints_list)} safetensors="{shared.opts.ckpt_dir}":{len(safetensors_list)} diffusers="{shared.opts.diffusers_dir}":{len(diffusers_list)} time={time.time()-t0:.2f}')
     checkpoints_list = dict(sorted(checkpoints_list.items(), key=lambda cp: cp[1].filename))
 
 def update_model_hashes():
diff --git a/modules/ui_extra_networks_lora.py b/modules/ui_extra_networks_lora.py
index 73cce47a3..9dd1b3573 100644
--- a/modules/ui_extra_networks_lora.py
+++ b/modules/ui_extra_networks_lora.py
@@ -120,4 +120,4 @@ def list_items(self):
         return items
 
     def allowed_directories_for_previews(self):
-        return [shared.cmd_opts.lora_dir, shared.cmd_opts.lyco_dir]
+        return [shared.cmd_opts.lora_dir]
diff --git a/webui.py b/webui.py
index 2b8d7c56f..4eb6e89ce 100644
--- a/webui.py
+++ b/webui.py
@@ -96,9 +96,6 @@ def initialize():
     modules.model_te.refresh_te_list()
     timer.startup.record("te")
 
-    extensions.list_extensions()
-    timer.startup.record("extensions")
-
     modelloader.cleanup_models()
     modules.sd_models.setup_model()
     timer.startup.record("models")
@@ -120,6 +117,9 @@ def initialize():
     yolo.initialize()
     timer.startup.record("detailer")
 
+    extensions.list_extensions()
+    timer.startup.record("extensions")
+
     log.info('Load extensions')
     t_timer, t_total = modules.scripts.load_scripts()
     timer.startup.record("extensions")

From 39b14a202e93f621ac16216b2471f96e1d5b6d48 Mon Sep 17 00:00:00 2001
From: Disty0 <disty@disty.xyz>
Date: Fri, 29 Nov 2024 22:35:00 +0300
Subject: [PATCH 043/162] Fix sequential offload with lora

---
 modules/lora/networks.py |  8 +++++---
 modules/sd_models.py     | 14 +++++++++-----
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index 2db145a5a..8a23f7413 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -278,8 +278,6 @@ def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=No
             shared.sd_model = sd_models_compile.compile_diffusers(shared.sd_model)
 
         shared.compiled_model_state.lora_model = backup_lora_model
-    if shared.opts.diffusers_offload_mode == "balanced":
-        sd_models.apply_balanced_offload(shared.sd_model)
     t1 = time.time()
     timer['load'] += t1 - t0
 
@@ -401,12 +399,16 @@ def network_load():
     for k in timer.keys():
         timer[k] = 0
     sd_model = getattr(shared.sd_model, "pipe", shared.sd_model)  # wrapped model compatiblility
+    if shared.opts.diffusers_offload_mode == "sequential":
+        sd_models.disable_offload(sd_model)
+        sd_models.move_model(sd_model, device=devices.cpu)
     for component_name in ['text_encoder','text_encoder_2', 'unet', 'transformer']:
         component = getattr(sd_model, component_name, None)
         if component is not None:
             for _, module in component.named_modules():
                 network_apply_weights(module)
-
+    if shared.opts.diffusers_offload_mode == "sequential":
+        sd_models.set_diffuser_offload(sd_model, op="model")
 
 def list_available_networks():
     t0 = time.time()
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 68446bdd3..361f6375b 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -405,7 +405,7 @@ def apply_balanced_offload_to_module(pipe):
         if hasattr(pipe, "_internal_dict"):
             keys = pipe._internal_dict.keys() # pylint: disable=protected-access
         else:
-            keys = get_signature(shared.sd_model).keys()
+            keys = get_signature(pipe).keys()
         for module_name in keys: # pylint: disable=protected-access
             module = getattr(pipe, module_name, None)
             if isinstance(module, torch.nn.Module):
@@ -1448,10 +1448,14 @@ def disable_offload(sd_model):
     from accelerate.hooks import remove_hook_from_module
     if not getattr(sd_model, 'has_accelerate', False):
         return
-    if hasattr(sd_model, 'components'):
-        for _name, model in sd_model.components.items():
-            if isinstance(model, torch.nn.Module):
-                remove_hook_from_module(model, recurse=True)
+    if hasattr(sd_model, "_internal_dict"):
+        keys = sd_model._internal_dict.keys() # pylint: disable=protected-access
+    else:
+        keys = get_signature(sd_model).keys()
+    for module_name in keys: # pylint: disable=protected-access
+        module = getattr(sd_model, module_name, None)
+        if isinstance(module, torch.nn.Module):
+            module = remove_hook_from_module(module, recurse=True)
     sd_model.has_accelerate = False
 
 

From 9187418358d6991fc34ee9a10a9e53340eda1e1d Mon Sep 17 00:00:00 2001
From: Disty0 <disty@disty.xyz>
Date: Fri, 29 Nov 2024 22:53:17 +0300
Subject: [PATCH 044/162] revert networs.py

---
 modules/lora/networks.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index 8a23f7413..2db145a5a 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -278,6 +278,8 @@ def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=No
             shared.sd_model = sd_models_compile.compile_diffusers(shared.sd_model)
 
         shared.compiled_model_state.lora_model = backup_lora_model
+    if shared.opts.diffusers_offload_mode == "balanced":
+        sd_models.apply_balanced_offload(shared.sd_model)
     t1 = time.time()
     timer['load'] += t1 - t0
 
@@ -399,16 +401,12 @@ def network_load():
     for k in timer.keys():
         timer[k] = 0
     sd_model = getattr(shared.sd_model, "pipe", shared.sd_model)  # wrapped model compatiblility
-    if shared.opts.diffusers_offload_mode == "sequential":
-        sd_models.disable_offload(sd_model)
-        sd_models.move_model(sd_model, device=devices.cpu)
     for component_name in ['text_encoder','text_encoder_2', 'unet', 'transformer']:
         component = getattr(sd_model, component_name, None)
         if component is not None:
             for _, module in component.named_modules():
                 network_apply_weights(module)
-    if shared.opts.diffusers_offload_mode == "sequential":
-        sd_models.set_diffuser_offload(sd_model, op="model")
+
 
 def list_available_networks():
     t0 = time.time()

From e52019104d822c156a5b7dfb9c8a734bd897a4a3 Mon Sep 17 00:00:00 2001
From: Disty0 <disty@disty.xyz>
Date: Fri, 29 Nov 2024 22:55:15 +0300
Subject: [PATCH 045/162] revert sd_models.py

---
 modules/sd_models.py | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/modules/sd_models.py b/modules/sd_models.py
index 361f6375b..68446bdd3 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -405,7 +405,7 @@ def apply_balanced_offload_to_module(pipe):
         if hasattr(pipe, "_internal_dict"):
             keys = pipe._internal_dict.keys() # pylint: disable=protected-access
         else:
-            keys = get_signature(pipe).keys()
+            keys = get_signature(shared.sd_model).keys()
         for module_name in keys: # pylint: disable=protected-access
             module = getattr(pipe, module_name, None)
             if isinstance(module, torch.nn.Module):
@@ -1448,14 +1448,10 @@ def disable_offload(sd_model):
     from accelerate.hooks import remove_hook_from_module
     if not getattr(sd_model, 'has_accelerate', False):
         return
-    if hasattr(sd_model, "_internal_dict"):
-        keys = sd_model._internal_dict.keys() # pylint: disable=protected-access
-    else:
-        keys = get_signature(sd_model).keys()
-    for module_name in keys: # pylint: disable=protected-access
-        module = getattr(sd_model, module_name, None)
-        if isinstance(module, torch.nn.Module):
-            module = remove_hook_from_module(module, recurse=True)
+    if hasattr(sd_model, 'components'):
+        for _name, model in sd_model.components.items():
+            if isinstance(model, torch.nn.Module):
+                remove_hook_from_module(model, recurse=True)
     sd_model.has_accelerate = False
 
 

From ea994a881e33f911c6556bdd38cb9cd2587e2e64 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Fri, 29 Nov 2024 15:40:09 -0500
Subject: [PATCH 046/162] lora stats

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 installer.py          | 2 +-
 scripts/flux_tools.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/installer.py b/installer.py
index 37202552d..8fb6d9683 100644
--- a/installer.py
+++ b/installer.py
@@ -459,7 +459,7 @@ def check_python(supported_minors=[9, 10, 11, 12], reason=None):
 def check_diffusers():
     if args.skip_all or args.skip_requirements:
         return
-    sha = '069186fac510d6f6f88a5e435523b235c823a8a0'
+    sha = 'c96bfa5c80eca798d555a79a491043c311d0f608'
     pkg = pkg_resources.working_set.by_key.get('diffusers', None)
     minor = int(pkg.version.split('.')[1] if pkg is not None else 0)
     cur = opts.get('diffusers_version', '') if minor > 0 else ''
diff --git a/scripts/flux_tools.py b/scripts/flux_tools.py
index e5fe443b7..3fbab6c6f 100644
--- a/scripts/flux_tools.py
+++ b/scripts/flux_tools.py
@@ -100,7 +100,7 @@ def run(self, p: processing.StableDiffusionProcessing, tool: str = 'None', stren
 
         if tool == 'Depth':
             # pipe = FluxControlPipeline.from_pretrained("black-forest-labs/FLUX.1-Depth-dev", torch_dtype=torch.bfloat16, revision="refs/pr/1").to("cuda")
-            install('git+https://github.com/asomoza/image_gen_aux.git', 'image_gen_aux')
+            install('git+https://github.com/huggingface/image_gen_aux.git', 'image_gen_aux')
             if shared.sd_model.__class__.__name__ != 'FluxControlPipeline' or 'Depth' not in shared.opts.sd_model_checkpoint:
                 shared.opts.data["sd_model_checkpoint"] = "black-forest-labs/FLUX.1-Depth-dev"
                 sd_models.reload_model_weights(op='model', revision="refs/pr/1")

From 797ad1f20f33b90c06380bedbcf7da1474cc90a7 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Fri, 29 Nov 2024 15:40:20 -0500
Subject: [PATCH 047/162] lora stats

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 modules/lora/networks.py | 69 +++++++++++++++++++++++++++-------------
 modules/model_flux.py    |  8 ++---
 2 files changed, 51 insertions(+), 26 deletions(-)

diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index 2db145a5a..beb4634c2 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -5,6 +5,7 @@
 import concurrent
 import torch
 import diffusers.models.lora
+import rich.progress as p
 
 import modules.lora.network as network
 import modules.lora.network_lora as network_lora
@@ -21,11 +22,12 @@
 
 
 debug = os.environ.get('SD_LORA_DEBUG', None) is not None
+pbar = p.Progress(p.TextColumn('[cyan]LoRA apply'), p.BarColumn(), p.TaskProgressColumn(), p.TimeRemainingColumn(), p.TimeElapsedColumn(), p.TextColumn('[cyan]{task.description}'), console=shared.console)
 extra_network_lora = None
 available_networks = {}
 available_network_aliases = {}
 loaded_networks: List[network.Network] = []
-timer = { 'load': 0, 'apply': 0, 'restore': 0, 'deactivate': 0 }
+timer = { 'list': 0, 'load': 0, 'backup': 0, 'calc': 0, 'apply': 0, 'restore': 0, 'deactivate': 0 }
 lora_cache = {}
 diffuser_loaded = []
 diffuser_scales = []
@@ -216,7 +218,6 @@ def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=No
     loaded_networks.clear()
     diffuser_loaded.clear()
     diffuser_scales.clear()
-    timer['load'] = 0
     t0 = time.time()
 
     for i, (network_on_disk, name) in enumerate(zip(networks_on_disk, names)):
@@ -269,8 +270,6 @@ def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=No
     if len(loaded_networks) > 0 and debug:
         shared.log.debug(f'Load network: type=LoRA loaded={len(loaded_networks)} cache={list(lora_cache)}')
 
-    devices.torch_gc()
-
     if recompile_model:
         shared.log.info("Load network: type=LoRA recompiling model")
         backup_lora_model = shared.compiled_model_state.lora_model
@@ -278,13 +277,18 @@ def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=No
             shared.sd_model = sd_models_compile.compile_diffusers(shared.sd_model)
 
         shared.compiled_model_state.lora_model = backup_lora_model
-    if shared.opts.diffusers_offload_mode == "balanced":
-        sd_models.apply_balanced_offload(shared.sd_model)
+
+    if len(loaded_networks) > 0:
+        devices.torch_gc()
+        if shared.opts.diffusers_offload_mode == "balanced":
+            sd_models.apply_balanced_offload(shared.sd_model)
+
     t1 = time.time()
-    timer['load'] += t1 - t0
+    timer['load'] = t1 - t0
 
 
 def set_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], updown, ex_bias):
+    t0 = time.time()
     weights_backup = getattr(self, "network_weights_backup", None)
     bias_backup = getattr(self, "network_bias_backup", None)
     if weights_backup is None and bias_backup is None:
@@ -315,9 +319,12 @@ def set_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm
         else:
             self.bias = None
         self.to(device)
+    t1 = time.time()
+    timer['apply'] += t1 - t0
 
 
 def maybe_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], wanted_names): # pylint: disable=W0613
+    t0 = time.time()
     weights_backup = getattr(self, "network_weights_backup", None)
     if weights_backup is None and wanted_names != (): # pylint: disable=C1803
         if getattr(self.weight, "quant_type", None) in ['nf4', 'fp4']:
@@ -344,6 +351,8 @@ def maybe_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.
         if shared.opts.lora_offload_backup and bias_backup is not None:
             bias_backup = bias_backup.to(devices.cpu)
         self.network_bias_backup = bias_backup
+    t1 = time.time()
+    timer['backup'] += t1 - t0
 
 
 def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv]):
@@ -353,16 +362,13 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
     If not, restores orginal weights from backup and alters weights according to networks.
     """
     network_layer_name = getattr(self, 'network_layer_name', None)
-    if network_layer_name is None:
-        return
-    t0 = time.time()
     current_names = getattr(self, "network_current_names", ())
     wanted_names = tuple((x.name, x.te_multiplier, x.unet_multiplier, x.dyn_dim) for x in loaded_networks)
-    if any([net.modules.get(network_layer_name, None) for net in loaded_networks]): # noqa: C419 # pylint: disable=R1729
-        maybe_backup_weights(self, wanted_names)
+    maybe_backup_weights(self, wanted_names)
     if current_names != wanted_names:
         batch_updown = None
         batch_ex_bias = None
+        t0 = time.time()
         for net in loaded_networks:
             # default workflow where module is known and has weights
             module = net.modules.get(network_layer_name, None)
@@ -391,21 +397,39 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
                 continue
             shared.log.warning(f'LoRA network="{net.name}" layer="{network_layer_name}" unsupported operation')
             extra_network_lora.errors[net.name] = extra_network_lora.errors.get(net.name, 0) + 1
+        t1 = time.time()
+        timer['calc'] += t1 - t0
         set_weights(self, batch_updown, batch_ex_bias)  # Set or restore weights from backup
         self.network_current_names = wanted_names
-    t1 = time.time()
-    timer['apply'] += t1 - t0
 
 
-def network_load():
-    for k in timer.keys():
-        timer[k] = 0
+def network_load(): # called from processing
+    timer['backup'] = 0
+    timer['calc'] = 0
+    timer['apply'] = 0
     sd_model = getattr(shared.sd_model, "pipe", shared.sd_model)  # wrapped model compatiblility
-    for component_name in ['text_encoder','text_encoder_2', 'unet', 'transformer']:
-        component = getattr(sd_model, component_name, None)
-        if component is not None:
-            for _, module in component.named_modules():
-                network_apply_weights(module)
+    with pbar:
+        for component_name in ['text_encoder','text_encoder_2', 'unet', 'transformer']:
+            component = getattr(sd_model, component_name, None)
+            if component is not None:
+                applied = 0
+                modules = list(component.named_modules())
+                task_start = time.time()
+                task = pbar.add_task(description=component_name , total=len(modules), visible=False)
+                for _, module in modules:
+                    layer_name = getattr(module, 'network_layer_name', None)
+                    if layer_name is None:
+                        continue
+                    present = any([net.modules.get(layer_name, None) for net in loaded_networks]) # noqa: C419
+                    if present:
+                        network_apply_weights(module)
+                        applied += 1
+                    pbar.update(task, advance=1, visible=(time.time() - task_start) > 1) # progress bar becomes visible if operation takes more than 1sec
+                pbar.remove_task(task)
+                if debug:
+                    shared.log.debug(f'Load network: type=LoRA component={component_name} modules={len(modules)} applied={applied}')
+    if debug:
+        shared.log.debug(f'Load network: type=LoRA total={total_time():.2f} timers={timer}')
 
 
 def list_available_networks():
@@ -442,4 +466,5 @@ def add_network(filename):
         for fn in candidates:
             executor.submit(add_network, fn)
     t1 = time.time()
+    timer['list'] = t1 - t0
     shared.log.info(f'Available LoRAs: path="{shared.cmd_opts.lora_dir}" items={len(available_networks)} folders={len(forbidden_network_aliases)} time={t1 - t0:.2f}')
diff --git a/modules/model_flux.py b/modules/model_flux.py
index 324e50b36..ce2c55f70 100644
--- a/modules/model_flux.py
+++ b/modules/model_flux.py
@@ -223,10 +223,8 @@ def load_flux(checkpoint_info, diffusers_load_config): # triggered by opts.sd_ch
     if shared.opts.sd_unet != 'None':
         try:
             debug(f'Load model: type=FLUX unet="{shared.opts.sd_unet}"')
-            _transformer = load_transformer(sd_unet.unet_dict[shared.opts.sd_unet])
-            if _transformer is not None:
-                transformer = _transformer
-            else:
+            transformer = load_transformer(sd_unet.unet_dict[shared.opts.sd_unet])
+            if transformer is None:
                 shared.opts.sd_unet = 'None'
                 sd_unet.failed_unet.append(shared.opts.sd_unet)
         except Exception as e:
@@ -334,6 +332,8 @@ def load_flux(checkpoint_info, diffusers_load_config): # triggered by opts.sd_ch
     text_encoder_1 = None
     text_encoder_2 = None
     vae = None
+    for k in kwargs.keys():
+        kwargs[k] = None
     devices.torch_gc()
 
     return pipe

From 881fa1183ca3ca1bda1102026155be2b850a9782 Mon Sep 17 00:00:00 2001
From: Disty0 <disty@disty.xyz>
Date: Sat, 30 Nov 2024 00:16:38 +0300
Subject: [PATCH 048/162] Fix offload issues with lora

---
 modules/lora/networks.py | 14 +++++++++++---
 modules/sd_models.py     | 14 +++++++++-----
 2 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index beb4634c2..f211149bd 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -280,8 +280,6 @@ def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=No
 
     if len(loaded_networks) > 0:
         devices.torch_gc()
-        if shared.opts.diffusers_offload_mode == "balanced":
-            sd_models.apply_balanced_offload(shared.sd_model)
 
     t1 = time.time()
     timer['load'] = t1 - t0
@@ -375,7 +373,7 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
             if module is not None and hasattr(self, 'weight'):
                 try:
                     with devices.inference_context():
-                        weight = self.weight # calculate quant weights once
+                        weight = self.weight.to(devices.device) # calculate quant weights once
                         updown, ex_bias = module.calc_updown(weight)
                         if batch_updown is not None and updown is not None:
                             batch_updown += updown
@@ -385,6 +383,11 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
                             batch_ex_bias += ex_bias
                         else:
                             batch_ex_bias = ex_bias
+                        if shared.opts.diffusers_offload_mode != "none":
+                            if batch_updown is not None:
+                                batch_updown = batch_updown.to(devices.cpu)
+                            if batch_ex_bias is not None:
+                                batch_ex_bias = batch_ex_bias.to(devices.cpu)
                 except RuntimeError as e:
                     extra_network_lora.errors[net.name] = extra_network_lora.errors.get(net.name, 0) + 1
                     if debug:
@@ -408,6 +411,9 @@ def network_load(): # called from processing
     timer['calc'] = 0
     timer['apply'] = 0
     sd_model = getattr(shared.sd_model, "pipe", shared.sd_model)  # wrapped model compatiblility
+    if shared.opts.diffusers_offload_mode != "none":
+        sd_models.disable_offload(sd_model)
+        sd_models.move_model(sd_model, device=devices.cpu)
     with pbar:
         for component_name in ['text_encoder','text_encoder_2', 'unet', 'transformer']:
             component = getattr(sd_model, component_name, None)
@@ -428,6 +434,8 @@ def network_load(): # called from processing
                 pbar.remove_task(task)
                 if debug:
                     shared.log.debug(f'Load network: type=LoRA component={component_name} modules={len(modules)} applied={applied}')
+    if shared.opts.diffusers_offload_mode != "none":
+        sd_models.set_diffuser_offload(sd_model, op="model")
     if debug:
         shared.log.debug(f'Load network: type=LoRA total={total_time():.2f} timers={timer}')
 
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 68446bdd3..361f6375b 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -405,7 +405,7 @@ def apply_balanced_offload_to_module(pipe):
         if hasattr(pipe, "_internal_dict"):
             keys = pipe._internal_dict.keys() # pylint: disable=protected-access
         else:
-            keys = get_signature(shared.sd_model).keys()
+            keys = get_signature(pipe).keys()
         for module_name in keys: # pylint: disable=protected-access
             module = getattr(pipe, module_name, None)
             if isinstance(module, torch.nn.Module):
@@ -1448,10 +1448,14 @@ def disable_offload(sd_model):
     from accelerate.hooks import remove_hook_from_module
     if not getattr(sd_model, 'has_accelerate', False):
         return
-    if hasattr(sd_model, 'components'):
-        for _name, model in sd_model.components.items():
-            if isinstance(model, torch.nn.Module):
-                remove_hook_from_module(model, recurse=True)
+    if hasattr(sd_model, "_internal_dict"):
+        keys = sd_model._internal_dict.keys() # pylint: disable=protected-access
+    else:
+        keys = get_signature(sd_model).keys()
+    for module_name in keys: # pylint: disable=protected-access
+        module = getattr(sd_model, module_name, None)
+        if isinstance(module, torch.nn.Module):
+            module = remove_hook_from_module(module, recurse=True)
     sd_model.has_accelerate = False
 
 

From 369ae52401d1e3d3533862ea90e1c5847b14ad34 Mon Sep 17 00:00:00 2001
From: Disty0 <disty@disty.xyz>
Date: Sat, 30 Nov 2024 00:55:22 +0300
Subject: [PATCH 049/162] Update OpenVINO to 2024.5.0

---
 CHANGELOG.md | 1 +
 installer.py | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 919041bde..08ae05471 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,6 +29,7 @@
   style-aligned applies selected attention layers uniformly to all images to achive consistency  
   can be used with or without input image in which case first prompt is used to establish baseline  
   *note:* all prompts are processes as a single batch, so vram is limiting factor
+- **OpenVINO**: update to 2024.5.0  
 
 ### UI and workflow improvements
 
diff --git a/installer.py b/installer.py
index 37202552d..ec83cc1c3 100644
--- a/installer.py
+++ b/installer.py
@@ -640,7 +640,7 @@ def install_ipex(torch_command):
         # os.environ.setdefault('TENSORFLOW_PACKAGE', 'tensorflow==2.15.1 intel-extension-for-tensorflow[xpu]==2.15.0.1')
     else:
         torch_command = os.environ.get('TORCH_COMMAND', '--pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/xpu') # torchvision doesn't exist on test/stable branch for windows
-    install(os.environ.get('OPENVINO_PACKAGE', 'openvino==2024.3.0'), 'openvino', ignore=True)
+    install(os.environ.get('OPENVINO_PACKAGE', 'openvino==2024.5.0'), 'openvino', ignore=True)
     install('nncf==2.7.0', 'nncf', ignore=True)
     install(os.environ.get('ONNXRUNTIME_PACKAGE', 'onnxruntime-openvino'), 'onnxruntime-openvino', ignore=True)
     return torch_command
@@ -650,7 +650,7 @@ def install_openvino(torch_command):
     check_python(supported_minors=[8, 9, 10, 11, 12], reason='OpenVINO backend requires Python 3.9, 3.10 or 3.11')
     log.info('OpenVINO: selected')
     torch_command = os.environ.get('TORCH_COMMAND', 'torch==2.3.1+cpu torchvision==0.18.1+cpu --index-url https://download.pytorch.org/whl/cpu')
-    install(os.environ.get('OPENVINO_PACKAGE', 'openvino==2024.3.0'), 'openvino')
+    install(os.environ.get('OPENVINO_PACKAGE', 'openvino==2024.5.0'), 'openvino')
     install(os.environ.get('ONNXRUNTIME_PACKAGE', 'onnxruntime-openvino'), 'onnxruntime-openvino', ignore=True)
     install('nncf==2.12.0', 'nncf')
     os.environ.setdefault('PYTORCH_TRACING_MODE', 'TORCHFX')

From 63ba83d361e37494d8a811ae1c9c77fae3cdc41b Mon Sep 17 00:00:00 2001
From: Disty0 <disty@disty.xyz>
Date: Sat, 30 Nov 2024 01:15:49 +0300
Subject: [PATCH 050/162] ZLUDA enable Dynamic attention by default

---
 modules/shared.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/modules/shared.py b/modules/shared.py
index 720819135..11452d7ab 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -449,16 +449,16 @@ def get_default_modes():
         default_cross_attention = "Scaled-Dot-Product" if native else "Doggettx's"
     elif devices.backend == "mps":
         default_cross_attention = "Scaled-Dot-Product" if native else "Doggettx's"
-    else: # cuda, rocm, ipex, openvino
-        default_cross_attention ="Scaled-Dot-Product"
+    else: # cuda, rocm, zluda, ipex, openvino
+        default_cross_attention = "Scaled-Dot-Product"
 
     if devices.backend == "rocm":
         default_sdp_options =  ['Memory attention', 'Math attention']
     elif devices.backend == "zluda":
-        default_sdp_options =  ['Math attention']
+        default_sdp_options =  ['Math attention', 'Dynamic attention']
     else:
         default_sdp_options = ['Flash attention', 'Memory attention', 'Math attention']
-    if (cmd_opts.lowvram or cmd_opts.medvram) and ('Flash attention' not in default_sdp_options):
+    if (cmd_opts.lowvram or cmd_opts.medvram) and ('Flash attention' not in default_sdp_options and 'Dynamic attention' not in default_sdp_options):
         default_sdp_options.append('Dynamic attention')
 
     return default_offload_mode, default_cross_attention, default_sdp_options

From 1e903129824ace3a33c7c04fd059f44ec18c52e8 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Sat, 30 Nov 2024 08:50:25 -0500
Subject: [PATCH 051/162] update lora

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 launch.py                     |  3 +++
 modules/cmd_args.py           |  1 +
 modules/lora/networks.py      | 45 ++++++++++++++++++-----------------
 modules/processing.py         |  9 +++++++
 modules/processing_helpers.py |  6 ++++-
 5 files changed, 41 insertions(+), 23 deletions(-)

diff --git a/launch.py b/launch.py
index e00da58c7..5c8a6051a 100755
--- a/launch.py
+++ b/launch.py
@@ -192,6 +192,9 @@ def main():
     global args # pylint: disable=global-statement
     installer.ensure_base_requirements()
     init_args() # setup argparser and default folders
+    if args.malloc:
+        import tracemalloc
+        tracemalloc.start()
     installer.args = args
     installer.setup_logging()
     installer.log.info('Starting SD.Next')
diff --git a/modules/cmd_args.py b/modules/cmd_args.py
index 752ad02c0..cb4e5fc16 100644
--- a/modules/cmd_args.py
+++ b/modules/cmd_args.py
@@ -26,6 +26,7 @@ def main_args():
     group_diag.add_argument("--no-hashing", default=os.environ.get("SD_NOHASHING", False), action='store_true', help="Disable hashing of checkpoints, default: %(default)s")
     group_diag.add_argument("--no-metadata", default=os.environ.get("SD_NOMETADATA", False), action='store_true', help="Disable reading of metadata from models, default: %(default)s")
     group_diag.add_argument("--profile", default=os.environ.get("SD_PROFILE", False), action='store_true', help="Run profiler, default: %(default)s")
+    group_diag.add_argument("--malloc", default=os.environ.get("SD_PROFILE", False), action='store_true', help="Trace memory ops, default: %(default)s")
     group_diag.add_argument("--disable-queue", default=os.environ.get("SD_DISABLEQUEUE", False), action='store_true', help="Disable queues, default: %(default)s")
     group_diag.add_argument('--debug', default=os.environ.get("SD_DEBUG", False), action='store_true', help = "Run installer with debug logging, default: %(default)s")
 
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index f211149bd..23c45ff2a 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -22,7 +22,7 @@
 
 
 debug = os.environ.get('SD_LORA_DEBUG', None) is not None
-pbar = p.Progress(p.TextColumn('[cyan]LoRA apply'), p.BarColumn(), p.TaskProgressColumn(), p.TimeRemainingColumn(), p.TimeElapsedColumn(), p.TextColumn('[cyan]{task.description}'), console=shared.console)
+pbar = p.Progress(p.TextColumn('[cyan]{task.description}'), p.BarColumn(), p.TaskProgressColumn(), p.TimeRemainingColumn(), p.TimeElapsedColumn(), console=shared.console)
 extra_network_lora = None
 available_networks = {}
 available_network_aliases = {}
@@ -50,6 +50,13 @@ def total_time():
     return sum(timer.values())
 
 
+def get_timers():
+    t = { 'total': round(sum(timer.values()), 2) }
+    for k, v in timer.items():
+        t[k] = round(v, 2)
+    return t
+
+
 def assign_network_names_to_compvis_modules(sd_model):
     if sd_model is None:
         return
@@ -362,7 +369,8 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
     network_layer_name = getattr(self, 'network_layer_name', None)
     current_names = getattr(self, "network_current_names", ())
     wanted_names = tuple((x.name, x.te_multiplier, x.unet_multiplier, x.dyn_dim) for x in loaded_networks)
-    maybe_backup_weights(self, wanted_names)
+    if network_layer_name is not None and any([net.modules.get(network_layer_name, None) for net in loaded_networks]): # noqa: C419
+        maybe_backup_weights(self, wanted_names)
     if current_names != wanted_names:
         batch_updown = None
         batch_ex_bias = None
@@ -414,30 +422,23 @@ def network_load(): # called from processing
     if shared.opts.diffusers_offload_mode != "none":
         sd_models.disable_offload(sd_model)
         sd_models.move_model(sd_model, device=devices.cpu)
+    modules = []
+    for component_name in ['text_encoder','text_encoder_2', 'unet', 'transformer']:
+        component = getattr(sd_model, component_name, None)
+        if component is not None and hasattr(component, 'named_modules'):
+            modules += list(component.named_modules())
     with pbar:
-        for component_name in ['text_encoder','text_encoder_2', 'unet', 'transformer']:
-            component = getattr(sd_model, component_name, None)
-            if component is not None:
-                applied = 0
-                modules = list(component.named_modules())
-                task_start = time.time()
-                task = pbar.add_task(description=component_name , total=len(modules), visible=False)
-                for _, module in modules:
-                    layer_name = getattr(module, 'network_layer_name', None)
-                    if layer_name is None:
-                        continue
-                    present = any([net.modules.get(layer_name, None) for net in loaded_networks]) # noqa: C419
-                    if present:
-                        network_apply_weights(module)
-                        applied += 1
-                    pbar.update(task, advance=1, visible=(time.time() - task_start) > 1) # progress bar becomes visible if operation takes more than 1sec
-                pbar.remove_task(task)
-                if debug:
-                    shared.log.debug(f'Load network: type=LoRA component={component_name} modules={len(modules)} applied={applied}')
+        task = pbar.add_task(description='Apply network: type=LoRA' , total=len(modules), visible=len(loaded_networks) > 0)
+        for _, module in modules:
+            network_apply_weights(module)
+            # pbar.update(task, advance=1) # progress bar becomes visible if operation takes more than 1sec
+        pbar.remove_task(task)
+    if debug:
+        shared.log.debug(f'Load network: type=LoRA modules={len(modules)}')
     if shared.opts.diffusers_offload_mode != "none":
         sd_models.set_diffuser_offload(sd_model, op="model")
     if debug:
-        shared.log.debug(f'Load network: type=LoRA total={total_time():.2f} timers={timer}')
+        shared.log.debug(f'Load network: type=LoRA timers{get_timers()}')
 
 
 def list_available_networks():
diff --git a/modules/processing.py b/modules/processing.py
index 16e7a9213..92faaee8d 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -473,4 +473,13 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
         p.scripts.postprocess(p, processed)
     timer.process.record('post')
     shared.log.info(f'Processed: images={len(output_images)} its={(p.steps * len(output_images)) / (t1 - t0):.2f} time={t1-t0:.2f} timers={timer.process.dct(min_time=0.02)} memory={memstats.memory_stats()}')
+
+    if shared.cmd_opts.malloc:
+        import tracemalloc
+        snapshot = tracemalloc.take_snapshot()
+        stats = snapshot.statistics('lineno')
+        shared.log.debug('Profile malloc:')
+        for stat in stats[:20]:
+            frame = stat.traceback[0]
+            shared.log.debug(f'  file="{frame.filename}":{frame.lineno} size={stat.size}')
     return processed
diff --git a/modules/processing_helpers.py b/modules/processing_helpers.py
index 22acf296c..ab08d4cc8 100644
--- a/modules/processing_helpers.py
+++ b/modules/processing_helpers.py
@@ -1,4 +1,5 @@
 import os
+import time
 import math
 import random
 import warnings
@@ -9,7 +10,7 @@
 from PIL import Image
 from skimage import exposure
 from blendmodes.blend import blendLayers, BlendType
-from modules import shared, devices, images, sd_models, sd_samplers, sd_hijack_hypertile, processing_vae
+from modules import shared, devices, images, sd_models, sd_samplers, sd_hijack_hypertile, processing_vae, timer
 
 
 debug = shared.log.trace if os.environ.get('SD_PROCESS_DEBUG', None) is not None else lambda *args, **kwargs: None
@@ -352,6 +353,7 @@ def diffusers_image_conditioning(_source_image, latent_image, _image_mask=None):
 
 
 def validate_sample(tensor):
+    t0 = time.time()
     if not isinstance(tensor, np.ndarray) and not isinstance(tensor, torch.Tensor):
         return tensor
     dtype = tensor.dtype
@@ -377,6 +379,8 @@ def validate_sample(tensor):
         if upcast is not None and not upcast:
             setattr(shared.sd_model.vae.config, 'force_upcast', True) # noqa: B010
             shared.log.warning('Decode: upcast=True set, retry operation')
+    t1 = time.time()
+    timer.process.add('validate', t1 - t0)
     return cast
 
 

From eacd4e9357cd44ef1e79640846822ad6d593d48d Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Sat, 30 Nov 2024 09:02:47 -0500
Subject: [PATCH 052/162] add stats

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 modules/lora/networks.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index 23c45ff2a..51ef27a8a 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -28,6 +28,7 @@
 available_network_aliases = {}
 loaded_networks: List[network.Network] = []
 timer = { 'list': 0, 'load': 0, 'backup': 0, 'calc': 0, 'apply': 0, 'restore': 0, 'deactivate': 0 }
+backup_size = 0
 lora_cache = {}
 diffuser_loaded = []
 diffuser_scales = []
@@ -289,6 +290,7 @@ def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=No
         devices.torch_gc()
 
     t1 = time.time()
+    backup_size = 0
     timer['load'] = t1 - t0
 
 
@@ -329,6 +331,7 @@ def set_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm
 
 
 def maybe_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], wanted_names): # pylint: disable=W0613
+    global backup_size # pylint: disable=W0603
     t0 = time.time()
     weights_backup = getattr(self, "network_weights_backup", None)
     if weights_backup is None and wanted_names != (): # pylint: disable=C1803
@@ -347,6 +350,7 @@ def maybe_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.
         if shared.opts.lora_offload_backup and weights_backup is not None:
             weights_backup = weights_backup.to(devices.cpu)
         self.network_weights_backup = weights_backup
+        backup_size += weights_backup.numel() * weights_backup.element_size()
     bias_backup = getattr(self, "network_bias_backup", None)
     if bias_backup is None:
         if getattr(self, 'bias', None) is not None:
@@ -356,6 +360,8 @@ def maybe_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.
         if shared.opts.lora_offload_backup and bias_backup is not None:
             bias_backup = bias_backup.to(devices.cpu)
         self.network_bias_backup = bias_backup
+        if bias_backup is not None:
+            backup_size += bias_backup.numel() * bias_backup.element_size()
     t1 = time.time()
     timer['backup'] += t1 - t0
 
@@ -431,14 +437,15 @@ def network_load(): # called from processing
         task = pbar.add_task(description='Apply network: type=LoRA' , total=len(modules), visible=len(loaded_networks) > 0)
         for _, module in modules:
             network_apply_weights(module)
-            # pbar.update(task, advance=1) # progress bar becomes visible if operation takes more than 1sec
+            pbar.update(task, advance=1) # progress bar becomes visible if operation takes more than 1sec
         pbar.remove_task(task)
+    modules.clear()
     if debug:
         shared.log.debug(f'Load network: type=LoRA modules={len(modules)}')
     if shared.opts.diffusers_offload_mode != "none":
         sd_models.set_diffuser_offload(sd_model, op="model")
     if debug:
-        shared.log.debug(f'Load network: type=LoRA timers{get_timers()}')
+        shared.log.debug(f'Load network: type=LoRA time={get_timers()} backup={backup_size}')
 
 
 def list_available_networks():

From 6ec93f2d4609f36d3f66a8f4ac4b03bca32e5d12 Mon Sep 17 00:00:00 2001
From: Disty0 <disty@disty.xyz>
Date: Sat, 30 Nov 2024 17:04:35 +0300
Subject: [PATCH 053/162] Disable load lora gpu with medvram too

---
 modules/sd_models.py | 5 ++++-
 modules/shared.py    | 2 +-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/modules/sd_models.py b/modules/sd_models.py
index 361f6375b..ccba0bfb5 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -413,11 +413,11 @@ def apply_balanced_offload_to_module(pipe):
                 if checkpoint_name is None:
                     checkpoint_name = pipe.__class__.__name__
                 offload_dir = os.path.join(shared.opts.accelerate_offload_path, checkpoint_name, module_name)
+                network_layer_name = getattr(module, "network_layer_name", None)
                 module = remove_hook_from_module(module, recurse=True)
                 try:
                     module = module.to("cpu")
                     module.offload_dir = offload_dir
-                    network_layer_name = getattr(module, "network_layer_name", None)
                     module = add_hook_to_module(module, dispatch_from_cpu_hook(), append=True)
                     module._hf_hook.execution_device = torch.device(devices.device) # pylint: disable=protected-access
                     if network_layer_name:
@@ -1455,7 +1455,10 @@ def disable_offload(sd_model):
     for module_name in keys: # pylint: disable=protected-access
         module = getattr(sd_model, module_name, None)
         if isinstance(module, torch.nn.Module):
+            network_layer_name = getattr(module, "network_layer_name", None)
             module = remove_hook_from_module(module, recurse=True)
+            if network_layer_name:
+                module.network_layer_name = network_layer_name
     sd_model.has_accelerate = False
 
 
diff --git a/modules/shared.py b/modules/shared.py
index e213997c7..bcb506cee 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -910,7 +910,7 @@ def get_default_modes():
     "lora_apply_tags": OptionInfo(0, "LoRA auto-apply tags", gr.Slider, {"minimum": -1, "maximum": 32, "step": 1}),
     "lora_in_memory_limit": OptionInfo(0, "LoRA memory cache", gr.Slider, {"minimum": 0, "maximum": 24, "step": 1}),
     "lora_quant": OptionInfo("NF4","LoRA precision in quantized models", gr.Radio, {"choices": ["NF4", "FP4"]}),
-    "lora_load_gpu": OptionInfo(True if not cmd_opts.lowvram else False, "Load LoRA directly to GPU"),
+    "lora_load_gpu": OptionInfo(True if not (cmd_opts.lowvram or cmd_opts.medvram) else False, "Load LoRA directly to GPU"),
     "lora_offload_backup": OptionInfo(True, "Offload LoRA Backup Weights"),
 }))
 

From eee85e5a4ed24af589bc9ee488cc4c496d747417 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Sat, 30 Nov 2024 12:29:58 -0500
Subject: [PATCH 054/162] lora refactor in progress

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md                        |  5 +-
 modules/lora/extra_networks_lora.py |  9 ++-
 modules/lora/networks.py            | 73 ++++++++++++-------------
 modules/processing_callbacks.py     |  2 -
 modules/processing_diffusers.py     | 21 ++++---
 modules/prompt_parser_diffusers.py  |  9 ++-
 modules/sd_models.py                | 85 ++++++++++++++++-------------
 7 files changed, 109 insertions(+), 95 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 919041bde..dcb88bcf3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,6 @@
 # Change Log for SD.Next
 
-## Update for 2024-11-28
+## Update for 2024-11-30
 
 ### New models and integrations
 
@@ -67,7 +67,8 @@
 - fix xyz-grid with lora  
 - fix api script callbacks  
 - fix gpu memory monitoring  
-- simplify img2img/inpaint/sketch canvas handling
+- simplify img2img/inpaint/sketch canvas handling  
+- fix prompt caching  
 
 ## Update for 2024-11-21
 
diff --git a/modules/lora/extra_networks_lora.py b/modules/lora/extra_networks_lora.py
index 3aea659d9..c875ba0d5 100644
--- a/modules/lora/extra_networks_lora.py
+++ b/modules/lora/extra_networks_lora.py
@@ -113,22 +113,21 @@ def __init__(self):
         self.errors = {}
 
     def activate(self, p, params_list, step=0):
-        t0 = time.time()
         self.errors.clear()
         if self.active:
             if self.model != shared.opts.sd_model_checkpoint: # reset if model changed
                 self.active = False
         if len(params_list) > 0 and not self.active: # activate patches once
-            shared.log.debug(f'Activate network: type=LoRA model="{shared.opts.sd_model_checkpoint}"')
+            # shared.log.debug(f'Activate network: type=LoRA model="{shared.opts.sd_model_checkpoint}"')
             self.active = True
             self.model = shared.opts.sd_model_checkpoint
         names, te_multipliers, unet_multipliers, dyn_dims = parse(p, params_list, step)
-        networks.load_networks(names, te_multipliers, unet_multipliers, dyn_dims)
-        t1 = time.time()
+        networks.load_networks(names, te_multipliers, unet_multipliers, dyn_dims) # load
+        networks.network_load() # backup/apply
         if len(networks.loaded_networks) > 0 and step == 0:
             infotext(p)
             prompt(p)
-            shared.log.info(f'Load network: type=LoRA apply={[n.name for n in networks.loaded_networks]} te={te_multipliers} unet={unet_multipliers} dims={dyn_dims} load={t1-t0:.2f}')
+            shared.log.info(f'Load network: type=LoRA apply={[n.name for n in networks.loaded_networks]} te={te_multipliers} unet={unet_multipliers} time={networks.get_timers()}')
 
     def deactivate(self, p):
         t0 = time.time()
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index 51ef27a8a..86c6e5ed0 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -54,7 +54,8 @@ def total_time():
 def get_timers():
     t = { 'total': round(sum(timer.values()), 2) }
     for k, v in timer.items():
-        t[k] = round(v, 2)
+        if v > 0.1:
+            t[k] = round(v, 2)
     return t
 
 
@@ -216,6 +217,7 @@ def maybe_recompile_model(names, te_multipliers):
 
 
 def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=None):
+    global backup_size # pylint: disable=global-statement
     networks_on_disk: list[network.NetworkOnDisk] = [available_network_aliases.get(name, None) for name in names]
     if any(x is None for x in networks_on_disk):
         list_available_networks()
@@ -304,10 +306,9 @@ def set_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm
     with devices.inference_context():
         if weights_backup is not None:
             if updown is not None:
-                if len(weights_backup.shape) == 4 and weights_backup.shape[1] == 9:
-                    # inpainting model. zero pad updown to make channel[1]  4 to 9
+                if len(weights_backup.shape) == 4 and weights_backup.shape[1] == 9: # inpainting model. zero pad updown to make channel[1]  4 to 9
                     updown = torch.nn.functional.pad(updown, (0, 0, 0, 0, 0, 5))  # pylint: disable=not-callable
-                weights_backup = weights_backup.clone().to(device)
+                weights_backup = weights_backup.clone().to(self.weight.device)
                 weights_backup += updown.to(weights_backup)
             if getattr(self, "quant_type", None) in ['nf4', 'fp4']:
                 bnb = model_quant.load_bnb('Load network: type=LoRA', silent=True)
@@ -375,18 +376,18 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
     network_layer_name = getattr(self, 'network_layer_name', None)
     current_names = getattr(self, "network_current_names", ())
     wanted_names = tuple((x.name, x.te_multiplier, x.unet_multiplier, x.dyn_dim) for x in loaded_networks)
-    if network_layer_name is not None and any([net.modules.get(network_layer_name, None) for net in loaded_networks]): # noqa: C419
-        maybe_backup_weights(self, wanted_names)
-    if current_names != wanted_names:
-        batch_updown = None
-        batch_ex_bias = None
-        t0 = time.time()
-        for net in loaded_networks:
-            # default workflow where module is known and has weights
-            module = net.modules.get(network_layer_name, None)
-            if module is not None and hasattr(self, 'weight'):
-                try:
-                    with devices.inference_context():
+    with devices.inference_context():
+        if network_layer_name is not None and any([net.modules.get(network_layer_name, None) for net in loaded_networks]): # noqa: C419
+            maybe_backup_weights(self, wanted_names)
+        if current_names != wanted_names:
+            batch_updown = None
+            batch_ex_bias = None
+            t0 = time.time()
+            for net in loaded_networks:
+                # default workflow where module is known and has weights
+                module = net.modules.get(network_layer_name, None)
+                if module is not None and hasattr(self, 'weight'):
+                    try:
                         weight = self.weight.to(devices.device) # calculate quant weights once
                         updown, ex_bias = module.calc_updown(weight)
                         if batch_updown is not None and updown is not None:
@@ -402,22 +403,22 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
                                 batch_updown = batch_updown.to(devices.cpu)
                             if batch_ex_bias is not None:
                                 batch_ex_bias = batch_ex_bias.to(devices.cpu)
-                except RuntimeError as e:
-                    extra_network_lora.errors[net.name] = extra_network_lora.errors.get(net.name, 0) + 1
-                    if debug:
-                        module_name = net.modules.get(network_layer_name, None)
-                        shared.log.error(f'LoRA apply weight name="{net.name}" module="{module_name}" layer="{network_layer_name}" {e}')
-                        errors.display(e, 'LoRA')
-                        raise RuntimeError('LoRA apply weight') from e
-                continue
-            if module is None:
-                continue
-            shared.log.warning(f'LoRA network="{net.name}" layer="{network_layer_name}" unsupported operation')
-            extra_network_lora.errors[net.name] = extra_network_lora.errors.get(net.name, 0) + 1
-        t1 = time.time()
-        timer['calc'] += t1 - t0
-        set_weights(self, batch_updown, batch_ex_bias)  # Set or restore weights from backup
-        self.network_current_names = wanted_names
+                    except RuntimeError as e:
+                        extra_network_lora.errors[net.name] = extra_network_lora.errors.get(net.name, 0) + 1
+                        if debug:
+                            module_name = net.modules.get(network_layer_name, None)
+                            shared.log.error(f'LoRA apply weight name="{net.name}" module="{module_name}" layer="{network_layer_name}" {e}')
+                            errors.display(e, 'LoRA')
+                            raise RuntimeError('LoRA apply weight') from e
+                    continue
+                if module is None:
+                    continue
+                shared.log.warning(f'LoRA network="{net.name}" layer="{network_layer_name}" unsupported operation')
+                extra_network_lora.errors[net.name] = extra_network_lora.errors.get(net.name, 0) + 1
+            t1 = time.time()
+            timer['calc'] += t1 - t0
+            set_weights(self, batch_updown, batch_ex_bias)  # Set or restore weights from backup
+            self.network_current_names = wanted_names
 
 
 def network_load(): # called from processing
@@ -425,7 +426,7 @@ def network_load(): # called from processing
     timer['calc'] = 0
     timer['apply'] = 0
     sd_model = getattr(shared.sd_model, "pipe", shared.sd_model)  # wrapped model compatiblility
-    if shared.opts.diffusers_offload_mode != "none":
+    if shared.opts.diffusers_offload_mode == "sequential":
         sd_models.disable_offload(sd_model)
         sd_models.move_model(sd_model, device=devices.cpu)
     modules = []
@@ -441,11 +442,9 @@ def network_load(): # called from processing
         pbar.remove_task(task)
     modules.clear()
     if debug:
-        shared.log.debug(f'Load network: type=LoRA modules={len(modules)}')
-    if shared.opts.diffusers_offload_mode != "none":
+        shared.log.debug(f'Load network: type=LoRA modules={len(modules)} backup={backup_size} time={get_timers()}')
+    if shared.opts.diffusers_offload_mode == "sequential":
         sd_models.set_diffuser_offload(sd_model, op="model")
-    if debug:
-        shared.log.debug(f'Load network: type=LoRA time={get_timers()} backup={backup_size}')
 
 
 def list_available_networks():
diff --git a/modules/processing_callbacks.py b/modules/processing_callbacks.py
index e1bf723cc..f3eb0bc37 100644
--- a/modules/processing_callbacks.py
+++ b/modules/processing_callbacks.py
@@ -4,7 +4,6 @@
 import torch
 import numpy as np
 from modules import shared, processing_correction, extra_networks, timer, prompt_parser_diffusers
-from modules.lora.networks import network_load
 
 
 p = None
@@ -69,7 +68,6 @@ def diffusers_callback(pipe, step: int = 0, timestep: int = 0, kwargs: dict = {}
             time.sleep(0.1)
     if hasattr(p, "stepwise_lora") and shared.native:
         extra_networks.activate(p, p.extra_network_data, step=step)
-        network_load()
     if latents is None:
         return kwargs
     elif shared.opts.nan_skip:
diff --git a/modules/processing_diffusers.py b/modules/processing_diffusers.py
index ae24f5f80..463a15280 100644
--- a/modules/processing_diffusers.py
+++ b/modules/processing_diffusers.py
@@ -199,11 +199,6 @@ def process_hires(p: processing.StableDiffusionProcessing, output):
                 if hasattr(shared.sd_model, "vae") and output.images is not None and len(output.images) > 0:
                     output.images = processing_vae.vae_decode(latents=output.images, model=shared.sd_model, full_quality=p.full_quality, output_type='pil', width=p.hr_upscale_to_x, height=p.hr_upscale_to_y) # controlnet cannnot deal with latent input
                     p.task_args['image'] = output.images # replace so hires uses new output
-            sd_models.move_model(shared.sd_model, devices.device)
-            if hasattr(shared.sd_model, 'unet'):
-                sd_models.move_model(shared.sd_model.unet, devices.device)
-            if hasattr(shared.sd_model, 'transformer'):
-                sd_models.move_model(shared.sd_model.transformer, devices.device)
             update_sampler(p, shared.sd_model, second_pass=True)
             orig_denoise = p.denoising_strength
             p.denoising_strength = strength
@@ -227,6 +222,11 @@ def process_hires(p: processing.StableDiffusionProcessing, output):
             shared.state.job = 'HiRes'
             shared.state.sampling_steps = hires_args.get('prior_num_inference_steps', None) or p.steps or hires_args.get('num_inference_steps', None)
             try:
+                sd_models.move_model(shared.sd_model, devices.device)
+                if hasattr(shared.sd_model, 'unet'):
+                    sd_models.move_model(shared.sd_model.unet, devices.device)
+                if hasattr(shared.sd_model, 'transformer'):
+                    sd_models.move_model(shared.sd_model.transformer, devices.device)
                 sd_models_compile.check_deepcache(enable=True)
                 output = shared.sd_model(**hires_args) # pylint: disable=not-callable
                 if isinstance(output, dict):
@@ -405,6 +405,9 @@ def process_diffusers(p: processing.StableDiffusionProcessing):
         shared.sd_model = orig_pipeline
         return results
 
+    if shared.opts.diffusers_offload_mode == "balanced":
+        shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
+
     # sanitize init_images
     if hasattr(p, 'init_images') and getattr(p, 'init_images', None) is None:
         del p.init_images
@@ -427,10 +430,6 @@ def process_diffusers(p: processing.StableDiffusionProcessing):
     if p.negative_prompts is None or len(p.negative_prompts) == 0:
         p.negative_prompts = p.all_negative_prompts[p.iteration * p.batch_size:(p.iteration+1) * p.batch_size]
 
-    # load loras
-    networks.network_load()
-
-    sd_models.move_model(shared.sd_model, devices.device)
     sd_models_compile.openvino_recompile_model(p, hires=False, refiner=False) # recompile if a parameter changes
 
     if 'base' not in p.skip:
@@ -461,6 +460,10 @@ def process_diffusers(p: processing.StableDiffusionProcessing):
     timer.process.add('lora', networks.total_time())
 
     shared.sd_model = orig_pipeline
+
+    if shared.opts.diffusers_offload_mode == "balanced":
+        shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
+
     if p.state == '':
         global last_p # pylint: disable=global-statement
         last_p = p
diff --git a/modules/prompt_parser_diffusers.py b/modules/prompt_parser_diffusers.py
index 2edef4bf5..c74731c6d 100644
--- a/modules/prompt_parser_diffusers.py
+++ b/modules/prompt_parser_diffusers.py
@@ -16,6 +16,7 @@
 token_dict = None # used by helper get_tokens
 token_type = None # used by helper get_tokens
 cache = OrderedDict()
+last_attention = None
 embedder = None
 
 
@@ -52,7 +53,7 @@ def __init__(self, prompts, negative_prompts, steps, clip_skip, p):
         self.prompts = prompts
         self.negative_prompts = negative_prompts
         self.batchsize = len(self.prompts)
-        self.attention = None
+        self.attention = last_attention
         self.allsame = self.compare_prompts()  # collapses batched prompts to single prompt if possible
         self.steps = steps
         self.clip_skip = clip_skip
@@ -78,6 +79,8 @@ def __init__(self, prompts, negative_prompts, steps, clip_skip, p):
                 self.scheduled_encode(pipe, batchidx)
             else:
                 self.encode(pipe, prompt, negative_prompt, batchidx)
+        if shared.opts.diffusers_offload_mode == "balanced":
+            pipe = sd_models.apply_balanced_offload(pipe)
         self.checkcache(p)
         debug(f"Prompt encode: time={(time.time() - t0):.3f}")
 
@@ -113,6 +116,7 @@ def flatten(xss):
                 debug(f"Prompt cache: add={key}")
                 while len(cache) > int(shared.opts.sd_textencoder_cache_size):
                     cache.popitem(last=False)
+                return True
         if item:
             self.__dict__.update(cache[key])
             cache.move_to_end(key)
@@ -161,7 +165,9 @@ def extend_embeds(self, batchidx, idx):  # Extends scheduled prompt via index
             self.negative_pooleds[batchidx].append(self.negative_pooleds[batchidx][idx])
 
     def encode(self, pipe, positive_prompt, negative_prompt, batchidx):
+        global last_attention # pylint: disable=global-statement
         self.attention = shared.opts.prompt_attention
+        last_attention = self.attention
         if self.attention == "xhinker":
             prompt_embed, positive_pooled, negative_embed, negative_pooled = get_xhinker_text_embeddings(pipe, positive_prompt, negative_prompt, self.clip_skip)
         else:
@@ -178,7 +184,6 @@ def encode(self, pipe, positive_prompt, negative_prompt, batchidx):
         if debug_enabled:
             get_tokens(pipe, 'positive', positive_prompt)
             get_tokens(pipe, 'negative', negative_prompt)
-        pipe = prepare_model()
 
     def __call__(self, key, step=0):
         batch = getattr(self, key)
diff --git a/modules/sd_models.py b/modules/sd_models.py
index ccba0bfb5..2cf7b3931 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -13,6 +13,7 @@
 from rich import progress # pylint: disable=redefined-builtin
 import torch
 import safetensors.torch
+import accelerate
 from omegaconf import OmegaConf
 from ldm.util import instantiate_from_config
 from modules import paths, shared, shared_state, modelloader, devices, script_callbacks, sd_vae, sd_unet, errors, sd_models_config, sd_models_compile, sd_hijack_accelerate, sd_detect
@@ -310,6 +311,7 @@ def set_accelerate(sd_model):
 
 
 def set_diffuser_offload(sd_model, op: str = 'model'):
+    t0 = time.time()
     if not shared.native:
         shared.log.warning('Attempting to use offload with backend=original')
         return
@@ -363,41 +365,50 @@ def set_diffuser_offload(sd_model, op: str = 'model'):
             sd_model = apply_balanced_offload(sd_model)
         except Exception as e:
             shared.log.error(f'Setting {op}: offload={shared.opts.diffusers_offload_mode} {e}')
+    process_timer.add('offload', time.time() - t0)
+
+
+class OffloadHook(accelerate.hooks.ModelHook):
+    def init_hook(self, module):
+        return module
+
+    def pre_forward(self, module, *args, **kwargs):
+        if devices.normalize_device(module.device) != devices.normalize_device(devices.device):
+            device_index = torch.device(devices.device).index
+            if device_index is None:
+                device_index = 0
+            max_memory = {
+                device_index: f"{shared.opts.diffusers_offload_max_gpu_memory}GiB",
+                "cpu": f"{shared.opts.diffusers_offload_max_cpu_memory}GiB",
+            }
+            device_map = accelerate.infer_auto_device_map(module, max_memory=max_memory)
+            module = accelerate.hooks.remove_hook_from_module(module, recurse=True)
+            offload_dir = getattr(module, "offload_dir", os.path.join(shared.opts.accelerate_offload_path, module.__class__.__name__))
+            module = accelerate.dispatch_model(module, device_map=device_map, offload_dir=offload_dir)
+            module = accelerate.hooks.add_hook_to_module(module, OffloadHook(), append=True)
+            module._hf_hook.execution_device = torch.device(devices.device) # pylint: disable=protected-access
+        return args, kwargs
+
+    def post_forward(self, module, output):
+        return output
+
+    def detach_hook(self, module):
+        return module
+
+
+offload_hook_instance = OffloadHook()
 
 
 def apply_balanced_offload(sd_model):
-    from accelerate import infer_auto_device_map, dispatch_model
-    from accelerate.hooks import add_hook_to_module, remove_hook_from_module, ModelHook
+    t0 = time.time()
     excluded = ['OmniGenPipeline']
     if sd_model.__class__.__name__ in excluded:
         return sd_model
-
-    class dispatch_from_cpu_hook(ModelHook):
-        def init_hook(self, module):
-            return module
-
-        def pre_forward(self, module, *args, **kwargs):
-            if devices.normalize_device(module.device) != devices.normalize_device(devices.device):
-                device_index = torch.device(devices.device).index
-                if device_index is None:
-                    device_index = 0
-                max_memory = {
-                    device_index: f"{shared.opts.diffusers_offload_max_gpu_memory}GiB",
-                    "cpu": f"{shared.opts.diffusers_offload_max_cpu_memory}GiB",
-                }
-                device_map = infer_auto_device_map(module, max_memory=max_memory)
-                module = remove_hook_from_module(module, recurse=True)
-                offload_dir = getattr(module, "offload_dir", os.path.join(shared.opts.accelerate_offload_path, module.__class__.__name__))
-                module = dispatch_model(module, device_map=device_map, offload_dir=offload_dir)
-                module = add_hook_to_module(module, dispatch_from_cpu_hook(), append=True)
-                module._hf_hook.execution_device = torch.device(devices.device) # pylint: disable=protected-access
-            return args, kwargs
-
-        def post_forward(self, module, output):
-            return output
-
-        def detach_hook(self, module):
-            return module
+    fn = f'{sys._getframe(2).f_code.co_name}:{sys._getframe(1).f_code.co_name}' # pylint: disable=protected-access
+    debug_move(f'Apply offload: type=balanced fn={fn}')
+    checkpoint_name = sd_model.sd_checkpoint_info.name if getattr(sd_model, "sd_checkpoint_info", None) is not None else None
+    if checkpoint_name is None:
+        checkpoint_name = sd_model.__class__.__name__
 
     def apply_balanced_offload_to_module(pipe):
         if hasattr(pipe, "pipe"):
@@ -409,23 +420,19 @@ def apply_balanced_offload_to_module(pipe):
         for module_name in keys: # pylint: disable=protected-access
             module = getattr(pipe, module_name, None)
             if isinstance(module, torch.nn.Module):
-                checkpoint_name = pipe.sd_checkpoint_info.name if getattr(pipe, "sd_checkpoint_info", None) is not None else None
-                if checkpoint_name is None:
-                    checkpoint_name = pipe.__class__.__name__
-                offload_dir = os.path.join(shared.opts.accelerate_offload_path, checkpoint_name, module_name)
                 network_layer_name = getattr(module, "network_layer_name", None)
-                module = remove_hook_from_module(module, recurse=True)
+                module = accelerate.hooks.remove_hook_from_module(module, recurse=True)
                 try:
-                    module = module.to("cpu")
-                    module.offload_dir = offload_dir
-                    module = add_hook_to_module(module, dispatch_from_cpu_hook(), append=True)
+                    module = module.to(devices.cpu, non_blocking=True)
+                    module.offload_dir = os.path.join(shared.opts.accelerate_offload_path, checkpoint_name, module_name)
+                    # module = accelerate.hooks.add_hook_to_module(module, OffloadHook(), append=True)
+                    module = accelerate.hooks.add_hook_to_module(module, offload_hook_instance, append=True)
                     module._hf_hook.execution_device = torch.device(devices.device) # pylint: disable=protected-access
                     if network_layer_name:
                         module.network_layer_name = network_layer_name
                 except Exception as e:
                     if 'bitsandbytes' not in str(e):
                         shared.log.error(f'Balanced offload: module={module_name} {e}')
-                devices.torch_gc(fast=True)
 
     apply_balanced_offload_to_module(sd_model)
     if hasattr(sd_model, "pipe"):
@@ -435,6 +442,8 @@ def apply_balanced_offload_to_module(pipe):
     if hasattr(sd_model, "decoder_pipe"):
         apply_balanced_offload_to_module(sd_model.decoder_pipe)
     set_accelerate(sd_model)
+    devices.torch_gc(fast=True)
+    process_timer.add('offload', time.time() - t0)
     return sd_model
 
 

From b55e746fca34a53effb2219aeef263cbbdb6b03c Mon Sep 17 00:00:00 2001
From: Disty0 <disty@disty.xyz>
Date: Sat, 30 Nov 2024 20:49:57 +0300
Subject: [PATCH 055/162] Improve balanced offload pre forward performance

---
 modules/model_stablecascade.py |  8 +++-----
 modules/sd_models.py           | 18 ++++++++++++------
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/modules/model_stablecascade.py b/modules/model_stablecascade.py
index 6c23ea00a..d6f9e4266 100644
--- a/modules/model_stablecascade.py
+++ b/modules/model_stablecascade.py
@@ -330,14 +330,12 @@ def __call__(
             elif output_type == "pil":
                 images = images.permute(0, 2, 3, 1).cpu().float().numpy()  # float() as bfloat16-> numpy doesnt work
                 images = self.numpy_to_pil(images)
+            if shared.opts.diffusers_offload_mode == "balanced":
+                shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
         else:
             images = latents
 
-        # Offload all models
-        if shared.opts.diffusers_offload_mode == "balanced":
-            shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
-        else:
-            self.maybe_free_model_hooks()
+        self.maybe_free_model_hooks()
 
         if not return_dict:
             return images
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 2cf7b3931..c2c789987 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -378,15 +378,17 @@ def pre_forward(self, module, *args, **kwargs):
             if device_index is None:
                 device_index = 0
             max_memory = {
-                device_index: f"{shared.opts.diffusers_offload_max_gpu_memory}GiB",
-                "cpu": f"{shared.opts.diffusers_offload_max_cpu_memory}GiB",
+                device_index: int(shared.opts.diffusers_offload_max_gpu_memory * 1024*1024*1024),
+                "cpu": int(shared.opts.diffusers_offload_max_cpu_memory * 1024*1024*1024),
             }
-            device_map = accelerate.infer_auto_device_map(module, max_memory=max_memory)
-            module = accelerate.hooks.remove_hook_from_module(module, recurse=True)
+            device_map = getattr(module, "balanced_offload_device_map", None)
+            if device_map is None or max_memory != getattr(module, "balanced_offload_max_memory", None):
+                device_map = accelerate.infer_auto_device_map(module, max_memory=max_memory)
             offload_dir = getattr(module, "offload_dir", os.path.join(shared.opts.accelerate_offload_path, module.__class__.__name__))
             module = accelerate.dispatch_model(module, device_map=device_map, offload_dir=offload_dir)
-            module = accelerate.hooks.add_hook_to_module(module, OffloadHook(), append=True)
             module._hf_hook.execution_device = torch.device(devices.device) # pylint: disable=protected-access
+            module.balanced_offload_device_map = device_map
+            module.balanced_offload_max_memory = max_memory
         return args, kwargs
 
     def post_forward(self, module, output):
@@ -421,15 +423,19 @@ def apply_balanced_offload_to_module(pipe):
             module = getattr(pipe, module_name, None)
             if isinstance(module, torch.nn.Module):
                 network_layer_name = getattr(module, "network_layer_name", None)
+                device_map = getattr(module, "balanced_offload_device_map", None)
+                max_memory = getattr(module, "balanced_offload_max_memory", None)
                 module = accelerate.hooks.remove_hook_from_module(module, recurse=True)
                 try:
                     module = module.to(devices.cpu, non_blocking=True)
                     module.offload_dir = os.path.join(shared.opts.accelerate_offload_path, checkpoint_name, module_name)
-                    # module = accelerate.hooks.add_hook_to_module(module, OffloadHook(), append=True)
                     module = accelerate.hooks.add_hook_to_module(module, offload_hook_instance, append=True)
                     module._hf_hook.execution_device = torch.device(devices.device) # pylint: disable=protected-access
                     if network_layer_name:
                         module.network_layer_name = network_layer_name
+                    if device_map and max_memory:
+                        module.balanced_offload_device_map = device_map
+                        module.balanced_offload_max_memory = max_memory
                 except Exception as e:
                     if 'bitsandbytes' not in str(e):
                         shared.log.error(f'Balanced offload: module={module_name} {e}')

From 81a95d04cf6db49433c9920efabb90cca3165734 Mon Sep 17 00:00:00 2001
From: Disty0 <disty@disty.xyz>
Date: Sat, 30 Nov 2024 21:12:26 +0300
Subject: [PATCH 056/162] Skip apply_balanced_offload if not needed

---
 modules/sd_models.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/modules/sd_models.py b/modules/sd_models.py
index c2c789987..6c3ddc6b5 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -389,6 +389,7 @@ def pre_forward(self, module, *args, **kwargs):
             module._hf_hook.execution_device = torch.device(devices.device) # pylint: disable=protected-access
             module.balanced_offload_device_map = device_map
             module.balanced_offload_max_memory = max_memory
+            module.balanced_offload_active = True
         return args, kwargs
 
     def post_forward(self, module, output):
@@ -421,7 +422,8 @@ def apply_balanced_offload_to_module(pipe):
             keys = get_signature(pipe).keys()
         for module_name in keys: # pylint: disable=protected-access
             module = getattr(pipe, module_name, None)
-            if isinstance(module, torch.nn.Module):
+            balanced_offload_active = getattr(module, "balanced_offload_active", None)
+            if isinstance(module, torch.nn.Module) and (balanced_offload_active is None or balanced_offload_active):
                 network_layer_name = getattr(module, "network_layer_name", None)
                 device_map = getattr(module, "balanced_offload_device_map", None)
                 max_memory = getattr(module, "balanced_offload_max_memory", None)
@@ -431,6 +433,7 @@ def apply_balanced_offload_to_module(pipe):
                     module.offload_dir = os.path.join(shared.opts.accelerate_offload_path, checkpoint_name, module_name)
                     module = accelerate.hooks.add_hook_to_module(module, offload_hook_instance, append=True)
                     module._hf_hook.execution_device = torch.device(devices.device) # pylint: disable=protected-access
+                    module.balanced_offload_active = False
                     if network_layer_name:
                         module.network_layer_name = network_layer_name
                     if device_map and max_memory:
@@ -1471,6 +1474,8 @@ def disable_offload(sd_model):
         module = getattr(sd_model, module_name, None)
         if isinstance(module, torch.nn.Module):
             network_layer_name = getattr(module, "network_layer_name", None)
+            if getattr(module, "balanced_offload_active", None) is not None:
+                module.balanced_offload_active = None
             module = remove_hook_from_module(module, recurse=True)
             if network_layer_name:
                 module.network_layer_name = network_layer_name

From bccb277dcb15b29f696500f854e59a31694ac235 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Sat, 30 Nov 2024 17:26:49 -0500
Subject: [PATCH 057/162] update lora apply weights and xyz

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 modules/lora/networks.py | 81 ++++++++++++++++++++++------------------
 modules/processing.py    |  3 +-
 scripts/xyz_grid_on.py   |  1 +
 3 files changed, 48 insertions(+), 37 deletions(-)

diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index 86c6e5ed0..b06a0c81f 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -27,8 +27,9 @@
 available_networks = {}
 available_network_aliases = {}
 loaded_networks: List[network.Network] = []
-timer = { 'list': 0, 'load': 0, 'backup': 0, 'calc': 0, 'apply': 0, 'restore': 0, 'deactivate': 0 }
+timer = { 'list': 0, 'load': 0, 'backup': 0, 'calc': 0, 'apply': 0, 'move': 0, 'restore': 0, 'deactivate': 0 }
 backup_size = 0
+bnb = None
 lora_cache = {}
 diffuser_loaded = []
 diffuser_scales = []
@@ -302,42 +303,41 @@ def set_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm
     bias_backup = getattr(self, "network_bias_backup", None)
     if weights_backup is None and bias_backup is None:
         return
-    device = self.weight.device
-    with devices.inference_context():
-        if weights_backup is not None:
-            if updown is not None:
-                if len(weights_backup.shape) == 4 and weights_backup.shape[1] == 9: # inpainting model. zero pad updown to make channel[1]  4 to 9
-                    updown = torch.nn.functional.pad(updown, (0, 0, 0, 0, 0, 5))  # pylint: disable=not-callable
-                weights_backup = weights_backup.clone().to(self.weight.device)
-                weights_backup += updown.to(weights_backup)
-            if getattr(self, "quant_type", None) in ['nf4', 'fp4']:
-                bnb = model_quant.load_bnb('Load network: type=LoRA', silent=True)
-                if bnb is not None:
-                    self.weight = bnb.nn.Params4bit(weights_backup, quant_state=self.quant_state, quant_type=self.quant_type, blocksize=self.blocksize)
-                else:
-                    self.weight.copy_(weights_backup, non_blocking=True)
+    if weights_backup is not None:
+        if updown is not None and len(weights_backup.shape) == 4 and weights_backup.shape[1] == 9: # inpainting model. zero pad updown to make channel[1]  4 to 9
+            updown = torch.nn.functional.pad(updown, (0, 0, 0, 0, 0, 5))  # pylint: disable=not-callable
+        if updown is not None:
+            new_weight = updown.to(devices.device) + weights_backup.to(devices.device)
+            if getattr(self, "quant_type", None) in ['nf4', 'fp4'] and bnb is not None:
+                self.weight = bnb.nn.Params4bit(new_weight, quant_state=self.quant_state, quant_type=self.quant_type, blocksize=self.blocksize)
             else:
-                self.weight.copy_(weights_backup, non_blocking=True)
-            if hasattr(self, "qweight") and hasattr(self, "freeze"):
-                self.freeze()
-        if bias_backup is not None:
-            if ex_bias is not None:
-                bias_backup = bias_backup.clone() + ex_bias.to(weights_backup)
-            self.bias.copy_(bias_backup)
+                self.weight.copy_(new_weight, non_blocking=True)
+            del new_weight
         else:
-            self.bias = None
-        self.to(device)
+            self.weight.copy_(weights_backup, non_blocking=True)
+        if hasattr(self, "qweight") and hasattr(self, "freeze"):
+            self.freeze()
+    if bias_backup is not None:
+        if ex_bias is not None:
+            new_weight = ex_bias.to(self.bias.device) + bias_backup.to(self.device)
+            self.bias.copy_(new_weight, non_blocking=True)
+            del new_weight
+        else:
+            self.bias.copy_(bias_backup, non_blocking=True)
+    else:
+        self.bias = None
     t1 = time.time()
     timer['apply'] += t1 - t0
 
 
 def maybe_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], wanted_names): # pylint: disable=W0613
-    global backup_size # pylint: disable=W0603
+    global bnb, backup_size # pylint: disable=W0603
     t0 = time.time()
     weights_backup = getattr(self, "network_weights_backup", None)
     if weights_backup is None and wanted_names != (): # pylint: disable=C1803
         if getattr(self.weight, "quant_type", None) in ['nf4', 'fp4']:
-            bnb = model_quant.load_bnb('Load network: type=LoRA', silent=True)
+            if bnb is None:
+                bnb = model_quant.load_bnb('Load network: type=LoRA', silent=True)
             if bnb is not None:
                 with devices.inference_context():
                     weights_backup = bnb.functional.dequantize_4bit(self.weight, quant_state=self.weight.quant_state, quant_type=self.weight.quant_type, blocksize=self.weight.blocksize,)
@@ -375,21 +375,27 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
     """
     network_layer_name = getattr(self, 'network_layer_name', None)
     current_names = getattr(self, "network_current_names", ())
-    wanted_names = tuple((x.name, x.te_multiplier, x.unet_multiplier, x.dyn_dim) for x in loaded_networks)
+    wanted_names = tuple((x.name, x.te_multiplier, x.unet_multiplier, x.dyn_dim) for x in loaded_networks) if len(loaded_networks) > 0 else ()
     with devices.inference_context():
-        if network_layer_name is not None and any([net.modules.get(network_layer_name, None) for net in loaded_networks]): # noqa: C419
+        if len(loaded_networks) > 0 and network_layer_name is not None and any([net.modules.get(network_layer_name, None) for net in loaded_networks]): # noqa: C419
             maybe_backup_weights(self, wanted_names)
         if current_names != wanted_names:
+            if shared.opts.diffusers_offload_mode == "none":
+                self.to(devices.device, non_blocking=True)
             batch_updown = None
             batch_ex_bias = None
-            t0 = time.time()
             for net in loaded_networks:
-                # default workflow where module is known and has weights
                 module = net.modules.get(network_layer_name, None)
                 if module is not None and hasattr(self, 'weight'):
                     try:
-                        weight = self.weight.to(devices.device) # calculate quant weights once
+                        t0 = time.time()
+                        weight = self.weight.to(devices.device, non_blocking=True) # calculate quant weights once
+                        t1 = time.time()
                         updown, ex_bias = module.calc_updown(weight)
+                        del weight
+                        t2 = time.time()
+                        timer['move'] += t1 - t0
+                        timer['calc'] += t2 - t1
                         if batch_updown is not None and updown is not None:
                             batch_updown += updown
                         else:
@@ -399,10 +405,13 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
                         else:
                             batch_ex_bias = ex_bias
                         if shared.opts.diffusers_offload_mode != "none":
+                            t0 = time.time()
                             if batch_updown is not None:
-                                batch_updown = batch_updown.to(devices.cpu)
+                                batch_updown = batch_updown.to(devices.cpu, non_blocking=True)
                             if batch_ex_bias is not None:
-                                batch_ex_bias = batch_ex_bias.to(devices.cpu)
+                                batch_ex_bias = batch_ex_bias.to(devices.cpu, non_blocking=True)
+                            t1 = time.time()
+                            timer['move'] += t1 - t0
                     except RuntimeError as e:
                         extra_network_lora.errors[net.name] = extra_network_lora.errors.get(net.name, 0) + 1
                         if debug:
@@ -415,16 +424,16 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
                     continue
                 shared.log.warning(f'LoRA network="{net.name}" layer="{network_layer_name}" unsupported operation')
                 extra_network_lora.errors[net.name] = extra_network_lora.errors.get(net.name, 0) + 1
-            t1 = time.time()
-            timer['calc'] += t1 - t0
             set_weights(self, batch_updown, batch_ex_bias)  # Set or restore weights from backup
             self.network_current_names = wanted_names
+            # self.to(devices.cpu)
 
 
-def network_load(): # called from processing
+def network_load():
     timer['backup'] = 0
     timer['calc'] = 0
     timer['apply'] = 0
+    timer['move'] = 0
     sd_model = getattr(shared.sd_model, "pipe", shared.sd_model)  # wrapped model compatiblility
     if shared.opts.diffusers_offload_mode == "sequential":
         sd_models.disable_offload(sd_model)
diff --git a/modules/processing.py b/modules/processing.py
index 92faaee8d..ebbaf7272 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -472,7 +472,8 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
     if p.scripts is not None and isinstance(p.scripts, scripts.ScriptRunner) and not (shared.state.interrupted or shared.state.skipped):
         p.scripts.postprocess(p, processed)
     timer.process.record('post')
-    shared.log.info(f'Processed: images={len(output_images)} its={(p.steps * len(output_images)) / (t1 - t0):.2f} time={t1-t0:.2f} timers={timer.process.dct(min_time=0.02)} memory={memstats.memory_stats()}')
+    if not p.disable_extra_networks:
+        shared.log.info(f'Processed: images={len(output_images)} its={(p.steps * len(output_images)) / (t1 - t0):.2f} time={t1-t0:.2f} timers={timer.process.dct(min_time=0.02)} memory={memstats.memory_stats()}')
 
     if shared.cmd_opts.malloc:
         import tracemalloc
diff --git a/scripts/xyz_grid_on.py b/scripts/xyz_grid_on.py
index 202a2cfc4..aa0897442 100644
--- a/scripts/xyz_grid_on.py
+++ b/scripts/xyz_grid_on.py
@@ -413,6 +413,7 @@ def cell(x, y, z, ix, iy, iz):
 
         p.do_not_save_grid = True
         p.do_not_save_samples = True
+        p.disable_extra_networks = True
         active = False
         cache = processed
         return processed

From 22982f3126ecd829b037ed7359556383f93fcffe Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Sat, 30 Nov 2024 18:10:10 -0500
Subject: [PATCH 058/162] update stats

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md              |  3 +++
 modules/lora/networks.py  | 42 +++++++++++++++++++++++++--------------
 modules/processing_vae.py |  2 +-
 3 files changed, 31 insertions(+), 16 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index dcb88bcf3..9ab3bcbd1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -40,6 +40,8 @@
   - Flux: all-in-one safetensors  
     example: <https://civitai.com/models/646328?modelVersionId=1040235>  
   - Flux: do not recast quants  
+- **Offload** improvements:  
+  - faster and more compatible *balanced* mode  
 - **UI**:  
   - improved stats on generate completion  
   - improved live preview display and performance  
@@ -69,6 +71,7 @@
 - fix gpu memory monitoring  
 - simplify img2img/inpaint/sketch canvas handling  
 - fix prompt caching  
+- fix xyz grid skip final pass  
 
 ## Update for 2024-11-21
 
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index b06a0c81f..604e591a9 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -3,9 +3,10 @@
 import re
 import time
 import concurrent
+from contextlib import nullcontext
 import torch
 import diffusers.models.lora
-import rich.progress as p
+import rich.progress as rp
 
 import modules.lora.network as network
 import modules.lora.network_lora as network_lora
@@ -22,7 +23,6 @@
 
 
 debug = os.environ.get('SD_LORA_DEBUG', None) is not None
-pbar = p.Progress(p.TextColumn('[cyan]{task.description}'), p.BarColumn(), p.TaskProgressColumn(), p.TimeRemainingColumn(), p.TimeElapsedColumn(), console=shared.console)
 extra_network_lora = None
 available_networks = {}
 available_network_aliases = {}
@@ -307,7 +307,7 @@ def set_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm
         if updown is not None and len(weights_backup.shape) == 4 and weights_backup.shape[1] == 9: # inpainting model. zero pad updown to make channel[1]  4 to 9
             updown = torch.nn.functional.pad(updown, (0, 0, 0, 0, 0, 5))  # pylint: disable=not-callable
         if updown is not None:
-            new_weight = updown.to(devices.device) + weights_backup.to(devices.device)
+            new_weight = updown.to(devices.device, non_blocking=True) + weights_backup.to(devices.device, non_blocking=True)
             if getattr(self, "quant_type", None) in ['nf4', 'fp4'] and bnb is not None:
                 self.weight = bnb.nn.Params4bit(new_weight, quant_state=self.quant_state, quant_type=self.quant_type, blocksize=self.blocksize)
             else:
@@ -319,7 +319,7 @@ def set_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm
             self.freeze()
     if bias_backup is not None:
         if ex_bias is not None:
-            new_weight = ex_bias.to(self.bias.device) + bias_backup.to(self.device)
+            new_weight = ex_bias.to(devices.device, non_blocking=True) + bias_backup.to(devices.device, non_blocking=True)
             self.bias.copy_(new_weight, non_blocking=True)
             del new_weight
         else:
@@ -351,7 +351,6 @@ def maybe_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.
         if shared.opts.lora_offload_backup and weights_backup is not None:
             weights_backup = weights_backup.to(devices.cpu)
         self.network_weights_backup = weights_backup
-        backup_size += weights_backup.numel() * weights_backup.element_size()
     bias_backup = getattr(self, "network_bias_backup", None)
     if bias_backup is None:
         if getattr(self, 'bias', None) is not None:
@@ -361,8 +360,10 @@ def maybe_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.
         if shared.opts.lora_offload_backup and bias_backup is not None:
             bias_backup = bias_backup.to(devices.cpu)
         self.network_bias_backup = bias_backup
-        if bias_backup is not None:
-            backup_size += bias_backup.numel() * bias_backup.element_size()
+    if getattr(self, 'network_weights_backup', None) is not None:
+        backup_size += self.network_weights_backup.numel() * self.network_weights_backup.element_size()
+    if getattr(self, 'network_bias_backup', None) is not None:
+        backup_size += self.network_bias_backup.numel() * self.network_bias_backup.element_size()
     t1 = time.time()
     timer['backup'] += t1 - t0
 
@@ -424,9 +425,11 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
                     continue
                 shared.log.warning(f'LoRA network="{net.name}" layer="{network_layer_name}" unsupported operation')
                 extra_network_lora.errors[net.name] = extra_network_lora.errors.get(net.name, 0) + 1
-            set_weights(self, batch_updown, batch_ex_bias)  # Set or restore weights from backup
             self.network_current_names = wanted_names
-            # self.to(devices.cpu)
+            set_weights(self, batch_updown, batch_ex_bias)  # Set or restore weights from backup
+            if batch_updown is not None or batch_ex_bias is not None:
+                return self.weight.device
+    return None
 
 
 def network_load():
@@ -443,15 +446,24 @@ def network_load():
         component = getattr(sd_model, component_name, None)
         if component is not None and hasattr(component, 'named_modules'):
             modules += list(component.named_modules())
+    devices_used = []
+    if len(loaded_networks) > 0:
+        pbar = rp.Progress(rp.TextColumn('[cyan]{task.description}'), rp.BarColumn(), rp.TaskProgressColumn(), rp.TimeRemainingColumn(), rp.TimeElapsedColumn(), console=shared.console)
+        task = pbar.add_task(description='Apply network: type=LoRA' , total=len(modules))
+    else:
+        task = None
+        pbar = nullcontext()
     with pbar:
-        task = pbar.add_task(description='Apply network: type=LoRA' , total=len(modules), visible=len(loaded_networks) > 0)
         for _, module in modules:
-            network_apply_weights(module)
-            pbar.update(task, advance=1) # progress bar becomes visible if operation takes more than 1sec
-        pbar.remove_task(task)
-    modules.clear()
+            devices_used.append(network_apply_weights(module))
+            if task is not None:
+                pbar.update(task, advance=1) # progress bar becomes visible if operation takes more than 1sec
+        # pbar.remove_task(task)
     if debug:
-        shared.log.debug(f'Load network: type=LoRA modules={len(modules)} backup={backup_size} time={get_timers()}')
+        devices_used = [d for d in devices_used if d is not None]
+        devices_set = list(set(devices_used))
+        shared.log.debug(f'Load network: type=LoRA modules={len(modules)} apply={len(devices_used)} device={devices_set} backup={backup_size} time={get_timers()}')
+    modules.clear()
     if shared.opts.diffusers_offload_mode == "sequential":
         sd_models.set_diffuser_offload(sd_model, op="model")
 
diff --git a/modules/processing_vae.py b/modules/processing_vae.py
index 1c4a45f07..b114e01d3 100644
--- a/modules/processing_vae.py
+++ b/modules/processing_vae.py
@@ -117,7 +117,7 @@ def full_vae_decode(latents, model):
             model.vae.orig_dtype = model.vae.dtype
             model.vae = model.vae.to(dtype=torch.float32)
         latents = latents.to(torch.float32)
-    latents = latents.to(devices.device)
+    latents = latents.to(devices.device, non_blocking=True)
     if getattr(model.vae, "post_quant_conv", None) is not None:
         latents = latents.to(next(iter(model.vae.post_quant_conv.parameters())).dtype)
 

From b7aff134a2f50f41b0371489506408a7ca600e85 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Sat, 30 Nov 2024 19:03:51 -0500
Subject: [PATCH 059/162] add low/high threshold to balanced offload

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md                  |  2 ++
 modules/devices.py            |  3 ++-
 modules/lora/networks.py      |  1 +
 modules/processing_helpers.py |  5 ++---
 modules/sd_models.py          | 27 ++++++++++++++++++++-------
 modules/shared.py             |  5 +++--
 6 files changed, 30 insertions(+), 13 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9ab3bcbd1..c62b6b917 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -42,6 +42,8 @@
   - Flux: do not recast quants  
 - **Offload** improvements:  
   - faster and more compatible *balanced* mode  
+  - balanced offload: units are now in percentage instead of bytes  
+  - balanced offload: add both high and low watermark  
 - **UI**:  
   - improved stats on generate completion  
   - improved live preview display and performance  
diff --git a/modules/devices.py b/modules/devices.py
index 9ca1863a5..64968a30c 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -224,7 +224,7 @@ def torch_gc(force=False, fast=False):
         timer.process.records['gc'] = 0
     timer.process.records['gc'] += t1 - t0
     if not force or collected == 0:
-        return
+        return used_gpu
     mem = memstats.memory_stats()
     saved = round(gpu.get('used', 0) - mem.get('gpu', {}).get('used', 0), 2)
     before = { 'gpu': gpu.get('used', 0), 'ram': ram.get('used', 0) }
@@ -233,6 +233,7 @@ def torch_gc(force=False, fast=False):
     results = { 'collected': collected, 'saved': saved }
     fn = f'{sys._getframe(2).f_code.co_name}:{sys._getframe(1).f_code.co_name}' # pylint: disable=protected-access
     log.debug(f'GC: utilization={utilization} gc={results} before={before} after={after} device={torch.device(get_optimal_device_name())} fn={fn} time={round(t1 - t0, 2)}') # pylint: disable=protected-access
+    return used_gpu
 
 
 def set_cuda_sync_mode(mode):
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index 604e591a9..69db5fce3 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -218,6 +218,7 @@ def maybe_recompile_model(names, te_multipliers):
 
 
 def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=None):
+    timer['list'] = 0
     global backup_size # pylint: disable=global-statement
     networks_on_disk: list[network.NetworkOnDisk] = [available_network_aliases.get(name, None) for name in names]
     if any(x is None for x in networks_on_disk):
diff --git a/modules/processing_helpers.py b/modules/processing_helpers.py
index ab08d4cc8..5d2661cc2 100644
--- a/modules/processing_helpers.py
+++ b/modules/processing_helpers.py
@@ -368,14 +368,13 @@ def validate_sample(tensor):
     sample = 255.0 * np.moveaxis(sample, 0, 2) if not shared.native else 255.0 * sample
     with warnings.catch_warnings(record=True) as w:
         cast = sample.astype(np.uint8)
-    minimum, maximum, mean = np.min(cast), np.max(cast), np.mean(cast)
-    if len(w) > 0 or minimum == maximum:
+    if len(w) > 0:
         nans = np.isnan(sample).sum()
         cast = np.nan_to_num(sample)
         cast = cast.astype(np.uint8)
         vae = shared.sd_model.vae.dtype if hasattr(shared.sd_model, 'vae') else None
         upcast = getattr(shared.sd_model.vae.config, 'force_upcast', None) if hasattr(shared.sd_model, 'vae') and hasattr(shared.sd_model.vae, 'config') else None
-        shared.log.error(f'Decode: sample={sample.shape} invalid={nans} mean={mean} dtype={dtype} vae={vae} upcast={upcast} failed to validate')
+        shared.log.error(f'Decode: sample={sample.shape} invalid={nans} dtype={dtype} vae={vae} upcast={upcast} failed to validate')
         if upcast is not None and not upcast:
             setattr(shared.sd_model.vae.config, 'force_upcast', True) # noqa: B010
             shared.log.warning('Decode: upcast=True set, retry operation')
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 6c3ddc6b5..42bd33d82 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -361,7 +361,7 @@ def set_diffuser_offload(sd_model, op: str = 'model'):
             shared.log.error(f'Setting {op}: offload={shared.opts.diffusers_offload_mode} {e}')
     if shared.opts.diffusers_offload_mode == "balanced":
         try:
-            shared.log.debug(f'Setting {op}: offload={shared.opts.diffusers_offload_mode} threshold={shared.opts.diffusers_offload_max_gpu_memory} limit={shared.opts.cuda_mem_fraction}')
+            shared.log.debug(f'Setting {op}: offload={shared.opts.diffusers_offload_mode} watermarks low={shared.opts.diffusers_offload_min_gpu_memory} high={shared.opts.diffusers_offload_max_gpu_memory} limit={shared.opts.cuda_mem_fraction:.2f}')
             sd_model = apply_balanced_offload(sd_model)
         except Exception as e:
             shared.log.error(f'Setting {op}: offload={shared.opts.diffusers_offload_mode} {e}')
@@ -369,6 +369,16 @@ def set_diffuser_offload(sd_model, op: str = 'model'):
 
 
 class OffloadHook(accelerate.hooks.ModelHook):
+    def __init__(self):
+        if shared.opts.diffusers_offload_max_gpu_memory > 1:
+            shared.opts.diffusers_offload_max_gpu_memory = 0.75
+        if shared.opts.diffusers_offload_max_cpu_memory > 1:
+            shared.opts.diffusers_offload_max_cpu_memory = 0.75
+        self.gpu = int(shared.gpu_memory * shared.opts.diffusers_offload_max_gpu_memory * 1024*1024*1024)
+        self.cpu = int(shared.cpu_memory * shared.opts.diffusers_offload_max_cpu_memory * 1024*1024*1024)
+        shared.log.info(f'Init offload: type=balanced gpu={self.gpu} cpu={self.cpu}')
+        super().__init__()
+
     def init_hook(self, module):
         return module
 
@@ -377,10 +387,7 @@ def pre_forward(self, module, *args, **kwargs):
             device_index = torch.device(devices.device).index
             if device_index is None:
                 device_index = 0
-            max_memory = {
-                device_index: int(shared.opts.diffusers_offload_max_gpu_memory * 1024*1024*1024),
-                "cpu": int(shared.opts.diffusers_offload_max_cpu_memory * 1024*1024*1024),
-            }
+            max_memory = { device_index: self.gpu, "cpu": self.cpu }
             device_map = getattr(module, "balanced_offload_device_map", None)
             if device_map is None or max_memory != getattr(module, "balanced_offload_max_memory", None):
                 device_map = accelerate.infer_auto_device_map(module, max_memory=max_memory)
@@ -399,10 +406,13 @@ def detach_hook(self, module):
         return module
 
 
-offload_hook_instance = OffloadHook()
+offload_hook_instance = None
 
 
 def apply_balanced_offload(sd_model):
+    global offload_hook_instance # pylint: disable=global-statement
+    if offload_hook_instance is None:
+        offload_hook_instance = OffloadHook()
     t0 = time.time()
     excluded = ['OmniGenPipeline']
     if sd_model.__class__.__name__ in excluded:
@@ -414,6 +424,7 @@ def apply_balanced_offload(sd_model):
         checkpoint_name = sd_model.__class__.__name__
 
     def apply_balanced_offload_to_module(pipe):
+        used_gpu = devices.torch_gc(fast=True)
         if hasattr(pipe, "pipe"):
             apply_balanced_offload_to_module(pipe.pipe)
         if hasattr(pipe, "_internal_dict"):
@@ -429,7 +440,9 @@ def apply_balanced_offload_to_module(pipe):
                 max_memory = getattr(module, "balanced_offload_max_memory", None)
                 module = accelerate.hooks.remove_hook_from_module(module, recurse=True)
                 try:
-                    module = module.to(devices.cpu, non_blocking=True)
+                    if used_gpu > 100 * shared.opts.diffusers_offload_min_gpu_memory:
+                        module = module.to(devices.cpu, non_blocking=True)
+                        used_gpu = devices.torch_gc(fast=True)
                     module.offload_dir = os.path.join(shared.opts.accelerate_offload_path, checkpoint_name, module_name)
                     module = accelerate.hooks.add_hook_to_module(module, offload_hook_instance, append=True)
                     module._hf_hook.execution_device = torch.device(devices.device) # pylint: disable=protected-access
diff --git a/modules/shared.py b/modules/shared.py
index bcb506cee..21a70fea1 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -560,8 +560,9 @@ def get_default_modes():
     "diffusers_extract_ema": OptionInfo(False, "Use model EMA weights when possible"),
     "diffusers_generator_device": OptionInfo("GPU", "Generator device", gr.Radio, {"choices": ["GPU", "CPU", "Unset"]}),
     "diffusers_offload_mode": OptionInfo(startup_offload_mode, "Model offload mode", gr.Radio, {"choices": ['none', 'balanced', 'model', 'sequential']}),
-    "diffusers_offload_max_gpu_memory": OptionInfo(round(gpu_memory * 0.75, 1), "Max GPU memory before balanced offload", gr.Slider, {"minimum": 0, "maximum": gpu_memory, "step": 0.01, "visible": True }),
-    "diffusers_offload_max_cpu_memory": OptionInfo(round(cpu_memory * 0.75, 1), "Max CPU memory before balanced offload", gr.Slider, {"minimum": 0, "maximum": cpu_memory, "step": 0.01, "visible": False }),
+    "diffusers_offload_min_gpu_memory": OptionInfo(0.25, "Balanced offload GPU low watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
+    "diffusers_offload_max_gpu_memory": OptionInfo(0.75, "Balanced offload GPU high watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
+    "diffusers_offload_max_cpu_memory": OptionInfo(0.75, "Balanced offload CPU high watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
     "diffusers_vae_upcast": OptionInfo("default", "VAE upcasting", gr.Radio, {"choices": ['default', 'true', 'false']}),
     "diffusers_vae_slicing": OptionInfo(True, "VAE slicing"),
     "diffusers_vae_tiling": OptionInfo(cmd_opts.lowvram or cmd_opts.medvram, "VAE tiling"),

From 05639ca238857604c5516a38334ea0a950fab01c Mon Sep 17 00:00:00 2001
From: Pablo Hellmann <phtijger@gmail.com>
Date: Sun, 1 Dec 2024 01:12:28 +0100
Subject: [PATCH 060/162] prompt token counter fix

---
 javascript/black-teal-reimagined.css | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/javascript/black-teal-reimagined.css b/javascript/black-teal-reimagined.css
index b7567ce75..0a985b786 100644
--- a/javascript/black-teal-reimagined.css
+++ b/javascript/black-teal-reimagined.css
@@ -985,9 +985,10 @@ svg.feather.feather-image,
 
 #txt2img_token_counter, #txt2img_negative_token_counter {
   display: flex;
-  flex-direction: column;
-  justify-content: space-evenly;
-  padding: 5px;
+  flex-direction: row;
+  padding-top: 1px;
+  opacity: 0.6;
+  z-index: 99;
 }
 
 #txt2img_prompt_container {

From c5cd3cb623e1e7999a161024dfc97b01d34d9531 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Sat, 30 Nov 2024 19:15:50 -0500
Subject: [PATCH 061/162] reinit offoad instance on change

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 modules/sd_models.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/modules/sd_models.py b/modules/sd_models.py
index 42bd33d82..24ceff5ee 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -374,9 +374,14 @@ def __init__(self):
             shared.opts.diffusers_offload_max_gpu_memory = 0.75
         if shared.opts.diffusers_offload_max_cpu_memory > 1:
             shared.opts.diffusers_offload_max_cpu_memory = 0.75
+        self.min_watermark = shared.opts.diffusers_offload_min_gpu_memory
+        self.max_watermark = shared.opts.diffusers_offload_max_gpu_memory
+        self.cpu_watermark = shared.opts.diffusers_offload_max_cpu_memory
         self.gpu = int(shared.gpu_memory * shared.opts.diffusers_offload_max_gpu_memory * 1024*1024*1024)
         self.cpu = int(shared.cpu_memory * shared.opts.diffusers_offload_max_cpu_memory * 1024*1024*1024)
-        shared.log.info(f'Init offload: type=balanced gpu={self.gpu} cpu={self.cpu}')
+        gpu_dict = { "min": self.min_watermark, "max": self.max_watermark, "bytes": self.gpu }
+        cpu_dict = { "max": self.cpu_watermark, "bytes": self.cpu }
+        shared.log.info(f'Init offload: type=balanced gpu={gpu_dict} cpu={cpu_dict}')
         super().__init__()
 
     def init_hook(self, module):
@@ -411,7 +416,7 @@ def detach_hook(self, module):
 
 def apply_balanced_offload(sd_model):
     global offload_hook_instance # pylint: disable=global-statement
-    if offload_hook_instance is None:
+    if offload_hook_instance is None or offload_hook_instance.min_watermark != shared.opts.diffusers_offload_min_gpu_memory or offload_hook_instance.max_watermark != shared.opts.diffusers_offload_max_gpu_memory:
         offload_hook_instance = OffloadHook()
     t0 = time.time()
     excluded = ['OmniGenPipeline']

From 03b362e799f7934c792eafceaa9c8367ffceb0aa Mon Sep 17 00:00:00 2001
From: Pablo Hellmann <phtijger@gmail.com>
Date: Sun, 1 Dec 2024 01:23:13 +0100
Subject: [PATCH 062/162] small changes and removed useless css

---
 javascript/black-teal-reimagined.css | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

diff --git a/javascript/black-teal-reimagined.css b/javascript/black-teal-reimagined.css
index 0a985b786..25f4a3f2c 100644
--- a/javascript/black-teal-reimagined.css
+++ b/javascript/black-teal-reimagined.css
@@ -392,13 +392,13 @@ input[type='range']::-moz-range-track {
 }
 
 /* Form Styles */
-div.form {
+div.form, #txt2img_seed_row, #txt2img_subseed_row {
   border-width: 0;
   box-shadow: var(--shadow-md);
   background: var(--background-fill-primary);
   border-bottom: 3px solid var(--highlight-color);
   padding: 3px;
-  border-radius: var(--radius-md);
+  border-radius: var(--radius-lg);
   margin: 1px;
 }
 
@@ -700,17 +700,6 @@ svg.feather.feather-image,
   height: 2.4em;
 }
 
-#footer,
-#style_pos_col,
-#style_neg_col,
-#roll_col,
-#extras_upscaler_2,
-#extras_upscaler_2_visibility,
-#txt2img_seed_resize_from_w,
-#txt2img_seed_resize_from_h {
-  display: none;
-}
-
 #save-animation {
   border-radius: var(--radius-sm) !important;
   margin-bottom: 16px;

From 4089af7c032fe06d98fbc5b3801ccd5fc7d5396b Mon Sep 17 00:00:00 2001
From: Disty0 <disty@disty.xyz>
Date: Sun, 1 Dec 2024 03:52:57 +0300
Subject: [PATCH 063/162] Fix NaNs on Intel with Lora + Offloading

---
 modules/lora/networks.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index 69db5fce3..4312f3405 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -412,6 +412,9 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
                                 batch_updown = batch_updown.to(devices.cpu, non_blocking=True)
                             if batch_ex_bias is not None:
                                 batch_ex_bias = batch_ex_bias.to(devices.cpu, non_blocking=True)
+                            if devices.backend == "ipex":
+                                # using non_blocking=True here causes NaNs on Intel
+                                torch.xpu.synchronize(devices.device)
                             t1 = time.time()
                             timer['move'] += t1 - t0
                     except RuntimeError as e:

From e74c038f6405a33753fb12a4e516367cf808b551 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Sat, 30 Nov 2024 19:53:08 -0500
Subject: [PATCH 064/162] interruptible lora apply

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 modules/lora/networks.py   | 2 ++
 scripts/xyz_grid_shared.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index 4312f3405..83a62eb5a 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -459,6 +459,8 @@ def network_load():
         pbar = nullcontext()
     with pbar:
         for _, module in modules:
+            if shared.state.interrupted:
+                continue
             devices_used.append(network_apply_weights(module))
             if task is not None:
                 pbar.update(task, advance=1) # progress bar becomes visible if operation takes more than 1sec
diff --git a/scripts/xyz_grid_shared.py b/scripts/xyz_grid_shared.py
index 82387fab8..3fc8d32c8 100644
--- a/scripts/xyz_grid_shared.py
+++ b/scripts/xyz_grid_shared.py
@@ -192,7 +192,7 @@ def apply_vae(p, x, xs):
 
 def list_lora():
     import sys
-    lora = [v for k, v in sys.modules.items() if k == 'networks'][0]
+    lora = [v for k, v in sys.modules.items() if k == 'networks' or k == 'modules.lora.networks'][0]
     loras = [v.fullname for v in lora.available_networks.values()]
     return ['None'] + loras
 

From 04c82501d503075edf7afd68157dce450491dc10 Mon Sep 17 00:00:00 2001
From: Pablo Hellmann <phtijger@gmail.com>
Date: Sun, 1 Dec 2024 02:19:47 +0100
Subject: [PATCH 065/162] darker colors and fancy live preview

---
 javascript/black-teal-reimagined.css | 28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

diff --git a/javascript/black-teal-reimagined.css b/javascript/black-teal-reimagined.css
index 25f4a3f2c..7b3a281ee 100644
--- a/javascript/black-teal-reimagined.css
+++ b/javascript/black-teal-reimagined.css
@@ -54,8 +54,8 @@ html {
 
   /* Background Colors */
   --background-color: var(--neutral-950);
-  --background-fill-primary: var(--neutral-700);
-  --input-background-fill: var(--neutral-800);
+  --background-fill-primary: var(--neutral-800);
+  --input-background-fill: var(--neutral-900);
 
   /* Padding and Borders */
   --input-padding: 4px;
@@ -402,6 +402,30 @@ div.form, #txt2img_seed_row, #txt2img_subseed_row {
   margin: 1px;
 }
 
+/* Image preview styling*/
+#txt2img_gallery {
+  background: var(--background-fill-primary);
+  padding: 5px;
+  margin: 0px;
+}
+
+@keyframes colorChange {
+  0% {
+    background-color: var(--neutral-800);
+  }
+  50% {
+    background-color: var(--neutral-700);
+  }
+  100% {
+    background-color: var(--neutral-800);
+  }
+}
+
+.livePreview {
+  animation: colorChange 3s ease-in-out infinite; /* Adjust the duration as needed */
+  padding: 5px;
+}
+
 /* Gradio Style Classes */
 fieldset .gr-block.gr-box,
 label.block span {

From 75462dd21e71c87d0b619e7898df52d6a6dba434 Mon Sep 17 00:00:00 2001
From: Pablo Hellmann <phtijger@gmail.com>
Date: Sun, 1 Dec 2024 02:39:56 +0100
Subject: [PATCH 066/162] Fancy loader

---
 javascript/black-teal-reimagined.css | 111 ++++++++++++++++++++++++++-
 1 file changed, 109 insertions(+), 2 deletions(-)

diff --git a/javascript/black-teal-reimagined.css b/javascript/black-teal-reimagined.css
index 7b3a281ee..28176d247 100644
--- a/javascript/black-teal-reimagined.css
+++ b/javascript/black-teal-reimagined.css
@@ -994,8 +994,116 @@ svg.feather.feather-image,
   height: 100%;
 }
 
-/* Token counters styling */
+/* loader */
+.splash {
+  position: fixed;
+  top: 0;
+  left: 0;
+  width: 100vw;
+  height: 100vh;
+  z-index: 1000;
+  display: flex;
+  flex-direction: column;
+  align-items: center;
+  justify-content: center;
+  background-color: rgba(0, 0, 0, 0.8);
+}
+
+.motd {
+  margin-top: 1em;
+  color: var(--body-text-color-subdued);
+  font-family: monospace;
+  font-variant: all-petite-caps;
+  font-size: 1.2em;
+}
+
+.splash-img {
+  margin: 0;
+  width: 512px;
+  height: 512px;
+  background-repeat: no-repeat;
+  animation: color 8s infinite alternate, move 3s infinite alternate;
+}
+
+.loading {
+  color: white;
+  position: border-box;
+  top: 85%;
+  font-size: 1.5em;
+}
+
+.loader {
+  width: 100px;
+  height: 100px;
+  border: var(--spacing-md) solid transparent;
+  border-radius: 50%;
+  border-top: var(--spacing-md) solid var(--primary-600);
+  animation: spin 2s linear infinite, pulse 1.5s ease-in-out infinite;
+  position: border-box;
+}
+
+.loader::before,
+.loader::after {
+  content: "";
+  position: absolute;
+  top: 6px;
+  bottom: 6px;
+  left: 6px;
+  right: 6px;
+  border-radius: 50%;
+  border: var(--spacing-md) solid transparent;
+}
+
+.loader::before {
+  border-top-color: var(--primary-900);
+  animation: spin 3s linear infinite;
+}
+
+.loader::after {
+  border-top-color: var(--primary-300);
+  animation: spin 1.5s linear infinite;
+}
 
+@keyframes move {
+  0% {
+    transform: translateY(0);
+  }
+  50% {
+    transform: translateY(-10px);
+  }
+  100% {
+    transform: translateY(0);
+  }
+}
+
+@keyframes spin {
+  from {
+    transform: rotate(0deg);
+  }
+  to {
+    transform: rotate(360deg);
+  }
+}
+
+@keyframes pulse {
+  0%, 100% {
+    transform: scale(1);
+  }
+  50% {
+    transform: scale(1.1);
+  }
+}
+
+@keyframes color {
+  0% {
+    filter: hue-rotate(0deg);
+  }
+  100% {
+    filter: hue-rotate(360deg);
+  }
+}
+
+/* Token counters styling */
 #txt2img_token_counter, #txt2img_negative_token_counter {
   display: flex;
   flex-direction: row;
@@ -1063,7 +1171,6 @@ svg.feather.feather-image,
   --input-radius: var(--radius-lg);
   --input-text-size: var(--text-md);
   --input-text-weight: 400;
-  --loader-color: var(--color-accent);
   --prose-text-size: var(--text-md);
   --prose-text-weight: 400;
   --prose-header-text-weight: 400;

From 507636d0a15385af8abfebf0c8145e2a5356bd0b Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Sun, 1 Dec 2024 10:54:51 -0500
Subject: [PATCH 067/162] lora-refactor

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 modules/extra_networks.py           |   5 +-
 modules/lora/extra_networks_lora.py |   5 +-
 modules/lora/lora_convert.py        |  31 +++
 modules/lora/lyco_helpers.py        |   4 +-
 modules/lora/networks.py            | 364 +++++++++++++---------------
 scripts/xyz_grid.py                 |   1 +
 6 files changed, 208 insertions(+), 202 deletions(-)

diff --git a/modules/extra_networks.py b/modules/extra_networks.py
index 010157af9..fca48e21c 100644
--- a/modules/extra_networks.py
+++ b/modules/extra_networks.py
@@ -17,8 +17,9 @@ def register_extra_network(extra_network):
 def register_default_extra_networks():
     from modules.ui_extra_networks_styles import ExtraNetworkStyles
     register_extra_network(ExtraNetworkStyles())
-    from modules.lora.extra_networks_lora import ExtraNetworkLora
-    register_extra_network(ExtraNetworkLora())
+    if shared.native:
+        from modules.lora.networks import extra_network_lora
+        register_extra_network(extra_network_lora)
     if shared.opts.hypernetwork_enabled:
         from modules.ui_extra_networks_hypernet import ExtraNetworkHypernet
         register_extra_network(ExtraNetworkHypernet())
diff --git a/modules/lora/extra_networks_lora.py b/modules/lora/extra_networks_lora.py
index c875ba0d5..d58cebd8f 100644
--- a/modules/lora/extra_networks_lora.py
+++ b/modules/lora/extra_networks_lora.py
@@ -4,6 +4,7 @@
 import modules.lora.networks as networks
 from modules import extra_networks, shared
 
+
 # from https://github.com/cheald/sd-webui-loractl/blob/master/loractl/lib/utils.py
 def get_stepwise(param, step, steps):
     def sorted_positions(raw_steps):
@@ -122,8 +123,8 @@ def activate(self, p, params_list, step=0):
             self.active = True
             self.model = shared.opts.sd_model_checkpoint
         names, te_multipliers, unet_multipliers, dyn_dims = parse(p, params_list, step)
-        networks.load_networks(names, te_multipliers, unet_multipliers, dyn_dims) # load
-        networks.network_load() # backup/apply
+        networks.network_load(names, te_multipliers, unet_multipliers, dyn_dims) # load
+        networks.network_process()
         if len(networks.loaded_networks) > 0 and step == 0:
             infotext(p)
             prompt(p)
diff --git a/modules/lora/lora_convert.py b/modules/lora/lora_convert.py
index dc86a24cf..032ffa5a3 100644
--- a/modules/lora/lora_convert.py
+++ b/modules/lora/lora_convert.py
@@ -476,3 +476,34 @@ def _convert_sd_scripts_to_ai_toolkit(sds_sd):
         return new_state_dict
 
     return _convert_sd_scripts_to_ai_toolkit(state_dict)
+
+
+def assign_network_names_to_compvis_modules(sd_model):
+    if sd_model is None:
+        return
+    sd_model = getattr(shared.sd_model, "pipe", shared.sd_model)  # wrapped model compatiblility
+    network_layer_mapping = {}
+    if hasattr(sd_model, 'text_encoder') and sd_model.text_encoder is not None:
+        for name, module in sd_model.text_encoder.named_modules():
+            prefix = "lora_te1_" if hasattr(sd_model, 'text_encoder_2') else "lora_te_"
+            network_name = prefix + name.replace(".", "_")
+            network_layer_mapping[network_name] = module
+            module.network_layer_name = network_name
+    if hasattr(sd_model, 'text_encoder_2'):
+        for name, module in sd_model.text_encoder_2.named_modules():
+            network_name = "lora_te2_" + name.replace(".", "_")
+            network_layer_mapping[network_name] = module
+            module.network_layer_name = network_name
+    if hasattr(sd_model, 'unet'):
+        for name, module in sd_model.unet.named_modules():
+            network_name = "lora_unet_" + name.replace(".", "_")
+            network_layer_mapping[network_name] = module
+            module.network_layer_name = network_name
+    if hasattr(sd_model, 'transformer'):
+        for name, module in sd_model.transformer.named_modules():
+            network_name = "lora_transformer_" + name.replace(".", "_")
+            network_layer_mapping[network_name] = module
+            if "norm" in network_name and "linear" not in network_name and shared.sd_model_type != "sd3":
+                continue
+            module.network_layer_name = network_name
+    shared.sd_model.network_layer_mapping = network_layer_mapping
diff --git a/modules/lora/lyco_helpers.py b/modules/lora/lyco_helpers.py
index 9a16d25ab..ac4f2419f 100644
--- a/modules/lora/lyco_helpers.py
+++ b/modules/lora/lyco_helpers.py
@@ -12,13 +12,13 @@ def rebuild_conventional(up, down, shape, dyn_dim=None):
     if dyn_dim is not None:
         up = up[:, :dyn_dim]
         down = down[:dyn_dim, :]
-    return (up @ down).reshape(shape)
+    return (up @ down).reshape(shape).to(up.dtype)
 
 
 def rebuild_cp_decomposition(up, down, mid):
     up = up.reshape(up.size(0), -1)
     down = down.reshape(down.size(0), -1)
-    return torch.einsum('n m k l, i n, m j -> i j k l', mid, up, down)
+    return torch.einsum('n m k l, i n, m j -> i j k l', mid, up, down).to(up.dtype)
 
 
 # copied from https://github.com/KohakuBlueleaf/LyCORIS/blob/dev/lycoris/modules/lokr.py
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index 83a62eb5a..e0f2134c9 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -19,16 +19,16 @@
 import modules.lora.network_glora as network_glora
 import modules.lora.network_overrides as network_overrides
 import modules.lora.lora_convert as lora_convert
+from modules.lora.extra_networks_lora import ExtraNetworkLora
 from modules import shared, devices, sd_models, sd_models_compile, errors, files_cache, model_quant
 
 
 debug = os.environ.get('SD_LORA_DEBUG', None) is not None
-extra_network_lora = None
+extra_network_lora = ExtraNetworkLora()
 available_networks = {}
 available_network_aliases = {}
 loaded_networks: List[network.Network] = []
 timer = { 'list': 0, 'load': 0, 'backup': 0, 'calc': 0, 'apply': 0, 'move': 0, 'restore': 0, 'deactivate': 0 }
-backup_size = 0
 bnb = None
 lora_cache = {}
 diffuser_loaded = []
@@ -60,36 +60,7 @@ def get_timers():
     return t
 
 
-def assign_network_names_to_compvis_modules(sd_model):
-    if sd_model is None:
-        return
-    sd_model = getattr(shared.sd_model, "pipe", shared.sd_model)  # wrapped model compatiblility
-    network_layer_mapping = {}
-    if hasattr(sd_model, 'text_encoder') and sd_model.text_encoder is not None:
-        for name, module in sd_model.text_encoder.named_modules():
-            prefix = "lora_te1_" if hasattr(sd_model, 'text_encoder_2') else "lora_te_"
-            network_name = prefix + name.replace(".", "_")
-            network_layer_mapping[network_name] = module
-            module.network_layer_name = network_name
-    if hasattr(sd_model, 'text_encoder_2'):
-        for name, module in sd_model.text_encoder_2.named_modules():
-            network_name = "lora_te2_" + name.replace(".", "_")
-            network_layer_mapping[network_name] = module
-            module.network_layer_name = network_name
-    if hasattr(sd_model, 'unet'):
-        for name, module in sd_model.unet.named_modules():
-            network_name = "lora_unet_" + name.replace(".", "_")
-            network_layer_mapping[network_name] = module
-            module.network_layer_name = network_name
-    if hasattr(sd_model, 'transformer'):
-        for name, module in sd_model.transformer.named_modules():
-            network_name = "lora_transformer_" + name.replace(".", "_")
-            network_layer_mapping[network_name] = module
-            if "norm" in network_name and "linear" not in network_name and shared.sd_model_type != "sd3":
-                continue
-            module.network_layer_name = network_name
-    shared.sd_model.network_layer_mapping = network_layer_mapping
-
+# section: load networks from disk
 
 def load_diffusers(name, network_on_disk, lora_scale=shared.opts.extra_networks_default_multiplier) -> Union[network.Network, None]:
     name = name.replace(".", "_")
@@ -120,7 +91,7 @@ def load_diffusers(name, network_on_disk, lora_scale=shared.opts.extra_networks_
     return net
 
 
-def load_network(name, network_on_disk) -> Union[network.Network, None]:
+def load_safetensors(name, network_on_disk) -> Union[network.Network, None]:
     if not shared.sd_loaded:
         return None
 
@@ -139,7 +110,7 @@ def load_network(name, network_on_disk) -> Union[network.Network, None]:
             sd = lora_convert._convert_kohya_sd3_lora_to_diffusers(sd) or sd  # pylint: disable=protected-access
         except ValueError:  # EAFP for diffusers PEFT keys
             pass
-    assign_network_names_to_compvis_modules(shared.sd_model)
+    lora_convert.assign_network_names_to_compvis_modules(shared.sd_model)
     keys_failed_to_match = {}
     matched_networks = {}
     bundle_embeddings = {}
@@ -217,9 +188,46 @@ def maybe_recompile_model(names, te_multipliers):
     return recompile_model
 
 
-def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=None):
+def list_available_networks():
+    t0 = time.time()
+    available_networks.clear()
+    available_network_aliases.clear()
+    forbidden_network_aliases.clear()
+    available_network_hash_lookup.clear()
+    forbidden_network_aliases.update({"none": 1, "Addams": 1})
+    if not os.path.exists(shared.cmd_opts.lora_dir):
+        shared.log.warning(f'LoRA directory not found: path="{shared.cmd_opts.lora_dir}"')
+
+    def add_network(filename):
+        if not os.path.isfile(filename):
+            return
+        name = os.path.splitext(os.path.basename(filename))[0]
+        name = name.replace('.', '_')
+        try:
+            entry = network.NetworkOnDisk(name, filename)
+            available_networks[entry.name] = entry
+            if entry.alias in available_network_aliases:
+                forbidden_network_aliases[entry.alias.lower()] = 1
+            if shared.opts.lora_preferred_name == 'filename':
+                available_network_aliases[entry.name] = entry
+            else:
+                available_network_aliases[entry.alias] = entry
+            if entry.shorthash:
+                available_network_hash_lookup[entry.shorthash] = entry
+        except OSError as e:  # should catch FileNotFoundError and PermissionError etc.
+            shared.log.error(f'LoRA: filename="{filename}" {e}')
+
+    candidates = list(files_cache.list_files(shared.cmd_opts.lora_dir, ext_filter=[".pt", ".ckpt", ".safetensors"]))
+    with concurrent.futures.ThreadPoolExecutor(max_workers=shared.max_workers) as executor:
+        for fn in candidates:
+            executor.submit(add_network, fn)
+    t1 = time.time()
+    timer['list'] = t1 - t0
+    shared.log.info(f'Available LoRAs: path="{shared.cmd_opts.lora_dir}" items={len(available_networks)} folders={len(forbidden_network_aliases)} time={t1 - t0:.2f}')
+
+
+def network_load(names, te_multipliers=None, unet_multipliers=None, dyn_dims=None):
     timer['list'] = 0
-    global backup_size # pylint: disable=global-statement
     networks_on_disk: list[network.NetworkOnDisk] = [available_network_aliases.get(name, None) for name in names]
     if any(x is None for x in networks_on_disk):
         list_available_networks()
@@ -244,7 +252,7 @@ def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=No
                 if shared.opts.lora_force_diffusers or network_overrides.check_override(shorthash): # OpenVINO only works with Diffusers LoRa loading
                     net = load_diffusers(name, network_on_disk, lora_scale=te_multipliers[i] if te_multipliers else shared.opts.extra_networks_default_multiplier)
                 else:
-                    net = load_network(name, network_on_disk)
+                    net = load_safetensors(name, network_on_disk)
                 if net is not None:
                     net.mentioned_name = name
                     network_on_disk.read_hash()
@@ -294,17 +302,108 @@ def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=No
         devices.torch_gc()
 
     t1 = time.time()
-    backup_size = 0
     timer['load'] = t1 - t0
 
 
-def set_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], updown, ex_bias):
+# section: process loaded networks
+
+def network_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], weight, network_layer_name, wanted_names):
+    global bnb # pylint: disable=W0603
+    backup_size = 0
+    if len(loaded_networks) > 0 and network_layer_name is not None and any([net.modules.get(network_layer_name, None) for net in loaded_networks]): # noqa: C419
+        t0 = time.time()
+        weights_backup = getattr(self, "network_weights_backup", None)
+        if weights_backup is None and wanted_names != (): # pylint: disable=C1803
+            self.network_weights_backup = None
+            if getattr(weight, "quant_type", None) in ['nf4', 'fp4']:
+                if bnb is None:
+                    bnb = model_quant.load_bnb('Load network: type=LoRA', silent=True)
+                if bnb is not None:
+                    with devices.inference_context():
+                        weights_backup = bnb.functional.dequantize_4bit(weight, quant_state=weight.quant_state, quant_type=weight.quant_type, blocksize=weight.blocksize,)
+                        self.quant_state = weight.quant_state
+                        self.quant_type = weight.quant_type
+                        self.blocksize = weight.blocksize
+                else:
+                    weights_backup = weight.clone()
+            else:
+                weights_backup = weight.clone()
+            if shared.opts.lora_offload_backup and weights_backup is not None:
+                weights_backup = weights_backup.to(devices.cpu)
+            self.network_weights_backup = weights_backup
+        bias_backup = getattr(self, "network_bias_backup", None)
+        if bias_backup is None:
+            if getattr(self, 'bias', None) is not None:
+                bias_backup = self.bias.clone()
+            else:
+                bias_backup = None
+            if shared.opts.lora_offload_backup and bias_backup is not None:
+                bias_backup = bias_backup.to(devices.cpu)
+            self.network_bias_backup = bias_backup
+        if getattr(self, 'network_weights_backup', None) is not None:
+            backup_size += self.network_weights_backup.numel() * self.network_weights_backup.element_size()
+        if getattr(self, 'network_bias_backup', None) is not None:
+            backup_size += self.network_bias_backup.numel() * self.network_bias_backup.element_size()
+        t1 = time.time()
+        timer['backup'] += t1 - t0
+    return backup_size
+
+
+def network_calc_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], weight, network_layer_name):
+    if shared.opts.diffusers_offload_mode == "none":
+        self.to(devices.device, non_blocking=True)
+    batch_updown = None
+    batch_ex_bias = None
+    for net in loaded_networks:
+        module = net.modules.get(network_layer_name, None)
+        if module is not None and hasattr(self, 'weight'):
+            try:
+                t0 = time.time()
+                updown, ex_bias = module.calc_updown(weight)
+                t1 = time.time()
+                if batch_updown is not None and updown is not None:
+                    batch_updown += updown
+                else:
+                    batch_updown = updown
+                if batch_ex_bias is not None and ex_bias is not None:
+                    batch_ex_bias += ex_bias
+                else:
+                    batch_ex_bias = ex_bias
+                timer['calc'] += t1 - t0
+                if shared.opts.diffusers_offload_mode != "none":
+                    t0 = time.time()
+                    if batch_updown is not None:
+                        batch_updown = batch_updown.to(devices.cpu, non_blocking=True)
+                    if batch_ex_bias is not None:
+                        batch_ex_bias = batch_ex_bias.to(devices.cpu, non_blocking=True)
+                    if devices.backend == "ipex":
+                        # using non_blocking=True here causes NaNs on Intel
+                        torch.xpu.synchronize(devices.device)
+                    t1 = time.time()
+                    timer['move'] += t1 - t0
+            except RuntimeError as e:
+                extra_network_lora.errors[net.name] = extra_network_lora.errors.get(net.name, 0) + 1
+                if debug:
+                    module_name = net.modules.get(network_layer_name, None)
+                    shared.log.error(f'LoRA apply weight name="{net.name}" module="{module_name}" layer="{network_layer_name}" {e}')
+                    errors.display(e, 'LoRA')
+                    raise RuntimeError('LoRA apply weight') from e
+            continue
+        if module is None:
+            continue
+        shared.log.warning(f'LoRA network="{net.name}" layer="{network_layer_name}" unsupported operation')
+        extra_network_lora.errors[net.name] = extra_network_lora.errors.get(net.name, 0) + 1
+    return batch_updown, batch_ex_bias
+
+
+def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], updown, ex_bias):
     t0 = time.time()
     weights_backup = getattr(self, "network_weights_backup", None)
     bias_backup = getattr(self, "network_bias_backup", None)
     if weights_backup is None and bias_backup is None:
-        return
+        return None, None
     if weights_backup is not None:
+        self.weight = None
         if updown is not None and len(weights_backup.shape) == 4 and weights_backup.shape[1] == 9: # inpainting model. zero pad updown to make channel[1]  4 to 9
             updown = torch.nn.functional.pad(updown, (0, 0, 0, 0, 0, 5))  # pylint: disable=not-callable
         if updown is not None:
@@ -312,131 +411,28 @@ def set_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm
             if getattr(self, "quant_type", None) in ['nf4', 'fp4'] and bnb is not None:
                 self.weight = bnb.nn.Params4bit(new_weight, quant_state=self.quant_state, quant_type=self.quant_type, blocksize=self.blocksize)
             else:
-                self.weight.copy_(new_weight, non_blocking=True)
+                self.weight = torch.nn.Parameter(new_weight, requires_grad=False)
             del new_weight
         else:
-            self.weight.copy_(weights_backup, non_blocking=True)
+            self.weight = torch.nn.Parameter(weights_backup, requires_grad=False)
         if hasattr(self, "qweight") and hasattr(self, "freeze"):
             self.freeze()
     if bias_backup is not None:
+        self.bias = None
         if ex_bias is not None:
             new_weight = ex_bias.to(devices.device, non_blocking=True) + bias_backup.to(devices.device, non_blocking=True)
-            self.bias.copy_(new_weight, non_blocking=True)
+            self.bias = torch.nn.Parameter(new_weight, requires_grad=False)
             del new_weight
         else:
-            self.bias.copy_(bias_backup, non_blocking=True)
+            self.bias = torch.nn.Parameter(bias_backup, requires_grad=False)
     else:
         self.bias = None
     t1 = time.time()
     timer['apply'] += t1 - t0
+    return self.weight.device, self.weight.dtype
 
 
-def maybe_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], wanted_names): # pylint: disable=W0613
-    global bnb, backup_size # pylint: disable=W0603
-    t0 = time.time()
-    weights_backup = getattr(self, "network_weights_backup", None)
-    if weights_backup is None and wanted_names != (): # pylint: disable=C1803
-        if getattr(self.weight, "quant_type", None) in ['nf4', 'fp4']:
-            if bnb is None:
-                bnb = model_quant.load_bnb('Load network: type=LoRA', silent=True)
-            if bnb is not None:
-                with devices.inference_context():
-                    weights_backup = bnb.functional.dequantize_4bit(self.weight, quant_state=self.weight.quant_state, quant_type=self.weight.quant_type, blocksize=self.weight.blocksize,)
-                    self.quant_state = self.weight.quant_state
-                    self.quant_type = self.weight.quant_type
-                    self.blocksize = self.weight.blocksize
-            else:
-                weights_backup = self.weight.clone()
-        else:
-            weights_backup = self.weight.clone()
-        if shared.opts.lora_offload_backup and weights_backup is not None:
-            weights_backup = weights_backup.to(devices.cpu)
-        self.network_weights_backup = weights_backup
-    bias_backup = getattr(self, "network_bias_backup", None)
-    if bias_backup is None:
-        if getattr(self, 'bias', None) is not None:
-            bias_backup = self.bias.clone()
-        else:
-            bias_backup = None
-        if shared.opts.lora_offload_backup and bias_backup is not None:
-            bias_backup = bias_backup.to(devices.cpu)
-        self.network_bias_backup = bias_backup
-    if getattr(self, 'network_weights_backup', None) is not None:
-        backup_size += self.network_weights_backup.numel() * self.network_weights_backup.element_size()
-    if getattr(self, 'network_bias_backup', None) is not None:
-        backup_size += self.network_bias_backup.numel() * self.network_bias_backup.element_size()
-    t1 = time.time()
-    timer['backup'] += t1 - t0
-
-
-def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv]):
-    """
-    Applies the currently selected set of networks to the weights of torch layer self.
-    If weights already have this particular set of networks applied, does nothing.
-    If not, restores orginal weights from backup and alters weights according to networks.
-    """
-    network_layer_name = getattr(self, 'network_layer_name', None)
-    current_names = getattr(self, "network_current_names", ())
-    wanted_names = tuple((x.name, x.te_multiplier, x.unet_multiplier, x.dyn_dim) for x in loaded_networks) if len(loaded_networks) > 0 else ()
-    with devices.inference_context():
-        if len(loaded_networks) > 0 and network_layer_name is not None and any([net.modules.get(network_layer_name, None) for net in loaded_networks]): # noqa: C419
-            maybe_backup_weights(self, wanted_names)
-        if current_names != wanted_names:
-            if shared.opts.diffusers_offload_mode == "none":
-                self.to(devices.device, non_blocking=True)
-            batch_updown = None
-            batch_ex_bias = None
-            for net in loaded_networks:
-                module = net.modules.get(network_layer_name, None)
-                if module is not None and hasattr(self, 'weight'):
-                    try:
-                        t0 = time.time()
-                        weight = self.weight.to(devices.device, non_blocking=True) # calculate quant weights once
-                        t1 = time.time()
-                        updown, ex_bias = module.calc_updown(weight)
-                        del weight
-                        t2 = time.time()
-                        timer['move'] += t1 - t0
-                        timer['calc'] += t2 - t1
-                        if batch_updown is not None and updown is not None:
-                            batch_updown += updown
-                        else:
-                            batch_updown = updown
-                        if batch_ex_bias is not None and ex_bias is not None:
-                            batch_ex_bias += ex_bias
-                        else:
-                            batch_ex_bias = ex_bias
-                        if shared.opts.diffusers_offload_mode != "none":
-                            t0 = time.time()
-                            if batch_updown is not None:
-                                batch_updown = batch_updown.to(devices.cpu, non_blocking=True)
-                            if batch_ex_bias is not None:
-                                batch_ex_bias = batch_ex_bias.to(devices.cpu, non_blocking=True)
-                            if devices.backend == "ipex":
-                                # using non_blocking=True here causes NaNs on Intel
-                                torch.xpu.synchronize(devices.device)
-                            t1 = time.time()
-                            timer['move'] += t1 - t0
-                    except RuntimeError as e:
-                        extra_network_lora.errors[net.name] = extra_network_lora.errors.get(net.name, 0) + 1
-                        if debug:
-                            module_name = net.modules.get(network_layer_name, None)
-                            shared.log.error(f'LoRA apply weight name="{net.name}" module="{module_name}" layer="{network_layer_name}" {e}')
-                            errors.display(e, 'LoRA')
-                            raise RuntimeError('LoRA apply weight') from e
-                    continue
-                if module is None:
-                    continue
-                shared.log.warning(f'LoRA network="{net.name}" layer="{network_layer_name}" unsupported operation')
-                extra_network_lora.errors[net.name] = extra_network_lora.errors.get(net.name, 0) + 1
-            self.network_current_names = wanted_names
-            set_weights(self, batch_updown, batch_ex_bias)  # Set or restore weights from backup
-            if batch_updown is not None or batch_ex_bias is not None:
-                return self.weight.device
-    return None
-
-
-def network_load():
+def network_process():
     timer['backup'] = 0
     timer['calc'] = 0
     timer['apply'] = 0
@@ -450,63 +446,39 @@ def network_load():
         component = getattr(sd_model, component_name, None)
         if component is not None and hasattr(component, 'named_modules'):
             modules += list(component.named_modules())
-    devices_used = []
     if len(loaded_networks) > 0:
         pbar = rp.Progress(rp.TextColumn('[cyan]{task.description}'), rp.BarColumn(), rp.TaskProgressColumn(), rp.TimeRemainingColumn(), rp.TimeElapsedColumn(), console=shared.console)
         task = pbar.add_task(description='Apply network: type=LoRA' , total=len(modules))
     else:
         task = None
         pbar = nullcontext()
-    with pbar:
+    with devices.inference_context(), pbar:
+        wanted_names = tuple((x.name, x.te_multiplier, x.unet_multiplier, x.dyn_dim) for x in loaded_networks) if len(loaded_networks) > 0 else ()
+        applied = 0
+        backup_size = 0
+        weights_devices = []
+        weights_dtypes = []
         for _, module in modules:
-            if shared.state.interrupted:
+            network_layer_name = getattr(module, 'network_layer_name', None)
+            current_names = getattr(module, "network_current_names", ())
+            if shared.state.interrupted or network_layer_name is None or current_names == wanted_names:
                 continue
-            devices_used.append(network_apply_weights(module))
+            weight = module.weight.to(devices.device, non_blocking=True) if hasattr(module, 'weight') else None
+            backup_size += network_backup_weights(module, weight, network_layer_name, wanted_names)
+            batch_updown, batch_ex_bias = network_calc_weights(module, weight, network_layer_name)
+            del weight
+            weights_device, weights_dtype = network_apply_weights(module, batch_updown, batch_ex_bias)
+            weights_devices.append(weights_device)
+            weights_dtypes.append(weights_dtype)
+            module.network_current_names = wanted_names
             if task is not None:
                 pbar.update(task, advance=1) # progress bar becomes visible if operation takes more than 1sec
+            if batch_updown is not None or batch_ex_bias is not None:
+                applied += 1
         # pbar.remove_task(task)
-    if debug:
-        devices_used = [d for d in devices_used if d is not None]
-        devices_set = list(set(devices_used))
-        shared.log.debug(f'Load network: type=LoRA modules={len(modules)} apply={len(devices_used)} device={devices_set} backup={backup_size} time={get_timers()}')
+    weights_devices, weights_dtypes = list(set([x for x in weights_devices if x is not None])), list(set([x for x in weights_dtypes if x is not None])) # noqa: C403
+    if debug and len(loaded_networks) > 0:
+        shared.log.debug(f'Load network: type=LoRA modules={len(modules)} networks={len(loaded_networks)} apply={applied} device={weights_devices} dtype={weights_dtypes} backup={backup_size} time={get_timers()}')
     modules.clear()
     if shared.opts.diffusers_offload_mode == "sequential":
         sd_models.set_diffuser_offload(sd_model, op="model")
-
-
-def list_available_networks():
-    t0 = time.time()
-    available_networks.clear()
-    available_network_aliases.clear()
-    forbidden_network_aliases.clear()
-    available_network_hash_lookup.clear()
-    forbidden_network_aliases.update({"none": 1, "Addams": 1})
-    if not os.path.exists(shared.cmd_opts.lora_dir):
-        shared.log.warning(f'LoRA directory not found: path="{shared.cmd_opts.lora_dir}"')
-
-    def add_network(filename):
-        if not os.path.isfile(filename):
-            return
-        name = os.path.splitext(os.path.basename(filename))[0]
-        name = name.replace('.', '_')
-        try:
-            entry = network.NetworkOnDisk(name, filename)
-            available_networks[entry.name] = entry
-            if entry.alias in available_network_aliases:
-                forbidden_network_aliases[entry.alias.lower()] = 1
-            if shared.opts.lora_preferred_name == 'filename':
-                available_network_aliases[entry.name] = entry
-            else:
-                available_network_aliases[entry.alias] = entry
-            if entry.shorthash:
-                available_network_hash_lookup[entry.shorthash] = entry
-        except OSError as e:  # should catch FileNotFoundError and PermissionError etc.
-            shared.log.error(f'LoRA: filename="{filename}" {e}')
-
-    candidates = list(files_cache.list_files(shared.cmd_opts.lora_dir, ext_filter=[".pt", ".ckpt", ".safetensors"]))
-    with concurrent.futures.ThreadPoolExecutor(max_workers=shared.max_workers) as executor:
-        for fn in candidates:
-            executor.submit(add_network, fn)
-    t1 = time.time()
-    timer['list'] = t1 - t0
-    shared.log.info(f'Available LoRAs: path="{shared.cmd_opts.lora_dir}" items={len(available_networks)} folders={len(forbidden_network_aliases)} time={t1 - t0:.2f}')
diff --git a/scripts/xyz_grid.py b/scripts/xyz_grid.py
index 60e608c76..0395ce736 100644
--- a/scripts/xyz_grid.py
+++ b/scripts/xyz_grid.py
@@ -12,6 +12,7 @@
 from scripts.xyz_grid_shared import str_permutations, list_to_csv_string, re_range # pylint: disable=no-name-in-module
 from scripts.xyz_grid_classes import axis_options, AxisOption, SharedSettingsStackHelper # pylint: disable=no-name-in-module
 from scripts.xyz_grid_draw import draw_xyz_grid # pylint: disable=no-name-in-module
+from scripts.xyz_grid_shared import apply_field, apply_task_args, apply_setting, apply_prompt, apply_order, apply_sampler, apply_hr_sampler_name, confirm_samplers, apply_checkpoint, apply_refiner, apply_unet, apply_dict, apply_clip_skip, apply_vae, list_lora, apply_lora, apply_lora_strength, apply_te, apply_styles, apply_upscaler, apply_context, apply_detailer, apply_override, apply_processing, apply_options, apply_seed, format_value_add_label, format_value, format_value_join_list, do_nothing, format_nothing, str_permutations # pylint: disable=no-name-in-module, unused-import
 from modules import shared, errors, scripts, images, processing
 from modules.ui_components import ToolButton
 import modules.ui_symbols as symbols

From 023b13b6cb7a2d1bfaed0a55022e29bf2efd2d13 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Sun, 1 Dec 2024 15:34:25 -0500
Subject: [PATCH 068/162] balanced offload improvements

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 modules/call_queue.py              | 20 +++++++++-------
 modules/devices.py                 |  4 ++--
 modules/lora/networks.py           | 11 +++++----
 modules/memstats.py                | 18 +++++++++++---
 modules/processing_diffusers.py    |  2 ++
 modules/prompt_parser_diffusers.py | 38 +++++++++++++++++-------------
 modules/sd_models.py               | 20 +++++++---------
 modules/shared.py                  |  2 +-
 modules/ui_control.py              | 19 +++++++++------
 9 files changed, 80 insertions(+), 54 deletions(-)

diff --git a/modules/call_queue.py b/modules/call_queue.py
index cdc2fe1f7..11ba7b56e 100644
--- a/modules/call_queue.py
+++ b/modules/call_queue.py
@@ -73,16 +73,20 @@ def f(*args, extra_outputs_array=extra_outputs, **kwargs):
         elapsed_m = int(elapsed // 60)
         elapsed_s = elapsed % 60
         elapsed_text = f"{elapsed_m}m {elapsed_s:.2f}s" if elapsed_m > 0 else f"{elapsed_s:.2f}s"
-        summary = timer.process.summary(min_time=0.1, total=False).replace('=', ' ')
-        vram_html = ''
+        summary = timer.process.summary(min_time=0.25, total=False).replace('=', ' ')
+        gpu = ''
+        cpu = ''
         if not shared.mem_mon.disabled:
             vram = {k: -(v//-(1024*1024)) for k, v in shared.mem_mon.read().items()}
-            used = round(100 * vram['used'] / (vram['total'] + 0.001))
-            if vram.get('active_peak', 0) > 0:
-                vram_html = " | "
-                vram_html += f"GPU {max(vram['active_peak'], vram['reserved_peak'])} MB {used}%"
-                vram_html += f" | retries {vram['retries']} oom {vram['oom']}" if vram.get('retries', 0) > 0 or vram.get('oom', 0) > 0 else ''
+            peak = max(vram['active_peak'], vram['reserved_peak'], vram['used'])
+            used = round(100.0 * peak / vram['total']) if vram['total'] > 0 else 0
+            if used > 0:
+                gpu += f"| GPU {peak} MB {used}%"
+                gpu += f" | retries {vram['retries']} oom {vram['oom']}" if vram.get('retries', 0) > 0 or vram.get('oom', 0) > 0 else ''
+            ram = shared.ram_stats()
+            if ram['used'] > 0:
+                cpu += f"| RAM {ram['used']} GB {round(100.0 * ram['used'] / ram['total'])}%"
         if isinstance(res, list):
-            res[-1] += f"<div class='performance'><p>Time: {elapsed_text} | {summary}{vram_html}</p></div>"
+            res[-1] += f"<div class='performance'><p>Time: {elapsed_text} | {summary} {gpu} {cpu}</p></div>"
         return tuple(res)
     return f
diff --git a/modules/devices.py b/modules/devices.py
index 64968a30c..71eef5726 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -224,7 +224,7 @@ def torch_gc(force=False, fast=False):
         timer.process.records['gc'] = 0
     timer.process.records['gc'] += t1 - t0
     if not force or collected == 0:
-        return used_gpu
+        return used_gpu, used_ram
     mem = memstats.memory_stats()
     saved = round(gpu.get('used', 0) - mem.get('gpu', {}).get('used', 0), 2)
     before = { 'gpu': gpu.get('used', 0), 'ram': ram.get('used', 0) }
@@ -233,7 +233,7 @@ def torch_gc(force=False, fast=False):
     results = { 'collected': collected, 'saved': saved }
     fn = f'{sys._getframe(2).f_code.co_name}:{sys._getframe(1).f_code.co_name}' # pylint: disable=protected-access
     log.debug(f'GC: utilization={utilization} gc={results} before={before} after={after} device={torch.device(get_optimal_device_name())} fn={fn} time={round(t1 - t0, 2)}') # pylint: disable=protected-access
-    return used_gpu
+    return used_gpu, used_ram
 
 
 def set_cuda_sync_mode(mode):
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index e0f2134c9..21d641af6 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -447,8 +447,8 @@ def network_process():
         if component is not None and hasattr(component, 'named_modules'):
             modules += list(component.named_modules())
     if len(loaded_networks) > 0:
-        pbar = rp.Progress(rp.TextColumn('[cyan]{task.description}'), rp.BarColumn(), rp.TaskProgressColumn(), rp.TimeRemainingColumn(), rp.TimeElapsedColumn(), console=shared.console)
-        task = pbar.add_task(description='Apply network: type=LoRA' , total=len(modules))
+        pbar = rp.Progress(rp.TextColumn('[cyan]Apply network: type=LoRA'), rp.BarColumn(), rp.TaskProgressColumn(), rp.TimeRemainingColumn(), rp.TimeElapsedColumn(), rp.TextColumn('[cyan]{task.description}'), console=shared.console)
+        task = pbar.add_task(description='' , total=len(modules))
     else:
         task = None
         pbar = nullcontext()
@@ -463,7 +463,8 @@ def network_process():
             current_names = getattr(module, "network_current_names", ())
             if shared.state.interrupted or network_layer_name is None or current_names == wanted_names:
                 continue
-            weight = module.weight.to(devices.device, non_blocking=True) if hasattr(module, 'weight') else None
+            weight = getattr(module, 'weight', None)
+            weight = weight.to(devices.device, non_blocking=True) if weight is not None else None
             backup_size += network_backup_weights(module, weight, network_layer_name, wanted_names)
             batch_updown, batch_ex_bias = network_calc_weights(module, weight, network_layer_name)
             del weight
@@ -472,13 +473,13 @@ def network_process():
             weights_dtypes.append(weights_dtype)
             module.network_current_names = wanted_names
             if task is not None:
-                pbar.update(task, advance=1) # progress bar becomes visible if operation takes more than 1sec
+                pbar.update(task, advance=1, description=f'networks={len(loaded_networks)} modules={len(modules)} apply={applied} backup={backup_size}')
             if batch_updown is not None or batch_ex_bias is not None:
                 applied += 1
         # pbar.remove_task(task)
     weights_devices, weights_dtypes = list(set([x for x in weights_devices if x is not None])), list(set([x for x in weights_dtypes if x is not None])) # noqa: C403
     if debug and len(loaded_networks) > 0:
-        shared.log.debug(f'Load network: type=LoRA modules={len(modules)} networks={len(loaded_networks)} apply={applied} device={weights_devices} dtype={weights_dtypes} backup={backup_size} time={get_timers()}')
+        shared.log.debug(f'Load network: type=LoRA networks={len(loaded_networks)} modules={len(modules)} apply={applied} device={weights_devices} dtype={weights_dtypes} backup={backup_size} time={get_timers()}')
     modules.clear()
     if shared.opts.diffusers_offload_mode == "sequential":
         sd_models.set_diffuser_offload(sd_model, op="model")
diff --git a/modules/memstats.py b/modules/memstats.py
index c417165a2..7836f7636 100644
--- a/modules/memstats.py
+++ b/modules/memstats.py
@@ -5,11 +5,12 @@
 
 fail_once = False
 
+def gb(val: float):
+    return round(val / 1024 / 1024 / 1024, 2)
+
+
 def memory_stats():
     global fail_once # pylint: disable=global-statement
-    def gb(val: float):
-        return round(val / 1024 / 1024 / 1024, 2)
-
     mem = {}
     try:
         process = psutil.Process(os.getpid())
@@ -38,3 +39,14 @@ def gb(val: float):
     except Exception:
         pass
     return mem
+
+
+def ram_stats():
+    try:
+        process = psutil.Process(os.getpid())
+        res = process.memory_info()
+        ram_total = 100 * res.rss / process.memory_percent()
+        ram = { 'used': gb(res.rss), 'total': gb(ram_total) }
+        return ram
+    except Exception:
+        return { 'used': 0, 'total': 0 }
diff --git a/modules/processing_diffusers.py b/modules/processing_diffusers.py
index 463a15280..c605a761c 100644
--- a/modules/processing_diffusers.py
+++ b/modules/processing_diffusers.py
@@ -83,6 +83,8 @@ def process_base(p: processing.StableDiffusionProcessing):
     try:
         t0 = time.time()
         sd_models_compile.check_deepcache(enable=True)
+        if shared.opts.diffusers_offload_mode == "balanced":
+            shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
         sd_models.move_model(shared.sd_model, devices.device)
         if hasattr(shared.sd_model, 'unet'):
             sd_models.move_model(shared.sd_model.unet, devices.device)
diff --git a/modules/prompt_parser_diffusers.py b/modules/prompt_parser_diffusers.py
index c74731c6d..d2093351a 100644
--- a/modules/prompt_parser_diffusers.py
+++ b/modules/prompt_parser_diffusers.py
@@ -39,8 +39,8 @@ def prepare_model(pipe = None):
         pipe = pipe.pipe
     if not hasattr(pipe, "text_encoder"):
         return None
-    if shared.opts.diffusers_offload_mode == "balanced":
-        pipe = sd_models.apply_balanced_offload(pipe)
+    # if shared.opts.diffusers_offload_mode == "balanced":
+    #    pipe = sd_models.apply_balanced_offload(pipe)
     elif hasattr(pipe, "maybe_free_model_hooks"):
         pipe.maybe_free_model_hooks()
         devices.torch_gc()
@@ -79,8 +79,8 @@ def __init__(self, prompts, negative_prompts, steps, clip_skip, p):
                 self.scheduled_encode(pipe, batchidx)
             else:
                 self.encode(pipe, prompt, negative_prompt, batchidx)
-        if shared.opts.diffusers_offload_mode == "balanced":
-            pipe = sd_models.apply_balanced_offload(pipe)
+        # if shared.opts.diffusers_offload_mode == "balanced":
+        #    pipe = sd_models.apply_balanced_offload(pipe)
         self.checkcache(p)
         debug(f"Prompt encode: time={(time.time() - t0):.3f}")
 
@@ -199,8 +199,6 @@ def __call__(self, key, step=0):
 
 
 def compel_hijack(self, token_ids: torch.Tensor, attention_mask: typing.Optional[torch.Tensor] = None) -> torch.Tensor:
-    if not devices.same_device(self.text_encoder.device, devices.device):
-        sd_models.move_model(self.text_encoder, devices.device)
     needs_hidden_states = self.returned_embeddings_type != 1
     text_encoder_output = self.text_encoder(token_ids, attention_mask, output_hidden_states=needs_hidden_states, return_dict=True)
 
@@ -377,25 +375,31 @@ def prepare_embedding_providers(pipe, clip_skip) -> list[EmbeddingsProvider]:
         embedding_type = -(clip_skip + 1)
     else:
         embedding_type = clip_skip
+    embedding_args = {
+        'truncate': False,
+        'returned_embeddings_type': embedding_type,
+        'device': device,
+        'dtype_for_device_getter': lambda device: devices.dtype,
+    }
     if getattr(pipe, "prior_pipe", None) is not None and getattr(pipe.prior_pipe, "tokenizer", None) is not None and getattr(pipe.prior_pipe, "text_encoder", None) is not None:
-        provider = EmbeddingsProvider(padding_attention_mask_value=0, tokenizer=pipe.prior_pipe.tokenizer, text_encoder=pipe.prior_pipe.text_encoder, truncate=False, returned_embeddings_type=embedding_type, device=device)
+        provider = EmbeddingsProvider(padding_attention_mask_value=0, tokenizer=pipe.prior_pipe.tokenizer, text_encoder=pipe.prior_pipe.text_encoder, **embedding_args)
         embeddings_providers.append(provider)
-        no_mask_provider = EmbeddingsProvider(padding_attention_mask_value=1 if "sote" in pipe.sd_checkpoint_info.name.lower() else 0, tokenizer=pipe.prior_pipe.tokenizer, text_encoder=pipe.prior_pipe.text_encoder, truncate=False, returned_embeddings_type=embedding_type, device=device)
+        no_mask_provider = EmbeddingsProvider(padding_attention_mask_value=1 if "sote" in pipe.sd_checkpoint_info.name.lower() else 0, tokenizer=pipe.prior_pipe.tokenizer, text_encoder=pipe.prior_pipe.text_encoder, **embedding_args)
         embeddings_providers.append(no_mask_provider)
     elif getattr(pipe, "tokenizer", None) is not None and getattr(pipe, "text_encoder", None) is not None:
-        if not devices.same_device(pipe.text_encoder.device, devices.device):
-            sd_models.move_model(pipe.text_encoder, devices.device)
-        provider = EmbeddingsProvider(tokenizer=pipe.tokenizer, text_encoder=pipe.text_encoder, truncate=False, returned_embeddings_type=embedding_type, device=device)
+        if pipe.text_encoder.__class__.__name__.startswith('CLIP'):
+            sd_models.move_model(pipe.text_encoder, devices.device, force=True)
+        provider = EmbeddingsProvider(tokenizer=pipe.tokenizer, text_encoder=pipe.text_encoder, **embedding_args)
         embeddings_providers.append(provider)
     if getattr(pipe, "tokenizer_2", None) is not None and getattr(pipe, "text_encoder_2", None) is not None:
-        if not devices.same_device(pipe.text_encoder_2.device, devices.device):
-            sd_models.move_model(pipe.text_encoder_2, devices.device)
-        provider = EmbeddingsProvider(tokenizer=pipe.tokenizer_2, text_encoder=pipe.text_encoder_2, truncate=False, returned_embeddings_type=embedding_type, device=device)
+        if pipe.text_encoder_2.__class__.__name__.startswith('CLIP'):
+            sd_models.move_model(pipe.text_encoder_2, devices.device, force=True)
+        provider = EmbeddingsProvider(tokenizer=pipe.tokenizer_2, text_encoder=pipe.text_encoder_2, **embedding_args)
         embeddings_providers.append(provider)
     if getattr(pipe, "tokenizer_3", None) is not None and getattr(pipe, "text_encoder_3", None) is not None:
-        if not devices.same_device(pipe.text_encoder_3.device, devices.device):
-            sd_models.move_model(pipe.text_encoder_3, devices.device)
-        provider = EmbeddingsProvider(tokenizer=pipe.tokenizer_3, text_encoder=pipe.text_encoder_3, truncate=False, returned_embeddings_type=embedding_type, device=device)
+        if pipe.text_encoder_3.__class__.__name__.startswith('CLIP'):
+            sd_models.move_model(pipe.text_encoder_3, devices.device, force=True)
+        provider = EmbeddingsProvider(tokenizer=pipe.tokenizer_3, text_encoder=pipe.text_encoder_3, **embedding_args)
         embeddings_providers.append(provider)
     return embeddings_providers
 
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 24ceff5ee..83bf6f994 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -401,7 +401,6 @@ def pre_forward(self, module, *args, **kwargs):
             module._hf_hook.execution_device = torch.device(devices.device) # pylint: disable=protected-access
             module.balanced_offload_device_map = device_map
             module.balanced_offload_max_memory = max_memory
-            module.balanced_offload_active = True
         return args, kwargs
 
     def post_forward(self, module, output):
@@ -429,7 +428,7 @@ def apply_balanced_offload(sd_model):
         checkpoint_name = sd_model.__class__.__name__
 
     def apply_balanced_offload_to_module(pipe):
-        used_gpu = devices.torch_gc(fast=True)
+        used_gpu, used_ram = devices.torch_gc(fast=True)
         if hasattr(pipe, "pipe"):
             apply_balanced_offload_to_module(pipe.pipe)
         if hasattr(pipe, "_internal_dict"):
@@ -438,20 +437,21 @@ def apply_balanced_offload_to_module(pipe):
             keys = get_signature(pipe).keys()
         for module_name in keys: # pylint: disable=protected-access
             module = getattr(pipe, module_name, None)
-            balanced_offload_active = getattr(module, "balanced_offload_active", None)
-            if isinstance(module, torch.nn.Module) and (balanced_offload_active is None or balanced_offload_active):
+            if isinstance(module, torch.nn.Module):
                 network_layer_name = getattr(module, "network_layer_name", None)
                 device_map = getattr(module, "balanced_offload_device_map", None)
                 max_memory = getattr(module, "balanced_offload_max_memory", None)
                 module = accelerate.hooks.remove_hook_from_module(module, recurse=True)
                 try:
                     if used_gpu > 100 * shared.opts.diffusers_offload_min_gpu_memory:
+                        debug_move(f'Balanced offload: gpu={used_gpu} ram={used_ram} current={module.device} target={devices.cpu} component={module.__class__.__name__}')
                         module = module.to(devices.cpu, non_blocking=True)
-                        used_gpu = devices.torch_gc(fast=True)
+                        used_gpu, used_ram = devices.torch_gc(fast=True)
+                    else:
+                        debug_move(f'Balanced offload: gpu={used_gpu} ram={used_ram} current={module.device} target={devices.cpu} component={module.__class__.__name__}')
                     module.offload_dir = os.path.join(shared.opts.accelerate_offload_path, checkpoint_name, module_name)
                     module = accelerate.hooks.add_hook_to_module(module, offload_hook_instance, append=True)
                     module._hf_hook.execution_device = torch.device(devices.device) # pylint: disable=protected-access
-                    module.balanced_offload_active = False
                     if network_layer_name:
                         module.network_layer_name = network_layer_name
                     if device_map and max_memory:
@@ -515,13 +515,13 @@ def move_model(model, device=None, force=False):
                             shared.log.error(f'Model move execution device: device={device} {e}')
     if getattr(model, 'has_accelerate', False) and not force:
         return
-    if hasattr(model, "device") and devices.normalize_device(model.device) == devices.normalize_device(device):
+    if hasattr(model, "device") and devices.normalize_device(model.device) == devices.normalize_device(device) and not force:
         return
     try:
         t0 = time.time()
         try:
             if hasattr(model, 'to'):
-                model.to(device)
+                model.to(device, non_blocking=True)
             if hasattr(model, "prior_pipe"):
                 model.prior_pipe.to(device)
         except Exception as e0:
@@ -551,7 +551,7 @@ def move_model(model, device=None, force=False):
     if 'move' not in process_timer.records:
         process_timer.records['move'] = 0
     process_timer.records['move'] += t1 - t0
-    if os.environ.get('SD_MOVE_DEBUG', None) or (t1-t0) > 1:
+    if os.environ.get('SD_MOVE_DEBUG', None) or (t1-t0) > 2:
         shared.log.debug(f'Model move: device={device} class={model.__class__.__name__} accelerate={getattr(model, "has_accelerate", False)} fn={fn} time={t1-t0:.2f}') # pylint: disable=protected-access
     devices.torch_gc()
 
@@ -1492,8 +1492,6 @@ def disable_offload(sd_model):
         module = getattr(sd_model, module_name, None)
         if isinstance(module, torch.nn.Module):
             network_layer_name = getattr(module, "network_layer_name", None)
-            if getattr(module, "balanced_offload_active", None) is not None:
-                module.balanced_offload_active = None
             module = remove_hook_from_module(module, recurse=True)
             if network_layer_name:
                 module.network_layer_name = network_layer_name
diff --git a/modules/shared.py b/modules/shared.py
index 21a70fea1..4b7f34e83 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -20,7 +20,7 @@
 from modules.paths import models_path, script_path, data_path, sd_configs_path, sd_default_config, sd_model_file, default_sd_model_file, extensions_dir, extensions_builtin_dir # pylint: disable=W0611
 from modules.dml import memory_providers, default_memory_provider, directml_do_hijack
 from modules.onnx_impl import initialize_onnx, execution_providers
-from modules.memstats import memory_stats
+from modules.memstats import memory_stats, ram_stats
 from modules.ui_components import DropdownEditable
 import modules.interrogate
 import modules.memmon
diff --git a/modules/ui_control.py b/modules/ui_control.py
index 072d9b9c9..59db12fc5 100644
--- a/modules/ui_control.py
+++ b/modules/ui_control.py
@@ -29,15 +29,20 @@ def return_stats(t: float = None):
         elapsed_m = int(elapsed // 60)
         elapsed_s = elapsed % 60
         elapsed_text = f"Time: {elapsed_m}m {elapsed_s:.2f}s |" if elapsed_m > 0 else f"Time: {elapsed_s:.2f}s |"
-    summary = timer.process.summary(min_time=0.1, total=False).replace('=', ' ')
-    vram_html = ''
+    summary = timer.process.summary(min_time=0.25, total=False).replace('=', ' ')
+    gpu = ''
+    cpu = ''
     if not shared.mem_mon.disabled:
         vram = {k: -(v//-(1024*1024)) for k, v in shared.mem_mon.read().items()}
-        used = round(100 * vram['used'] / (vram['total'] + 0.001))
-        if vram.get('active_peak', 0) > 0:
-            vram_html += f"| GPU {max(vram['active_peak'], vram['reserved_peak'])} MB {used}%"
-            vram_html += f" | retries {vram['retries']} oom {vram['oom']}" if vram.get('retries', 0) > 0 or vram.get('oom', 0) > 0 else ''
-    return f"<div class='performance'><p>{elapsed_text} {summary} {vram_html}</p></div>"
+        peak = max(vram['active_peak'], vram['reserved_peak'], vram['used'])
+        used = round(100.0 * peak / vram['total']) if vram['total'] > 0 else 0
+        if used > 0:
+            gpu += f"| GPU {peak} MB {used}%"
+            gpu += f" | retries {vram['retries']} oom {vram['oom']}" if vram.get('retries', 0) > 0 or vram.get('oom', 0) > 0 else ''
+        ram = shared.ram_stats()
+        if ram['used'] > 0:
+            cpu += f"| RAM {ram['used']} GB {round(100.0 * ram['used'] / ram['total'])}%"
+    return f"<div class='performance'><p>Time: {elapsed_text} | {summary} {gpu} {cpu}</p></div>"
 
 
 def return_controls(res, t: float = None):

From 82eb9244865e50526002bd7cf952fe1cabbb422a Mon Sep 17 00:00:00 2001
From: Disty0 <disty@disty.xyz>
Date: Mon, 2 Dec 2024 00:29:01 +0300
Subject: [PATCH 069/162] Reduce balanced offload max gpu memory to 0.70

---
 modules/shared.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/shared.py b/modules/shared.py
index 4b7f34e83..387b6ca40 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -561,7 +561,7 @@ def get_default_modes():
     "diffusers_generator_device": OptionInfo("GPU", "Generator device", gr.Radio, {"choices": ["GPU", "CPU", "Unset"]}),
     "diffusers_offload_mode": OptionInfo(startup_offload_mode, "Model offload mode", gr.Radio, {"choices": ['none', 'balanced', 'model', 'sequential']}),
     "diffusers_offload_min_gpu_memory": OptionInfo(0.25, "Balanced offload GPU low watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
-    "diffusers_offload_max_gpu_memory": OptionInfo(0.75, "Balanced offload GPU high watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
+    "diffusers_offload_max_gpu_memory": OptionInfo(0.70, "Balanced offload GPU high watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
     "diffusers_offload_max_cpu_memory": OptionInfo(0.75, "Balanced offload CPU high watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
     "diffusers_vae_upcast": OptionInfo("default", "VAE upcasting", gr.Radio, {"choices": ['default', 'true', 'false']}),
     "diffusers_vae_slicing": OptionInfo(True, "VAE slicing"),

From 106f93f07963667de96399cfccc1d5e396e95c41 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Sun, 1 Dec 2024 17:13:53 -0500
Subject: [PATCH 070/162] memory optimizations

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 modules/processing.py      |  1 +
 modules/processing_args.py | 24 ++++++++++++++++--------
 modules/sd_models.py       | 29 +++++++++++++++--------------
 3 files changed, 32 insertions(+), 22 deletions(-)

diff --git a/modules/processing.py b/modules/processing.py
index ebbaf7272..095eba54c 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -483,4 +483,5 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
         for stat in stats[:20]:
             frame = stat.traceback[0]
             shared.log.debug(f'  file="{frame.filename}":{frame.lineno} size={stat.size}')
+    devices.torch_gc(force=True)
     return processed
diff --git a/modules/processing_args.py b/modules/processing_args.py
index a716b685e..4ce552825 100644
--- a/modules/processing_args.py
+++ b/modules/processing_args.py
@@ -12,7 +12,8 @@
 from modules.api import helpers
 
 
-debug = shared.log.trace if os.environ.get('SD_DIFFUSERS_DEBUG', None) is not None else lambda *args, **kwargs: None
+debug_enabled = os.environ.get('SD_DIFFUSERS_DEBUG', None)
+debug_log = shared.log.trace if os.environ.get('SD_DIFFUSERS_DEBUG', None) is not None else lambda *args, **kwargs: None
 
 
 def task_specific_kwargs(p, model):
@@ -93,7 +94,8 @@ def task_specific_kwargs(p, model):
             'target_subject_category': getattr(p, 'prompt', '').split()[-1],
             'output_type': 'pil',
         }
-    debug(f'Diffusers task specific args: {task_args}')
+    if debug_enabled:
+        debug_log(f'Diffusers task specific args: {task_args}')
     return task_args
 
 
@@ -108,7 +110,8 @@ def set_pipeline_args(p, model, prompts: list, negative_prompts: list, prompts_2
     signature = inspect.signature(type(model).__call__, follow_wrapped=True)
     possible = list(signature.parameters)
 
-    debug(f'Diffusers pipeline possible: {possible}')
+    if debug_enabled:
+        debug_log(f'Diffusers pipeline possible: {possible}')
     prompts, negative_prompts, prompts_2, negative_prompts_2 = fix_prompts(prompts, negative_prompts, prompts_2, negative_prompts_2)
     steps = kwargs.get("num_inference_steps", None) or len(getattr(p, 'timesteps', ['1']))
     clip_skip = kwargs.pop("clip_skip", 1)
@@ -159,6 +162,8 @@ def set_pipeline_args(p, model, prompts: list, negative_prompts: list, prompts_2
                 args['negative_prompt'] = negative_prompts[0]
             else:
                 args['negative_prompt'] = negative_prompts
+    if prompt_parser_diffusers.embedder is not None and not prompt_parser_diffusers.embedder.scheduled_prompt: # not scheduled so we dont need it anymore
+        prompt_parser_diffusers.embedder = None
 
     if 'clip_skip' in possible and parser == 'fixed':
         if clip_skip == 1:
@@ -248,14 +253,16 @@ def set_pipeline_args(p, model, prompts: list, negative_prompts: list, prompts_2
         if arg in possible:
             args[arg] = task_kwargs[arg]
     task_args = getattr(p, 'task_args', {})
-    debug(f'Diffusers task args: {task_args}')
+    if debug_enabled:
+        debug_log(f'Diffusers task args: {task_args}')
     for k, v in task_args.items():
         if k in possible:
             args[k] = v
         else:
-            debug(f'Diffusers unknown task args: {k}={v}')
+            debug_log(f'Diffusers unknown task args: {k}={v}')
     cross_attention_args = getattr(p, 'cross_attention_kwargs', {})
-    debug(f'Diffusers cross-attention args: {cross_attention_args}')
+    if debug_enabled:
+        debug_log(f'Diffusers cross-attention args: {cross_attention_args}')
     for k, v in cross_attention_args.items():
         if args.get('cross_attention_kwargs', None) is None:
             args['cross_attention_kwargs'] = {}
@@ -273,7 +280,7 @@ def set_pipeline_args(p, model, prompts: list, negative_prompts: list, prompts_2
 
     # handle implicit controlnet
     if 'control_image' in possible and 'control_image' not in args and 'image' in args:
-        debug('Diffusers: set control image')
+        debug_log('Diffusers: set control image')
         args['control_image'] = args['image']
 
     sd_hijack_hypertile.hypertile_set(p, hr=len(getattr(p, 'init_images', [])) > 0)
@@ -309,5 +316,6 @@ def set_pipeline_args(p, model, prompts: list, negative_prompts: list, prompts_2
     if shared.cmd_opts.profile:
         t1 = time.time()
         shared.log.debug(f'Profile: pipeline args: {t1-t0:.2f}')
-    debug(f'Diffusers pipeline args: {args}')
+    if debug_enabled:
+        debug_log(f'Diffusers pipeline args: {args}')
     return args
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 83bf6f994..37567962c 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -443,23 +443,24 @@ def apply_balanced_offload_to_module(pipe):
                 max_memory = getattr(module, "balanced_offload_max_memory", None)
                 module = accelerate.hooks.remove_hook_from_module(module, recurse=True)
                 try:
-                    if used_gpu > 100 * shared.opts.diffusers_offload_min_gpu_memory:
-                        debug_move(f'Balanced offload: gpu={used_gpu} ram={used_ram} current={module.device} target={devices.cpu} component={module.__class__.__name__}')
-                        module = module.to(devices.cpu, non_blocking=True)
-                        used_gpu, used_ram = devices.torch_gc(fast=True)
-                    else:
-                        debug_move(f'Balanced offload: gpu={used_gpu} ram={used_ram} current={module.device} target={devices.cpu} component={module.__class__.__name__}')
-                    module.offload_dir = os.path.join(shared.opts.accelerate_offload_path, checkpoint_name, module_name)
-                    module = accelerate.hooks.add_hook_to_module(module, offload_hook_instance, append=True)
-                    module._hf_hook.execution_device = torch.device(devices.device) # pylint: disable=protected-access
-                    if network_layer_name:
-                        module.network_layer_name = network_layer_name
-                    if device_map and max_memory:
-                        module.balanced_offload_device_map = device_map
-                        module.balanced_offload_max_memory = max_memory
+                    do_offload = used_gpu > 100 * shared.opts.diffusers_offload_min_gpu_memory
+                    debug_move(f'Balanced offload: gpu={used_gpu} ram={used_ram} current={module.device} dtype={module.dtype} op={"move" if do_offload else "skip"} component={module.__class__.__name__}')
+                    if do_offload:
+                        module = module.to(devices.cpu)
+                        used_gpu, used_ram = devices.torch_gc(fast=True, force=True)
                 except Exception as e:
                     if 'bitsandbytes' not in str(e):
                         shared.log.error(f'Balanced offload: module={module_name} {e}')
+                    if os.environ.get('SD_MOVE_DEBUG', None):
+                        errors.display(e, f'Balanced offload: module={module_name}')
+                module.offload_dir = os.path.join(shared.opts.accelerate_offload_path, checkpoint_name, module_name)
+                module = accelerate.hooks.add_hook_to_module(module, offload_hook_instance, append=True)
+                module._hf_hook.execution_device = torch.device(devices.device) # pylint: disable=protected-access
+                if network_layer_name:
+                    module.network_layer_name = network_layer_name
+                if device_map and max_memory:
+                    module.balanced_offload_device_map = device_map
+                    module.balanced_offload_max_memory = max_memory
 
     apply_balanced_offload_to_module(sd_model)
     if hasattr(sd_model, "pipe"):

From f3761032cfecc59305b927af69bbf2699bb8f17e Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Sun, 1 Dec 2024 17:26:34 -0500
Subject: [PATCH 071/162] cleanup

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 modules/sd_checkpoint.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/modules/sd_checkpoint.py b/modules/sd_checkpoint.py
index 2f6533ef0..6ab396329 100644
--- a/modules/sd_checkpoint.py
+++ b/modules/sd_checkpoint.py
@@ -253,6 +253,8 @@ def select_checkpoint(op='model'):
         model_checkpoint = shared.opts.data.get('sd_model_refiner', None)
     else:
         model_checkpoint = shared.opts.sd_model_checkpoint
+    if len(model_checkpoint) < 3:
+        return None
     if model_checkpoint is None or model_checkpoint == 'None':
         return None
     checkpoint_info = get_closet_checkpoint_match(model_checkpoint)

From 7e2034c4ffdb8c61fab47f36568a04a1391cba42 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Mon, 2 Dec 2024 10:51:41 -0500
Subject: [PATCH 072/162] lora add fuse

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md                                 |  5 +-
 extensions-builtin/Lora/network_overrides.py |  1 -
 modules/lora/extra_networks_lora.py          |  3 +-
 modules/lora/networks.py                     | 61 ++++++++++++++------
 modules/shared.py                            | 10 ++--
 scripts/xyz_grid.py                          |  2 +-
 6 files changed, 54 insertions(+), 28 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c62b6b917..a9109341d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,6 @@
 # Change Log for SD.Next
 
-## Update for 2024-11-30
+## Update for 2024-12-02
 
 ### New models and integrations
 
@@ -32,6 +32,9 @@
 
 ### UI and workflow improvements
 
+- **LoRA** handler rewrite  
+  - LoRA weights are no longer calculated on-the-fly during model execution, but are pre-calculated at the start  
+    this results in perceived overhead on generate startup, but results in overall faster execution as LoRA does not need to be processed on each step  
 - **Model loader** improvements:  
   - detect model components on model load fail  
   - allow passing absolute path to model loader  
diff --git a/extensions-builtin/Lora/network_overrides.py b/extensions-builtin/Lora/network_overrides.py
index 5334f3c1b..b5c28b718 100644
--- a/extensions-builtin/Lora/network_overrides.py
+++ b/extensions-builtin/Lora/network_overrides.py
@@ -26,7 +26,6 @@
 
 force_models = [ # forced always
     'sc',
-    # 'sd3',
     'kandinsky',
     'hunyuandit',
     'auraflow',
diff --git a/modules/lora/extra_networks_lora.py b/modules/lora/extra_networks_lora.py
index d58cebd8f..57966550a 100644
--- a/modules/lora/extra_networks_lora.py
+++ b/modules/lora/extra_networks_lora.py
@@ -124,7 +124,7 @@ def activate(self, p, params_list, step=0):
             self.model = shared.opts.sd_model_checkpoint
         names, te_multipliers, unet_multipliers, dyn_dims = parse(p, params_list, step)
         networks.network_load(names, te_multipliers, unet_multipliers, dyn_dims) # load
-        networks.network_process()
+        networks.network_activate()
         if len(networks.loaded_networks) > 0 and step == 0:
             infotext(p)
             prompt(p)
@@ -141,6 +141,7 @@ def deactivate(self, p):
                         shared.sd_model.unload_lora_weights() # fails for non-CLIP models
                     except Exception:
                         pass
+        networks.network_deactivate()
         t1 = time.time()
         networks.timer['restore'] += t1 - t0
         if self.active and networks.debug:
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index 21d641af6..48073774c 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -310,12 +310,15 @@ def network_load(names, te_multipliers=None, unet_multipliers=None, dyn_dims=Non
 def network_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], weight, network_layer_name, wanted_names):
     global bnb # pylint: disable=W0603
     backup_size = 0
-    if len(loaded_networks) > 0 and network_layer_name is not None and any([net.modules.get(network_layer_name, None) for net in loaded_networks]): # noqa: C419
+    if len(loaded_networks) > 0 and network_layer_name is not None and any([net.modules.get(network_layer_name, None) for net in loaded_networks]): # noqa: C419 # pylint: disable=R1729
         t0 = time.time()
+
         weights_backup = getattr(self, "network_weights_backup", None)
         if weights_backup is None and wanted_names != (): # pylint: disable=C1803
             self.network_weights_backup = None
-            if getattr(weight, "quant_type", None) in ['nf4', 'fp4']:
+            if shared.opts.lora_fuse_diffusers:
+                weights_backup = True
+            elif getattr(weight, "quant_type", None) in ['nf4', 'fp4']:
                 if bnb is None:
                     bnb = model_quant.load_bnb('Load network: type=LoRA', silent=True)
                 if bnb is not None:
@@ -328,22 +331,26 @@ def network_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.n
                     weights_backup = weight.clone()
             else:
                 weights_backup = weight.clone()
-            if shared.opts.lora_offload_backup and weights_backup is not None:
+            if shared.opts.lora_offload_backup and weights_backup is not None and isinstance(weights_backup, torch.Tensor):
                 weights_backup = weights_backup.to(devices.cpu)
             self.network_weights_backup = weights_backup
         bias_backup = getattr(self, "network_bias_backup", None)
         if bias_backup is None:
             if getattr(self, 'bias', None) is not None:
-                bias_backup = self.bias.clone()
+                if shared.opts.lora_fuse_diffusers:
+                    bias_backup = True
+                else:
+                    bias_backup = self.bias.clone()
             else:
                 bias_backup = None
-            if shared.opts.lora_offload_backup and bias_backup is not None:
+            if shared.opts.lora_offload_backup and bias_backup is not None and isinstance(bias_backup, torch.Tensor):
                 bias_backup = bias_backup.to(devices.cpu)
             self.network_bias_backup = bias_backup
+
         if getattr(self, 'network_weights_backup', None) is not None:
-            backup_size += self.network_weights_backup.numel() * self.network_weights_backup.element_size()
+            backup_size += self.network_weights_backup.numel() * self.network_weights_backup.element_size() if isinstance(self.network_weights_backup, torch.Tensor) else 0
         if getattr(self, 'network_bias_backup', None) is not None:
-            backup_size += self.network_bias_backup.numel() * self.network_bias_backup.element_size()
+            backup_size += self.network_bias_backup.numel() * self.network_bias_backup.element_size() if isinstance(self.network_bias_backup, torch.Tensor) else 0
         t1 = time.time()
         timer['backup'] += t1 - t0
     return backup_size
@@ -396,18 +403,24 @@ def network_calc_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.
     return batch_updown, batch_ex_bias
 
 
-def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], updown, ex_bias):
+def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], updown, ex_bias, apply: bool = True):
     t0 = time.time()
     weights_backup = getattr(self, "network_weights_backup", None)
     bias_backup = getattr(self, "network_bias_backup", None)
     if weights_backup is None and bias_backup is None:
         return None, None
     if weights_backup is not None:
-        self.weight = None
+        if isinstance(weights_backup, bool):
+            weights_backup = self.weight
+        else:
+            self.weight = None
         if updown is not None and len(weights_backup.shape) == 4 and weights_backup.shape[1] == 9: # inpainting model. zero pad updown to make channel[1]  4 to 9
             updown = torch.nn.functional.pad(updown, (0, 0, 0, 0, 0, 5))  # pylint: disable=not-callable
         if updown is not None:
-            new_weight = updown.to(devices.device, non_blocking=True) + weights_backup.to(devices.device, non_blocking=True)
+            if apply:
+                new_weight = weights_backup.to(devices.device, non_blocking=True) + updown.to(devices.device, non_blocking=True)
+            else:
+                new_weight = weights_backup.to(devices.device, non_blocking=True) - updown.to(devices.device, non_blocking=True)
             if getattr(self, "quant_type", None) in ['nf4', 'fp4'] and bnb is not None:
                 self.weight = bnb.nn.Params4bit(new_weight, quant_state=self.quant_state, quant_type=self.quant_type, blocksize=self.blocksize)
             else:
@@ -418,9 +431,15 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
         if hasattr(self, "qweight") and hasattr(self, "freeze"):
             self.freeze()
     if bias_backup is not None:
-        self.bias = None
+        if isinstance(bias_backup, bool):
+            bias_backup = self.bias
+        else:
+            self.bias = None
         if ex_bias is not None:
-            new_weight = ex_bias.to(devices.device, non_blocking=True) + bias_backup.to(devices.device, non_blocking=True)
+            if apply:
+                new_weight = bias_backup.to(devices.device, non_blocking=True) + ex_bias.to(devices.device, non_blocking=True)
+            else:
+                new_weight = bias_backup.to(devices.device, non_blocking=True) - ex_bias.to(devices.device, non_blocking=True)
             self.bias = torch.nn.Parameter(new_weight, requires_grad=False)
             del new_weight
         else:
@@ -432,7 +451,10 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
     return self.weight.device, self.weight.dtype
 
 
-def network_process():
+def network_deactivate():
+    pass
+
+def network_activate():
     timer['backup'] = 0
     timer['calc'] = 0
     timer['apply'] = 0
@@ -462,24 +484,25 @@ def network_process():
             network_layer_name = getattr(module, 'network_layer_name', None)
             current_names = getattr(module, "network_current_names", ())
             if shared.state.interrupted or network_layer_name is None or current_names == wanted_names:
+                if task is not None:
+                    pbar.update(task, advance=1, description=f'networks={len(loaded_networks)} skip')
                 continue
             weight = getattr(module, 'weight', None)
             weight = weight.to(devices.device, non_blocking=True) if weight is not None else None
             backup_size += network_backup_weights(module, weight, network_layer_name, wanted_names)
             batch_updown, batch_ex_bias = network_calc_weights(module, weight, network_layer_name)
-            del weight
             weights_device, weights_dtype = network_apply_weights(module, batch_updown, batch_ex_bias)
             weights_devices.append(weights_device)
             weights_dtypes.append(weights_dtype)
+            if batch_updown is not None or batch_ex_bias is not None:
+                applied += 1
+            del weight, batch_updown, batch_ex_bias
             module.network_current_names = wanted_names
             if task is not None:
                 pbar.update(task, advance=1, description=f'networks={len(loaded_networks)} modules={len(modules)} apply={applied} backup={backup_size}')
-            if batch_updown is not None or batch_ex_bias is not None:
-                applied += 1
-        # pbar.remove_task(task)
-    weights_devices, weights_dtypes = list(set([x for x in weights_devices if x is not None])), list(set([x for x in weights_dtypes if x is not None])) # noqa: C403
+    weights_devices, weights_dtypes = list(set([x for x in weights_devices if x is not None])), list(set([x for x in weights_dtypes if x is not None])) # noqa: C403 # pylint: disable=R1718
     if debug and len(loaded_networks) > 0:
-        shared.log.debug(f'Load network: type=LoRA networks={len(loaded_networks)} modules={len(modules)} apply={applied} device={weights_devices} dtype={weights_dtypes} backup={backup_size} time={get_timers()}')
+        shared.log.debug(f'Load network: type=LoRA networks={len(loaded_networks)} modules={len(modules)} apply={applied} device={weights_devices} dtype={weights_dtypes} backup={backup_size} fuse={shared.opts.lora_fuse_diffusers} time={get_timers()}')
     modules.clear()
     if shared.opts.diffusers_offload_mode == "sequential":
         sd_models.set_diffuser_offload(sd_model, op="model")
diff --git a/modules/shared.py b/modules/shared.py
index 387b6ca40..10167c809 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -20,7 +20,7 @@
 from modules.paths import models_path, script_path, data_path, sd_configs_path, sd_default_config, sd_model_file, default_sd_model_file, extensions_dir, extensions_builtin_dir # pylint: disable=W0611
 from modules.dml import memory_providers, default_memory_provider, directml_do_hijack
 from modules.onnx_impl import initialize_onnx, execution_providers
-from modules.memstats import memory_stats, ram_stats
+from modules.memstats import memory_stats
 from modules.ui_components import DropdownEditable
 import modules.interrogate
 import modules.memmon
@@ -903,16 +903,16 @@ def get_default_modes():
     "wildcards_enabled": OptionInfo(True, "Enable file wildcards support"),
     "extra_networks_lora_sep": OptionInfo("<h2>LoRA</h2>", "", gr.HTML),
     "extra_networks_default_multiplier": OptionInfo(1.0, "Default strength", gr.Slider, {"minimum": 0.0, "maximum": 2.0, "step": 0.01}),
-    "lora_preferred_name": OptionInfo("filename", "LoRA preferred name", gr.Radio, {"choices": ["filename", "alias"]}),
+    "lora_preferred_name": OptionInfo("filename", "LoRA preferred name", gr.Radio, {"choices": ["filename", "alias"], "visible": False}),
     "lora_add_hashes_to_infotext": OptionInfo(False, "LoRA add hash info"),
+    "lora_fuse_diffusers": OptionInfo(False if not cmd_opts.use_openvino else True, "LoRA fuse directly to model"),
+    "lora_load_gpu": OptionInfo(True if not (cmd_opts.lowvram or cmd_opts.medvram) else False, "LoRA load directly to GPU"),
+    "lora_offload_backup": OptionInfo(True, "LoRA offload backup weights"),
     "lora_force_diffusers": OptionInfo(False if not cmd_opts.use_openvino else True, "LoRA force loading of all models using Diffusers"),
     "lora_maybe_diffusers": OptionInfo(False, "LoRA force loading of specific models using Diffusers"),
-    "lora_fuse_diffusers": OptionInfo(False if not cmd_opts.use_openvino else True, "LoRA use fuse when possible"),
     "lora_apply_tags": OptionInfo(0, "LoRA auto-apply tags", gr.Slider, {"minimum": -1, "maximum": 32, "step": 1}),
     "lora_in_memory_limit": OptionInfo(0, "LoRA memory cache", gr.Slider, {"minimum": 0, "maximum": 24, "step": 1}),
     "lora_quant": OptionInfo("NF4","LoRA precision in quantized models", gr.Radio, {"choices": ["NF4", "FP4"]}),
-    "lora_load_gpu": OptionInfo(True if not (cmd_opts.lowvram or cmd_opts.medvram) else False, "Load LoRA directly to GPU"),
-    "lora_offload_backup": OptionInfo(True, "Offload LoRA Backup Weights"),
 }))
 
 options_templates.update(options_section((None, "Internal options"), {
diff --git a/scripts/xyz_grid.py b/scripts/xyz_grid.py
index 0395ce736..bb067ea21 100644
--- a/scripts/xyz_grid.py
+++ b/scripts/xyz_grid.py
@@ -12,7 +12,7 @@
 from scripts.xyz_grid_shared import str_permutations, list_to_csv_string, re_range # pylint: disable=no-name-in-module
 from scripts.xyz_grid_classes import axis_options, AxisOption, SharedSettingsStackHelper # pylint: disable=no-name-in-module
 from scripts.xyz_grid_draw import draw_xyz_grid # pylint: disable=no-name-in-module
-from scripts.xyz_grid_shared import apply_field, apply_task_args, apply_setting, apply_prompt, apply_order, apply_sampler, apply_hr_sampler_name, confirm_samplers, apply_checkpoint, apply_refiner, apply_unet, apply_dict, apply_clip_skip, apply_vae, list_lora, apply_lora, apply_lora_strength, apply_te, apply_styles, apply_upscaler, apply_context, apply_detailer, apply_override, apply_processing, apply_options, apply_seed, format_value_add_label, format_value, format_value_join_list, do_nothing, format_nothing, str_permutations # pylint: disable=no-name-in-module, unused-import
+from scripts.xyz_grid_shared import apply_field, apply_task_args, apply_setting, apply_prompt, apply_order, apply_sampler, apply_hr_sampler_name, confirm_samplers, apply_checkpoint, apply_refiner, apply_unet, apply_dict, apply_clip_skip, apply_vae, list_lora, apply_lora, apply_lora_strength, apply_te, apply_styles, apply_upscaler, apply_context, apply_detailer, apply_override, apply_processing, apply_options, apply_seed, format_value_add_label, format_value, format_value_join_list, do_nothing, format_nothing # pylint: disable=no-name-in-module, unused-import
 from modules import shared, errors, scripts, images, processing
 from modules.ui_components import ToolButton
 import modules.ui_symbols as symbols

From 4eac263055b280e948d1a22d8987353efe99d446 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Mon, 2 Dec 2024 11:21:39 -0500
Subject: [PATCH 073/162] add bdia sampler

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md                         |   6 +-
 modules/lora/networks.py             |  12 +-
 modules/schedulers/scheduler_bdia.py | 551 +++++++++++++++++++++++++++
 modules/sd_samplers_diffusers.py     |   3 +
 modules/shared.py                    |   2 +-
 5 files changed, 563 insertions(+), 11 deletions(-)
 create mode 100644 modules/schedulers/scheduler_bdia.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a9109341d..57b08fbe7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -32,9 +32,12 @@
 
 ### UI and workflow improvements
 
-- **LoRA** handler rewrite  
+- **LoRA** handler rewrite:  
   - LoRA weights are no longer calculated on-the-fly during model execution, but are pre-calculated at the start  
     this results in perceived overhead on generate startup, but results in overall faster execution as LoRA does not need to be processed on each step  
+  - *note*: LoRA weights backups are required so LoRA can be unapplied, but can take quite a lot of system memory  
+    if you know you will not need to unapply LoRA, you can disable backups in *settings -> networks -> lora fuse*  
+    in which case, you need to reload model to unapply LoRA  
 - **Model loader** improvements:  
   - detect model components on model load fail  
   - allow passing absolute path to model loader  
@@ -60,6 +63,7 @@
 - **Sampler** improvements  
   - Euler FlowMatch: add sigma methods (*karras/exponential/betas*)  
   - DPM FlowMatch: update all and add sigma methods  
+  - BDIA-DDIM: *experimental*  
 
 ### Fixes  
 
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index 48073774c..14fce760a 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -403,7 +403,7 @@ def network_calc_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.
     return batch_updown, batch_ex_bias
 
 
-def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], updown, ex_bias, apply: bool = True):
+def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], updown, ex_bias):
     t0 = time.time()
     weights_backup = getattr(self, "network_weights_backup", None)
     bias_backup = getattr(self, "network_bias_backup", None)
@@ -417,10 +417,7 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
         if updown is not None and len(weights_backup.shape) == 4 and weights_backup.shape[1] == 9: # inpainting model. zero pad updown to make channel[1]  4 to 9
             updown = torch.nn.functional.pad(updown, (0, 0, 0, 0, 0, 5))  # pylint: disable=not-callable
         if updown is not None:
-            if apply:
-                new_weight = weights_backup.to(devices.device, non_blocking=True) + updown.to(devices.device, non_blocking=True)
-            else:
-                new_weight = weights_backup.to(devices.device, non_blocking=True) - updown.to(devices.device, non_blocking=True)
+            new_weight = weights_backup.to(devices.device, non_blocking=True) + updown.to(devices.device, non_blocking=True)
             if getattr(self, "quant_type", None) in ['nf4', 'fp4'] and bnb is not None:
                 self.weight = bnb.nn.Params4bit(new_weight, quant_state=self.quant_state, quant_type=self.quant_type, blocksize=self.blocksize)
             else:
@@ -436,10 +433,7 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
         else:
             self.bias = None
         if ex_bias is not None:
-            if apply:
-                new_weight = bias_backup.to(devices.device, non_blocking=True) + ex_bias.to(devices.device, non_blocking=True)
-            else:
-                new_weight = bias_backup.to(devices.device, non_blocking=True) - ex_bias.to(devices.device, non_blocking=True)
+            new_weight = bias_backup.to(devices.device, non_blocking=True) + ex_bias.to(devices.device, non_blocking=True)
             self.bias = torch.nn.Parameter(new_weight, requires_grad=False)
             del new_weight
         else:
diff --git a/modules/schedulers/scheduler_bdia.py b/modules/schedulers/scheduler_bdia.py
new file mode 100644
index 000000000..bb3e7f9b2
--- /dev/null
+++ b/modules/schedulers/scheduler_bdia.py
@@ -0,0 +1,551 @@
+# Copyright 2024 Stanford University Team and The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# DISCLAIMER: This code is strongly influenced by https://github.com/pesser/pytorch_diffusion
+# and https://github.com/hojonathanho/diffusion
+
+import math
+from dataclasses import dataclass
+from typing import List, Optional, Tuple, Union
+
+import numpy as np
+import torch
+
+from diffusers.configuration_utils import ConfigMixin, register_to_config
+from diffusers.utils import BaseOutput
+from diffusers.utils.torch_utils import randn_tensor
+from diffusers.schedulers.scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin
+
+
+@dataclass
+# Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput with DDPM->DDIM
+class DDIMSchedulerOutput(BaseOutput):
+    """
+    Output class for the scheduler's `step` function output.
+
+    Args:
+        prev_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images):
+            Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the
+            denoising loop.
+        pred_original_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images):
+            The predicted denoised sample `(x_{0})` based on the model output from the current timestep.
+            `pred_original_sample` can be used to preview progress or for guidance.
+    """
+
+    prev_sample: torch.Tensor
+    pred_original_sample: Optional[torch.Tensor] = None
+
+
+# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
+def betas_for_alpha_bar(
+    num_diffusion_timesteps,
+    max_beta=0.999,
+    alpha_transform_type="cosine",
+):
+    """
+    Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
+    (1-beta) over time from t = [0,1].
+
+    Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
+    to that part of the diffusion process.
+
+
+    Args:
+        num_diffusion_timesteps (`int`): the number of betas to produce.
+        max_beta (`float`): the maximum beta to use; use values lower than 1 to
+                     prevent singularities.
+        alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar.
+                     Choose from `cosine` or `exp`
+
+    Returns:
+        betas (`np.ndarray`): the betas used by the scheduler to step the model outputs
+    """
+    if alpha_transform_type == "cosine":
+
+        def alpha_bar_fn(t):
+            return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2
+
+    elif alpha_transform_type == "exp":
+
+        def alpha_bar_fn(t):
+            return math.exp(t * -12.0)
+
+    else:
+        raise ValueError(f"Unsupported alpha_transform_type: {alpha_transform_type}")
+
+    betas = []
+    for i in range(num_diffusion_timesteps):
+        t1 = i / num_diffusion_timesteps
+        t2 = (i + 1) / num_diffusion_timesteps
+        betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta))
+    return torch.tensor(betas, dtype=torch.float32)
+
+
+def rescale_zero_terminal_snr(betas):
+    """
+    Rescales betas to have zero terminal SNR Based on https://arxiv.org/pdf/2305.08891.pdf (Algorithm 1)
+
+
+    Args:
+        betas (`torch.Tensor`):
+            the betas that the scheduler is being initialized with.
+
+    Returns:
+        `torch.Tensor`: rescaled betas with zero terminal SNR
+    """
+    # Convert betas to alphas_bar_sqrt
+    alphas = 1.0 - betas
+    alphas_cumprod = torch.cumprod(alphas, dim=0)
+    alphas_bar_sqrt = alphas_cumprod.sqrt()
+
+    # Store old values.
+    alphas_bar_sqrt_0 = alphas_bar_sqrt[0].clone()
+    alphas_bar_sqrt_T = alphas_bar_sqrt[-1].clone()
+
+    # Shift so the last timestep is zero.
+    alphas_bar_sqrt -= alphas_bar_sqrt_T
+
+    # Scale so the first timestep is back to the old value.
+    alphas_bar_sqrt *= alphas_bar_sqrt_0 / (alphas_bar_sqrt_0 - alphas_bar_sqrt_T)
+
+    # Convert alphas_bar_sqrt to betas
+    alphas_bar = alphas_bar_sqrt**2  # Revert sqrt
+    alphas = alphas_bar[1:] / alphas_bar[:-1]  # Revert cumprod
+    alphas = torch.cat([alphas_bar[0:1], alphas])
+    betas = 1 - alphas
+
+    return betas
+
+class BDIA_DDIMScheduler(SchedulerMixin, ConfigMixin):
+    """
+    `DDIMScheduler` extends the denoising procedure introduced in denoising diffusion probabilistic models (DDPMs) with
+    non-Markovian guidance.
+
+    This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
+    methods the library implements for all schedulers such as loading and saving.
+
+    Args:
+        num_train_timesteps (`int`, defaults to 1000):
+            The number of diffusion steps to train the model.
+        beta_start (`float`, defaults to 0.0001):
+            The starting `beta` value of inference.
+        beta_end (`float`, defaults to 0.02):
+            The final `beta` value.
+        beta_schedule (`str`, defaults to `"linear"`):
+            The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
+            `linear`, `scaled_linear`, or `squaredcos_cap_v2`.
+        trained_betas (`np.ndarray`, *optional*):
+            Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`.
+        clip_sample (`bool`, defaults to `True`):
+            Clip the predicted sample for numerical stability.
+        clip_sample_range (`float`, defaults to 1.0):
+            The maximum magnitude for sample clipping. Valid only when `clip_sample=True`.
+        set_alpha_to_one (`bool`, defaults to `True`):
+            Each diffusion step uses the alphas product value at that step and at the previous one. For the final step
+            there is no previous alpha. When this option is `True` the previous alpha product is fixed to `1`,
+            otherwise it uses the alpha value at step 0.
+        steps_offset (`int`, defaults to 0):
+            An offset added to the inference steps, as required by some model families.
+        prediction_type (`str`, defaults to `epsilon`, *optional*):
+            Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process),
+            `sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen
+            Video](https://imagen.research.google/video/paper.pdf) paper).
+        thresholding (`bool`, defaults to `False`):
+            Whether to use the "dynamic thresholding" method. This is unsuitable for latent-space diffusion models such
+            as Stable Diffusion.
+        dynamic_thresholding_ratio (`float`, defaults to 0.995):
+            The ratio for the dynamic thresholding method. Valid only when `thresholding=True`.
+        sample_max_value (`float`, defaults to 1.0):
+            The threshold value for dynamic thresholding. Valid only when `thresholding=True`.
+        timestep_spacing (`str`, defaults to `"leading"`):
+            The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and
+            Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
+        rescale_betas_zero_snr (`bool`, defaults to `False`):
+            Whether to rescale the betas to have zero terminal SNR. This enables the model to generate very bright and
+            dark samples instead of limiting it to samples with medium brightness. Loosely related to
+            [`--offset_noise`](https://github.com/huggingface/diffusers/blob/74fd735eb073eb1d774b1ab4154a0876eb82f055/examples/dreambooth/train_dreambooth.py#L506).
+    """
+
+    _compatibles = [e.name for e in KarrasDiffusionSchedulers]
+    order = 1
+
+    @register_to_config
+    def __init__(
+        self,
+        num_train_timesteps: int = 1000,
+        beta_start: float = 0.0001,
+        beta_end: float = 0.02,
+        beta_schedule: str = "linear",
+        trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
+        clip_sample: bool = True,
+        set_alpha_to_one: bool = True, #was True
+        steps_offset: int = 0,
+        prediction_type: str = "epsilon",
+        thresholding: bool = False,
+        dynamic_thresholding_ratio: float = 0.995,
+        clip_sample_range: float = 1.0,
+        sample_max_value: float = 1.0,
+        timestep_spacing: str = "leading", #leading
+        rescale_betas_zero_snr: bool = False,
+        gamma: float = 1.0,
+
+    ):
+        if trained_betas is not None:
+            self.betas = torch.tensor(trained_betas, dtype=torch.float32)
+        elif beta_schedule == "linear":
+            self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
+        elif beta_schedule == "scaled_linear":
+            # this schedule is very specific to the latent diffusion model.
+            self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
+        elif beta_schedule == "squaredcos_cap_v2":
+            # Glide cosine schedule
+            self.betas = betas_for_alpha_bar(num_train_timesteps)
+        else:
+            raise NotImplementedError(f"{beta_schedule} is not implemented for {self.__class__}")
+
+        # Rescale for zero SNR
+        if rescale_betas_zero_snr:
+            self.betas = rescale_zero_terminal_snr(self.betas)
+
+        self.alphas = 1.0 - self.betas #may have to add something for last step
+        
+        self.alphas_cumprod = torch.cumprod(self.alphas, dim=0)
+        # At every step in ddim, we are looking into the previous alphas_cumprod
+        # For the final step, there is no previous alphas_cumprod because we are already at 0
+        # `set_alpha_to_one` decides whether we set this parameter simply to one or
+        # whether we use the final alpha of the "non-previous" one.
+        self.final_alpha_cumprod = torch.tensor(1.0) if set_alpha_to_one else self.alphas_cumprod[0]
+
+        # standard deviation of the initial noise distribution
+        self.init_noise_sigma = 1.0
+
+        # setable values
+        self.num_inference_steps = None
+        self.timesteps = torch.from_numpy(np.arange(0, num_train_timesteps)[::-1].copy().astype(np.int64))
+        self.next_sample = []
+        self.BDIA = False
+
+
+    def scale_model_input(self, sample: torch.Tensor, timestep: Optional[int] = None) -> torch.Tensor:
+        """
+        Ensures interchangeability with schedulers that need to scale the denoising model input depending on the
+        current timestep.
+
+        Args:
+            sample (`torch.Tensor`):
+                The input sample.
+            timestep (`int`, *optional*):
+                The current timestep in the diffusion chain.
+
+        Returns:
+            `torch.Tensor`:
+                A scaled input sample.
+        """
+        return sample
+
+    def _get_variance(self, timestep, prev_timestep):
+        alpha_prod_t = self.alphas_cumprod[timestep]
+        alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod
+        beta_prod_t = 1 - alpha_prod_t
+        beta_prod_t_prev = 1 - alpha_prod_t_prev
+
+        variance = (beta_prod_t_prev / beta_prod_t) * (1 - alpha_prod_t / alpha_prod_t_prev)
+
+        return variance
+
+    # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample
+    def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor:
+        """
+        "Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the
+        prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by
+        s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing
+        pixels from saturation at each step. We find that dynamic thresholding results in significantly better
+        photorealism as well as better image-text alignment, especially when using very large guidance weights."
+
+        https://arxiv.org/abs/2205.11487
+        """
+        dtype = sample.dtype
+        batch_size, channels, *remaining_dims = sample.shape
+
+        if dtype not in (torch.float32, torch.float64):
+            sample = sample.float()  # upcast for quantile calculation, and clamp not implemented for cpu half
+
+        # Flatten sample for doing quantile calculation along each image
+        sample = sample.reshape(batch_size, channels * np.prod(remaining_dims))
+
+        abs_sample = sample.abs()  # "a certain percentile absolute pixel value"
+
+        s = torch.quantile(abs_sample, self.config.dynamic_thresholding_ratio, dim=1)
+        s = torch.clamp(
+            s, min=1, max=self.config.sample_max_value
+        )  # When clamped to min=1, equivalent to standard clipping to [-1, 1]
+        s = s.unsqueeze(1)  # (batch_size, 1) because clamp will broadcast along dim=0
+        sample = torch.clamp(sample, -s, s) / s  # "we threshold xt0 to the range [-s, s] and then divide by s"
+
+        sample = sample.reshape(batch_size, channels, *remaining_dims)
+        sample = sample.to(dtype)
+
+        return sample
+
+    def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.device] = None):
+        """
+        Sets the discrete timesteps used for the diffusion chain (to be run before inference).
+
+        Args:
+            num_inference_steps (`int`):
+                The number of diffusion steps used when generating samples with a pre-trained model.
+        """
+
+        if num_inference_steps > self.config.num_train_timesteps:
+            raise ValueError(
+                f"`num_inference_steps`: {num_inference_steps} cannot be larger than `self.config.train_timesteps`:"
+                f" {self.config.num_train_timesteps} as the unet model trained with this scheduler can only handle"
+                f" maximal {self.config.num_train_timesteps} timesteps."
+            )
+
+        self.num_inference_steps = num_inference_steps
+
+        # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://arxiv.org/abs/2305.08891
+        if self.config.timestep_spacing == "linspace":
+            timesteps = (
+                np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps)
+                .round()[::-1]
+                .copy()
+                .astype(np.int64)
+            )
+        elif self.config.timestep_spacing == "leading":
+            step_ratio = self.config.num_train_timesteps // self.num_inference_steps
+            # creates integer timesteps by multiplying by ratio
+            # casting to int to avoid issues when num_inference_step is power of 3
+            timesteps = (np.arange(0, num_inference_steps) * step_ratio).round()[::-1].copy().astype(np.int64)
+            timesteps += self.config.steps_offset
+        elif self.config.timestep_spacing == "trailing":
+            step_ratio = self.config.num_train_timesteps / self.num_inference_steps
+            # creates integer timesteps by multiplying by ratio
+            # casting to int to avoid issues when num_inference_step is power of 3
+            timesteps = np.round(np.arange(self.config.num_train_timesteps, 0, -step_ratio)).astype(np.int64)
+            timesteps -= 1
+        else:
+            raise ValueError(
+                f"{self.config.timestep_spacing} is not supported. Please make sure to choose one of 'leading' or 'trailing'."
+            )
+
+        self.timesteps = torch.from_numpy(timesteps).to(device)
+
+    def step(
+        self,
+        model_output: torch.Tensor,
+        timestep: int,
+        sample: torch.Tensor,
+        eta: float = 0.0,
+        use_clipped_model_output: bool = False,
+        generator=None,
+        variance_noise: Optional[torch.Tensor] = None,
+        return_dict: bool = True,
+        debug: bool = False,
+    ) -> Union[DDIMSchedulerOutput, Tuple]:
+        """
+        Predict the sample from the previous timestep by reversing the SDE.
+        
+        Args:
+            model_output (torch.Tensor): Direct output from learned diffusion model
+            timestep (int): Current discrete timestep in the diffusion chain
+            sample (torch.Tensor): Current instance of sample created by diffusion process
+            eta (float): Weight of noise for added noise in diffusion step
+            use_clipped_model_output (bool): Whether to use clipped model output
+            generator (torch.Generator, optional): Random number generator
+            variance_noise (torch.Tensor, optional): Pre-generated noise for variance
+            return_dict (bool): Whether to return as DDIMSchedulerOutput or tuple
+            debug (bool): Whether to print debug information
+        """
+        if self.num_inference_steps is None:
+            raise ValueError("Number of inference steps is 'None', run 'set_timesteps' first")
+
+        # Calculate timesteps
+        step_size = self.config.num_train_timesteps // self.num_inference_steps
+        prev_timestep = timestep - step_size
+        next_timestep = timestep + step_size
+
+        if debug:
+            print("\n=== Timestep Information ===")
+            print(f"Current timestep: {timestep}")
+            print(f"Previous timestep: {prev_timestep}")
+            print(f"Next timestep: {next_timestep}")
+            print(f"Step size: {step_size}")
+
+        # Pre-compute alpha and variance values
+        alpha_prod_t = self.alphas_cumprod[timestep]
+        alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod
+        variance = self._get_variance(timestep, prev_timestep)
+        std_dev_t = eta * variance ** 0.5
+
+        # Compute required values
+        alpha_i = alpha_prod_t ** 0.5
+        alpha_i_minus_1 = alpha_prod_t_prev ** 0.5
+        sigma_i = (1 - alpha_prod_t) ** 0.5
+        sigma_i_minus_1 = (1 - alpha_prod_t_prev - std_dev_t**2) ** 0.5
+
+        if debug:
+            print("\n=== Alpha Values ===")
+            print(f"alpha_i: {alpha_i}")
+            print(f"alpha_i_minus_1: {alpha_i_minus_1}")
+            print(f"sigma_i: {sigma_i}")
+            print(f"sigma_i_minus_1: {sigma_i_minus_1}")
+
+        # Predict original sample based on prediction type
+        if self.config.prediction_type == "epsilon":
+            pred_original_sample = (sample - sigma_i * model_output) / alpha_i
+            pred_epsilon = model_output
+            if debug:
+                print("\nPrediction type: epsilon")
+        elif self.config.prediction_type == "sample":
+            pred_original_sample = model_output
+            pred_epsilon = (sample - alpha_i * pred_original_sample) / sigma_i
+            if debug:
+                print("\nPrediction type: sample")
+        elif self.config.prediction_type == "v_prediction":
+            pred_original_sample = alpha_i * sample - sigma_i * model_output
+            pred_epsilon = alpha_i * model_output + sigma_i * sample
+            if debug:
+                print("\nPrediction type: v_prediction")
+        else:
+            raise ValueError(
+                f"prediction_type {self.config.prediction_type} must be one of `epsilon`, `sample`, or `v_prediction`"
+            )
+
+        # Apply thresholding or clipping if configured
+        if self.config.thresholding:
+            if debug:
+                print("\nApplying thresholding")
+            pred_original_sample = self._threshold_sample(pred_original_sample)
+        elif self.config.clip_sample:
+            if debug:
+                print("\nApplying clipping")
+            pred_original_sample = pred_original_sample.clamp(
+                -self.config.clip_sample_range, self.config.clip_sample_range
+            )
+
+        # Recompute pred_epsilon if using clipped model output
+        if use_clipped_model_output:
+            if debug:
+                print("\nUsing clipped model output")
+            pred_epsilon = (sample - alpha_i * pred_original_sample) / sigma_i
+
+        # Compute DDIM step
+        ddim_step = alpha_i_minus_1 * pred_original_sample + sigma_i_minus_1 * pred_epsilon
+
+        # Handle initial DDIM step or BDIA steps
+        if len(self.next_sample) == 0:
+            if debug:
+                print("\nFirst iteration (DDIM)")
+            self.update_next_sample_BDIA(sample)
+            self.update_next_sample_BDIA(ddim_step)
+        else:
+            if debug:
+                print("\nBDIA step")
+            # BDIA implementation
+            alpha_prod_t_next = self.alphas_cumprod[next_timestep]
+            alpha_i_plus_1 = alpha_prod_t_next ** 0.5
+            sigma_i_plus_1 = (1 - alpha_prod_t_next) ** 0.5
+            
+            if debug:
+                print(f"alpha_i_plus_1: {alpha_i_plus_1}")
+                print(f"sigma_i_plus_1: {sigma_i_plus_1}")
+            
+            a = alpha_i_plus_1 * pred_original_sample + sigma_i_plus_1 * pred_epsilon
+            bdia_step = (
+                self.config.gamma * self.next_sample[-2] + 
+                ddim_step - 
+                (self.config.gamma * a)
+            )
+            self.update_next_sample_BDIA(bdia_step)
+
+        prev_sample = self.next_sample[-1]
+
+        # Apply variance noise if eta > 0
+        if eta > 0:
+            if debug:
+                print(f"\nApplying variance noise with eta: {eta}")
+            
+            if variance_noise is not None and generator is not None:
+                raise ValueError(
+                    "Cannot pass both generator and variance_noise. Use either `generator` or `variance_noise`."
+                )
+
+            if variance_noise is None:
+                variance_noise = randn_tensor(
+                    model_output.shape,
+                    generator=generator,
+                    device=model_output.device,
+                    dtype=model_output.dtype
+                )
+            prev_sample = prev_sample + std_dev_t * variance_noise
+
+        if not return_dict:
+            return (prev_sample,)
+
+        return DDIMSchedulerOutput(prev_sample=prev_sample, pred_original_sample=pred_original_sample)
+    
+    def add_noise(
+        self,
+        original_samples: torch.Tensor,
+        noise: torch.Tensor,
+        timesteps: torch.IntTensor,
+    ) -> torch.Tensor:
+        # Make sure alphas_cumprod and timestep have same device and dtype as original_samples
+        # Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement
+        # for the subsequent add_noise calls
+        self.alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device)
+        alphas_cumprod = self.alphas_cumprod.to(dtype=original_samples.dtype)
+        timesteps = timesteps.to(original_samples.device)
+
+        sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
+        sqrt_alpha_prod = sqrt_alpha_prod.flatten()
+        while len(sqrt_alpha_prod.shape) < len(original_samples.shape):
+            sqrt_alpha_prod = sqrt_alpha_prod.unsqueeze(-1)
+
+        sqrt_one_minus_alpha_prod = (1 - alphas_cumprod[timesteps]) ** 0.5
+        sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.flatten()
+        while len(sqrt_one_minus_alpha_prod.shape) < len(original_samples.shape):
+            sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.unsqueeze(-1)
+
+        noisy_samples = sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise
+        return noisy_samples
+
+    # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.get_velocity
+    def get_velocity(self, sample: torch.Tensor, noise: torch.Tensor, timesteps: torch.IntTensor) -> torch.Tensor:
+        # Make sure alphas_cumprod and timestep have same device and dtype as sample
+        self.alphas_cumprod = self.alphas_cumprod.to(device=sample.device)
+        alphas_cumprod = self.alphas_cumprod.to(dtype=sample.dtype)
+        timesteps = timesteps.to(sample.device)
+
+        sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
+        sqrt_alpha_prod = sqrt_alpha_prod.flatten()
+        while len(sqrt_alpha_prod.shape) < len(sample.shape):
+            sqrt_alpha_prod = sqrt_alpha_prod.unsqueeze(-1)
+
+        sqrt_one_minus_alpha_prod = (1 - alphas_cumprod[timesteps]) ** 0.5
+        sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.flatten()
+        while len(sqrt_one_minus_alpha_prod.shape) < len(sample.shape):
+            sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.unsqueeze(-1)
+
+        velocity = sqrt_alpha_prod * noise - sqrt_one_minus_alpha_prod * sample
+        return velocity
+    
+    def update_next_sample_BDIA(self, new_value):
+        self.next_sample.append(new_value.clone())
+
+
+    def __len__(self):
+        return self.config.num_train_timesteps
\ No newline at end of file
diff --git a/modules/sd_samplers_diffusers.py b/modules/sd_samplers_diffusers.py
index 4672df92e..7c23d4342 100644
--- a/modules/sd_samplers_diffusers.py
+++ b/modules/sd_samplers_diffusers.py
@@ -52,6 +52,7 @@
     from modules.schedulers.scheduler_dc import DCSolverMultistepScheduler # pylint: disable=ungrouped-imports
     from modules.schedulers.scheduler_vdm import VDMScheduler # pylint: disable=ungrouped-imports
     from modules.schedulers.scheduler_dpm_flowmatch import FlowMatchDPMSolverMultistepScheduler # pylint: disable=ungrouped-imports
+    from modules.schedulers.scheduler_bdia import BDIA_DDIMScheduler # pylint: disable=ungrouped-imports
 except Exception as e:
     shared.log.error(f'Diffusers import error: version={diffusers.__version__} error: {e}')
     if os.environ.get('SD_SAMPLER_DEBUG', None) is not None:
@@ -97,6 +98,7 @@
     'VDM Solver': { 'clip_sample_range': 2.0, },
     'LCM': { 'beta_start': 0.00085, 'beta_end': 0.012, 'beta_schedule': "scaled_linear", 'set_alpha_to_one': True, 'rescale_betas_zero_snr': False, 'thresholding': False, 'timestep_spacing': 'linspace' },
     'TCD': { 'set_alpha_to_one': True, 'rescale_betas_zero_snr': False, 'beta_schedule': 'scaled_linear' },
+    'BDIA DDIM': { 'clip_sample': False, 'set_alpha_to_one': True, 'steps_offset': 0, 'clip_sample_range': 1.0, 'sample_max_value': 1.0, 'timestep_spacing': 'leading', 'rescale_betas_zero_snr': False, 'thresholding': False, 'gamma': 1.0 },
 
     'PNDM': { 'skip_prk_steps': False, 'set_alpha_to_one': False, 'steps_offset': 0, 'timestep_spacing': 'linspace' },
     'IPNDM': { },
@@ -142,6 +144,7 @@
     sd_samplers_common.SamplerData('SA Solver', lambda model: DiffusionSampler('SA Solver', SASolverScheduler, model), [], {}),
     sd_samplers_common.SamplerData('DC Solver', lambda model: DiffusionSampler('DC Solver', DCSolverMultistepScheduler, model), [], {}),
     sd_samplers_common.SamplerData('VDM Solver', lambda model: DiffusionSampler('VDM Solver', VDMScheduler, model), [], {}),
+    sd_samplers_common.SamplerData('BDIA DDIM', lambda model: DiffusionSampler('BDIA DDIM g=0', BDIA_DDIMScheduler, model), [], {}),
 
     sd_samplers_common.SamplerData('PNDM', lambda model: DiffusionSampler('PNDM', PNDMScheduler, model), [], {}),
     sd_samplers_common.SamplerData('IPNDM', lambda model: DiffusionSampler('IPNDM', IPNDMScheduler, model), [], {}),
diff --git a/modules/shared.py b/modules/shared.py
index 10167c809..269510b08 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -20,7 +20,7 @@
 from modules.paths import models_path, script_path, data_path, sd_configs_path, sd_default_config, sd_model_file, default_sd_model_file, extensions_dir, extensions_builtin_dir # pylint: disable=W0611
 from modules.dml import memory_providers, default_memory_provider, directml_do_hijack
 from modules.onnx_impl import initialize_onnx, execution_providers
-from modules.memstats import memory_stats
+from modules.memstats import memory_stats, ram_stats # pylint: disable=unused-import
 from modules.ui_components import DropdownEditable
 import modules.interrogate
 import modules.memmon

From 9acfb4431d8cacceed8425eb9e68e737f7e8913f Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Mon, 2 Dec 2024 11:30:30 -0500
Subject: [PATCH 074/162] update light theme

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 extensions-builtin/sdnext-modernui | 2 +-
 javascript/light-teal.css          | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/extensions-builtin/sdnext-modernui b/extensions-builtin/sdnext-modernui
index f083ce41a..3008cee4b 160000
--- a/extensions-builtin/sdnext-modernui
+++ b/extensions-builtin/sdnext-modernui
@@ -1 +1 @@
-Subproject commit f083ce41a9f18b500f26745ea9e86855e509d2cb
+Subproject commit 3008cee4b67bb00f8f1a4fe4510ec27ba92aa418
diff --git a/javascript/light-teal.css b/javascript/light-teal.css
index 28bf03e6f..174622e52 100644
--- a/javascript/light-teal.css
+++ b/javascript/light-teal.css
@@ -20,9 +20,9 @@
   --body-text-color: var(--neutral-800);
   --body-text-color-subdued: var(--neutral-600);
   --background-color: #FFFFFF;
-  --background-fill-primary: var(--neutral-400);
+  --background-fill-primary: var(--neutral-300);
   --input-padding: 4px;
-  --input-background-fill: var(--neutral-300);
+  --input-background-fill: var(--neutral-200);
   --input-shadow: 2px 2px 2px 2px var(--neutral-500);
   --button-secondary-text-color: black;
   --button-secondary-background-fill: linear-gradient(to bottom right, var(--neutral-200), var(--neutral-500));
@@ -291,8 +291,8 @@ svg.feather.feather-image, .feather .feather-image { display: none }
   --slider-color: ;
   --stat-background-fill: linear-gradient(to right, var(--primary-400), var(--primary-600));
   --table-border-color: var(--neutral-700);
-  --table-even-background-fill: #222222;
-  --table-odd-background-fill: #333333;
+  --table-even-background-fill: #FFFFFF;
+  --table-odd-background-fill: #CCCCCC;
   --table-radius: var(--radius-lg);
   --table-row-focus: var(--color-accent-soft);
 }

From 52ea1813898f0a809e16600ae5980bae8f0887c2 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Mon, 2 Dec 2024 12:26:27 -0500
Subject: [PATCH 075/162] update requirements and notes

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md     | 9 +++++++--
 TODO.md          | 3 ++-
 requirements.txt | 2 +-
 3 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8091176e8..748ac6f04 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,13 +29,13 @@
   style-aligned applies selected attention layers uniformly to all images to achive consistency  
   can be used with or without input image in which case first prompt is used to establish baseline  
   *note:* all prompts are processes as a single batch, so vram is limiting factor
-- **OpenVINO**: update to 2024.5.0  
 
 ### UI and workflow improvements
 
 - **LoRA** handler rewrite:  
   - LoRA weights are no longer calculated on-the-fly during model execution, but are pre-calculated at the start  
     this results in perceived overhead on generate startup, but results in overall faster execution as LoRA does not need to be processed on each step  
+    thanks @AI-Casanova  
   - *note*: LoRA weights backups are required so LoRA can be unapplied, but can take quite a lot of system memory  
     if you know you will not need to unapply LoRA, you can disable backups in *settings -> networks -> lora fuse*  
     in which case, you need to reload model to unapply LoRA  
@@ -51,6 +51,7 @@
   - faster and more compatible *balanced* mode  
   - balanced offload: units are now in percentage instead of bytes  
   - balanced offload: add both high and low watermark  
+    *note*: balanced offload is recommended method for offload when using any large models such as sd35 or flux
 - **UI**:  
   - improved stats on generate completion  
   - improved live preview display and performance  
@@ -60,7 +61,11 @@
   - control: optionn to hide input column
   - control: add stats
   - browser -> server logging framework  
-  - add addtional themes: `black-reimagined`  
+  - add addtional themes: `black-reimagined`, thanks @Artheriax  
+
+### Updates
+
+- **OpenVINO**: update to 2024.5.0  
 - **Sampler** improvements  
   - Euler FlowMatch: add sigma methods (*karras/exponential/betas*)  
   - DPM FlowMatch: update all and add sigma methods  
diff --git a/TODO.md b/TODO.md
index 73008039d..90372e41f 100644
--- a/TODO.md
+++ b/TODO.md
@@ -9,7 +9,8 @@ Main ToDo list can be found at [GitHub projects](https://github.com/users/vladma
 - Flux IPAdapter: <https://github.com/huggingface/diffusers/issues/9825>
 - Flux NF4: <https://github.com/huggingface/diffusers/issues/9996>
 - SANA: <https://github.com/huggingface/diffusers/pull/9982>
-- LTX-Video: <https://huggingface.co/Lightricks/LTX-Video> <https://huggingface.co/spaces/Lightricks/LTX-Video-Playground/tree/main>
+- LTX-Video: <https://github.com/huggingface/diffusers/pull/10021> <https://huggingface.co/Lightricks/LTX-Video> <https://huggingface.co/spaces/Lightricks/LTX-Video-Playground/tree/main>
+- TorchAO: <https://github.com/huggingface/diffusers/pull/10009>
 
 ## Other
 
diff --git a/requirements.txt b/requirements.txt
index 12a9f85cb..3b1b14c7d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -52,7 +52,7 @@ numba==0.59.1
 protobuf==4.25.3
 pytorch_lightning==1.9.4
 tokenizers==0.20.3
-transformers==4.46.2
+transformers==4.46.3
 urllib3==1.26.19
 Pillow==10.4.0
 timm==0.9.16

From bd192d2991dd8895117e87ed892bc058f84b4cf1 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Mon, 2 Dec 2024 16:27:22 -0500
Subject: [PATCH 076/162] remove tracemalloc

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 launch.py             | 3 ---
 modules/cmd_args.py   | 1 -
 modules/processing.py | 8 --------
 3 files changed, 12 deletions(-)

diff --git a/launch.py b/launch.py
index 5c8a6051a..e00da58c7 100755
--- a/launch.py
+++ b/launch.py
@@ -192,9 +192,6 @@ def main():
     global args # pylint: disable=global-statement
     installer.ensure_base_requirements()
     init_args() # setup argparser and default folders
-    if args.malloc:
-        import tracemalloc
-        tracemalloc.start()
     installer.args = args
     installer.setup_logging()
     installer.log.info('Starting SD.Next')
diff --git a/modules/cmd_args.py b/modules/cmd_args.py
index cb4e5fc16..752ad02c0 100644
--- a/modules/cmd_args.py
+++ b/modules/cmd_args.py
@@ -26,7 +26,6 @@ def main_args():
     group_diag.add_argument("--no-hashing", default=os.environ.get("SD_NOHASHING", False), action='store_true', help="Disable hashing of checkpoints, default: %(default)s")
     group_diag.add_argument("--no-metadata", default=os.environ.get("SD_NOMETADATA", False), action='store_true', help="Disable reading of metadata from models, default: %(default)s")
     group_diag.add_argument("--profile", default=os.environ.get("SD_PROFILE", False), action='store_true', help="Run profiler, default: %(default)s")
-    group_diag.add_argument("--malloc", default=os.environ.get("SD_PROFILE", False), action='store_true', help="Trace memory ops, default: %(default)s")
     group_diag.add_argument("--disable-queue", default=os.environ.get("SD_DISABLEQUEUE", False), action='store_true', help="Disable queues, default: %(default)s")
     group_diag.add_argument('--debug', default=os.environ.get("SD_DEBUG", False), action='store_true', help = "Run installer with debug logging, default: %(default)s")
 
diff --git a/modules/processing.py b/modules/processing.py
index 095eba54c..57512850a 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -475,13 +475,5 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
     if not p.disable_extra_networks:
         shared.log.info(f'Processed: images={len(output_images)} its={(p.steps * len(output_images)) / (t1 - t0):.2f} time={t1-t0:.2f} timers={timer.process.dct(min_time=0.02)} memory={memstats.memory_stats()}')
 
-    if shared.cmd_opts.malloc:
-        import tracemalloc
-        snapshot = tracemalloc.take_snapshot()
-        stats = snapshot.statistics('lineno')
-        shared.log.debug('Profile malloc:')
-        for stat in stats[:20]:
-            frame = stat.traceback[0]
-            shared.log.debug(f'  file="{frame.filename}":{frame.lineno} size={stat.size}')
     devices.torch_gc(force=True)
     return processed

From e3f06734d04bec232ddf234e9fa550375fde9638 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Tue, 3 Dec 2024 08:52:53 -0500
Subject: [PATCH 077/162] samplers add custom sigma

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md               |  1 +
 installer.py               |  2 +-
 modules/processing_args.py | 15 +++++++++++++++
 3 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 748ac6f04..51d61aa46 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -68,6 +68,7 @@
 - **OpenVINO**: update to 2024.5.0  
 - **Sampler** improvements  
   - Euler FlowMatch: add sigma methods (*karras/exponential/betas*)  
+  - Euler FlowMatch: allow using timestep presets to set sigmas  
   - DPM FlowMatch: update all and add sigma methods  
   - BDIA-DDIM: *experimental*  
 
diff --git a/installer.py b/installer.py
index c849ac5b6..e03512301 100644
--- a/installer.py
+++ b/installer.py
@@ -459,7 +459,7 @@ def check_python(supported_minors=[9, 10, 11, 12], reason=None):
 def check_diffusers():
     if args.skip_all or args.skip_requirements:
         return
-    sha = 'c96bfa5c80eca798d555a79a491043c311d0f608'
+    sha = '63b631f38336f56755fb5cf15d9b0fb70bbf6323' # diffusers commit hash
     pkg = pkg_resources.working_set.by_key.get('diffusers', None)
     minor = int(pkg.version.split('.')[1] if pkg is not None else 0)
     cur = opts.get('diffusers_version', '') if minor > 0 else ''
diff --git a/modules/processing_args.py b/modules/processing_args.py
index 4ce552825..d73762d29 100644
--- a/modules/processing_args.py
+++ b/modules/processing_args.py
@@ -186,6 +186,21 @@ def set_pipeline_args(p, model, prompts: list, negative_prompts: list, prompts_2
                     shared.log.error(f'Sampler timesteps: {e}')
             else:
                 shared.log.warning(f'Sampler: sampler={model.scheduler.__class__.__name__} timesteps not supported')
+    if 'sigmas' in possible:
+        sigmas = re.split(',| ', shared.opts.schedulers_timesteps)
+        sigmas = [float(x)/1000.0 for x in sigmas if x.isdigit()]
+        if len(sigmas) > 0:
+            if hasattr(model.scheduler, 'set_timesteps') and "sigmas" in set(inspect.signature(model.scheduler.set_timesteps).parameters.keys()):
+                try:
+                    args['sigmas'] = sigmas
+                    p.steps = len(sigmas)
+                    p.timesteps = sigmas
+                    steps = p.steps
+                    shared.log.debug(f'Sampler: steps={len(sigmas)} sigmas={sigmas}')
+                except Exception as e:
+                    shared.log.error(f'Sampler sigmas: {e}')
+            else:
+                shared.log.warning(f'Sampler: sampler={model.scheduler.__class__.__name__} sigmas not supported')
 
     if hasattr(model, 'scheduler') and hasattr(model.scheduler, 'noise_sampler_seed') and hasattr(model.scheduler, 'noise_sampler'):
         model.scheduler.noise_sampler = None # noise needs to be reset instead of using cached values

From 1edf657247a84922405494365137772069f125e0 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Tue, 3 Dec 2024 09:46:23 -0500
Subject: [PATCH 078/162] add nvml charts

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 extensions-builtin/sd-extension-system-info |  2 +-
 javascript/nvml.js                          | 32 +++++++++++++++++++++
 2 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/extensions-builtin/sd-extension-system-info b/extensions-builtin/sd-extension-system-info
index 6a2a28a4f..dfa01ce99 160000
--- a/extensions-builtin/sd-extension-system-info
+++ b/extensions-builtin/sd-extension-system-info
@@ -1 +1 @@
-Subproject commit 6a2a28a4f674b85e09824384ad842b801426b491
+Subproject commit dfa01ce99a17d76b45284ef28cef018ff52ac353
diff --git a/javascript/nvml.js b/javascript/nvml.js
index cf0187367..39850c9d8 100644
--- a/javascript/nvml.js
+++ b/javascript/nvml.js
@@ -1,6 +1,32 @@
 let nvmlInterval = null; // eslint-disable-line prefer-const
 let nvmlEl = null;
 let nvmlTable = null;
+const chartData = { mem: [], load: [] };
+
+async function updateNVMLChart(mem, load) {
+  const maxLen = 120;
+  const colorRangeMap = $.range_map({
+    '0:5': '#fffafa',
+    '6:10': '#fff7ed',
+    '11:20': '#fed7aa',
+    '21:30': '#fdba74',
+    '31:40': '#fb923c',
+    '41:50': '#f97316',
+    '51:60': '#ea580c',
+    '61:70': '#c2410c',
+    '71:80': '#9a3412',
+    '81:90': '#7c2d12',
+    '91:100': '#6c2e12',
+  });
+  const sparklineConfigLOAD = { type: 'bar', height: '100px', barWidth: '2px', barSpacing: '1px', chartRangeMin: 0, chartRangeMax: 100, barColor: '#89007D' };
+  const sparklineConfigMEM = { type: 'bar', height: '100px', barWidth: '2px', barSpacing: '1px', chartRangeMin: 0, chartRangeMax: 100, colorMap: colorRangeMap, composite: true };
+  if (chartData.load.length > maxLen) chartData.load.shift();
+  chartData.load.push(load);
+  if (chartData.mem.length > maxLen) chartData.mem.shift();
+  chartData.mem.push(mem);
+  $('#nvmlChart').sparkline(chartData.load, sparklineConfigLOAD);
+  $('#nvmlChart').sparkline(chartData.mem, sparklineConfigMEM);
+}
 
 async function updateNVML() {
   try {
@@ -35,6 +61,9 @@ async function updateNVML() {
         <tr><td>State</td><td>${gpu.state}</td></tr>
       `;
       nvmlTbody.innerHTML = rows;
+      const mem = 100 * (gpu.memory?.used || 0) / (gpu.memory?.total || 1);
+      const load = 100 * (gpu.clock?.gpu?.[0] || 0) / (gpu.clock?.gpu?.[1] || 1);
+      updateNVMLChart(mem, load);
     }
     nvmlEl.style.display = 'block';
   } catch (e) {
@@ -56,7 +85,10 @@ async function initNVML() {
       <thead><tr><th></th><th></th></tr></thead>
       <tbody></tbody>
     `;
+    const nvmlChart = document.createElement('div');
+    nvmlChart.id = 'nvmlChart';
     nvmlEl.appendChild(nvmlTable);
+    nvmlEl.appendChild(nvmlChart);
     gradioApp().appendChild(nvmlEl);
     log('initNVML');
   }

From cd44f4ebf393e87420edefcdff3a046e9d5139eb Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Tue, 3 Dec 2024 09:52:15 -0500
Subject: [PATCH 079/162] cleanup nvml

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 javascript/nvml.js | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/javascript/nvml.js b/javascript/nvml.js
index 39850c9d8..0a82cba1b 100644
--- a/javascript/nvml.js
+++ b/javascript/nvml.js
@@ -5,7 +5,7 @@ const chartData = { mem: [], load: [] };
 
 async function updateNVMLChart(mem, load) {
   const maxLen = 120;
-  const colorRangeMap = $.range_map({
+  const colorRangeMap = $.range_map({ // eslint-disable-line no-undef
     '0:5': '#fffafa',
     '6:10': '#fff7ed',
     '11:20': '#fed7aa',
@@ -24,8 +24,8 @@ async function updateNVMLChart(mem, load) {
   chartData.load.push(load);
   if (chartData.mem.length > maxLen) chartData.mem.shift();
   chartData.mem.push(mem);
-  $('#nvmlChart').sparkline(chartData.load, sparklineConfigLOAD);
-  $('#nvmlChart').sparkline(chartData.mem, sparklineConfigMEM);
+  $('#nvmlChart').sparkline(chartData.load, sparklineConfigLOAD); // eslint-disable-line no-undef
+  $('#nvmlChart').sparkline(chartData.mem, sparklineConfigMEM); // eslint-disable-line no-undef
 }
 
 async function updateNVML() {
@@ -61,9 +61,7 @@ async function updateNVML() {
         <tr><td>State</td><td>${gpu.state}</td></tr>
       `;
       nvmlTbody.innerHTML = rows;
-      const mem = 100 * (gpu.memory?.used || 0) / (gpu.memory?.total || 1);
-      const load = 100 * (gpu.clock?.gpu?.[0] || 0) / (gpu.clock?.gpu?.[1] || 1);
-      updateNVMLChart(mem, load);
+      updateNVMLChart(gpu.load.memory, gpu.load.gpu);
     }
     nvmlEl.style.display = 'block';
   } catch (e) {

From e41d9f52cdf0fceeecdb456cd38256dc8e57b3e7 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Wed, 4 Dec 2024 08:47:22 -0500
Subject: [PATCH 080/162] add offload warning

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 modules/sd_models.py | 17 +++++++++++------
 wiki                 |  2 +-
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/modules/sd_models.py b/modules/sd_models.py
index 37567962c..101ff837b 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -34,6 +34,7 @@
 debug_process = shared.log.trace if os.environ.get('SD_PROCESS_DEBUG', None) is not None else lambda *args, **kwargs: None
 diffusers_version = int(diffusers.__version__.split('.')[1])
 checkpoint_tiles = checkpoint_titles # legacy compatibility
+should_offload = ['sc', 'sd3', 'f1', 'hunyuandit', 'auraflow', 'omnigen']
 
 
 class NoWatermark:
@@ -320,11 +321,15 @@ def set_diffuser_offload(sd_model, op: str = 'model'):
         return
     if not (hasattr(sd_model, "has_accelerate") and sd_model.has_accelerate):
         sd_model.has_accelerate = False
-    if hasattr(sd_model, 'maybe_free_model_hooks') and shared.opts.diffusers_offload_mode == "none":
-        shared.log.debug(f'Setting {op}: offload={shared.opts.diffusers_offload_mode} limit={shared.opts.cuda_mem_fraction}')
-        sd_model.maybe_free_model_hooks()
-        sd_model.has_accelerate = False
-    if hasattr(sd_model, "enable_model_cpu_offload") and shared.opts.diffusers_offload_mode == "model":
+    if shared.opts.diffusers_offload_mode == "none":
+        if shared.sd_model_type in should_offload:
+            shared.log.warning(f'Setting {op}: offload={shared.opts.diffusers_offload_mode} type={shared.sd_model.__class__.__name__} large model')
+        else:
+            shared.log.debug(f'Setting {op}: offload={shared.opts.diffusers_offload_mode} limit={shared.opts.cuda_mem_fraction}')
+        if hasattr(sd_model, 'maybe_free_model_hooks'):
+            sd_model.maybe_free_model_hooks()
+            sd_model.has_accelerate = False
+    if shared.opts.diffusers_offload_mode == "model" and hasattr(sd_model, "enable_model_cpu_offload"):
         try:
             shared.log.debug(f'Setting {op}: offload={shared.opts.diffusers_offload_mode} limit={shared.opts.cuda_mem_fraction}')
             if shared.opts.diffusers_move_base or shared.opts.diffusers_move_unet or shared.opts.diffusers_move_refiner:
@@ -339,7 +344,7 @@ def set_diffuser_offload(sd_model, op: str = 'model'):
             set_accelerate(sd_model)
         except Exception as e:
             shared.log.error(f'Setting {op}: offload={shared.opts.diffusers_offload_mode} {e}')
-    if hasattr(sd_model, "enable_sequential_cpu_offload") and shared.opts.diffusers_offload_mode == "sequential":
+    if shared.opts.diffusers_offload_mode == "sequential" and hasattr(sd_model, "enable_sequential_cpu_offload"):
         try:
             shared.log.debug(f'Setting {op}: offload={shared.opts.diffusers_offload_mode} limit={shared.opts.cuda_mem_fraction}')
             if shared.opts.diffusers_move_base or shared.opts.diffusers_move_unet or shared.opts.diffusers_move_refiner:
diff --git a/wiki b/wiki
index f57cdb49d..2a83f725b 160000
--- a/wiki
+++ b/wiki
@@ -1 +1 @@
-Subproject commit f57cdb49d8ca928024b43525897d1c1379eab4c4
+Subproject commit 2a83f725bda6a81399f579ba7102741f71b0be39

From ca060026020ccd8c1720fedc25fa2bcea0478d46 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Wed, 4 Dec 2024 09:25:57 -0500
Subject: [PATCH 081/162] lora maintain device

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 modules/lora/networks.py           | 63 ++++++++++++++++--------------
 modules/model_stablecascade.py     |  6 +--
 modules/processing_diffusers.py    | 12 ++----
 modules/processing_info.py         |  1 +
 modules/prompt_parser_diffusers.py |  4 --
 modules/sd_models.py               |  2 +
 6 files changed, 42 insertions(+), 46 deletions(-)

diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index 14fce760a..735c00c4c 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -307,7 +307,7 @@ def network_load(names, te_multipliers=None, unet_multipliers=None, dyn_dims=Non
 
 # section: process loaded networks
 
-def network_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], weight, network_layer_name, wanted_names):
+def network_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], weight: torch.Tensor, network_layer_name: str, wanted_names: tuple):
     global bnb # pylint: disable=W0603
     backup_size = 0
     if len(loaded_networks) > 0 and network_layer_name is not None and any([net.modules.get(network_layer_name, None) for net in loaded_networks]): # noqa: C419 # pylint: disable=R1729
@@ -356,7 +356,7 @@ def network_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.n
     return backup_size
 
 
-def network_calc_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], weight, network_layer_name):
+def network_calc_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], weight: torch.Tensor, network_layer_name: str):
     if shared.opts.diffusers_offload_mode == "none":
         self.to(devices.device, non_blocking=True)
     batch_updown = None
@@ -403,7 +403,7 @@ def network_calc_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.
     return batch_updown, batch_ex_bias
 
 
-def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], updown, ex_bias):
+def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], updown: torch.Tensor, ex_bias: torch.Tensor, orig_device: torch.device):
     t0 = time.time()
     weights_backup = getattr(self, "network_weights_backup", None)
     bias_backup = getattr(self, "network_bias_backup", None)
@@ -421,10 +421,10 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
             if getattr(self, "quant_type", None) in ['nf4', 'fp4'] and bnb is not None:
                 self.weight = bnb.nn.Params4bit(new_weight, quant_state=self.quant_state, quant_type=self.quant_type, blocksize=self.blocksize)
             else:
-                self.weight = torch.nn.Parameter(new_weight, requires_grad=False)
+                self.weight = torch.nn.Parameter(new_weight.to(device=orig_device, non_blocking=True), requires_grad=False)
             del new_weight
         else:
-            self.weight = torch.nn.Parameter(weights_backup, requires_grad=False)
+            self.weight = torch.nn.Parameter(weights_backup.to(device=orig_device, non_blocking=True), requires_grad=False)
         if hasattr(self, "qweight") and hasattr(self, "freeze"):
             self.freeze()
     if bias_backup is not None:
@@ -434,10 +434,10 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
             self.bias = None
         if ex_bias is not None:
             new_weight = bias_backup.to(devices.device, non_blocking=True) + ex_bias.to(devices.device, non_blocking=True)
-            self.bias = torch.nn.Parameter(new_weight, requires_grad=False)
+            self.bias = torch.nn.Parameter(new_weight.to(device=orig_device, non_blocking=True), requires_grad=False)
             del new_weight
         else:
-            self.bias = torch.nn.Parameter(bias_backup, requires_grad=False)
+            self.bias = torch.nn.Parameter(bias_backup.to(device=orig_device, non_blocking=True), requires_grad=False)
     else:
         self.bias = None
     t1 = time.time()
@@ -457,14 +457,15 @@ def network_activate():
     if shared.opts.diffusers_offload_mode == "sequential":
         sd_models.disable_offload(sd_model)
         sd_models.move_model(sd_model, device=devices.cpu)
-    modules = []
+    modules = {}
     for component_name in ['text_encoder','text_encoder_2', 'unet', 'transformer']:
         component = getattr(sd_model, component_name, None)
         if component is not None and hasattr(component, 'named_modules'):
-            modules += list(component.named_modules())
+            modules[component_name] = list(component.named_modules())
+    total = sum(len(x) for x in modules.values())
     if len(loaded_networks) > 0:
         pbar = rp.Progress(rp.TextColumn('[cyan]Apply network: type=LoRA'), rp.BarColumn(), rp.TaskProgressColumn(), rp.TimeRemainingColumn(), rp.TimeElapsedColumn(), rp.TextColumn('[cyan]{task.description}'), console=shared.console)
-        task = pbar.add_task(description='' , total=len(modules))
+        task = pbar.add_task(description='' , total=total)
     else:
         task = None
         pbar = nullcontext()
@@ -474,29 +475,31 @@ def network_activate():
         backup_size = 0
         weights_devices = []
         weights_dtypes = []
-        for _, module in modules:
-            network_layer_name = getattr(module, 'network_layer_name', None)
-            current_names = getattr(module, "network_current_names", ())
-            if shared.state.interrupted or network_layer_name is None or current_names == wanted_names:
+        for component in modules.keys():
+            orig_device = getattr(sd_model, component, None).device
+            for _, module in modules[component]:
+                network_layer_name = getattr(module, 'network_layer_name', None)
+                current_names = getattr(module, "network_current_names", ())
+                if shared.state.interrupted or network_layer_name is None or current_names == wanted_names:
+                    if task is not None:
+                        pbar.update(task, advance=1, description=f'networks={len(loaded_networks)} skip')
+                    continue
+                weight = getattr(module, 'weight', None)
+                weight = weight.to(devices.device, non_blocking=True) if weight is not None else None
+                backup_size += network_backup_weights(module, weight, network_layer_name, wanted_names)
+                batch_updown, batch_ex_bias = network_calc_weights(module, weight, network_layer_name)
+                weights_device, weights_dtype = network_apply_weights(module, batch_updown, batch_ex_bias, orig_device)
+                weights_devices.append(weights_device)
+                weights_dtypes.append(weights_dtype)
+                if batch_updown is not None or batch_ex_bias is not None:
+                    applied += 1
+                del weight, batch_updown, batch_ex_bias
+                module.network_current_names = wanted_names
                 if task is not None:
-                    pbar.update(task, advance=1, description=f'networks={len(loaded_networks)} skip')
-                continue
-            weight = getattr(module, 'weight', None)
-            weight = weight.to(devices.device, non_blocking=True) if weight is not None else None
-            backup_size += network_backup_weights(module, weight, network_layer_name, wanted_names)
-            batch_updown, batch_ex_bias = network_calc_weights(module, weight, network_layer_name)
-            weights_device, weights_dtype = network_apply_weights(module, batch_updown, batch_ex_bias)
-            weights_devices.append(weights_device)
-            weights_dtypes.append(weights_dtype)
-            if batch_updown is not None or batch_ex_bias is not None:
-                applied += 1
-            del weight, batch_updown, batch_ex_bias
-            module.network_current_names = wanted_names
-            if task is not None:
-                pbar.update(task, advance=1, description=f'networks={len(loaded_networks)} modules={len(modules)} apply={applied} backup={backup_size}')
+                    pbar.update(task, advance=1, description=f'networks={len(loaded_networks)} modules={len(modules)} apply={applied} backup={backup_size}')
     weights_devices, weights_dtypes = list(set([x for x in weights_devices if x is not None])), list(set([x for x in weights_dtypes if x is not None])) # noqa: C403 # pylint: disable=R1718
     if debug and len(loaded_networks) > 0:
-        shared.log.debug(f'Load network: type=LoRA networks={len(loaded_networks)} modules={len(modules)} apply={applied} device={weights_devices} dtype={weights_dtypes} backup={backup_size} fuse={shared.opts.lora_fuse_diffusers} time={get_timers()}')
+        shared.log.debug(f'Load network: type=LoRA networks={len(loaded_networks)} modules={total} apply={applied} device={weights_devices} dtype={weights_dtypes} backup={backup_size} fuse={shared.opts.lora_fuse_diffusers} time={get_timers()}')
     modules.clear()
     if shared.opts.diffusers_offload_mode == "sequential":
         sd_models.set_diffuser_offload(sd_model, op="model")
diff --git a/modules/model_stablecascade.py b/modules/model_stablecascade.py
index d6f9e4266..2a7739e55 100644
--- a/modules/model_stablecascade.py
+++ b/modules/model_stablecascade.py
@@ -187,8 +187,7 @@ def __call__(
         callback_on_step_end=None,
         callback_on_step_end_tensor_inputs=["latents"],
     ):
-        if shared.opts.diffusers_offload_mode == "balanced":
-            shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
+        shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
         # 0. Define commonly used variables
         self.guidance_scale = guidance_scale
         self.do_classifier_free_guidance = self.guidance_scale > 1
@@ -330,8 +329,7 @@ def __call__(
             elif output_type == "pil":
                 images = images.permute(0, 2, 3, 1).cpu().float().numpy()  # float() as bfloat16-> numpy doesnt work
                 images = self.numpy_to_pil(images)
-            if shared.opts.diffusers_offload_mode == "balanced":
-                shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
+            shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
         else:
             images = latents
 
diff --git a/modules/processing_diffusers.py b/modules/processing_diffusers.py
index c605a761c..0341cac4d 100644
--- a/modules/processing_diffusers.py
+++ b/modules/processing_diffusers.py
@@ -83,8 +83,7 @@ def process_base(p: processing.StableDiffusionProcessing):
     try:
         t0 = time.time()
         sd_models_compile.check_deepcache(enable=True)
-        if shared.opts.diffusers_offload_mode == "balanced":
-            shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
+        shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
         sd_models.move_model(shared.sd_model, devices.device)
         if hasattr(shared.sd_model, 'unet'):
             sd_models.move_model(shared.sd_model.unet, devices.device)
@@ -266,8 +265,7 @@ def process_refine(p: processing.StableDiffusionProcessing, output):
         if shared.state.interrupted or shared.state.skipped:
             shared.sd_model = orig_pipeline
             return output
-        if shared.opts.diffusers_offload_mode == "balanced":
-            shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
+        shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
         if shared.opts.diffusers_move_refiner:
             sd_models.move_model(shared.sd_refiner, devices.device)
             if hasattr(shared.sd_refiner, 'unet'):
@@ -407,8 +405,7 @@ def process_diffusers(p: processing.StableDiffusionProcessing):
         shared.sd_model = orig_pipeline
         return results
 
-    if shared.opts.diffusers_offload_mode == "balanced":
-        shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
+    shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
 
     # sanitize init_images
     if hasattr(p, 'init_images') and getattr(p, 'init_images', None) is None:
@@ -463,8 +460,7 @@ def process_diffusers(p: processing.StableDiffusionProcessing):
 
     shared.sd_model = orig_pipeline
 
-    if shared.opts.diffusers_offload_mode == "balanced":
-        shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
+    shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
 
     if p.state == '':
         global last_p # pylint: disable=global-statement
diff --git a/modules/processing_info.py b/modules/processing_info.py
index 714ebf35f..e0fca12ae 100644
--- a/modules/processing_info.py
+++ b/modules/processing_info.py
@@ -140,6 +140,7 @@ def create_infotext(p: StableDiffusionProcessing, all_prompts=None, all_seeds=No
     if sd_hijack is not None and hasattr(sd_hijack.model_hijack, 'embedding_db') and len(sd_hijack.model_hijack.embedding_db.embeddings_used) > 0: # this is for original hijaacked models only, diffusers are handled separately
         args["Embeddings"] = ', '.join(sd_hijack.model_hijack.embedding_db.embeddings_used)
     # samplers
+
     if getattr(p, 'sampler_name', None) is not None:
         args["Sampler eta delta"] = shared.opts.eta_noise_seed_delta if shared.opts.eta_noise_seed_delta != 0 and sd_samplers_common.is_sampler_using_eta_noise_seed_delta(p) else None
         args["Sampler eta multiplier"] = p.initial_noise_multiplier if getattr(p, 'initial_noise_multiplier', 1.0) != 1.0 else None
diff --git a/modules/prompt_parser_diffusers.py b/modules/prompt_parser_diffusers.py
index d2093351a..06c0b6012 100644
--- a/modules/prompt_parser_diffusers.py
+++ b/modules/prompt_parser_diffusers.py
@@ -39,8 +39,6 @@ def prepare_model(pipe = None):
         pipe = pipe.pipe
     if not hasattr(pipe, "text_encoder"):
         return None
-    # if shared.opts.diffusers_offload_mode == "balanced":
-    #    pipe = sd_models.apply_balanced_offload(pipe)
     elif hasattr(pipe, "maybe_free_model_hooks"):
         pipe.maybe_free_model_hooks()
         devices.torch_gc()
@@ -79,8 +77,6 @@ def __init__(self, prompts, negative_prompts, steps, clip_skip, p):
                 self.scheduled_encode(pipe, batchidx)
             else:
                 self.encode(pipe, prompt, negative_prompt, batchidx)
-        # if shared.opts.diffusers_offload_mode == "balanced":
-        #    pipe = sd_models.apply_balanced_offload(pipe)
         self.checkcache(p)
         debug(f"Prompt encode: time={(time.time() - t0):.3f}")
 
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 101ff837b..63ec6b327 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -420,6 +420,8 @@ def detach_hook(self, module):
 
 def apply_balanced_offload(sd_model):
     global offload_hook_instance # pylint: disable=global-statement
+    if shared.opts.diffusers_offload_mode != "balanced":
+        return sd_model
     if offload_hook_instance is None or offload_hook_instance.min_watermark != shared.opts.diffusers_offload_min_gpu_memory or offload_hook_instance.max_watermark != shared.opts.diffusers_offload_max_gpu_memory:
         offload_hook_instance = OffloadHook()
     t0 = time.time()

From 2965045993acaf376bb9fd14f29c500cbc298c46 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Thu, 5 Dec 2024 07:58:52 -0500
Subject: [PATCH 082/162] change offload and upcast defaults

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md                 | 12 +++++++++---
 configs/flux/vae/config.json |  2 +-
 configs/sd15/vae/config.json |  1 +
 configs/sd3/vae/config.json  |  2 +-
 configs/sdxl/vae/config.json |  2 +-
 modules/shared.py            | 14 +++++++-------
 6 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 51d61aa46..d49c1c555 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -47,11 +47,17 @@
   - Flux: all-in-one safetensors  
     example: <https://civitai.com/models/646328?modelVersionId=1040235>  
   - Flux: do not recast quants  
-- **Offload** improvements:  
-  - faster and more compatible *balanced* mode  
+- **Memory** improvements:  
+  - faster and more compatible *balanced offload* mode  
   - balanced offload: units are now in percentage instead of bytes  
   - balanced offload: add both high and low watermark  
-    *note*: balanced offload is recommended method for offload when using any large models such as sd35 or flux
+    default is 25% for low-watermark (skip offload if memory usage is below 25%) and 70% high-watermark (must offload if memory usage is above 70%)  
+  - change-in-behavior:  
+    `lowvrwam` triggers *sequential offload*, also automatically triggered on systems with <=4GB vram  
+    all other systems use *balanced offload* by default (can be changed in settings)  
+    previous behavior was to use *model offload* on systems with <=8GB and `medvram` and no offload by default  
+  - VAE upcase is now disabled by default on all systems  
+    if you have issues with image decode, you'll need to enable it manually  
 - **UI**:  
   - improved stats on generate completion  
   - improved live preview display and performance  
diff --git a/configs/flux/vae/config.json b/configs/flux/vae/config.json
index b43183d0f..7ecb342c2 100644
--- a/configs/flux/vae/config.json
+++ b/configs/flux/vae/config.json
@@ -14,7 +14,7 @@
     "DownEncoderBlock2D",
     "DownEncoderBlock2D"
   ],
-  "force_upcast": true,
+  "force_upcast": false,
   "in_channels": 3,
   "latent_channels": 16,
   "latents_mean": null,
diff --git a/configs/sd15/vae/config.json b/configs/sd15/vae/config.json
index 55d78924f..2cba0e824 100644
--- a/configs/sd15/vae/config.json
+++ b/configs/sd15/vae/config.json
@@ -14,6 +14,7 @@
     "DownEncoderBlock2D",
     "DownEncoderBlock2D"
   ],
+  "force_upcast": false,
   "in_channels": 3,
   "latent_channels": 4,
   "layers_per_block": 2,
diff --git a/configs/sd3/vae/config.json b/configs/sd3/vae/config.json
index 58e7764fb..f6f4e8684 100644
--- a/configs/sd3/vae/config.json
+++ b/configs/sd3/vae/config.json
@@ -15,7 +15,7 @@
     "DownEncoderBlock2D",
     "DownEncoderBlock2D"
   ],
-  "force_upcast": true,
+  "force_upcast": false,
   "in_channels": 3,
   "latent_channels": 16,
   "latents_mean": null,
diff --git a/configs/sdxl/vae/config.json b/configs/sdxl/vae/config.json
index a66a171ba..1c7a60866 100644
--- a/configs/sdxl/vae/config.json
+++ b/configs/sdxl/vae/config.json
@@ -15,7 +15,7 @@
     "DownEncoderBlock2D",
     "DownEncoderBlock2D"
   ],
-  "force_upcast": true,
+  "force_upcast": false,
   "in_channels": 3,
   "latent_channels": 4,
   "layers_per_block": 2,
diff --git a/modules/shared.py b/modules/shared.py
index aa41a6fd6..068ee8b40 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -432,15 +432,15 @@ def get_default_modes():
                 cmd_opts.lowvram = True
                 default_offload_mode = "sequential"
                 log.info(f"Device detect: memory={gpu_memory:.1f} optimization=lowvram")
-            elif gpu_memory <= 8:
-                cmd_opts.medvram = True
-                default_offload_mode = "model"
-                log.info(f"Device detect: memory={gpu_memory:.1f} optimization=medvram")
+            # elif gpu_memory <= 8:
+            #     cmd_opts.medvram = True
+            #     default_offload_mode = "model"
+            #     log.info(f"Device detect: memory={gpu_memory:.1f} optimization=medvram")
             else:
-                default_offload_mode = "none"
-                log.info(f"Device detect: memory={gpu_memory:.1f} optimization=none")
+                default_offload_mode = "balanced"
+                log.info(f"Device detect: memory={gpu_memory:.1f} optimization=balanced")
     elif cmd_opts.medvram:
-        default_offload_mode = "model"
+        default_offload_mode = "balanced"
     elif cmd_opts.lowvram:
         default_offload_mode = "sequential"
 

From 293494841306b971dd75c9c51dd446f1f7abc0b9 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Thu, 5 Dec 2024 09:04:42 -0500
Subject: [PATCH 083/162] lora one more safe cast

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md             | 4 ++--
 modules/lora/networks.py | 8 ++------
 modules/sd_detect.py     | 2 +-
 3 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d49c1c555..80c5dd474 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,6 @@
 # Change Log for SD.Next
 
-## Update for 2024-12-02
+## Update for 2024-12-05
 
 ### New models and integrations
 
@@ -53,7 +53,7 @@
   - balanced offload: add both high and low watermark  
     default is 25% for low-watermark (skip offload if memory usage is below 25%) and 70% high-watermark (must offload if memory usage is above 70%)  
   - change-in-behavior:  
-    `lowvrwam` triggers *sequential offload*, also automatically triggered on systems with <=4GB vram  
+    low-end systems, triggered by either `lowvrwam` or by detection of <=4GB will use *sequential offload*  
     all other systems use *balanced offload* by default (can be changed in settings)  
     previous behavior was to use *model offload* on systems with <=8GB and `medvram` and no offload by default  
   - VAE upcase is now disabled by default on all systems  
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index 735c00c4c..20626d9ef 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -369,11 +369,11 @@ def network_calc_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.
                 updown, ex_bias = module.calc_updown(weight)
                 t1 = time.time()
                 if batch_updown is not None and updown is not None:
-                    batch_updown += updown
+                    batch_updown += updown.to(batch_updown.device, non_blocking=True)
                 else:
                     batch_updown = updown
                 if batch_ex_bias is not None and ex_bias is not None:
-                    batch_ex_bias += ex_bias
+                    batch_ex_bias += ex_bias.to(batch_ex_bias.device, non_blocking=True)
                 else:
                     batch_ex_bias = ex_bias
                 timer['calc'] += t1 - t0
@@ -396,10 +396,6 @@ def network_calc_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.
                     errors.display(e, 'LoRA')
                     raise RuntimeError('LoRA apply weight') from e
             continue
-        if module is None:
-            continue
-        shared.log.warning(f'LoRA network="{net.name}" layer="{network_layer_name}" unsupported operation')
-        extra_network_lora.errors[net.name] = extra_network_lora.errors.get(net.name, 0) + 1
     return batch_updown, batch_ex_bias
 
 
diff --git a/modules/sd_detect.py b/modules/sd_detect.py
index 062bb32e1..071a83d7e 100644
--- a/modules/sd_detect.py
+++ b/modules/sd_detect.py
@@ -105,7 +105,7 @@ def detect_pipeline(f: str, op: str = 'model', warning=True, quiet=False):
                 guess = 'Stable Diffusion XL Instruct'
             # get actual pipeline
             pipeline = shared_items.get_pipelines().get(guess, None) if pipeline is None else pipeline
-            if not quiet:
+            if debug_load is not None:
                 shared.log.info(f'Autodetect {op}: detect="{guess}" class={getattr(pipeline, "__name__", None)} file="{f}" size={size}MB')
                 t0 = time.time()
                 keys = model_tools.get_safetensor_keys(f)

From dda3d2d653bf3a34743b3d04b59891288ceb4faa Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Thu, 5 Dec 2024 10:03:17 -0500
Subject: [PATCH 084/162] flux redux allow prompt

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md          |  1 +
 installer.py          |  2 +-
 scripts/flux_tools.py | 39 ++++++++++++++++++++++++++++++++-------
 3 files changed, 34 insertions(+), 8 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 80c5dd474..601cba683 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,6 +14,7 @@
   *todo*: support for Canny/Depth LoRAs  
   - [Redux](https://huggingface.co/black-forest-labs/FLUX.1-Redux-dev): ~0.1GB  
     works together with existing model and basically uses input image to analyze it and use that instead of prompt  
+    *optional* can use prompt to combine guidance with input image  
     *recommended*: low denoise strength levels result in more variety  
   - [Fill](https://huggingface.co/black-forest-labs/FLUX.1-Fill-dev): ~23.8GB, replaces currently loaded model  
     *note*: can be used in inpaint/outpaint mode only  
diff --git a/installer.py b/installer.py
index c1ec1f177..93cd10413 100644
--- a/installer.py
+++ b/installer.py
@@ -459,7 +459,7 @@ def check_python(supported_minors=[9, 10, 11, 12], reason=None):
 def check_diffusers():
     if args.skip_all or args.skip_requirements:
         return
-    sha = '63b631f38336f56755fb5cf15d9b0fb70bbf6323' # diffusers commit hash
+    sha = '3335e2262d47e7d7e311a44dea7f454b5f01b643' # diffusers commit hash
     pkg = pkg_resources.working_set.by_key.get('diffusers', None)
     minor = int(pkg.version.split('.')[1] if pkg is not None else 0)
     cur = opts.get('diffusers_version', '') if minor > 0 else ''
diff --git a/scripts/flux_tools.py b/scripts/flux_tools.py
index 3fbab6c6f..909257a37 100644
--- a/scripts/flux_tools.py
+++ b/scripts/flux_tools.py
@@ -26,11 +26,21 @@ def ui(self, _is_img2img): # ui elements
         with gr.Row():
             tool = gr.Dropdown(label='Tool', choices=['None', 'Redux', 'Fill', 'Canny', 'Depth'], value='None')
         with gr.Row():
-            process = gr.Checkbox(label='Preprocess input images', value=True)
-            strength = gr.Checkbox(label='Override denoise strength', value=True)
-        return [tool, strength, process]
+            prompt = gr.Slider(label='Redux prompt strength', minimum=0, maximum=2, step=0.01, value=0, visible=False)
+            process = gr.Checkbox(label='Control preprocess input images', value=True, visible=False)
+            strength = gr.Checkbox(label='Control override denoise strength', value=True, visible=False)
 
-    def run(self, p: processing.StableDiffusionProcessing, tool: str = 'None', strength: bool = True, process: bool = True): # pylint: disable=arguments-differ
+        def display(tool):
+            return [
+                gr.update(visible=tool in ['Redux']),
+                gr.update(visible=tool in ['Canny', 'Depth']),
+                gr.update(visible=tool in ['Canny', 'Depth']),
+            ]
+
+        tool.change(fn=display, inputs=[tool], outputs=[prompt, process, strength])
+        return [tool, prompt, strength, process]
+
+    def run(self, p: processing.StableDiffusionProcessing, tool: str = 'None', prompt: float = 1.0, strength: bool = True, process: bool = True): # pylint: disable=arguments-differ
         global redux_pipe, processor_canny, processor_depth # pylint: disable=global-statement
         if tool is None or tool == 'None':
             return
@@ -50,6 +60,7 @@ def run(self, p: processing.StableDiffusionProcessing, tool: str = 'None', stren
         t0 = time.time()
         if tool == 'Redux':
             # pipe_prior_redux = FluxPriorReduxPipeline.from_pretrained("black-forest-labs/FLUX.1-Redux-dev", revision="refs/pr/8", torch_dtype=torch.bfloat16).to("cuda")
+            shared.log.debug(f'{title}: tool={tool} prompt={prompt}')
             if redux_pipe is None:
                 redux_pipe = diffusers.FluxPriorReduxPipeline.from_pretrained(
                     "black-forest-labs/FLUX.1-Redux-dev",
@@ -57,7 +68,21 @@ def run(self, p: processing.StableDiffusionProcessing, tool: str = 'None', stren
                     torch_dtype=devices.dtype,
                     cache_dir=shared.opts.hfcache_dir
                 ).to(devices.device)
-            redux_output = redux_pipe(image)
+            if prompt > 0:
+                shared.log.info(f'{title}: tool={tool} load text encoder')
+                redux_pipe.tokenizer, redux_pipe.tokenizer_2 = shared.sd_model.tokenizer, shared.sd_model.tokenizer_2
+                redux_pipe.text_encoder, redux_pipe.text_encoder_2 = shared.sd_model.text_encoder, shared.sd_model.text_encoder_2
+            sd_models.apply_balanced_offload(redux_pipe)
+            redux_output = redux_pipe(
+                image=image,
+                prompt=p.prompt if prompt > 0 else None,
+                prompt_embeds_scale=[prompt],
+                pooled_prompt_embeds_scale=[prompt],
+            )
+            if prompt > 0:
+                redux_pipe.tokenizer, redux_pipe.tokenizer_2 = None, None
+                redux_pipe.text_encoder, redux_pipe.text_encoder_2 = None, None
+                devices.torch_gc()
             for k, v in redux_output.items():
                 p.task_args[k] = v
         else:
@@ -77,7 +102,7 @@ def run(self, p: processing.StableDiffusionProcessing, tool: str = 'None', stren
             p.task_args['mask_image'] = p.image_mask
 
         if tool == 'Canny':
-            # pipe = FluxControlPipeline.from_pretrained("black-forest-labs/FLUX.1-Canny-dev", torch_dtype=torch.bfloat16, revision="refs/pr/1").to("cuda")
+            # pipe = diffusers.FluxControlPipeline.from_pretrained("black-forest-labs/FLUX.1-Canny-dev", torch_dtype=torch.bfloat16, revision="refs/pr/1").to("cuda")
             install('controlnet-aux')
             install('timm==0.9.16')
             if shared.sd_model.__class__.__name__ != 'FluxControlPipeline' or 'Canny' not in shared.opts.sd_model_checkpoint:
@@ -99,7 +124,7 @@ def run(self, p: processing.StableDiffusionProcessing, tool: str = 'None', stren
                 processor_canny = None
 
         if tool == 'Depth':
-            # pipe = FluxControlPipeline.from_pretrained("black-forest-labs/FLUX.1-Depth-dev", torch_dtype=torch.bfloat16, revision="refs/pr/1").to("cuda")
+            # pipe = diffusers.FluxControlPipeline.from_pretrained("black-forest-labs/FLUX.1-Depth-dev", torch_dtype=torch.bfloat16, revision="refs/pr/1").to("cuda")
             install('git+https://github.com/huggingface/image_gen_aux.git', 'image_gen_aux')
             if shared.sd_model.__class__.__name__ != 'FluxControlPipeline' or 'Depth' not in shared.opts.sd_model_checkpoint:
                 shared.opts.data["sd_model_checkpoint"] = "black-forest-labs/FLUX.1-Depth-dev"

From 16ab1e2ae88add2ccc2a53648e55af8f00fe3d97 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Fri, 6 Dec 2024 07:23:31 -0500
Subject: [PATCH 085/162] safer lora unapply

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md             |  2 +-
 modules/lora/networks.py | 19 ++++++++++---------
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 601cba683..be9a573ea 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,6 @@
 # Change Log for SD.Next
 
-## Update for 2024-12-05
+## Update for 2024-12-06
 
 ### New models and integrations
 
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index 20626d9ef..f1fdb0c45 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -334,6 +334,7 @@ def network_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.n
             if shared.opts.lora_offload_backup and weights_backup is not None and isinstance(weights_backup, torch.Tensor):
                 weights_backup = weights_backup.to(devices.cpu)
             self.network_weights_backup = weights_backup
+
         bias_backup = getattr(self, "network_bias_backup", None)
         if bias_backup is None:
             if getattr(self, 'bias', None) is not None:
@@ -380,12 +381,9 @@ def network_calc_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.
                 if shared.opts.diffusers_offload_mode != "none":
                     t0 = time.time()
                     if batch_updown is not None:
-                        batch_updown = batch_updown.to(devices.cpu, non_blocking=True)
+                        batch_updown = batch_updown.to(devices.cpu)
                     if batch_ex_bias is not None:
-                        batch_ex_bias = batch_ex_bias.to(devices.cpu, non_blocking=True)
-                    if devices.backend == "ipex":
-                        # using non_blocking=True here causes NaNs on Intel
-                        torch.xpu.synchronize(devices.device)
+                        batch_ex_bias = batch_ex_bias.to(devices.cpu)
                     t1 = time.time()
                     timer['move'] += t1 - t0
             except RuntimeError as e:
@@ -405,6 +403,7 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
     bias_backup = getattr(self, "network_bias_backup", None)
     if weights_backup is None and bias_backup is None:
         return None, None
+
     if weights_backup is not None:
         if isinstance(weights_backup, bool):
             weights_backup = self.weight
@@ -417,12 +416,13 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
             if getattr(self, "quant_type", None) in ['nf4', 'fp4'] and bnb is not None:
                 self.weight = bnb.nn.Params4bit(new_weight, quant_state=self.quant_state, quant_type=self.quant_type, blocksize=self.blocksize)
             else:
-                self.weight = torch.nn.Parameter(new_weight.to(device=orig_device, non_blocking=True), requires_grad=False)
+                self.weight = torch.nn.Parameter(new_weight.to(device=orig_device), requires_grad=False)
             del new_weight
         else:
-            self.weight = torch.nn.Parameter(weights_backup.to(device=orig_device, non_blocking=True), requires_grad=False)
+            self.weight = torch.nn.Parameter(weights_backup.to(device=orig_device), requires_grad=False)
         if hasattr(self, "qweight") and hasattr(self, "freeze"):
             self.freeze()
+
     if bias_backup is not None:
         if isinstance(bias_backup, bool):
             bias_backup = self.bias
@@ -430,12 +430,13 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
             self.bias = None
         if ex_bias is not None:
             new_weight = bias_backup.to(devices.device, non_blocking=True) + ex_bias.to(devices.device, non_blocking=True)
-            self.bias = torch.nn.Parameter(new_weight.to(device=orig_device, non_blocking=True), requires_grad=False)
+            self.bias = torch.nn.Parameter(new_weight.to(device=orig_device), requires_grad=False)
             del new_weight
         else:
-            self.bias = torch.nn.Parameter(bias_backup.to(device=orig_device, non_blocking=True), requires_grad=False)
+            self.bias = torch.nn.Parameter(bias_backup.to(device=orig_device), requires_grad=False)
     else:
         self.bias = None
+
     t1 = time.time()
     timer['apply'] += t1 - t0
     return self.weight.device, self.weight.dtype

From 2a1fbd904fb871d1fbd911c9455ccbf26e56eb10 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Fri, 6 Dec 2024 07:36:20 -0500
Subject: [PATCH 086/162] handle os err

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 modules/memstats.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/memstats.py b/modules/memstats.py
index 7836f7636..fd5f152a0 100644
--- a/modules/memstats.py
+++ b/modules/memstats.py
@@ -20,10 +20,10 @@ def memory_stats():
         mem.update({ 'ram': ram })
     except Exception as e:
         if not fail_once:
-            shared.log.error('Memory stats: {e}')
+            shared.log.error(f'Memory stats: {e}')
             errors.display(e, 'Memory stats')
             fail_once = True
-        mem.update({ 'ram': str(e) })
+        mem.update({ 'ram': { 'error': str(e) } })
     try:
         s = torch.cuda.mem_get_info()
         gpu = { 'used': gb(s[1] - s[0]), 'total': gb(s[1]) }

From 28eeb00432f682d38134dc1f65e67ca2728c9e79 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Fri, 6 Dec 2024 09:15:37 -0500
Subject: [PATCH 087/162] remove non-blocking

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 cli/load-unet.py                |  4 ++--
 modules/lora/networks.py        | 12 ++++++------
 modules/processing_vae.py       |  2 +-
 modules/rife/__init__.py        |  4 ++--
 modules/sd_hijack_accelerate.py |  8 ++++----
 modules/sd_models.py            |  2 +-
 6 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/cli/load-unet.py b/cli/load-unet.py
index 2398cdb64..c910101b0 100644
--- a/cli/load-unet.py
+++ b/cli/load-unet.py
@@ -33,13 +33,13 @@ def set_module_tensor(
             stats.dtypes[value.dtype] = 0
         stats.dtypes[value.dtype] += 1
         if name in module._buffers: # pylint: disable=protected-access
-            module._buffers[name] = value.to(device=device, dtype=dtype, non_blocking=True) # pylint: disable=protected-access
+            module._buffers[name] = value.to(device=device, dtype=dtype) # pylint: disable=protected-access
             if 'buffers' not in stats.weights:
                 stats.weights['buffers'] = 0
             stats.weights['buffers'] += 1
         elif value is not None:
             param_cls = type(module._parameters[name]) # pylint: disable=protected-access
-            module._parameters[name] = param_cls(value, requires_grad=old_value.requires_grad).to(device, dtype=dtype, non_blocking=True) # pylint: disable=protected-access
+            module._parameters[name] = param_cls(value, requires_grad=old_value.requires_grad).to(device, dtype=dtype) # pylint: disable=protected-access
             if 'parameters' not in stats.weights:
                 stats.weights['parameters'] = 0
             stats.weights['parameters'] += 1
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index f1fdb0c45..5a093370c 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -359,7 +359,7 @@ def network_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.n
 
 def network_calc_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], weight: torch.Tensor, network_layer_name: str):
     if shared.opts.diffusers_offload_mode == "none":
-        self.to(devices.device, non_blocking=True)
+        self.to(devices.device)
     batch_updown = None
     batch_ex_bias = None
     for net in loaded_networks:
@@ -370,11 +370,11 @@ def network_calc_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.
                 updown, ex_bias = module.calc_updown(weight)
                 t1 = time.time()
                 if batch_updown is not None and updown is not None:
-                    batch_updown += updown.to(batch_updown.device, non_blocking=True)
+                    batch_updown += updown.to(batch_updown.device)
                 else:
                     batch_updown = updown
                 if batch_ex_bias is not None and ex_bias is not None:
-                    batch_ex_bias += ex_bias.to(batch_ex_bias.device, non_blocking=True)
+                    batch_ex_bias += ex_bias.to(batch_ex_bias.device)
                 else:
                     batch_ex_bias = ex_bias
                 timer['calc'] += t1 - t0
@@ -412,7 +412,7 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
         if updown is not None and len(weights_backup.shape) == 4 and weights_backup.shape[1] == 9: # inpainting model. zero pad updown to make channel[1]  4 to 9
             updown = torch.nn.functional.pad(updown, (0, 0, 0, 0, 0, 5))  # pylint: disable=not-callable
         if updown is not None:
-            new_weight = weights_backup.to(devices.device, non_blocking=True) + updown.to(devices.device, non_blocking=True)
+            new_weight = weights_backup.to(devices.device) + updown.to(devices.device)
             if getattr(self, "quant_type", None) in ['nf4', 'fp4'] and bnb is not None:
                 self.weight = bnb.nn.Params4bit(new_weight, quant_state=self.quant_state, quant_type=self.quant_type, blocksize=self.blocksize)
             else:
@@ -429,7 +429,7 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
         else:
             self.bias = None
         if ex_bias is not None:
-            new_weight = bias_backup.to(devices.device, non_blocking=True) + ex_bias.to(devices.device, non_blocking=True)
+            new_weight = bias_backup.to(devices.device) + ex_bias.to(devices.device)
             self.bias = torch.nn.Parameter(new_weight.to(device=orig_device), requires_grad=False)
             del new_weight
         else:
@@ -482,7 +482,7 @@ def network_activate():
                         pbar.update(task, advance=1, description=f'networks={len(loaded_networks)} skip')
                     continue
                 weight = getattr(module, 'weight', None)
-                weight = weight.to(devices.device, non_blocking=True) if weight is not None else None
+                weight = weight.to(devices.device) if weight is not None else None
                 backup_size += network_backup_weights(module, weight, network_layer_name, wanted_names)
                 batch_updown, batch_ex_bias = network_calc_weights(module, weight, network_layer_name)
                 weights_device, weights_dtype = network_apply_weights(module, batch_updown, batch_ex_bias, orig_device)
diff --git a/modules/processing_vae.py b/modules/processing_vae.py
index b114e01d3..1c4a45f07 100644
--- a/modules/processing_vae.py
+++ b/modules/processing_vae.py
@@ -117,7 +117,7 @@ def full_vae_decode(latents, model):
             model.vae.orig_dtype = model.vae.dtype
             model.vae = model.vae.to(dtype=torch.float32)
         latents = latents.to(torch.float32)
-    latents = latents.to(devices.device, non_blocking=True)
+    latents = latents.to(devices.device)
     if getattr(model.vae, "post_quant_conv", None) is not None:
         latents = latents.to(next(iter(model.vae.post_quant_conv.parameters())).dtype)
 
diff --git a/modules/rife/__init__.py b/modules/rife/__init__.py
index f74f3d984..2a636eb2f 100644
--- a/modules/rife/__init__.py
+++ b/modules/rife/__init__.py
@@ -82,13 +82,13 @@ def f_pad(img):
     for _i in range(pad): # fill starting frames
         buffer.put(frame)
 
-    I1 = f_pad(torch.from_numpy(np.transpose(frame, (2,0,1))).to(devices.device, non_blocking=True).unsqueeze(0).float() / 255.)
+    I1 = f_pad(torch.from_numpy(np.transpose(frame, (2,0,1))).to(devices.device).unsqueeze(0).float() / 255.)
     with torch.no_grad():
         with tqdm(total=len(images), desc='Interpolate', unit='frame') as pbar:
             for image in images:
                 frame = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
                 I0 = I1
-                I1 = f_pad(torch.from_numpy(np.transpose(frame, (2,0,1))).to(devices.device, non_blocking=True).unsqueeze(0).float() / 255.)
+                I1 = f_pad(torch.from_numpy(np.transpose(frame, (2,0,1))).to(devices.device).unsqueeze(0).float() / 255.)
                 I0_small = F.interpolate(I0, (32, 32), mode='bilinear', align_corners=False).to(torch.float32)
                 I1_small = F.interpolate(I1, (32, 32), mode='bilinear', align_corners=False).to(torch.float32)
                 ssim = ssim_matlab(I0_small[:, :3], I1_small[:, :3])
diff --git a/modules/sd_hijack_accelerate.py b/modules/sd_hijack_accelerate.py
index 90eac5c4e..f8cf8983f 100644
--- a/modules/sd_hijack_accelerate.py
+++ b/modules/sd_hijack_accelerate.py
@@ -35,10 +35,10 @@ def hijack_set_module_tensor(
     with devices.inference_context():
         # note: majority of time is spent on .to(old_value.dtype)
         if tensor_name in module._buffers: # pylint: disable=protected-access
-            module._buffers[tensor_name] = value.to(device, old_value.dtype, non_blocking=True)  # pylint: disable=protected-access
+            module._buffers[tensor_name] = value.to(device, old_value.dtype)  # pylint: disable=protected-access
         elif value is not None or not devices.same_device(torch.device(device), module._parameters[tensor_name].device):  # pylint: disable=protected-access
             param_cls = type(module._parameters[tensor_name]) # pylint: disable=protected-access
-            module._parameters[tensor_name] = param_cls(value, requires_grad=old_value.requires_grad).to(device, old_value.dtype, non_blocking=True) # pylint: disable=protected-access
+            module._parameters[tensor_name] = param_cls(value, requires_grad=old_value.requires_grad).to(device, old_value.dtype) # pylint: disable=protected-access
     t1 = time.time()
     tensor_to_timer += (t1 - t0)
 
@@ -63,10 +63,10 @@ def hijack_set_module_tensor_simple(
     old_value = getattr(module, tensor_name)
     with devices.inference_context():
         if tensor_name in module._buffers: # pylint: disable=protected-access
-            module._buffers[tensor_name] = value.to(device, non_blocking=True)  # pylint: disable=protected-access
+            module._buffers[tensor_name] = value.to(device)  # pylint: disable=protected-access
         elif value is not None or not devices.same_device(torch.device(device), module._parameters[tensor_name].device):  # pylint: disable=protected-access
             param_cls = type(module._parameters[tensor_name]) # pylint: disable=protected-access
-            module._parameters[tensor_name] = param_cls(value, requires_grad=old_value.requires_grad).to(device, non_blocking=True) # pylint: disable=protected-access
+            module._parameters[tensor_name] = param_cls(value, requires_grad=old_value.requires_grad).to(device) # pylint: disable=protected-access
     t1 = time.time()
     tensor_to_timer += (t1 - t0)
 
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 63ec6b327..8853916e4 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -529,7 +529,7 @@ def move_model(model, device=None, force=False):
         t0 = time.time()
         try:
             if hasattr(model, 'to'):
-                model.to(device, non_blocking=True)
+                model.to(device)
             if hasattr(model, "prior_pipe"):
                 model.prior_pipe.to(device)
         except Exception as e0:

From 461be710502a65684c7e1ea3f84549392c09560c Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Fri, 6 Dec 2024 13:51:25 -0500
Subject: [PATCH 088/162] update wiki

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md | 1 +
 wiki         | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index be9a573ea..b3aaec73f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -72,6 +72,7 @@
 
 ### Updates
 
+- Additional Wiki content: Styles, Wildcards, etc.
 - **OpenVINO**: update to 2024.5.0  
 - **Sampler** improvements  
   - Euler FlowMatch: add sigma methods (*karras/exponential/betas*)  
diff --git a/wiki b/wiki
index 2a83f725b..c5d484397 160000
--- a/wiki
+++ b/wiki
@@ -1 +1 @@
-Subproject commit 2a83f725bda6a81399f579ba7102741f71b0be39
+Subproject commit c5d484397f7504fdea098d5e24c843a69c9fd2a2

From 6c9101dfaf0e29f0bd55f702cffd53f9e71a0347 Mon Sep 17 00:00:00 2001
From: AI-Casanova <54461896+AI-Casanova@users.noreply.github.com>
Date: Fri, 6 Dec 2024 22:54:08 -0600
Subject: [PATCH 089/162] lora low memory mode: switching requires manual model
 reload

---
 modules/lora/networks.py | 67 +++++++++++++++++++++++++++++++++++++---
 modules/shared.py        |  1 +
 2 files changed, 64 insertions(+), 4 deletions(-)

diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index 5a093370c..9618f01a9 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -316,7 +316,7 @@ def network_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.n
         weights_backup = getattr(self, "network_weights_backup", None)
         if weights_backup is None and wanted_names != (): # pylint: disable=C1803
             self.network_weights_backup = None
-            if shared.opts.lora_fuse_diffusers:
+            if shared.opts.lora_fuse_diffusers or shared.opts.lora_low_memory:
                 weights_backup = True
             elif getattr(weight, "quant_type", None) in ['nf4', 'fp4']:
                 if bnb is None:
@@ -338,7 +338,7 @@ def network_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.n
         bias_backup = getattr(self, "network_bias_backup", None)
         if bias_backup is None:
             if getattr(self, 'bias', None) is not None:
-                if shared.opts.lora_fuse_diffusers:
+                if shared.opts.lora_fuse_diffusers or shared.opts.lora_low_memory:
                     bias_backup = True
                 else:
                     bias_backup = self.bias.clone()
@@ -397,7 +397,7 @@ def network_calc_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.
     return batch_updown, batch_ex_bias
 
 
-def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], updown: torch.Tensor, ex_bias: torch.Tensor, orig_device: torch.device):
+def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], updown: torch.Tensor, ex_bias: torch.Tensor, orig_device: torch.device, deactivate: bool = False):
     t0 = time.time()
     weights_backup = getattr(self, "network_weights_backup", None)
     bias_backup = getattr(self, "network_bias_backup", None)
@@ -412,6 +412,8 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
         if updown is not None and len(weights_backup.shape) == 4 and weights_backup.shape[1] == 9: # inpainting model. zero pad updown to make channel[1]  4 to 9
             updown = torch.nn.functional.pad(updown, (0, 0, 0, 0, 0, 5))  # pylint: disable=not-callable
         if updown is not None:
+            if deactivate:
+                updown *= -1
             new_weight = weights_backup.to(devices.device) + updown.to(devices.device)
             if getattr(self, "quant_type", None) in ['nf4', 'fp4'] and bnb is not None:
                 self.weight = bnb.nn.Params4bit(new_weight, quant_state=self.quant_state, quant_type=self.quant_type, blocksize=self.blocksize)
@@ -429,6 +431,8 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
         else:
             self.bias = None
         if ex_bias is not None:
+            if deactivate:
+                ex_bias *= -1
             new_weight = bias_backup.to(devices.device) + ex_bias.to(devices.device)
             self.bias = torch.nn.Parameter(new_weight.to(device=orig_device), requires_grad=False)
             del new_weight
@@ -443,7 +447,62 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
 
 
 def network_deactivate():
-    pass
+    if not shared.opts.lora_low_memory:
+        return
+    timer['deactivate'] = 0
+    t0 = time.time()
+    sd_model = getattr(shared.sd_model, "pipe", shared.sd_model)  # wrapped model compatiblility
+    if shared.opts.diffusers_offload_mode == "sequential":
+        sd_models.disable_offload(sd_model)
+        sd_models.move_model(sd_model, device=devices.cpu)
+    modules = {}
+    for component_name in ['text_encoder', 'text_encoder_2', 'unet', 'transformer']:
+        component = getattr(sd_model, component_name, None)
+        if component is not None and hasattr(component, 'named_modules'):
+            modules[component_name] = list(component.named_modules())
+    total = sum(len(x) for x in modules.values())
+    if len(loaded_networks) > 0:
+        pbar = rp.Progress(rp.TextColumn('[cyan]Deactivate network: type=LoRA'), rp.BarColumn(), rp.TaskProgressColumn(),
+                           rp.TimeRemainingColumn(), rp.TimeElapsedColumn(), rp.TextColumn('[cyan]{task.description}'),
+                           console=shared.console)
+        task = pbar.add_task(description='', total=total)
+    else:
+        task = None
+        pbar = nullcontext()
+    with devices.inference_context(), pbar:
+        applied = 0
+        weights_devices = []
+        weights_dtypes = []
+        for component in modules.keys():
+            orig_device = getattr(sd_model, component, None).device
+            for _, module in modules[component]:
+                network_layer_name = getattr(module, 'network_layer_name', None)
+                if shared.state.interrupted or network_layer_name is None:
+                    if task is not None:
+                        pbar.update(task, advance=1, description=f'networks={len(loaded_networks)} skip')
+                    continue
+                weight = getattr(module, 'weight', None)
+                weight = weight.to(devices.device) if weight is not None else None
+                batch_updown, batch_ex_bias = network_calc_weights(module, weight, network_layer_name)
+                weights_device, weights_dtype = network_apply_weights(module, batch_updown, batch_ex_bias, orig_device, deactivate=True)
+                weights_devices.append(weights_device)
+                weights_dtypes.append(weights_dtype)
+                if batch_updown is not None or batch_ex_bias is not None:
+                    applied += 1
+                del weight, batch_updown, batch_ex_bias
+                module.network_current_names = ()
+                if task is not None:
+                    pbar.update(task, advance=1,
+                                description=f'networks={len(loaded_networks)} modules={len(modules)} deactivate={applied}')
+    weights_devices, weights_dtypes = list(set([x for x in weights_devices if x is not None])), list(set([x for x in weights_dtypes if x is not None]))  # noqa: C403 # pylint: disable=R1718
+    if debug and len(loaded_networks) > 0:
+        shared.log.debug(
+            f'Deactivate network: type=LoRA networks={len(loaded_networks)} modules={total} deactivate={applied} device={weights_devices} dtype={weights_dtypes} fuse={shared.opts.lora_fuse_diffusers} time={get_timers()}')
+    modules.clear()
+    if shared.opts.diffusers_offload_mode == "sequential":
+        sd_models.set_diffuser_offload(sd_model, op="model")
+    t1 = time.time()
+    timer['deactivate'] += t1 - t0
 
 def network_activate():
     timer['backup'] = 0
diff --git a/modules/shared.py b/modules/shared.py
index 068ee8b40..f8a989270 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -913,6 +913,7 @@ def get_default_modes():
     "lora_apply_tags": OptionInfo(0, "LoRA auto-apply tags", gr.Slider, {"minimum": -1, "maximum": 32, "step": 1}),
     "lora_in_memory_limit": OptionInfo(0, "LoRA memory cache", gr.Slider, {"minimum": 0, "maximum": 24, "step": 1}),
     "lora_quant": OptionInfo("NF4","LoRA precision in quantized models", gr.Radio, {"choices": ["NF4", "FP4"]}),
+    "lora_low_memory": OptionInfo(False, "LoRA low memory mode"),
 }))
 
 options_templates.update(options_section((None, "Internal options"), {

From f346cccb518d2e0b0c52895e93155f1f49ed8ec4 Mon Sep 17 00:00:00 2001
From: QuantumSoul <jeuxcooletgratuit.simdif@gmail.com>
Date: Sat, 7 Dec 2024 14:13:16 +0100
Subject: [PATCH 090/162] Create mkdocs.yml

---
 mkdocs.yml | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)
 create mode 100644 mkdocs.yml

diff --git a/mkdocs.yml b/mkdocs.yml
new file mode 100644
index 000000000..7fdeb24db
--- /dev/null
+++ b/mkdocs.yml
@@ -0,0 +1,49 @@
+site_name: SD.Next Documentation
+site_url: https://vladmandic.github.io/automatic
+repo_url: https://github.com/vladmandic/automatic
+repo_name: vladmandic/automatic
+docs_dir: wiki
+
+theme:
+  name: material
+
+  features:
+    - navigation.footer
+    - navigation.instant
+    - navigation.instant.prefetch
+    - navigation.instant.progress
+    - navigation.tracking
+
+  palette:
+    - media: "(prefers-color-scheme: light)"
+      scheme: default
+      toggle:
+        icon: material/weather-night
+        name: Switch to dark mode
+      primary: teal
+      accent: pink
+    
+    - media: "(prefers-color-scheme: dark)"
+      scheme: slate
+      toggle:
+        icon: material/weather-sunny
+        name: Switch to light mode
+      primary: green
+      accent: yellow
+    
+  logo: assets/favicon.svg
+  favicon: assets/favicon.svg
+
+extra:
+  social:
+    - icon: fontawesome/brands/discord
+      link: https://discord.gg/VjvR2tabEX
+      name: Discord
+    - icon: fontawesome/brands/youtube
+      link: https://www.youtube.com/@SDNext
+      name: Youtube
+
+markdown_extensions:
+  - admonition
+  - pymdownx.details
+  - pymdownx.superfences
\ No newline at end of file

From 1185950c4ae648175f899cc8e87c539cf042af43 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Mon, 9 Dec 2024 13:40:19 -0500
Subject: [PATCH 091/162] yet another lora refactor

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 extensions-builtin/Lora/network_lora.py |   3 -
 mkdocs.yml                              |  49 -----
 modules/extra_networks.py               |  16 +-
 modules/face/faceid.py                  |  10 +-
 modules/lora/extra_networks_lora.py     |   8 +-
 modules/lora/lora_timers.py             |  38 ++++
 modules/lora/network_lora.py            |   3 -
 modules/lora/networks.py                | 240 ++++++++++++------------
 modules/processing.py                   |  14 +-
 modules/processing_args.py              |   1 +
 modules/processing_callbacks.py         |   2 +-
 modules/processing_class.py             |   1 +
 modules/processing_diffusers.py         |  15 +-
 modules/prompt_parser_diffusers.py      |   2 +-
 modules/shared.py                       |   5 +-
 wiki                                    |   2 +-
 16 files changed, 193 insertions(+), 216 deletions(-)
 delete mode 100644 mkdocs.yml
 create mode 100644 modules/lora/lora_timers.py

diff --git a/extensions-builtin/Lora/network_lora.py b/extensions-builtin/Lora/network_lora.py
index 5e6eaef6c..8ebda2e22 100644
--- a/extensions-builtin/Lora/network_lora.py
+++ b/extensions-builtin/Lora/network_lora.py
@@ -22,7 +22,6 @@ def __init__(self,  net: network.Network, weights: network.NetworkWeights):
         self.dim = weights.w["lora_down.weight"].shape[0]
 
     def create_module(self, weights, key, none_ok=False):
-        from modules.shared import opts
         weight = weights.get(key)
         if weight is None and none_ok:
             return None
@@ -49,8 +48,6 @@ def create_module(self, weights, key, none_ok=False):
             if weight.shape != module.weight.shape:
                 weight = weight.reshape(module.weight.shape)
             module.weight.copy_(weight)
-        if opts.lora_load_gpu:
-            module = module.to(device=devices.device, dtype=devices.dtype)
         module.weight.requires_grad_(False)
         return module
 
diff --git a/mkdocs.yml b/mkdocs.yml
deleted file mode 100644
index 7fdeb24db..000000000
--- a/mkdocs.yml
+++ /dev/null
@@ -1,49 +0,0 @@
-site_name: SD.Next Documentation
-site_url: https://vladmandic.github.io/automatic
-repo_url: https://github.com/vladmandic/automatic
-repo_name: vladmandic/automatic
-docs_dir: wiki
-
-theme:
-  name: material
-
-  features:
-    - navigation.footer
-    - navigation.instant
-    - navigation.instant.prefetch
-    - navigation.instant.progress
-    - navigation.tracking
-
-  palette:
-    - media: "(prefers-color-scheme: light)"
-      scheme: default
-      toggle:
-        icon: material/weather-night
-        name: Switch to dark mode
-      primary: teal
-      accent: pink
-    
-    - media: "(prefers-color-scheme: dark)"
-      scheme: slate
-      toggle:
-        icon: material/weather-sunny
-        name: Switch to light mode
-      primary: green
-      accent: yellow
-    
-  logo: assets/favicon.svg
-  favicon: assets/favicon.svg
-
-extra:
-  social:
-    - icon: fontawesome/brands/discord
-      link: https://discord.gg/VjvR2tabEX
-      name: Discord
-    - icon: fontawesome/brands/youtube
-      link: https://www.youtube.com/@SDNext
-      name: Youtube
-
-markdown_extensions:
-  - admonition
-  - pymdownx.details
-  - pymdownx.superfences
\ No newline at end of file
diff --git a/modules/extra_networks.py b/modules/extra_networks.py
index fca48e21c..e96d2e5b7 100644
--- a/modules/extra_networks.py
+++ b/modules/extra_networks.py
@@ -74,9 +74,12 @@ def is_stepwise(en_obj):
     return any([len(str(x).split("@")) > 1 for x in all_args]) # noqa C419 # pylint: disable=use-a-generator
 
 
-def activate(p, extra_network_data, step=0):
+def activate(p, extra_network_data=None, step=0):
     """call activate for extra networks in extra_network_data in specified order, then call activate for all remaining registered networks with an empty argument list"""
-    if extra_network_data is None:
+    if p.disable_extra_networks:
+        return
+    extra_network_data = extra_network_data or p.network_data
+    if extra_network_data is None or len(extra_network_data) == 0:
         return
     stepwise = False
     for extra_network_args in extra_network_data.values():
@@ -106,15 +109,18 @@ def activate(p, extra_network_data, step=0):
             except Exception as e:
                 errors.display(e, f"Activating network: type={extra_network_name}")
 
-    p.extra_network_data = extra_network_data
+    p.network_data = extra_network_data
     if stepwise:
         p.stepwise_lora = True
         shared.opts.data['lora_functional'] = functional
 
 
-def deactivate(p, extra_network_data):
+def deactivate(p, extra_network_data=None):
     """call deactivate for extra networks in extra_network_data in specified order, then call deactivate for all remaining registered networks"""
-    if extra_network_data is None:
+    if p.disable_extra_networks:
+        return
+    extra_network_data = extra_network_data or p.network_data
+    if extra_network_data is None or len(extra_network_data) == 0:
         return
     for extra_network_name in extra_network_data:
         extra_network = extra_network_registry.get(extra_network_name, None)
diff --git a/modules/face/faceid.py b/modules/face/faceid.py
index b74e15dc5..4a4f07531 100644
--- a/modules/face/faceid.py
+++ b/modules/face/faceid.py
@@ -204,7 +204,6 @@ def face_id(
                 ip_model_dict["face_image"] = face_images
             ip_model_dict["faceid_embeds"] = face_embeds # overwrite placeholder
             faceid_model.set_scale(scale)
-            extra_network_data = None
 
             if p.all_prompts is None or len(p.all_prompts) == 0:
                 processing.process_init(p)
@@ -215,11 +214,9 @@ def face_id(
                 p.negative_prompts = p.all_negative_prompts[n * p.batch_size:(n+1) * p.batch_size]
                 p.seeds = p.all_seeds[n * p.batch_size:(n+1) * p.batch_size]
                 p.subseeds = p.all_subseeds[n * p.batch_size:(n+1) * p.batch_size]
-                p.prompts, extra_network_data = extra_networks.parse_prompts(p.prompts)
+                p.prompts, p.network_data = extra_networks.parse_prompts(p.prompts)
 
-                if not p.disable_extra_networks:
-                    with devices.autocast():
-                        extra_networks.activate(p, extra_network_data)
+                extra_networks.activate(p, p.network_data)
                 ip_model_dict.update({
                         "prompt": p.prompts[0],
                         "negative_prompt": p.negative_prompts[0],
@@ -239,8 +236,7 @@ def face_id(
             devices.torch_gc()
 
         ipadapter.unapply(p.sd_model)
-        if not p.disable_extra_networks:
-            extra_networks.deactivate(p, extra_network_data)
+        extra_networks.deactivate(p, p.network_data)
 
         p.extra_generation_params["IP Adapter"] = f"{basename}:{scale}"
     finally:
diff --git a/modules/lora/extra_networks_lora.py b/modules/lora/extra_networks_lora.py
index 57966550a..4ce7a94a9 100644
--- a/modules/lora/extra_networks_lora.py
+++ b/modules/lora/extra_networks_lora.py
@@ -1,5 +1,4 @@
 import re
-import time
 import numpy as np
 import modules.lora.networks as networks
 from modules import extra_networks, shared
@@ -128,10 +127,9 @@ def activate(self, p, params_list, step=0):
         if len(networks.loaded_networks) > 0 and step == 0:
             infotext(p)
             prompt(p)
-            shared.log.info(f'Load network: type=LoRA apply={[n.name for n in networks.loaded_networks]} te={te_multipliers} unet={unet_multipliers} time={networks.get_timers()}')
+            shared.log.info(f'Load network: type=LoRA apply={[n.name for n in networks.loaded_networks]} te={te_multipliers} unet={unet_multipliers} time={networks.timer.summary}')
 
     def deactivate(self, p):
-        t0 = time.time()
         if shared.native and len(networks.diffuser_loaded) > 0:
             if hasattr(shared.sd_model, "unload_lora_weights") and hasattr(shared.sd_model, "text_encoder"):
                 if not (shared.compiled_model_state is not None and shared.compiled_model_state.is_compiled is True):
@@ -142,10 +140,8 @@ def deactivate(self, p):
                     except Exception:
                         pass
         networks.network_deactivate()
-        t1 = time.time()
-        networks.timer['restore'] += t1 - t0
         if self.active and networks.debug:
-            shared.log.debug(f"Network end: type=LoRA load={networks.timer['load']:.2f} apply={networks.timer['apply']:.2f} restore={networks.timer['restore']:.2f}")
+            shared.log.debug(f"Network end: type=LoRA time={networks.timer.summary}")
         if self.errors:
             for k, v in self.errors.items():
                 shared.log.error(f'LoRA: name="{k}" errors={v}')
diff --git a/modules/lora/lora_timers.py b/modules/lora/lora_timers.py
new file mode 100644
index 000000000..30c35a728
--- /dev/null
+++ b/modules/lora/lora_timers.py
@@ -0,0 +1,38 @@
+class Timer():
+    list: float = 0
+    load: float = 0
+    backup: float = 0
+    calc: float = 0
+    apply: float = 0
+    move: float = 0
+    restore: float = 0
+    activate: float = 0
+    deactivate: float = 0
+
+    @property
+    def total(self):
+        return round(self.activate + self.deactivate, 2)
+
+    @property
+    def summary(self):
+        t = {}
+        for k, v in self.__dict__.items():
+            if v > 0.1:
+                t[k] = round(v, 2)
+        return t
+
+    def clear(self, complete: bool = False):
+        self.backup = 0
+        self.calc = 0
+        self.apply = 0
+        self.move = 0
+        self.restore = 0
+        if complete:
+            self.activate = 0
+            self.deactivate = 0
+
+    def add(self, name, t):
+        self.__dict__[name] += t
+
+    def __str__(self):
+        return f'{self.__class__.__name__}({self.summary})'
diff --git a/modules/lora/network_lora.py b/modules/lora/network_lora.py
index 6c1d7ea3f..8bf475ebc 100644
--- a/modules/lora/network_lora.py
+++ b/modules/lora/network_lora.py
@@ -22,7 +22,6 @@ def __init__(self,  net: network.Network, weights: network.NetworkWeights):
         self.dim = weights.w["lora_down.weight"].shape[0]
 
     def create_module(self, weights, key, none_ok=False):
-        from modules.shared import opts
         weight = weights.get(key)
         if weight is None and none_ok:
             return None
@@ -49,8 +48,6 @@ def create_module(self, weights, key, none_ok=False):
             if weight.shape != module.weight.shape:
                 weight = weight.reshape(module.weight.shape)
             module.weight.copy_(weight)
-        if opts.lora_load_gpu:
-            module = module.to(device=devices.device, dtype=devices.dtype)
         module.weight.requires_grad_(False)
         return module
 
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index 9618f01a9..805b24b52 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -1,24 +1,15 @@
 from typing import Union, List
+from contextlib import nullcontext
 import os
 import re
 import time
 import concurrent
-from contextlib import nullcontext
 import torch
 import diffusers.models.lora
 import rich.progress as rp
 
-import modules.lora.network as network
-import modules.lora.network_lora as network_lora
-import modules.lora.network_hada as network_hada
-import modules.lora.network_ia3 as network_ia3
-import modules.lora.network_oft as network_oft
-import modules.lora.network_lokr as network_lokr
-import modules.lora.network_full as network_full
-import modules.lora.network_norm as network_norm
-import modules.lora.network_glora as network_glora
-import modules.lora.network_overrides as network_overrides
-import modules.lora.lora_convert as lora_convert
+from modules.lora import lora_timers, network, lora_convert, network_overrides
+from modules.lora import network_lora, network_hada, network_ia3, network_oft, network_lokr, network_full, network_norm, network_glora
 from modules.lora.extra_networks_lora import ExtraNetworkLora
 from modules import shared, devices, sd_models, sd_models_compile, errors, files_cache, model_quant
 
@@ -28,7 +19,6 @@
 available_networks = {}
 available_network_aliases = {}
 loaded_networks: List[network.Network] = []
-timer = { 'list': 0, 'load': 0, 'backup': 0, 'calc': 0, 'apply': 0, 'move': 0, 'restore': 0, 'deactivate': 0 }
 bnb = None
 lora_cache = {}
 diffuser_loaded = []
@@ -36,6 +26,7 @@
 available_network_hash_lookup = {}
 forbidden_network_aliases = {}
 re_network_name = re.compile(r"(.*)\s*\([0-9a-fA-F]+\)")
+timer = lora_timers.Timer()
 module_types = [
     network_lora.ModuleTypeLora(),
     network_hada.ModuleTypeHada(),
@@ -47,19 +38,6 @@
     network_glora.ModuleTypeGLora(),
 ]
 
-
-def total_time():
-    return sum(timer.values())
-
-
-def get_timers():
-    t = { 'total': round(sum(timer.values()), 2) }
-    for k, v in timer.items():
-        if v > 0.1:
-            t[k] = round(v, 2)
-    return t
-
-
 # section: load networks from disk
 
 def load_diffusers(name, network_on_disk, lora_scale=shared.opts.extra_networks_default_multiplier) -> Union[network.Network, None]:
@@ -154,7 +132,7 @@ def load_safetensors(name, network_on_disk) -> Union[network.Network, None]:
         if debug:
             shared.log.debug(f'LoRA name="{name}" unmatched={keys_failed_to_match}')
     else:
-        shared.log.debug(f'LoRA name="{name}" type={set(network_types)} keys={len(matched_networks)}')
+        shared.log.debug(f'LoRA name="{name}" type={set(network_types)} keys={len(matched_networks)} direct={shared.opts.lora_fuse_diffusers}')
     if len(matched_networks) == 0:
         return None
     lora_cache[name] = net
@@ -222,12 +200,11 @@ def add_network(filename):
         for fn in candidates:
             executor.submit(add_network, fn)
     t1 = time.time()
-    timer['list'] = t1 - t0
+    timer.list = t1 - t0
     shared.log.info(f'Available LoRAs: path="{shared.cmd_opts.lora_dir}" items={len(available_networks)} folders={len(forbidden_network_aliases)} time={t1 - t0:.2f}')
 
 
 def network_load(names, te_multipliers=None, unet_multipliers=None, dyn_dims=None):
-    timer['list'] = 0
     networks_on_disk: list[network.NetworkOnDisk] = [available_network_aliases.get(name, None) for name in names]
     if any(x is None for x in networks_on_disk):
         list_available_networks()
@@ -301,13 +278,12 @@ def network_load(names, te_multipliers=None, unet_multipliers=None, dyn_dims=Non
     if len(loaded_networks) > 0:
         devices.torch_gc()
 
-    t1 = time.time()
-    timer['load'] = t1 - t0
+    timer.load = time.time() - t0
 
 
 # section: process loaded networks
 
-def network_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], weight: torch.Tensor, network_layer_name: str, wanted_names: tuple):
+def network_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], network_layer_name: str, wanted_names: tuple):
     global bnb # pylint: disable=W0603
     backup_size = 0
     if len(loaded_networks) > 0 and network_layer_name is not None and any([net.modules.get(network_layer_name, None) for net in loaded_networks]): # noqa: C419 # pylint: disable=R1729
@@ -315,9 +291,10 @@ def network_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.n
 
         weights_backup = getattr(self, "network_weights_backup", None)
         if weights_backup is None and wanted_names != (): # pylint: disable=C1803
+            weight = getattr(self, 'weight', None)
             self.network_weights_backup = None
-            if shared.opts.lora_fuse_diffusers or shared.opts.lora_low_memory:
-                weights_backup = True
+            if shared.opts.lora_fuse_diffusers:
+                self.network_weights_backup = True
             elif getattr(weight, "quant_type", None) in ['nf4', 'fp4']:
                 if bnb is None:
                     bnb = model_quant.load_bnb('Load network: type=LoRA', silent=True)
@@ -329,86 +306,112 @@ def network_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.n
                         self.blocksize = weight.blocksize
                 else:
                     weights_backup = weight.clone()
+                weights_backup = weights_backup.to(devices.cpu)
             else:
                 weights_backup = weight.clone()
-            if shared.opts.lora_offload_backup and weights_backup is not None and isinstance(weights_backup, torch.Tensor):
                 weights_backup = weights_backup.to(devices.cpu)
-            self.network_weights_backup = weights_backup
 
         bias_backup = getattr(self, "network_bias_backup", None)
         if bias_backup is None:
             if getattr(self, 'bias', None) is not None:
-                if shared.opts.lora_fuse_diffusers or shared.opts.lora_low_memory:
-                    bias_backup = True
+                if shared.opts.lora_fuse_diffusers:
+                    self.network_bias_backup = True
                 else:
                     bias_backup = self.bias.clone()
-            else:
-                bias_backup = None
-            if shared.opts.lora_offload_backup and bias_backup is not None and isinstance(bias_backup, torch.Tensor):
-                bias_backup = bias_backup.to(devices.cpu)
-            self.network_bias_backup = bias_backup
+                    bias_backup = bias_backup.to(devices.cpu)
 
         if getattr(self, 'network_weights_backup', None) is not None:
             backup_size += self.network_weights_backup.numel() * self.network_weights_backup.element_size() if isinstance(self.network_weights_backup, torch.Tensor) else 0
         if getattr(self, 'network_bias_backup', None) is not None:
             backup_size += self.network_bias_backup.numel() * self.network_bias_backup.element_size() if isinstance(self.network_bias_backup, torch.Tensor) else 0
-        t1 = time.time()
-        timer['backup'] += t1 - t0
+        timer.backup += time.time() - t0
     return backup_size
 
 
-def network_calc_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], weight: torch.Tensor, network_layer_name: str):
+def network_calc_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], network_layer_name: str):
     if shared.opts.diffusers_offload_mode == "none":
         self.to(devices.device)
     batch_updown = None
     batch_ex_bias = None
     for net in loaded_networks:
         module = net.modules.get(network_layer_name, None)
-        if module is not None and hasattr(self, 'weight'):
-            try:
+        if module is None:
+            continue
+        try:
+            t0 = time.time()
+            weight = self.weight.to(devices.device)
+            updown, ex_bias = module.calc_updown(weight)
+            if batch_updown is not None and updown is not None:
+                batch_updown += updown.to(batch_updown.device)
+            else:
+                batch_updown = updown
+            if batch_ex_bias is not None and ex_bias is not None:
+                batch_ex_bias += ex_bias.to(batch_ex_bias.device)
+            else:
+                batch_ex_bias = ex_bias
+            timer.calc += time.time() - t0
+            if shared.opts.diffusers_offload_mode == "sequential":
                 t0 = time.time()
-                updown, ex_bias = module.calc_updown(weight)
+                if batch_updown is not None:
+                    batch_updown = batch_updown.to(devices.cpu)
+                if batch_ex_bias is not None:
+                    batch_ex_bias = batch_ex_bias.to(devices.cpu)
                 t1 = time.time()
-                if batch_updown is not None and updown is not None:
-                    batch_updown += updown.to(batch_updown.device)
-                else:
-                    batch_updown = updown
-                if batch_ex_bias is not None and ex_bias is not None:
-                    batch_ex_bias += ex_bias.to(batch_ex_bias.device)
-                else:
-                    batch_ex_bias = ex_bias
-                timer['calc'] += t1 - t0
-                if shared.opts.diffusers_offload_mode != "none":
-                    t0 = time.time()
-                    if batch_updown is not None:
-                        batch_updown = batch_updown.to(devices.cpu)
-                    if batch_ex_bias is not None:
-                        batch_ex_bias = batch_ex_bias.to(devices.cpu)
-                    t1 = time.time()
-                    timer['move'] += t1 - t0
-            except RuntimeError as e:
-                extra_network_lora.errors[net.name] = extra_network_lora.errors.get(net.name, 0) + 1
-                if debug:
-                    module_name = net.modules.get(network_layer_name, None)
-                    shared.log.error(f'LoRA apply weight name="{net.name}" module="{module_name}" layer="{network_layer_name}" {e}')
-                    errors.display(e, 'LoRA')
-                    raise RuntimeError('LoRA apply weight') from e
-            continue
+                timer.move += t1 - t0
+        except RuntimeError as e:
+            extra_network_lora.errors[net.name] = extra_network_lora.errors.get(net.name, 0) + 1
+            if debug:
+                module_name = net.modules.get(network_layer_name, None)
+                shared.log.error(f'LoRA apply weight name="{net.name}" module="{module_name}" layer="{network_layer_name}" {e}')
+                errors.display(e, 'LoRA')
+                raise RuntimeError('LoRA apply weight') from e
+        continue
     return batch_updown, batch_ex_bias
 
 
-def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], updown: torch.Tensor, ex_bias: torch.Tensor, orig_device: torch.device, deactivate: bool = False):
+def network_apply_direct(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], updown: torch.Tensor, ex_bias: torch.Tensor, deactivate: bool = False):
+    weights_backup = getattr(self, "network_weights_backup", False)
+    bias_backup = getattr(self, "network_bias_backup", False)
+    if not weights_backup and not bias_backup:
+        return None, None
     t0 = time.time()
+
+    if weights_backup:
+        if updown is not None and len(self.weight.shape) == 4 and self.weight.shape[1] == 9: # inpainting model. zero pad updown to make channel[1]  4 to 9
+            updown = torch.nn.functional.pad(updown, (0, 0, 0, 0, 0, 5))  # pylint: disable=not-callable
+        if updown is not None:
+            if deactivate:
+                updown *= -1
+            new_weight = self.weight.to(devices.device) + updown.to(devices.device)
+            if getattr(self, "quant_type", None) in ['nf4', 'fp4'] and bnb is not None:
+                self.weight = bnb.nn.Params4bit(new_weight, quant_state=self.quant_state, quant_type=self.quant_type, blocksize=self.blocksize)
+            else:
+                self.weight = torch.nn.Parameter(new_weight, requires_grad=False)
+            del new_weight
+        if hasattr(self, "qweight") and hasattr(self, "freeze"):
+            self.freeze()
+
+    if bias_backup:
+        if ex_bias is not None:
+            if deactivate:
+                ex_bias *= -1
+            new_weight = bias_backup.to(devices.device) + ex_bias.to(devices.device)
+            self.bias = torch.nn.Parameter(new_weight, requires_grad=False)
+            del new_weight
+
+    timer.apply += time.time() - t0
+    return self.weight.device, self.weight.dtype
+
+
+def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], updown: torch.Tensor, ex_bias: torch.Tensor, orig_device: torch.device, deactivate: bool = False):
     weights_backup = getattr(self, "network_weights_backup", None)
     bias_backup = getattr(self, "network_bias_backup", None)
     if weights_backup is None and bias_backup is None:
         return None, None
+    t0 = time.time()
 
     if weights_backup is not None:
-        if isinstance(weights_backup, bool):
-            weights_backup = self.weight
-        else:
-            self.weight = None
+        self.weight = None
         if updown is not None and len(weights_backup.shape) == 4 and weights_backup.shape[1] == 9: # inpainting model. zero pad updown to make channel[1]  4 to 9
             updown = torch.nn.functional.pad(updown, (0, 0, 0, 0, 0, 5))  # pylint: disable=not-callable
         if updown is not None:
@@ -426,10 +429,7 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
             self.freeze()
 
     if bias_backup is not None:
-        if isinstance(bias_backup, bool):
-            bias_backup = self.bias
-        else:
-            self.bias = None
+        self.bias = None
         if ex_bias is not None:
             if deactivate:
                 ex_bias *= -1
@@ -438,19 +438,16 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
             del new_weight
         else:
             self.bias = torch.nn.Parameter(bias_backup.to(device=orig_device), requires_grad=False)
-    else:
-        self.bias = None
 
-    t1 = time.time()
-    timer['apply'] += t1 - t0
+    timer.apply += time.time() - t0
     return self.weight.device, self.weight.dtype
 
 
 def network_deactivate():
-    if not shared.opts.lora_low_memory:
+    if not shared.opts.lora_fuse_diffusers:
         return
-    timer['deactivate'] = 0
     t0 = time.time()
+    timer.clear()
     sd_model = getattr(shared.sd_model, "pipe", shared.sd_model)  # wrapped model compatiblility
     if shared.opts.diffusers_offload_mode == "sequential":
         sd_models.disable_offload(sd_model)
@@ -462,15 +459,13 @@ def network_deactivate():
             modules[component_name] = list(component.named_modules())
     total = sum(len(x) for x in modules.values())
     if len(loaded_networks) > 0:
-        pbar = rp.Progress(rp.TextColumn('[cyan]Deactivate network: type=LoRA'), rp.BarColumn(), rp.TaskProgressColumn(),
-                           rp.TimeRemainingColumn(), rp.TimeElapsedColumn(), rp.TextColumn('[cyan]{task.description}'),
-                           console=shared.console)
+        pbar = rp.Progress(rp.TextColumn('[cyan]Network: type=LoRA action=deactivate'), rp.BarColumn(), rp.TaskProgressColumn(), rp.TimeRemainingColumn(), rp.TimeElapsedColumn(), rp.TextColumn('[cyan]{task.description}'), console=shared.console)
         task = pbar.add_task(description='', total=total)
     else:
         task = None
         pbar = nullcontext()
     with devices.inference_context(), pbar:
-        applied = 0
+        applied_layers = []
         weights_devices = []
         weights_dtypes = []
         for component in modules.keys():
@@ -479,36 +474,33 @@ def network_deactivate():
                 network_layer_name = getattr(module, 'network_layer_name', None)
                 if shared.state.interrupted or network_layer_name is None:
                     if task is not None:
-                        pbar.update(task, advance=1, description=f'networks={len(loaded_networks)} skip')
+                        pbar.update(task, advance=1)
                     continue
-                weight = getattr(module, 'weight', None)
-                weight = weight.to(devices.device) if weight is not None else None
-                batch_updown, batch_ex_bias = network_calc_weights(module, weight, network_layer_name)
-                weights_device, weights_dtype = network_apply_weights(module, batch_updown, batch_ex_bias, orig_device, deactivate=True)
+                batch_updown, batch_ex_bias = network_calc_weights(module, network_layer_name)
+                if shared.opts.lora_fuse_diffusers:
+                    weights_device, weights_dtype = network_apply_direct(module, batch_updown, batch_ex_bias, deactivate=True)
+                else:
+                    weights_device, weights_dtype = network_apply_weights(module, batch_updown, batch_ex_bias, orig_device, deactivate=True)
                 weights_devices.append(weights_device)
                 weights_dtypes.append(weights_dtype)
                 if batch_updown is not None or batch_ex_bias is not None:
-                    applied += 1
-                del weight, batch_updown, batch_ex_bias
+                    applied_layers.append(network_layer_name)
+                del batch_updown, batch_ex_bias
                 module.network_current_names = ()
                 if task is not None:
-                    pbar.update(task, advance=1,
-                                description=f'networks={len(loaded_networks)} modules={len(modules)} deactivate={applied}')
+                    pbar.update(task, advance=1, description=f'networks={len(loaded_networks)} modules={len(modules)} deactivate={len(applied_layers)}')
     weights_devices, weights_dtypes = list(set([x for x in weights_devices if x is not None])), list(set([x for x in weights_dtypes if x is not None]))  # noqa: C403 # pylint: disable=R1718
+    timer.deactivate = time.time() - t0
     if debug and len(loaded_networks) > 0:
-        shared.log.debug(
-            f'Deactivate network: type=LoRA networks={len(loaded_networks)} modules={total} deactivate={applied} device={weights_devices} dtype={weights_dtypes} fuse={shared.opts.lora_fuse_diffusers} time={get_timers()}')
+        shared.log.debug(f'Deactivate network: type=LoRA networks={len(loaded_networks)} modules={total} deactivate={len(applied_layers)} device={weights_devices} dtype={weights_dtypes} fuse={shared.opts.lora_fuse_diffusers} time={timer.summary}')
     modules.clear()
     if shared.opts.diffusers_offload_mode == "sequential":
         sd_models.set_diffuser_offload(sd_model, op="model")
-    t1 = time.time()
-    timer['deactivate'] += t1 - t0
+
 
 def network_activate():
-    timer['backup'] = 0
-    timer['calc'] = 0
-    timer['apply'] = 0
-    timer['move'] = 0
+    t0 = time.time()
+    timer.clear(complete=True)
     sd_model = getattr(shared.sd_model, "pipe", shared.sd_model)  # wrapped model compatiblility
     if shared.opts.diffusers_offload_mode == "sequential":
         sd_models.disable_offload(sd_model)
@@ -520,14 +512,14 @@ def network_activate():
             modules[component_name] = list(component.named_modules())
     total = sum(len(x) for x in modules.values())
     if len(loaded_networks) > 0:
-        pbar = rp.Progress(rp.TextColumn('[cyan]Apply network: type=LoRA'), rp.BarColumn(), rp.TaskProgressColumn(), rp.TimeRemainingColumn(), rp.TimeElapsedColumn(), rp.TextColumn('[cyan]{task.description}'), console=shared.console)
+        pbar = rp.Progress(rp.TextColumn('[cyan]Network: type=LoRA action=activate'), rp.BarColumn(), rp.TaskProgressColumn(), rp.TimeRemainingColumn(), rp.TimeElapsedColumn(), rp.TextColumn('[cyan]{task.description}'), console=shared.console)
         task = pbar.add_task(description='' , total=total)
     else:
         task = None
         pbar = nullcontext()
     with devices.inference_context(), pbar:
         wanted_names = tuple((x.name, x.te_multiplier, x.unet_multiplier, x.dyn_dim) for x in loaded_networks) if len(loaded_networks) > 0 else ()
-        applied = 0
+        applied_layers = []
         backup_size = 0
         weights_devices = []
         weights_dtypes = []
@@ -536,26 +528,28 @@ def network_activate():
             for _, module in modules[component]:
                 network_layer_name = getattr(module, 'network_layer_name', None)
                 current_names = getattr(module, "network_current_names", ())
-                if shared.state.interrupted or network_layer_name is None or current_names == wanted_names:
+                if getattr(module, 'weight', None) is None or shared.state.interrupted or network_layer_name is None or current_names == wanted_names:
                     if task is not None:
-                        pbar.update(task, advance=1, description=f'networks={len(loaded_networks)} skip')
+                        pbar.update(task, advance=1)
                     continue
-                weight = getattr(module, 'weight', None)
-                weight = weight.to(devices.device) if weight is not None else None
-                backup_size += network_backup_weights(module, weight, network_layer_name, wanted_names)
-                batch_updown, batch_ex_bias = network_calc_weights(module, weight, network_layer_name)
-                weights_device, weights_dtype = network_apply_weights(module, batch_updown, batch_ex_bias, orig_device)
+                backup_size += network_backup_weights(module, network_layer_name, wanted_names)
+                batch_updown, batch_ex_bias = network_calc_weights(module, network_layer_name)
+                if shared.opts.lora_fuse_diffusers:
+                    weights_device, weights_dtype = network_apply_direct(module, batch_updown, batch_ex_bias)
+                else:
+                    weights_device, weights_dtype = network_apply_weights(module, batch_updown, batch_ex_bias, orig_device)
                 weights_devices.append(weights_device)
                 weights_dtypes.append(weights_dtype)
                 if batch_updown is not None or batch_ex_bias is not None:
-                    applied += 1
-                del weight, batch_updown, batch_ex_bias
+                    applied_layers.append(network_layer_name)
+                del batch_updown, batch_ex_bias
                 module.network_current_names = wanted_names
                 if task is not None:
-                    pbar.update(task, advance=1, description=f'networks={len(loaded_networks)} modules={len(modules)} apply={applied} backup={backup_size}')
+                    pbar.update(task, advance=1, description=f'networks={len(loaded_networks)} modules={total} apply={len(applied_layers)} backup={backup_size}')
     weights_devices, weights_dtypes = list(set([x for x in weights_devices if x is not None])), list(set([x for x in weights_dtypes if x is not None])) # noqa: C403 # pylint: disable=R1718
+    timer.activate = time.time() - t0
     if debug and len(loaded_networks) > 0:
-        shared.log.debug(f'Load network: type=LoRA networks={len(loaded_networks)} modules={total} apply={applied} device={weights_devices} dtype={weights_dtypes} backup={backup_size} fuse={shared.opts.lora_fuse_diffusers} time={get_timers()}')
+        shared.log.debug(f'Load network: type=LoRA networks={len(loaded_networks)} modules={total} apply={len(applied_layers)} device={weights_devices} dtype={weights_dtypes} backup={backup_size} fuse={shared.opts.lora_fuse_diffusers} time={timer.summary}')
     modules.clear()
     if shared.opts.diffusers_offload_mode == "sequential":
         sd_models.set_diffuser_offload(sd_model, op="model")
diff --git a/modules/processing.py b/modules/processing.py
index 57512850a..7ae397538 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -286,7 +286,6 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
         t0 = time.time()
         if not hasattr(p, 'skip_init'):
             p.init(p.all_prompts, p.all_seeds, p.all_subseeds)
-        extra_network_data = None
         debug(f'Processing inner: args={vars(p)}')
         for n in range(p.n_iter):
             pag.apply(p)
@@ -311,9 +310,9 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
                 p.scripts.before_process_batch(p, batch_number=n, prompts=p.prompts, seeds=p.seeds, subseeds=p.subseeds)
             if len(p.prompts) == 0:
                 break
-            p.prompts, extra_network_data = extra_networks.parse_prompts(p.prompts)
-            if not p.disable_extra_networks:
-                extra_networks.activate(p, extra_network_data)
+            p.prompts, p.network_data = extra_networks.parse_prompts(p.prompts)
+            if not shared.native:
+                extra_networks.activate(p, p.network_data)
             if p.scripts is not None and isinstance(p.scripts, scripts.ScriptRunner):
                 p.scripts.process_batch(p, batch_number=n, prompts=p.prompts, seeds=p.seeds, subseeds=p.subseeds)
 
@@ -417,6 +416,10 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
 
             timer.process.record('post')
             del samples
+
+            if not shared.native:
+                extra_networks.deactivate(p, p.network_data)
+
             devices.torch_gc()
 
         if hasattr(shared.sd_model, 'restore_pipeline') and shared.sd_model.restore_pipeline is not None:
@@ -445,9 +448,6 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
         from modules import ipadapter
         ipadapter.unapply(shared.sd_model)
 
-    if not p.disable_extra_networks:
-        extra_networks.deactivate(p, extra_network_data)
-
     if shared.opts.include_mask:
         if shared.opts.mask_apply_overlay and p.overlay_images is not None and len(p.overlay_images):
             p.image_mask = create_binary_mask(p.overlay_images[0])
diff --git a/modules/processing_args.py b/modules/processing_args.py
index d73762d29..93b0bf9b2 100644
--- a/modules/processing_args.py
+++ b/modules/processing_args.py
@@ -101,6 +101,7 @@ def task_specific_kwargs(p, model):
 
 def set_pipeline_args(p, model, prompts: list, negative_prompts: list, prompts_2: typing.Optional[list]=None, negative_prompts_2: typing.Optional[list]=None, desc:str='', **kwargs):
     t0 = time.time()
+    shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
     apply_circular(p.tiling, model)
     if hasattr(model, "set_progress_bar_config"):
         model.set_progress_bar_config(bar_format='Progress {rate_fmt}{postfix} {bar} {percentage:3.0f}% {n_fmt}/{total_fmt} {elapsed} {remaining} ' + '\x1b[38;5;71m' + desc, ncols=80, colour='#327fba')
diff --git a/modules/processing_callbacks.py b/modules/processing_callbacks.py
index f3eb0bc37..0b4c7dfe1 100644
--- a/modules/processing_callbacks.py
+++ b/modules/processing_callbacks.py
@@ -67,7 +67,7 @@ def diffusers_callback(pipe, step: int = 0, timestep: int = 0, kwargs: dict = {}
                 raise AssertionError('Interrupted...')
             time.sleep(0.1)
     if hasattr(p, "stepwise_lora") and shared.native:
-        extra_networks.activate(p, p.extra_network_data, step=step)
+        extra_networks.activate(p, step=step)
     if latents is None:
         return kwargs
     elif shared.opts.nan_skip:
diff --git a/modules/processing_class.py b/modules/processing_class.py
index 21e86c1b0..2cbc07cc2 100644
--- a/modules/processing_class.py
+++ b/modules/processing_class.py
@@ -139,6 +139,7 @@ def __init__(self,
         self.negative_pooleds = []
         self.disable_extra_networks = False
         self.iteration = 0
+        self.network_data = {}
 
         # initializers
         self.prompt = prompt
diff --git a/modules/processing_diffusers.py b/modules/processing_diffusers.py
index 0341cac4d..d22a9de97 100644
--- a/modules/processing_diffusers.py
+++ b/modules/processing_diffusers.py
@@ -4,7 +4,7 @@
 import numpy as np
 import torch
 import torchvision.transforms.functional as TF
-from modules import shared, devices, processing, sd_models, errors, sd_hijack_hypertile, processing_vae, sd_models_compile, hidiffusion, timer, modelstats
+from modules import shared, devices, processing, sd_models, errors, sd_hijack_hypertile, processing_vae, sd_models_compile, hidiffusion, timer, modelstats, extra_networks
 from modules.processing_helpers import resize_hires, calculate_base_steps, calculate_hires_steps, calculate_refiner_steps, save_intermediate, update_sampler, is_txt2img, is_refiner_enabled
 from modules.processing_args import set_pipeline_args
 from modules.onnx_impl import preprocess_pipeline as preprocess_onnx_pipeline, check_parameters_changed as olive_check_parameters_changed
@@ -89,6 +89,7 @@ def process_base(p: processing.StableDiffusionProcessing):
             sd_models.move_model(shared.sd_model.unet, devices.device)
         if hasattr(shared.sd_model, 'transformer'):
             sd_models.move_model(shared.sd_model.transformer, devices.device)
+        extra_networks.activate(p)
         hidiffusion.apply(p, shared.sd_model_type)
         # if 'image' in base_args:
         #    base_args['image'] = set_latents(p)
@@ -223,11 +224,14 @@ def process_hires(p: processing.StableDiffusionProcessing, output):
             shared.state.job = 'HiRes'
             shared.state.sampling_steps = hires_args.get('prior_num_inference_steps', None) or p.steps or hires_args.get('num_inference_steps', None)
             try:
+                shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
                 sd_models.move_model(shared.sd_model, devices.device)
                 if hasattr(shared.sd_model, 'unet'):
                     sd_models.move_model(shared.sd_model.unet, devices.device)
                 if hasattr(shared.sd_model, 'transformer'):
                     sd_models.move_model(shared.sd_model.transformer, devices.device)
+                if 'base' in p.skip:
+                    extra_networks.activate(p)
                 sd_models_compile.check_deepcache(enable=True)
                 output = shared.sd_model(**hires_args) # pylint: disable=not-callable
                 if isinstance(output, dict):
@@ -345,6 +349,7 @@ def process_refine(p: processing.StableDiffusionProcessing, output):
 
 
 def process_decode(p: processing.StableDiffusionProcessing, output):
+    shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
     if output is not None:
         if not hasattr(output, 'images') and hasattr(output, 'frames'):
             shared.log.debug(f'Generated: frames={len(output.frames[0])}')
@@ -405,8 +410,6 @@ def process_diffusers(p: processing.StableDiffusionProcessing):
         shared.sd_model = orig_pipeline
         return results
 
-    shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
-
     # sanitize init_images
     if hasattr(p, 'init_images') and getattr(p, 'init_images', None) is None:
         del p.init_images
@@ -453,13 +456,13 @@ def process_diffusers(p: processing.StableDiffusionProcessing):
             shared.sd_model = orig_pipeline
             return results
 
-    results = process_decode(p, output)
+    extra_networks.deactivate(p)
+    timer.process.add('lora', networks.timer.total)
 
+    results = process_decode(p, output)
     timer.process.record('decode')
-    timer.process.add('lora', networks.total_time())
 
     shared.sd_model = orig_pipeline
-
     shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
 
     if p.state == '':
diff --git a/modules/prompt_parser_diffusers.py b/modules/prompt_parser_diffusers.py
index 06c0b6012..8c140e0d6 100644
--- a/modules/prompt_parser_diffusers.py
+++ b/modules/prompt_parser_diffusers.py
@@ -92,7 +92,7 @@ def flatten(xss):
             return [x for xs in xss for x in xs]
 
         # unpack EN data in case of TE LoRA
-        en_data = p.extra_network_data
+        en_data = p.network_data
         en_data = [idx.items for item in en_data.values() for idx in item]
         effective_batch = 1 if self.allsame else self.batchsize
         key = str([self.prompts, self.negative_prompts, effective_batch, self.clip_skip, self.steps, en_data])
diff --git a/modules/shared.py b/modules/shared.py
index f8a989270..6e8f2f3fd 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -905,15 +905,12 @@ def get_default_modes():
     "extra_networks_default_multiplier": OptionInfo(1.0, "Default strength", gr.Slider, {"minimum": 0.0, "maximum": 2.0, "step": 0.01}),
     "lora_preferred_name": OptionInfo("filename", "LoRA preferred name", gr.Radio, {"choices": ["filename", "alias"], "visible": False}),
     "lora_add_hashes_to_infotext": OptionInfo(False, "LoRA add hash info"),
-    "lora_fuse_diffusers": OptionInfo(False if not cmd_opts.use_openvino else True, "LoRA fuse directly to model"),
-    "lora_load_gpu": OptionInfo(True if not (cmd_opts.lowvram or cmd_opts.medvram) else False, "LoRA load directly to GPU"),
-    "lora_offload_backup": OptionInfo(True, "LoRA offload backup weights"),
+    "lora_fuse_diffusers": OptionInfo(True, "LoRA fuse directly to model"),
     "lora_force_diffusers": OptionInfo(False if not cmd_opts.use_openvino else True, "LoRA force loading of all models using Diffusers"),
     "lora_maybe_diffusers": OptionInfo(False, "LoRA force loading of specific models using Diffusers"),
     "lora_apply_tags": OptionInfo(0, "LoRA auto-apply tags", gr.Slider, {"minimum": -1, "maximum": 32, "step": 1}),
     "lora_in_memory_limit": OptionInfo(0, "LoRA memory cache", gr.Slider, {"minimum": 0, "maximum": 24, "step": 1}),
     "lora_quant": OptionInfo("NF4","LoRA precision in quantized models", gr.Radio, {"choices": ["NF4", "FP4"]}),
-    "lora_low_memory": OptionInfo(False, "LoRA low memory mode"),
 }))
 
 options_templates.update(options_section((None, "Internal options"), {
diff --git a/wiki b/wiki
index c5d484397..20c9fe52f 160000
--- a/wiki
+++ b/wiki
@@ -1 +1 @@
-Subproject commit c5d484397f7504fdea098d5e24c843a69c9fd2a2
+Subproject commit 20c9fe52f253c23e736227787ddebd4cbfcbfe68

From 6ef10195186ac1384b7d9d977df6a068a5949be3 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Mon, 9 Dec 2024 13:46:25 -0500
Subject: [PATCH 092/162] update hotkeys

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 javascript/script.js | 5 +++--
 wiki                 | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/javascript/script.js b/javascript/script.js
index 250e90ba2..836d9b102 100644
--- a/javascript/script.js
+++ b/javascript/script.js
@@ -125,11 +125,12 @@ document.addEventListener('keydown', (e) => {
   let elem;
   if (e.key === 'Escape') elem = getUICurrentTabContent().querySelector('button[id$=_interrupt]');
   if (e.key === 'Enter' && e.ctrlKey) elem = getUICurrentTabContent().querySelector('button[id$=_generate]');
-  if (e.key === 'Backspace' && e.ctrlKey) elem = getUICurrentTabContent().querySelector('button[id$=_reprocess]');
+  if (e.key === 'r' && e.ctrlKey) elem = getUICurrentTabContent().querySelector('button[id$=_reprocess]');
   if (e.key === ' ' && e.ctrlKey) elem = getUICurrentTabContent().querySelector('button[id$=_extra_networks_btn]');
+  if (e.key === 'n' && e.ctrlKey) elem = getUICurrentTabContent().querySelector('button[id$=_extra_networks_btn]');
   if (e.key === 's' && e.ctrlKey) elem = getUICurrentTabContent().querySelector('button[id^=save_]');
   if (e.key === 'Insert' && e.ctrlKey) elem = getUICurrentTabContent().querySelector('button[id^=save_]');
-  if (e.key === 'Delete' && e.ctrlKey) elem = getUICurrentTabContent().querySelector('button[id^=delete_]');
+  if (e.key === 'd' && e.ctrlKey) elem = getUICurrentTabContent().querySelector('button[id^=delete_]');
   // if (e.key === 'm' && e.ctrlKey) elem = gradioApp().getElementById('setting_sd_model_checkpoint');
   if (elem) {
     e.preventDefault();
diff --git a/wiki b/wiki
index 20c9fe52f..8960da514 160000
--- a/wiki
+++ b/wiki
@@ -1 +1 @@
-Subproject commit 20c9fe52f253c23e736227787ddebd4cbfcbfe68
+Subproject commit 8960da514e9aff4a5d47402925c9498536443379

From 7c88bfb60a6b353f0a86b5bf1fc9f40d33d6974a Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Mon, 9 Dec 2024 14:16:26 -0500
Subject: [PATCH 093/162] fix preview choice

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 modules/sd_samplers_common.py | 6 ++----
 modules/shared_state.py       | 1 -
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/modules/sd_samplers_common.py b/modules/sd_samplers_common.py
index f6f6c18d5..a96795a25 100644
--- a/modules/sd_samplers_common.py
+++ b/modules/sd_samplers_common.py
@@ -35,7 +35,6 @@ def setup_img2img_steps(p, steps=None):
 def single_sample_to_image(sample, approximation=None):
     with queue_lock:
         t0 = time.time()
-        sd_cascade = False
         if approximation is None:
             approximation = approximation_indexes.get(shared.opts.show_progress_type, None)
             if approximation is None:
@@ -50,10 +49,9 @@ def single_sample_to_image(sample, approximation=None):
 
         if len(sample.shape) > 4: # likely unknown video latent (e.g. svd)
             return Image.new(mode="RGB", size=(512, 512))
-        if len(sample) == 16: # sd_cascade
-            sd_cascade = True
         if len(sample.shape) == 4 and sample.shape[0]: # likely animatediff latent
             sample = sample.permute(1, 0, 2, 3)[0]
+        # TODO remove
         if shared.native: # [-x,x] to [-5,5]
             sample_max = torch.max(sample)
             if sample_max > 5:
@@ -65,7 +63,7 @@ def single_sample_to_image(sample, approximation=None):
         if approximation == 2: # TAESD
             x_sample = sd_vae_taesd.decode(sample)
             x_sample = (1.0 + x_sample) / 2.0 # preview requires smaller range
-        elif sd_cascade and approximation != 3:
+        elif shared.sd_model_type == 'sc' and approximation != 3:
             x_sample = sd_vae_stablecascade.decode(sample)
         elif approximation == 0: # Simple
             x_sample = sd_vae_approx.cheap_approximation(sample) * 0.5 + 0.5
diff --git a/modules/shared_state.py b/modules/shared_state.py
index 51d33f9ed..3d3cb1ae6 100644
--- a/modules/shared_state.py
+++ b/modules/shared_state.py
@@ -141,7 +141,6 @@ def set_current_image(self):
         if self.job == 'VAE': # avoid generating preview while vae is running
             return
         from modules.shared import opts, cmd_opts
-        """sets self.current_image from self.current_latent if enough sampling steps have been made after the last call to this"""
         if cmd_opts.lowvram or self.api:
             return
         if abs(self.sampling_step - self.current_image_sampling_step) >= opts.show_progress_every_n_steps and opts.live_previews_enable and opts.show_progress_every_n_steps > 0:

From 383d7052ac135db857f2ab21e82f50437148ab07 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Mon, 9 Dec 2024 15:23:22 -0500
Subject: [PATCH 094/162] lora split te apply

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 modules/extra_networks.py           | 45 ++++++++++++++++-------------
 modules/lora/extra_networks_lora.py |  6 ++--
 modules/lora/networks.py            | 19 +++++++-----
 modules/processing_args.py          |  3 +-
 modules/processing_diffusers.py     |  2 +-
 5 files changed, 43 insertions(+), 32 deletions(-)

diff --git a/modules/extra_networks.py b/modules/extra_networks.py
index e96d2e5b7..fe141cca1 100644
--- a/modules/extra_networks.py
+++ b/modules/extra_networks.py
@@ -1,6 +1,7 @@
 import re
+import inspect
 from collections import defaultdict
-from modules import errors, shared, devices
+from modules import errors, shared
 
 
 extra_network_registry = {}
@@ -74,7 +75,7 @@ def is_stepwise(en_obj):
     return any([len(str(x).split("@")) > 1 for x in all_args]) # noqa C419 # pylint: disable=use-a-generator
 
 
-def activate(p, extra_network_data=None, step=0):
+def activate(p, extra_network_data=None, step=0, include=[], exclude=[]):
     """call activate for extra networks in extra_network_data in specified order, then call activate for all remaining registered networks with an empty argument list"""
     if p.disable_extra_networks:
         return
@@ -89,25 +90,29 @@ def activate(p, extra_network_data=None, step=0):
         shared.log.warning("Composable LoRA not compatible with 'lora_force_diffusers'")
         stepwise = False
     shared.opts.data['lora_functional'] = stepwise or functional
-    with devices.autocast():
-        for extra_network_name, extra_network_args in extra_network_data.items():
-            extra_network = extra_network_registry.get(extra_network_name, None)
-            if extra_network is None:
-                errors.log.warning(f"Skipping unknown extra network: {extra_network_name}")
-                continue
-            try:
+
+    for extra_network_name, extra_network_args in extra_network_data.items():
+        extra_network = extra_network_registry.get(extra_network_name, None)
+        if extra_network is None:
+            errors.log.warning(f"Skipping unknown extra network: {extra_network_name}")
+            continue
+        try:
+            signature = list(inspect.signature(extra_network.activate).parameters)
+            if 'include' in signature and 'exclude' in signature:
+                extra_network.activate(p, extra_network_args, step=step, include=include, exclude=exclude)
+            else:
                 extra_network.activate(p, extra_network_args, step=step)
-            except Exception as e:
-                errors.display(e, f"Activating network: type={extra_network_name} args:{extra_network_args}")
-
-        for extra_network_name, extra_network in extra_network_registry.items():
-            args = extra_network_data.get(extra_network_name, None)
-            if args is not None:
-                continue
-            try:
-                extra_network.activate(p, [])
-            except Exception as e:
-                errors.display(e, f"Activating network: type={extra_network_name}")
+        except Exception as e:
+            errors.display(e, f"Activating network: type={extra_network_name} args:{extra_network_args}")
+
+    for extra_network_name, extra_network in extra_network_registry.items():
+        args = extra_network_data.get(extra_network_name, None)
+        if args is not None:
+            continue
+        try:
+            extra_network.activate(p, [])
+        except Exception as e:
+            errors.display(e, f"Activating network: type={extra_network_name}")
 
     p.network_data = extra_network_data
     if stepwise:
diff --git a/modules/lora/extra_networks_lora.py b/modules/lora/extra_networks_lora.py
index 4ce7a94a9..135df1ccb 100644
--- a/modules/lora/extra_networks_lora.py
+++ b/modules/lora/extra_networks_lora.py
@@ -112,7 +112,7 @@ def __init__(self):
         self.model = None
         self.errors = {}
 
-    def activate(self, p, params_list, step=0):
+    def activate(self, p, params_list, step=0, include=[], exclude=[]):
         self.errors.clear()
         if self.active:
             if self.model != shared.opts.sd_model_checkpoint: # reset if model changed
@@ -123,8 +123,8 @@ def activate(self, p, params_list, step=0):
             self.model = shared.opts.sd_model_checkpoint
         names, te_multipliers, unet_multipliers, dyn_dims = parse(p, params_list, step)
         networks.network_load(names, te_multipliers, unet_multipliers, dyn_dims) # load
-        networks.network_activate()
-        if len(networks.loaded_networks) > 0 and step == 0:
+        networks.network_activate(include, exclude)
+        if len(networks.loaded_networks) > 0 and len(networks.applied_layers) > 0 and step == 0:
             infotext(p)
             prompt(p)
             shared.log.info(f'Load network: type=LoRA apply={[n.name for n in networks.loaded_networks]} te={te_multipliers} unet={unet_multipliers} time={networks.timer.summary}')
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index 805b24b52..edd82f3e4 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -19,6 +19,7 @@
 available_networks = {}
 available_network_aliases = {}
 loaded_networks: List[network.Network] = []
+applied_layers: list[str] = []
 bnb = None
 lora_cache = {}
 diffuser_loaded = []
@@ -465,7 +466,7 @@ def network_deactivate():
         task = None
         pbar = nullcontext()
     with devices.inference_context(), pbar:
-        applied_layers = []
+        applied_layers.clear()
         weights_devices = []
         weights_dtypes = []
         for component in modules.keys():
@@ -498,7 +499,7 @@ def network_deactivate():
         sd_models.set_diffuser_offload(sd_model, op="model")
 
 
-def network_activate():
+def network_activate(include=[], exclude=[]):
     t0 = time.time()
     timer.clear(complete=True)
     sd_model = getattr(shared.sd_model, "pipe", shared.sd_model)  # wrapped model compatiblility
@@ -506,10 +507,12 @@ def network_activate():
         sd_models.disable_offload(sd_model)
         sd_models.move_model(sd_model, device=devices.cpu)
     modules = {}
-    for component_name in ['text_encoder','text_encoder_2', 'unet', 'transformer']:
-        component = getattr(sd_model, component_name, None)
+    components = include if len(include) > 0 else ['text_encoder', 'text_encoder_2', 'text_encoder_3', 'unet', 'transformer']
+    components = [x for x in components if x not in exclude]
+    for name in components:
+        component = getattr(sd_model, name, None)
         if component is not None and hasattr(component, 'named_modules'):
-            modules[component_name] = list(component.named_modules())
+            modules[name] = list(component.named_modules())
     total = sum(len(x) for x in modules.values())
     if len(loaded_networks) > 0:
         pbar = rp.Progress(rp.TextColumn('[cyan]Network: type=LoRA action=activate'), rp.BarColumn(), rp.TaskProgressColumn(), rp.TimeRemainingColumn(), rp.TimeElapsedColumn(), rp.TextColumn('[cyan]{task.description}'), console=shared.console)
@@ -519,7 +522,7 @@ def network_activate():
         pbar = nullcontext()
     with devices.inference_context(), pbar:
         wanted_names = tuple((x.name, x.te_multiplier, x.unet_multiplier, x.dyn_dim) for x in loaded_networks) if len(loaded_networks) > 0 else ()
-        applied_layers = []
+        applied_layers.clear()
         backup_size = 0
         weights_devices = []
         weights_dtypes = []
@@ -546,10 +549,12 @@ def network_activate():
                 module.network_current_names = wanted_names
                 if task is not None:
                     pbar.update(task, advance=1, description=f'networks={len(loaded_networks)} modules={total} apply={len(applied_layers)} backup={backup_size}')
+        if task is not None and len(applied_layers) == 0:
+            pbar.remove_task(task) # hide progress bar for no action
     weights_devices, weights_dtypes = list(set([x for x in weights_devices if x is not None])), list(set([x for x in weights_dtypes if x is not None])) # noqa: C403 # pylint: disable=R1718
     timer.activate = time.time() - t0
     if debug and len(loaded_networks) > 0:
-        shared.log.debug(f'Load network: type=LoRA networks={len(loaded_networks)} modules={total} apply={len(applied_layers)} device={weights_devices} dtype={weights_dtypes} backup={backup_size} fuse={shared.opts.lora_fuse_diffusers} time={timer.summary}')
+        shared.log.debug(f'Load network: type=LoRA networks={len(loaded_networks)} components={components} modules={total} apply={len(applied_layers)} device={weights_devices} dtype={weights_dtypes} backup={backup_size} fuse={shared.opts.lora_fuse_diffusers} time={timer.summary}')
     modules.clear()
     if shared.opts.diffusers_offload_mode == "sequential":
         sd_models.set_diffuser_offload(sd_model, op="model")
diff --git a/modules/processing_args.py b/modules/processing_args.py
index 93b0bf9b2..e7f53ba8e 100644
--- a/modules/processing_args.py
+++ b/modules/processing_args.py
@@ -6,7 +6,7 @@
 import inspect
 import torch
 import numpy as np
-from modules import shared, errors, sd_models, processing, processing_vae, processing_helpers, sd_hijack_hypertile, prompt_parser_diffusers, timer
+from modules import shared, errors, sd_models, processing, processing_vae, processing_helpers, sd_hijack_hypertile, prompt_parser_diffusers, timer, extra_networks
 from modules.processing_callbacks import diffusers_callback_legacy, diffusers_callback, set_callbacks_p
 from modules.processing_helpers import resize_hires, fix_prompts, calculate_base_steps, calculate_hires_steps, calculate_refiner_steps, get_generator, set_latents, apply_circular # pylint: disable=unused-import
 from modules.api import helpers
@@ -134,6 +134,7 @@ def set_pipeline_args(p, model, prompts: list, negative_prompts: list, prompts_2
     else:
         prompt_parser_diffusers.embedder = None
 
+    extra_networks.activate(p, include=['text_encoder', 'text_encoder_2', 'text_encoder_3'])
     if 'prompt' in possible:
         if 'OmniGen' in model.__class__.__name__:
             prompts = [p.replace('|image|', '<|image_1|>') for p in prompts]
diff --git a/modules/processing_diffusers.py b/modules/processing_diffusers.py
index d22a9de97..627eb281f 100644
--- a/modules/processing_diffusers.py
+++ b/modules/processing_diffusers.py
@@ -89,7 +89,7 @@ def process_base(p: processing.StableDiffusionProcessing):
             sd_models.move_model(shared.sd_model.unet, devices.device)
         if hasattr(shared.sd_model, 'transformer'):
             sd_models.move_model(shared.sd_model.transformer, devices.device)
-        extra_networks.activate(p)
+        extra_networks.activate(p, exclude=['text_encoder', 'text_encoder_2'])
         hidiffusion.apply(p, shared.sd_model_type)
         # if 'image' in base_args:
         #    base_args['image'] = set_latents(p)

From 0f458853fdd9841dd8aa346172df271ee6358e5a Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Tue, 10 Dec 2024 08:41:38 -0500
Subject: [PATCH 095/162] fix sd upscale

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md          | 3 ++-
 scripts/sd_upscale.py | 5 +++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b3aaec73f..6775ef37e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,6 @@
 # Change Log for SD.Next
 
-## Update for 2024-12-06
+## Update for 2024-12-10
 
 ### New models and integrations
 
@@ -96,6 +96,7 @@
 - simplify img2img/inpaint/sketch canvas handling  
 - fix prompt caching  
 - fix xyz grid skip final pass  
+- fix sd upscale script
 
 ## Update for 2024-11-21
 
diff --git a/scripts/sd_upscale.py b/scripts/sd_upscale.py
index 9c5a72204..7ac31b603 100644
--- a/scripts/sd_upscale.py
+++ b/scripts/sd_upscale.py
@@ -48,7 +48,7 @@ def run(self, p, _, overlap, upscaler_index, scale_factor): # pylint: disable=ar
         else:
             img = init_img
         devices.torch_gc()
-        grid = images.split_grid(img, tile_w=p.width, tile_h=p.height, overlap=overlap)
+        grid = images.split_grid(img, tile_w=init_img.width, tile_h=init_img.height, overlap=overlap)
         batch_size = p.batch_size
         upscale_count = p.n_iter
         p.n_iter = 1
@@ -61,7 +61,7 @@ def run(self, p, _, overlap, upscaler_index, scale_factor): # pylint: disable=ar
 
         batch_count = math.ceil(len(work) / batch_size)
         state.job_count = batch_count * upscale_count
-        log.info(f"SD upscale: images={len(work)} tile={len(grid.tiles[0][2])}x{len(grid.tiles)} batches={state.job_count}")
+        log.info(f"SD upscale: images={len(work)} tiles={len(grid.tiles)} batches={state.job_count}")
 
         result_images = []
         for n in range(upscale_count):
@@ -91,4 +91,5 @@ def run(self, p, _, overlap, upscaler_index, scale_factor): # pylint: disable=ar
                 images.save_image(combined_image, p.outpath_samples, "", start_seed, p.prompt, opts.samples_format, info=initial_info, p=p)
 
         processed = Processed(p, result_images, seed, initial_info)
+        log.info(f"SD upscale: images={result_images}")
         return processed

From 042178fedbe8d83d6f7bb03ac13bff43008d17cf Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Tue, 10 Dec 2024 10:20:00 -0500
Subject: [PATCH 096/162] reorg settings

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md                |   1 +
 javascript/script.js        |   2 +-
 javascript/sdnext.css       |   6 +-
 modules/processing.py       |   3 +
 modules/processing_class.py |   3 +
 modules/shared.py           | 255 +++++++++++++++++++-----------------
 scripts/cogvideo.py         |  31 ++---
 7 files changed, 161 insertions(+), 140 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6775ef37e..5412861a8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -67,6 +67,7 @@
   - control: hide preview column by default
   - control: optionn to hide input column
   - control: add stats
+  - settings: reorganized and simplified  
   - browser -> server logging framework  
   - add addtional themes: `black-reimagined`, thanks @Artheriax  
 
diff --git a/javascript/script.js b/javascript/script.js
index 836d9b102..f943f4626 100644
--- a/javascript/script.js
+++ b/javascript/script.js
@@ -125,7 +125,7 @@ document.addEventListener('keydown', (e) => {
   let elem;
   if (e.key === 'Escape') elem = getUICurrentTabContent().querySelector('button[id$=_interrupt]');
   if (e.key === 'Enter' && e.ctrlKey) elem = getUICurrentTabContent().querySelector('button[id$=_generate]');
-  if (e.key === 'r' && e.ctrlKey) elem = getUICurrentTabContent().querySelector('button[id$=_reprocess]');
+  if (e.key === 'i' && e.ctrlKey) elem = getUICurrentTabContent().querySelector('button[id$=_reprocess]');
   if (e.key === ' ' && e.ctrlKey) elem = getUICurrentTabContent().querySelector('button[id$=_extra_networks_btn]');
   if (e.key === 'n' && e.ctrlKey) elem = getUICurrentTabContent().querySelector('button[id$=_extra_networks_btn]');
   if (e.key === 's' && e.ctrlKey) elem = getUICurrentTabContent().querySelector('button[id^=save_]');
diff --git a/javascript/sdnext.css b/javascript/sdnext.css
index c5145c973..60d835cd4 100644
--- a/javascript/sdnext.css
+++ b/javascript/sdnext.css
@@ -149,18 +149,20 @@ div#extras_scale_to_tab div.form { flex-direction: row; }
 #settings>div.tab-content { flex: 10 0 75%; display: grid; }
 #settings>div.tab-content>div { border: none; padding: 0; }
 #settings>div.tab-content>div>div>div>div>div { flex-direction: unset; }
-#settings>div.tab-nav { display: grid; grid-template-columns: repeat(auto-fill, .5em minmax(10em, 1fr)); flex: 1 0 auto; width: 12em; align-self: flex-start; gap: var(--spacing-xxl); }
+#settings>div.tab-nav { display: grid; grid-template-columns: repeat(auto-fill, .5em minmax(10em, 1fr)); flex: 1 0 auto; width: 12em; align-self: flex-start; gap: 8px; }
 #settings>div.tab-nav button { display: block; border: none; text-align: left; white-space: initial; padding: 0; }
 #settings>div.tab-nav>#settings_show_all_pages { padding: var(--size-2) var(--size-4); }
 #settings .block.gradio-checkbox { margin: 0; width: auto; }
 #settings .dirtyable { gap: .5em; }
 #settings .dirtyable.hidden { display: none; }
-#settings .modification-indicator { height: 1.2em; border-radius: 1em !important; padding: 0; width: 0; margin-right: 0.5em; }
+#settings .modification-indicator { height: 1.2em; border-radius: 1em !important; padding: 0; width: 0; margin-right: 0.5em; border-left: inset; }
 #settings .modification-indicator:disabled { visibility: hidden; }
 #settings .modification-indicator.saved { background: var(--color-accent-soft); width: var(--spacing-sm); }
 #settings .modification-indicator.changed { background: var(--color-accent); width: var(--spacing-sm); }
 #settings .modification-indicator.changed.unsaved { background-image: linear-gradient(var(--color-accent) 25%, var(--color-accent-soft) 75%); width: var(--spacing-sm); }
 #settings_result { margin: 0 1.2em; }
+#tab_settings .gradio-slider, #tab_settings .gradio-dropdown { width: 300px !important; max-width: 300px; }
+#tab_settings textarea { max-width: 500px; }
 .licenses { display: block !important; }
 
 /* live preview */
diff --git a/modules/processing.py b/modules/processing.py
index 7ae397538..b4839e402 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -118,6 +118,9 @@ def js(self):
     def infotext(self, p: StableDiffusionProcessing, index):
         return create_infotext(p, self.all_prompts, self.all_seeds, self.all_subseeds, comments=[], position_in_batch=index % self.batch_size, iteration=index // self.batch_size)
 
+    def __str___(self):
+        return f'{self.__class__.__name__}: {self.__dict__}'
+
 
 def process_images(p: StableDiffusionProcessing) -> Processed:
     timer.process.reset()
diff --git a/modules/processing_class.py b/modules/processing_class.py
index 2cbc07cc2..7a7d9cd36 100644
--- a/modules/processing_class.py
+++ b/modules/processing_class.py
@@ -339,6 +339,9 @@ def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subs
     def close(self):
         self.sampler = None # pylint: disable=attribute-defined-outside-init
 
+    def __str__(self):
+        return f'{self.__class__.__name__}: {self.__dict__}'
+
 
 class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
     def __init__(self, **kwargs):
diff --git a/modules/shared.py b/modules/shared.py
index 6e8f2f3fd..3d7571029 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -467,69 +467,103 @@ def get_default_modes():
 
 startup_offload_mode, startup_cross_attention, startup_sdp_options = get_default_modes()
 
-options_templates.update(options_section(('sd', "Execution & Models"), {
+options_templates.update(options_section(('sd', "Models & Loading"), {
     "sd_backend": OptionInfo(default_backend, "Execution backend", gr.Radio, {"choices": ["diffusers", "original"] }),
+    "diffusers_pipeline": OptionInfo('Autodetect', 'Model pipeline', gr.Dropdown, lambda: {"choices": list(shared_items.get_pipelines()), "visible": native}),
     "sd_model_checkpoint": OptionInfo(default_checkpoint, "Base model", DropdownEditable, lambda: {"choices": list_checkpoint_titles()}, refresh=refresh_checkpoints),
     "sd_model_refiner": OptionInfo('None', "Refiner model", gr.Dropdown, lambda: {"choices": ['None'] + list_checkpoint_titles()}, refresh=refresh_checkpoints),
-    "sd_vae": OptionInfo("Automatic", "VAE model", gr.Dropdown, lambda: {"choices": shared_items.sd_vae_items()}, refresh=shared_items.refresh_vae_list),
     "sd_unet": OptionInfo("None", "UNET model", gr.Dropdown, lambda: {"choices": shared_items.sd_unet_items()}, refresh=shared_items.refresh_unet_list),
-    "sd_text_encoder": OptionInfo('None', "Text encoder model", gr.Dropdown, lambda: {"choices": shared_items.sd_te_items()}, refresh=shared_items.refresh_te_list),
-    "sd_model_dict": OptionInfo('None', "Use separate base dict", gr.Dropdown, lambda: {"choices": ['None'] + list_checkpoint_titles()}, refresh=refresh_checkpoints),
+    "latent_history": OptionInfo(16, "Latent history size", gr.Slider, {"minimum": 1, "maximum": 100, "step": 1}),
+
+    "offload_sep": OptionInfo("<h2>Model Offloading</h2>", "", gr.HTML),
+    "diffusers_move_base": OptionInfo(False, "Move base model to CPU when using refiner", gr.Checkbox, {"visible": False }),
+    "diffusers_move_unet": OptionInfo(False, "Move base model to CPU when using VAE", gr.Checkbox, {"visible": False }),
+    "diffusers_move_refiner": OptionInfo(False, "Move refiner model to CPU when not in use", gr.Checkbox, {"visible": False }),
+    "diffusers_extract_ema": OptionInfo(False, "Use model EMA weights when possible", gr.Checkbox, {"visible": False }),
+    "diffusers_offload_mode": OptionInfo(startup_offload_mode, "Model offload mode", gr.Radio, {"choices": ['none', 'balanced', 'model', 'sequential']}),
+    "diffusers_offload_min_gpu_memory": OptionInfo(0.25, "Balanced offload GPU low watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
+    "diffusers_offload_max_gpu_memory": OptionInfo(0.70, "Balanced offload GPU high watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
+    "diffusers_offload_max_cpu_memory": OptionInfo(0.90, "Balanced offload CPU high watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
+
+    "advanced_sep": OptionInfo("<h2>Advanced Options</h2>", "", gr.HTML),
     "sd_checkpoint_autoload": OptionInfo(True, "Model autoload on start"),
     "sd_checkpoint_autodownload": OptionInfo(True, "Model auto-download on demand"),
-    "sd_textencoder_cache": OptionInfo(True, "Cache text encoder results", gr.Checkbox, {"visible": False}),
-    "sd_textencoder_cache_size": OptionInfo(4, "Text encoder cache size", gr.Slider, {"minimum": 0, "maximum": 16, "step": 1}),
     "stream_load": OptionInfo(False, "Load models using stream loading method", gr.Checkbox, {"visible": not native }),
+    "diffusers_eval": OptionInfo(True, "Force model eval", gr.Checkbox, {"visible": False }),
+    "diffusers_to_gpu": OptionInfo(False, "Load model directly to GPU"),
+    "disable_accelerate": OptionInfo(False, "Disable accelerate", gr.Checkbox, {"visible": False }),
+    "sd_model_dict": OptionInfo('None', "Use separate base dict", gr.Dropdown, lambda: {"choices": ['None'] + list_checkpoint_titles()}, refresh=refresh_checkpoints),
+    "sd_checkpoint_cache": OptionInfo(0, "Cached models", gr.Slider, {"minimum": 0, "maximum": 10, "step": 1, "visible": not native }),
+}))
+
+options_templates.update(options_section(('vae_encoder', "Variable Auto Encoder"), {
+    "sd_vae": OptionInfo("Automatic", "VAE model", gr.Dropdown, lambda: {"choices": shared_items.sd_vae_items()}, refresh=shared_items.refresh_vae_list),
+    "diffusers_vae_upcast": OptionInfo("default", "VAE upcasting", gr.Radio, {"choices": ['default', 'true', 'false']}),
+    "no_half_vae": OptionInfo(False if not cmd_opts.use_openvino else True, "Full precision (--no-half-vae)"),
+    "diffusers_vae_slicing": OptionInfo(True, "VAE slicing", gr.Checkbox, {"visible": native}),
+    "diffusers_vae_tiling": OptionInfo(cmd_opts.lowvram or cmd_opts.medvram, "VAE tiling", gr.Checkbox, {"visible": native}),
+    "sd_vae_sliced_encode": OptionInfo(False, "VAE sliced encode", gr.Checkbox, {"visible": not native}),
+    "nan_skip": OptionInfo(False, "Skip Generation if NaN found in latents", gr.Checkbox),
+    "rollback_vae": OptionInfo(False, "Attempt VAE roll back for NaN values"),
+}))
+
+options_templates.update(options_section(('text_encoder', "Text Encoder"), {
+    "sd_text_encoder": OptionInfo('None', "Text encoder model", gr.Dropdown, lambda: {"choices": shared_items.sd_te_items()}, refresh=shared_items.refresh_te_list),
+    "prompt_attention": OptionInfo("native", "Prompt attention parser", gr.Radio, {"choices": ["native", "compel", "xhinker", "a1111", "fixed"] }),
     "prompt_mean_norm": OptionInfo(False, "Prompt attention normalization", gr.Checkbox),
+    "sd_textencoder_cache": OptionInfo(True, "Cache text encoder results", gr.Checkbox, {"visible": False}),
+    "sd_textencoder_cache_size": OptionInfo(4, "Text encoder cache size", gr.Slider, {"minimum": 0, "maximum": 16, "step": 1}),
     "comma_padding_backtrack": OptionInfo(20, "Prompt padding", gr.Slider, {"minimum": 0, "maximum": 74, "step": 1, "visible": not native }),
-    "prompt_attention": OptionInfo("native", "Prompt attention parser", gr.Radio, {"choices": ["native", "compel", "xhinker", "a1111", "fixed"] }),
-    "latent_history": OptionInfo(16, "Latent history size", gr.Slider, {"minimum": 1, "maximum": 100, "step": 1}),
-    "sd_checkpoint_cache": OptionInfo(0, "Cached models", gr.Slider, {"minimum": 0, "maximum": 10, "step": 1, "visible": not native }),
+    "diffusers_zeros_prompt_pad": OptionInfo(False, "Use zeros for prompt padding", gr.Checkbox),
+    "diffusers_pooled": OptionInfo("default", "Diffusers SDXL pooled embeds", gr.Radio, {"choices": ['default', 'weighted']}),
 }))
 
 options_templates.update(options_section(('cuda', "Compute Settings"), {
-    "math_sep": OptionInfo("<h2>Execution precision</h2>", "", gr.HTML),
+    "math_sep": OptionInfo("<h2>Execution Precision</h2>", "", gr.HTML),
     "precision": OptionInfo("Autocast", "Precision type", gr.Radio, {"choices": ["Autocast", "Full"]}),
     "cuda_dtype": OptionInfo("Auto", "Device precision type", gr.Radio, {"choices": ["Auto", "FP32", "FP16", "BF16"]}),
+    "no_half": OptionInfo(False if not cmd_opts.use_openvino else True, "Full precision (--no-half)", None, None, None),
+    "upcast_sampling": OptionInfo(False if sys.platform != "darwin" else True, "Upcast sampling", gr.Checkbox, {"visible": not native}),
+    "upcast_attn": OptionInfo(False, "Upcast attention layer", gr.Checkbox, {"visible": not native}),
+    "cuda_cast_unet": OptionInfo(False, "Fixed UNet precision", gr.Checkbox, {"visible": not native}),
 
-    "model_sep": OptionInfo("<h2>Model options</h2>", "", gr.HTML),
-    "no_half": OptionInfo(False if not cmd_opts.use_openvino else True, "Full precision for model (--no-half)", None, None, None),
-    "no_half_vae": OptionInfo(False if not cmd_opts.use_openvino else True, "Full precision for VAE (--no-half-vae)"),
-    "upcast_sampling": OptionInfo(False if sys.platform != "darwin" else True, "Upcast sampling"),
-    "upcast_attn": OptionInfo(False, "Upcast attention layer"),
-    "cuda_cast_unet": OptionInfo(False, "Fixed UNet precision"),
-    "nan_skip": OptionInfo(False, "Skip Generation if NaN found in latents", gr.Checkbox),
-    "rollback_vae": OptionInfo(False, "Attempt VAE roll back for NaN values"),
+    "generator_sep": OptionInfo("<h2>Noise Options</h2>", "", gr.HTML),
+    "diffusers_generator_device": OptionInfo("GPU", "Generator device", gr.Radio, {"choices": ["GPU", "CPU", "Unset"]}),
 
     "cross_attention_sep": OptionInfo("<h2>Cross Attention</h2>", "", gr.HTML),
-    "cross_attention_optimization": OptionInfo(startup_cross_attention, "Attention optimization method", gr.Radio, lambda: {"choices": shared_items.list_crossattention(native) }),
-    "sdp_options": OptionInfo(startup_sdp_options, "SDP options", gr.CheckboxGroup, {"choices": ['Flash attention', 'Memory attention', 'Math attention', 'Dynamic attention', 'Sage attention'] }),
+    "cross_attention_optimization": OptionInfo(startup_cross_attention, "Attention optimization method", gr.Radio, lambda: {"choices": shared_items.list_crossattention(native)}),
+    "sdp_options": OptionInfo(startup_sdp_options, "SDP options", gr.CheckboxGroup, {"choices": ['Flash attention', 'Memory attention', 'Math attention', 'Dynamic attention', 'Sage attention'], "visible": native}),
     "xformers_options": OptionInfo(['Flash attention'], "xFormers options", gr.CheckboxGroup, {"choices": ['Flash attention'] }),
     "dynamic_attention_slice_rate": OptionInfo(4, "Dynamic Attention slicing rate in GB", gr.Slider, {"minimum": 0.1, "maximum": gpu_memory, "step": 0.1, "visible": native}),
     "sub_quad_sep": OptionInfo("<h3>Sub-quadratic options</h3>", "", gr.HTML, {"visible": not native}),
     "sub_quad_q_chunk_size": OptionInfo(512, "Attention query chunk size", gr.Slider, {"minimum": 16, "maximum": 8192, "step": 8, "visible": not native}),
     "sub_quad_kv_chunk_size": OptionInfo(512, "Attention kv chunk size", gr.Slider, {"minimum": 0, "maximum": 8192, "step": 8, "visible": not native}),
     "sub_quad_chunk_threshold": OptionInfo(80, "Attention chunking threshold", gr.Slider, {"minimum": 0, "maximum": 100, "step": 1, "visible": not native}),
+}))
 
-    "other_sep": OptionInfo("<h2>Execution options</h2>", "", gr.HTML),
-    "opt_channelslast": OptionInfo(False, "Use channels last "),
-    "cudnn_deterministic": OptionInfo(False, "Use deterministic mode"),
-    "cudnn_benchmark": OptionInfo(False, "Full-depth cuDNN benchmark feature"),
+options_templates.update(options_section(('backends', "Backend Settings"), {
+    "other_sep": OptionInfo("<h2>Torch Options</h2>", "", gr.HTML),
+    "opt_channelslast": OptionInfo(False, "Channels last "),
+    "cudnn_deterministic": OptionInfo(False, "Deterministic mode"),
+    "cudnn_benchmark": OptionInfo(False, "Full-depth cuDNN benchmark"),
     "diffusers_fuse_projections": OptionInfo(False, "Fused projections"),
-    "torch_expandable_segments": OptionInfo(False, "Torch expandable segments"),
-    "cuda_mem_fraction": OptionInfo(0.0, "Torch memory limit", gr.Slider, {"minimum": 0, "maximum": 2.0, "step": 0.05}),
-    "torch_gc_threshold": OptionInfo(80, "Torch memory threshold for GC", gr.Slider, {"minimum": 0, "maximum": 100, "step": 1}),
-    "torch_malloc": OptionInfo("native", "Torch memory allocator", gr.Radio, {"choices": ['native', 'cudaMallocAsync'] }),
+    "torch_expandable_segments": OptionInfo(False, "Expandable segments"),
+    "cuda_mem_fraction": OptionInfo(0.0, "Memory limit", gr.Slider, {"minimum": 0, "maximum": 2.0, "step": 0.05}),
+    "torch_gc_threshold": OptionInfo(80, "GC threshold", gr.Slider, {"minimum": 0, "maximum": 100, "step": 1}),
+    "inference_mode": OptionInfo("no-grad", "Inference mode", gr.Radio, {"choices": ["no-grad", "inference-mode", "none"]}),
+    "torch_malloc": OptionInfo("native", "Memory allocator", gr.Radio, {"choices": ['native', 'cudaMallocAsync'] }),
+
+    "onnx_sep": OptionInfo("<h2>ONNX</h2>", "", gr.HTML),
+    "onnx_execution_provider": OptionInfo(execution_providers.get_default_execution_provider().value, 'ONNX Execution Provider', gr.Dropdown, lambda: {"choices": execution_providers.available_execution_providers }),
+    "onnx_cpu_fallback": OptionInfo(True, 'ONNX allow fallback to CPU'),
+    "onnx_cache_converted": OptionInfo(True, 'ONNX cache converted models'),
+    "onnx_unload_base": OptionInfo(False, 'ONNX unload base model when processing refiner'),
 
-    "cuda_compile_sep": OptionInfo("<h2>Model Compile</h2>", "", gr.HTML),
-    "cuda_compile": OptionInfo([] if not cmd_opts.use_openvino else ["Model", "VAE"], "Compile Model", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder", "Upscaler"]}),
-    "cuda_compile_backend": OptionInfo("none" if not cmd_opts.use_openvino else "openvino_fx", "Model compile backend", gr.Radio, {"choices": ['none', 'inductor', 'cudagraphs', 'aot_ts_nvfuser', 'hidet', 'migraphx', 'ipex', 'onediff', 'stable-fast', 'deep-cache', 'olive-ai', 'openvino_fx']}),
-    "cuda_compile_mode": OptionInfo("default", "Model compile mode", gr.Radio, {"choices": ['default', 'reduce-overhead', 'max-autotune', 'max-autotune-no-cudagraphs']}),
-    "cuda_compile_fullgraph": OptionInfo(True if not cmd_opts.use_openvino else False, "Model compile fullgraph"),
-    "cuda_compile_precompile": OptionInfo(False, "Model compile precompile"),
-    "cuda_compile_verbose": OptionInfo(False, "Model compile verbose mode"),
-    "cuda_compile_errors": OptionInfo(True, "Model compile suppress errors"),
-    "deep_cache_interval": OptionInfo(3, "DeepCache cache interval", gr.Slider, {"minimum": 1, "maximum": 10, "step": 1}),
+    "olive_sep": OptionInfo("<h2>Olive</h2>", "", gr.HTML),
+    "olive_float16": OptionInfo(True, 'Olive use FP16 on optimization'),
+    "olive_vae_encoder_float32": OptionInfo(False, 'Olive force FP32 for VAE Encoder'),
+    "olive_static_dims": OptionInfo(True, 'Olive use static dimensions'),
+    "olive_cache_optimized": OptionInfo(True, 'Olive cache optimized models'),
 
     "ipex_sep": OptionInfo("<h2>IPEX</h2>", "", gr.HTML, {"visible": devices.backend == "ipex"}),
     "ipex_optimize": OptionInfo([], "IPEX Optimize for Intel GPUs", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder", "Upscaler"], "visible": devices.backend == "ipex"}),
@@ -543,91 +577,55 @@ def get_default_modes():
     "directml_sep": OptionInfo("<h2>DirectML</h2>", "", gr.HTML, {"visible": devices.backend == "directml"}),
     "directml_memory_provider": OptionInfo(default_memory_provider, 'DirectML memory stats provider', gr.Radio, {"choices": memory_providers, "visible": devices.backend == "directml"}),
     "directml_catch_nan": OptionInfo(False, "DirectML retry ops for NaN", gr.Checkbox, {"visible": devices.backend == "directml"}),
-
-    "olive_sep": OptionInfo("<h2>Olive</h2>", "", gr.HTML),
-    "olive_float16": OptionInfo(True, 'Olive use FP16 on optimization'),
-    "olive_vae_encoder_float32": OptionInfo(False, 'Olive force FP32 for VAE Encoder'),
-    "olive_static_dims": OptionInfo(True, 'Olive use static dimensions'),
-    "olive_cache_optimized": OptionInfo(True, 'Olive cache optimized models'),
-}))
-
-options_templates.update(options_section(('diffusers', "Diffusers Settings"), {
-    "diffusers_pipeline": OptionInfo('Autodetect', 'Diffusers pipeline', gr.Dropdown, lambda: {"choices": list(shared_items.get_pipelines()) }),
-    "diffuser_cache_config": OptionInfo(True, "Use cached model config when available"),
-    "diffusers_move_base": OptionInfo(False, "Move base model to CPU when using refiner"),
-    "diffusers_move_unet": OptionInfo(False, "Move base model to CPU when using VAE"),
-    "diffusers_move_refiner": OptionInfo(False, "Move refiner model to CPU when not in use"),
-    "diffusers_extract_ema": OptionInfo(False, "Use model EMA weights when possible"),
-    "diffusers_generator_device": OptionInfo("GPU", "Generator device", gr.Radio, {"choices": ["GPU", "CPU", "Unset"]}),
-    "diffusers_offload_mode": OptionInfo(startup_offload_mode, "Model offload mode", gr.Radio, {"choices": ['none', 'balanced', 'model', 'sequential']}),
-    "diffusers_offload_min_gpu_memory": OptionInfo(0.25, "Balanced offload GPU low watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
-    "diffusers_offload_max_gpu_memory": OptionInfo(0.70, "Balanced offload GPU high watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
-    "diffusers_offload_max_cpu_memory": OptionInfo(0.75, "Balanced offload CPU high watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
-    "diffusers_vae_upcast": OptionInfo("default", "VAE upcasting", gr.Radio, {"choices": ['default', 'true', 'false']}),
-    "diffusers_vae_slicing": OptionInfo(True, "VAE slicing"),
-    "diffusers_vae_tiling": OptionInfo(cmd_opts.lowvram or cmd_opts.medvram, "VAE tiling"),
-    "diffusers_model_load_variant": OptionInfo("default", "Preferred Model variant", gr.Radio, {"choices": ['default', 'fp32', 'fp16']}),
-    "diffusers_vae_load_variant": OptionInfo("default", "Preferred VAE variant", gr.Radio, {"choices": ['default', 'fp32', 'fp16']}),
-    "custom_diffusers_pipeline": OptionInfo('', 'Load custom Diffusers pipeline'),
-    "diffusers_eval": OptionInfo(True, "Force model eval"),
-    "diffusers_to_gpu": OptionInfo(False, "Load model directly to GPU"),
-    "disable_accelerate": OptionInfo(False, "Disable accelerate"),
-    "diffusers_pooled": OptionInfo("default", "Diffusers SDXL pooled embeds", gr.Radio, {"choices": ['default', 'weighted']}),
-    "diffusers_zeros_prompt_pad": OptionInfo(False, "Use zeros for prompt padding", gr.Checkbox),
-    "huggingface_token": OptionInfo('', 'HuggingFace token'),
-    "enable_linfusion": OptionInfo(False, "Apply LinFusion distillation on load"),
-
-    "onnx_sep": OptionInfo("<h2>ONNX Runtime</h2>", "", gr.HTML),
-    "onnx_execution_provider": OptionInfo(execution_providers.get_default_execution_provider().value, 'Execution Provider', gr.Dropdown, lambda: {"choices": execution_providers.available_execution_providers }),
-    "onnx_cpu_fallback": OptionInfo(True, 'ONNX allow fallback to CPU'),
-    "onnx_cache_converted": OptionInfo(True, 'ONNX cache converted models'),
-    "onnx_unload_base": OptionInfo(False, 'ONNX unload base model when processing refiner'),
 }))
 
 options_templates.update(options_section(('quantization', "Quantization Settings"), {
-    "bnb_quantization": OptionInfo([], "BnB quantization enabled", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder"], "visible": native}),
-    "bnb_quantization_type": OptionInfo("nf4", "BnB quantization type", gr.Radio, {"choices": ['nf4', 'fp8', 'fp4'], "visible": native}),
-    "bnb_quantization_storage": OptionInfo("uint8", "BnB quantization storage", gr.Radio, {"choices": ["float16", "float32", "int8", "uint8", "float64", "bfloat16"], "visible": native}),
-    "optimum_quanto_weights": OptionInfo([], "Optimum.quanto quantization enabled", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder", "ControlNet"], "visible": native}),
-    "optimum_quanto_weights_type": OptionInfo("qint8", "Optimum.quanto quantization type", gr.Radio, {"choices": ['qint8', 'qfloat8_e4m3fn', 'qfloat8_e5m2', 'qint4', 'qint2'], "visible": native}),
-    "optimum_quanto_activations_type": OptionInfo("none", "Optimum.quanto quantization activations ", gr.Radio, {"choices": ['none', 'qint8', 'qfloat8_e4m3fn', 'qfloat8_e5m2'], "visible": native}),
-    "torchao_quantization": OptionInfo([], "TorchAO quantization enabled", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder"], "visible": native}),
-    "torchao_quantization_mode": OptionInfo("pre", "TorchAO quantization mode", gr.Radio, {"choices": ['pre', 'post'], "visible": native}),
-    "torchao_quantization_type": OptionInfo("int8", "TorchAO quantization type", gr.Radio, {"choices": ["int8+act", "int8", "int4", "fp8+act", "fp8", "fpx"], "visible": native}),
-    "nncf_compress_weights": OptionInfo([], "NNCF compression enabled", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder", "ControlNet"], "visible": native}),
-    "nncf_compress_weights_mode": OptionInfo("INT8", "NNCF compress mode", gr.Radio, {"choices": ['INT8', 'INT8_SYM', 'INT4_ASYM', 'INT4_SYM', 'NF4'] if cmd_opts.use_openvino else ['INT8']}),
-    "nncf_compress_weights_raito": OptionInfo(1.0, "NNCF compress ratio", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01, "visible": cmd_opts.use_openvino}),
-    "nncf_quantize": OptionInfo([], "NNCF OpenVINO quantization enabled", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder"], "visible": cmd_opts.use_openvino}),
-    "nncf_quant_mode": OptionInfo("INT8", "NNCF OpenVINO quantization mode", gr.Radio, {"choices": ['INT8', 'FP8_E4M3', 'FP8_E5M2'], "visible": cmd_opts.use_openvino}),
-
-    "quant_shuffle_weights": OptionInfo(False, "Shuffle the weights between GPU and CPU when quantizing", gr.Checkbox, {"visible": native}),
+    "bnb_sep": OptionInfo("<h2>BitsAndBytes</h2>", "", gr.HTML),
+    "bnb_quantization": OptionInfo([], "Enabled", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder"], "visible": native}),
+    "bnb_quantization_type": OptionInfo("nf4", "Type", gr.Radio, {"choices": ['nf4', 'fp8', 'fp4'], "visible": native}),
+    "bnb_quantization_storage": OptionInfo("uint8", "Backend storage", gr.Radio, {"choices": ["float16", "float32", "int8", "uint8", "float64", "bfloat16"], "visible": native}),
+    "optimum_quanto_sep": OptionInfo("<h2>Optimum Quanto</h2>", "", gr.HTML),
+    "optimum_quanto_weights": OptionInfo([], "Enabled", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder", "ControlNet"], "visible": native}),
+    "optimum_quanto_weights_type": OptionInfo("qint8", "Type", gr.Radio, {"choices": ['qint8', 'qfloat8_e4m3fn', 'qfloat8_e5m2', 'qint4', 'qint2'], "visible": native}),
+    "optimum_quanto_activations_type": OptionInfo("none", "Activations ", gr.Radio, {"choices": ['none', 'qint8', 'qfloat8_e4m3fn', 'qfloat8_e5m2'], "visible": native}),
+    "torchao_sep": OptionInfo("<h2>TorchAO</h2>", "", gr.HTML),
+    "torchao_quantization": OptionInfo([], "Enabled", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder"], "visible": native}),
+    "torchao_quantization_mode": OptionInfo("pre", "Mode", gr.Radio, {"choices": ['pre', 'post'], "visible": native}),
+    "torchao_quantization_type": OptionInfo("int8", "Type", gr.Radio, {"choices": ["int8+act", "int8", "int4", "fp8+act", "fp8", "fpx"], "visible": native}),
+    "nncf_sep": OptionInfo("<h2>NNCF</h2>", "", gr.HTML),
+    "nncf_compress_weights": OptionInfo([], "Enabled", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder", "ControlNet"], "visible": native}),
+    "nncf_compress_weights_mode": OptionInfo("INT8", "Mode", gr.Radio, {"choices": ['INT8', 'INT8_SYM', 'INT4_ASYM', 'INT4_SYM', 'NF4'] if cmd_opts.use_openvino else ['INT8']}),
+    "nncf_compress_weights_raito": OptionInfo(1.0, "Compress ratio", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01, "visible": cmd_opts.use_openvino}),
+    "nncf_quantize": OptionInfo([], "OpenVINO enabled", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder"], "visible": cmd_opts.use_openvino}),
+    "nncf_quant_mode": OptionInfo("INT8", "OpenVINO mode", gr.Radio, {"choices": ['INT8', 'FP8_E4M3', 'FP8_E5M2'], "visible": cmd_opts.use_openvino}),
+    "quant_shuffle_weights": OptionInfo(False, "Shuffle weights", gr.Checkbox, {"visible": native}),
 }))
 
-options_templates.update(options_section(('advanced', "Inference Settings"), {
-    "token_merging_sep": OptionInfo("<h2>Token merging</h2>", "", gr.HTML),
+options_templates.update(options_section(('advanced', "Pipeline Modifiers"), {
+    "token_merging_sep": OptionInfo("<h2>Token Merging</h2>", "", gr.HTML),
     "token_merging_method": OptionInfo("None", "Token merging method", gr.Radio, {"choices": ['None', 'ToMe', 'ToDo']}),
     "tome_ratio": OptionInfo(0.0, "ToMe token merging ratio", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.05}),
     "todo_ratio": OptionInfo(0.0, "ToDo token merging ratio", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.05}),
 
     "freeu_sep": OptionInfo("<h2>FreeU</h2>", "", gr.HTML),
     "freeu_enabled": OptionInfo(False, "FreeU"),
-    "freeu_b1": OptionInfo(1.2, "1st stage backbone factor", gr.Slider, {"minimum": 1.0, "maximum": 2.0, "step": 0.01}),
-    "freeu_b2": OptionInfo(1.4, "2nd stage backbone factor", gr.Slider, {"minimum": 1.0, "maximum": 2.0, "step": 0.01}),
-    "freeu_s1": OptionInfo(0.9, "1st stage skip factor", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}),
-    "freeu_s2": OptionInfo(0.2, "2nd stage skip factor", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}),
+    "freeu_b1": OptionInfo(1.2, "1st stage backbone", gr.Slider, {"minimum": 1.0, "maximum": 2.0, "step": 0.01}),
+    "freeu_b2": OptionInfo(1.4, "2nd stage backbone", gr.Slider, {"minimum": 1.0, "maximum": 2.0, "step": 0.01}),
+    "freeu_s1": OptionInfo(0.9, "1st stage skip", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}),
+    "freeu_s2": OptionInfo(0.2, "2nd stage skip", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}),
 
     "pag_sep": OptionInfo("<h2>Perturbed-Attention Guidance</h2>", "", gr.HTML),
     "pag_apply_layers": OptionInfo("m0", "PAG layer names"),
 
     "hypertile_sep": OptionInfo("<h2>HyperTile</h2>", "", gr.HTML),
-    "hypertile_hires_only": OptionInfo(False, "HyperTile hires pass only"),
-    "hypertile_unet_enabled": OptionInfo(False, "HyperTile UNet"),
-    "hypertile_unet_tile": OptionInfo(0, "HyperTile UNet tile size", gr.Slider, {"minimum": 0, "maximum": 1024, "step": 8}),
-    "hypertile_unet_swap_size": OptionInfo(1, "HyperTile UNet swap size", gr.Slider, {"minimum": 1, "maximum": 10, "step": 1}),
-    "hypertile_unet_depth": OptionInfo(0, "HyperTile UNet depth", gr.Slider, {"minimum": 0, "maximum": 4, "step": 1}),
-    "hypertile_vae_enabled": OptionInfo(False, "HyperTile VAE", gr.Checkbox),
-    "hypertile_vae_tile": OptionInfo(128, "HyperTile VAE tile size", gr.Slider, {"minimum": 0, "maximum": 1024, "step": 8}),
-    "hypertile_vae_swap_size": OptionInfo(1, "HyperTile VAE swap size", gr.Slider, {"minimum": 1, "maximum": 10, "step": 1}),
+    "hypertile_hires_only": OptionInfo(False, "HiRes pass only"),
+    "hypertile_unet_enabled": OptionInfo(False, "UNet Enabled"),
+    "hypertile_unet_tile": OptionInfo(0, "UNet tile size", gr.Slider, {"minimum": 0, "maximum": 1024, "step": 8}),
+    "hypertile_unet_swap_size": OptionInfo(1, "UNet swap size", gr.Slider, {"minimum": 1, "maximum": 10, "step": 1}),
+    "hypertile_unet_depth": OptionInfo(0, "UNet depth", gr.Slider, {"minimum": 0, "maximum": 4, "step": 1}),
+    "hypertile_vae_enabled": OptionInfo(False, "VAE Enabled", gr.Checkbox),
+    "hypertile_vae_tile": OptionInfo(128, "VAE tile size", gr.Slider, {"minimum": 0, "maximum": 1024, "step": 8}),
+    "hypertile_vae_swap_size": OptionInfo(1, "VAE swap size", gr.Slider, {"minimum": 1, "maximum": 10, "step": 1}),
 
     "hidiffusion_sep": OptionInfo("<h2>HiDiffusion</h2>", "", gr.HTML),
     "hidiffusion_raunet": OptionInfo(True, "Apply RAU-Net"),
@@ -636,16 +634,28 @@ def get_default_modes():
     "hidiffusion_t1": OptionInfo(-1, "Override T1 ratio", gr.Slider, {"minimum": -1, "maximum": 1.0, "step": 0.05}),
     "hidiffusion_t2": OptionInfo(-1, "Override T2 ratio", gr.Slider, {"minimum": -1, "maximum": 1.0, "step": 0.05}),
 
+    "linfusion_sep": OptionInfo("<h2>Batch</h2>", "", gr.HTML),
+    "enable_linfusion": OptionInfo(False, "Apply LinFusion distillation on load"),
+
     "inference_batch_sep": OptionInfo("<h2>Batch</h2>", "", gr.HTML),
     "sequential_seed": OptionInfo(True, "Batch mode uses sequential seeds"),
     "batch_frame_mode": OptionInfo(False, "Parallel process images in batch"),
-    "inference_other_sep": OptionInfo("<h2>Other</h2>", "", gr.HTML),
-    "inference_mode": OptionInfo("no-grad", "Torch inference mode", gr.Radio, {"choices": ["no-grad", "inference-mode", "none"]}),
-    "sd_vae_sliced_encode": OptionInfo(False, "VAE sliced encode", gr.Checkbox, {"visible": not native}),
+}))
+
+options_templates.update(options_section(('compile', "Model Compile"), {
+    "cuda_compile_sep": OptionInfo("<h2>Model Compile</h2>", "", gr.HTML),
+    "cuda_compile": OptionInfo([] if not cmd_opts.use_openvino else ["Model", "VAE"], "Compile Model", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder", "Upscaler"]}),
+    "cuda_compile_backend": OptionInfo("none" if not cmd_opts.use_openvino else "openvino_fx", "Model compile backend", gr.Radio, {"choices": ['none', 'inductor', 'cudagraphs', 'aot_ts_nvfuser', 'hidet', 'migraphx', 'ipex', 'onediff', 'stable-fast', 'deep-cache', 'olive-ai', 'openvino_fx']}),
+    "cuda_compile_mode": OptionInfo("default", "Model compile mode", gr.Radio, {"choices": ['default', 'reduce-overhead', 'max-autotune', 'max-autotune-no-cudagraphs']}),
+    "cuda_compile_fullgraph": OptionInfo(True if not cmd_opts.use_openvino else False, "Model compile fullgraph"),
+    "cuda_compile_precompile": OptionInfo(False, "Model compile precompile"),
+    "cuda_compile_verbose": OptionInfo(False, "Model compile verbose mode"),
+    "cuda_compile_errors": OptionInfo(True, "Model compile suppress errors"),
+    "deep_cache_interval": OptionInfo(3, "DeepCache cache interval", gr.Slider, {"minimum": 1, "maximum": 10, "step": 1}),
 }))
 
 options_templates.update(options_section(('system-paths', "System Paths"), {
-    "models_paths_sep_options": OptionInfo("<h2>Models paths</h2>", "", gr.HTML),
+    "models_paths_sep_options": OptionInfo("<h2>Models Paths</h2>", "", gr.HTML),
     "models_dir": OptionInfo('models', "Base path where all models are stored", folder=True),
     "ckpt_dir": OptionInfo(os.path.join(paths.models_path, 'Stable-diffusion'), "Folder with stable diffusion models", folder=True),
     "diffusers_dir": OptionInfo(os.path.join(paths.models_path, 'Diffusers'), "Folder with Huggingface models", folder=True),
@@ -726,13 +736,13 @@ def get_default_modes():
 }))
 
 options_templates.update(options_section(('saving-paths', "Image Paths"), {
-    "saving_sep_images": OptionInfo("<h2>Save options</h2>", "", gr.HTML),
+    "saving_sep_images": OptionInfo("<h2>Save Options</h2>", "", gr.HTML),
     "save_images_add_number": OptionInfo(True, "Numbered filenames", component_args=hide_dirs),
     "use_original_name_batch": OptionInfo(True, "Batch uses original name"),
     "save_to_dirs": OptionInfo(False, "Save images to a subdirectory"),
     "directories_filename_pattern": OptionInfo("[date]", "Directory name pattern", component_args=hide_dirs),
     "samples_filename_pattern": OptionInfo("[seq]-[model_name]-[prompt_words]", "Images filename pattern", component_args=hide_dirs),
-    "directories_max_prompt_words": OptionInfo(8, "Max words per pattern", gr.Slider, {"minimum": 1, "maximum": 99, "step": 1, **hide_dirs}),
+    "directories_max_prompt_words": OptionInfo(8, "Max words", gr.Slider, {"minimum": 1, "maximum": 99, "step": 1, **hide_dirs}),
 
     "outdir_sep_dirs": OptionInfo("<h2>Folders</h2>", "", gr.HTML),
     "outdir_samples": OptionInfo("", "Images folder", component_args=hide_dirs, folder=True),
@@ -751,14 +761,14 @@ def get_default_modes():
     "outdir_control_grids": OptionInfo("outputs/grids", 'Folder for control grids', component_args=hide_dirs, folder=True),
 }))
 
-options_templates.update(options_section(('ui', "User Interface Options"), {
+options_templates.update(options_section(('ui', "User Interface"), {
     "theme_type": OptionInfo("Standard", "Theme type", gr.Radio, {"choices": ["Modern", "Standard", "None"]}),
     "theme_style": OptionInfo("Auto", "Theme mode", gr.Radio, {"choices": ["Auto", "Dark", "Light"]}),
     "gradio_theme": OptionInfo("black-teal", "UI theme", gr.Dropdown, lambda: {"choices": theme.list_themes()}, refresh=theme.refresh_themes),
     "autolaunch": OptionInfo(False, "Autolaunch browser upon startup"),
     "font_size": OptionInfo(14, "Font size", gr.Slider, {"minimum": 8, "maximum": 32, "step": 1, "visible": True}),
     "aspect_ratios": OptionInfo("1:1, 4:3, 3:2, 16:9, 16:10, 21:9, 2:3, 3:4, 9:16, 10:16, 9:21", "Allowed aspect ratios"),
-    "motd": OptionInfo(True, "Show MOTD"),
+    "motd": OptionInfo(False, "Show MOTD"),
     "compact_view": OptionInfo(False, "Compact view"),
     "return_grid": OptionInfo(True, "Show grid in results"),
     "return_mask": OptionInfo(False, "Inpainting include greyscale mask in results"),
@@ -770,14 +780,14 @@ def get_default_modes():
 }))
 
 options_templates.update(options_section(('live-preview', "Live Previews"), {
-    "notification_audio_enable": OptionInfo(False, "Play a notification upon completion"),
-    "notification_audio_path": OptionInfo("html/notification.mp3","Path to notification sound", component_args=hide_dirs, folder=True),
     "show_progress_every_n_steps": OptionInfo(1, "Live preview display period", gr.Slider, {"minimum": 0, "maximum": 32, "step": 1}),
     "show_progress_type": OptionInfo("Approximate", "Live preview method", gr.Radio, {"choices": ["Simple", "Approximate", "TAESD", "Full VAE"]}),
     "live_preview_refresh_period": OptionInfo(500, "Progress update period", gr.Slider, {"minimum": 0, "maximum": 5000, "step": 25}),
     "live_preview_taesd_layers": OptionInfo(3, "TAESD decode layers", gr.Slider, {"minimum": 1, "maximum": 3, "step": 1}),
     "logmonitor_show": OptionInfo(True, "Show log view"),
     "logmonitor_refresh_period": OptionInfo(5000, "Log view update period", gr.Slider, {"minimum": 0, "maximum": 30000, "step": 25}),
+    "notification_audio_enable": OptionInfo(False, "Play a notification upon completion"),
+    "notification_audio_path": OptionInfo("html/notification.mp3","Path to notification sound", component_args=hide_dirs, folder=True),
 }))
 
 options_templates.update(options_section(('sampler-params', "Sampler Settings"), {
@@ -816,7 +826,7 @@ def get_default_modes():
     's_noise': OptionInfo(1.0, "Sigma noise", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01, "visible": not native}),
     's_min':  OptionInfo(0.0, "Sigma min",  gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01, "visible": not native}),
     's_max':  OptionInfo(0.0, "Sigma max",  gr.Slider, {"minimum": 0.0, "maximum": 100.0, "step": 1.0, "visible": not native}),
-    "schedulers_sep_compvis": OptionInfo("<h2>CompVis specific config</h2>", "", gr.HTML, {"visible": not native}),
+    "schedulers_sep_compvis": OptionInfo("<h2>CompVis Config</h2>", "", gr.HTML, {"visible": not native}),
     'uni_pc_variant': OptionInfo("bh2", "UniPC variant", gr.Radio, {"choices": ["bh1", "bh2", "vary_coeff"], "visible": not native}),
     'uni_pc_skip_type': OptionInfo("time_uniform", "UniPC skip type", gr.Radio, {"choices": ["time_uniform", "time_quadratic", "logSNR"], "visible": not native}),
     "ddim_discretize": OptionInfo('uniform', "DDIM discretize img2img", gr.Radio, {"choices": ['uniform', 'quad'], "visible": not native}),
@@ -849,7 +859,7 @@ def get_default_modes():
     "detailer_unload": OptionInfo(False, "Move detailer model to CPU when complete"),
     "detailer_augment": OptionInfo(True, "Detailer use model augment"),
 
-    "postprocessing_sep_face_restore": OptionInfo("<h2>Face restore</h2>", "", gr.HTML),
+    "postprocessing_sep_face_restore": OptionInfo("<h2>Face Restore</h2>", "", gr.HTML),
     "face_restoration_model": OptionInfo("None", "Face restoration", gr.Radio, lambda: {"choices": ['None'] + [x.name() for x in face_restorers]}),
     "code_former_weight": OptionInfo(0.2, "CodeFormer weight parameter", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01}),
 
@@ -879,6 +889,15 @@ def get_default_modes():
     "deepbooru_filter_tags": OptionInfo("", "Filter out tags from deepbooru output"),
 }))
 
+options_templates.update(options_section(('huggingface', "Huggingface"), {
+    "huggingface_sep": OptionInfo("<h2>Huggingface</h2>", "", gr.HTML),
+    "diffuser_cache_config": OptionInfo(True, "Use cached model config when available"),
+    "huggingface_token": OptionInfo('', 'HuggingFace token'),
+    "diffusers_model_load_variant": OptionInfo("default", "Preferred Model variant", gr.Radio, {"choices": ['default', 'fp32', 'fp16']}),
+    "diffusers_vae_load_variant": OptionInfo("default", "Preferred VAE variant", gr.Radio, {"choices": ['default', 'fp32', 'fp16']}),
+    "custom_diffusers_pipeline": OptionInfo('', 'Load custom Diffusers pipeline'),
+}))
+
 options_templates.update(options_section(('extra_networks', "Networks"), {
     "extra_networks_sep1": OptionInfo("<h2>Networks UI</h2>", "", gr.HTML),
     "extra_networks_show": OptionInfo(True, "UI show on startup"),
diff --git a/scripts/cogvideo.py b/scripts/cogvideo.py
index c988c05c4..284109857 100644
--- a/scripts/cogvideo.py
+++ b/scripts/cogvideo.py
@@ -51,7 +51,7 @@ def video_type_change(video_type):
         with gr.Row():
             video_type = gr.Dropdown(label='Video file', choices=['None', 'GIF', 'PNG', 'MP4'], value='None')
             duration = gr.Slider(label='Duration', minimum=0.25, maximum=30, step=0.25, value=8, visible=False)
-        with gr.Accordion('Optional init video', open=False):
+        with gr.Accordion('Optional init image or video', open=False):
             with gr.Row():
                 image = gr.Image(value=None, label='Image', type='pil', source='upload', width=256, height=256)
                 video = gr.Video(value=None, label='Video', source='upload', width=256, height=256)
@@ -169,25 +169,18 @@ def generate(self, p: processing.StableDiffusionProcessing, model: str):
                 callback_on_step_end=diffusers_callback,
                 callback_on_step_end_tensor_inputs=['latents'],
             )
-            if getattr(p, 'image', False):
-                if 'I2V' not in model:
-                    shared.log.error(f'CogVideoX: model={model} image input not supported')
-                    return []
-                args['image'] = self.image(p, p.image)
-                args['num_frames'] = p.frames # only txt2vid has num_frames
-                shared.sd_model = sd_models.switch_pipe(diffusers.CogVideoXImageToVideoPipeline, shared.sd_model)
-            elif getattr(p, 'video', False):
-                if 'I2V' in model:
-                    shared.log.error(f'CogVideoX: model={model} image input not supported')
-                    return []
-                args['video'] = self.video(p, p.video)
-                shared.sd_model = sd_models.switch_pipe(diffusers.CogVideoXVideoToVideoPipeline, shared.sd_model)
+            if 'I2V' in model:
+                if hasattr(p, 'video') and p.video is not None:
+                    args['video'] = self.video(p, p.video)
+                    shared.sd_model = sd_models.switch_pipe(diffusers.CogVideoXVideoToVideoPipeline, shared.sd_model)
+                elif (hasattr(p, 'image') and p.image is not None) or (hasattr(p, 'init_images') and len(p.init_images) > 0):
+                    p.init_images = [p.image] if hasattr(p, 'image') and p.image is not None else p.init_images
+                    args['image'] = self.image(p, p.init_images[0])
+                    shared.sd_model = sd_models.switch_pipe(diffusers.CogVideoXImageToVideoPipeline, shared.sd_model)
             else:
-                if 'I2V' in model:
-                    shared.log.error(f'CogVideoX: model={model} image input not supported')
-                    return []
-                args['num_frames'] = p.frames # only txt2vid has num_frames
                 shared.sd_model = sd_models.switch_pipe(diffusers.CogVideoXPipeline, shared.sd_model)
+            args['num_frames'] = p.frames # only txt2vid has num_frames
+            shared.log.info(f'CogVideoX: class={shared.sd_model.__class__.__name__} frames={p.frames} input={args.get('video', None) or args.get('image', None)}')
             if debug:
                 shared.log.debug(f'CogVideoX args: {args}')
             frames = shared.sd_model(**args).frames[0]
@@ -199,7 +192,7 @@ def generate(self, p: processing.StableDiffusionProcessing, model: str):
                 errors.display(e, 'CogVideoX')
         t1 = time.time()
         its = (len(frames) * p.steps) / (t1 - t0)
-        shared.log.info(f'CogVideoX: frames={len(frames)} its={its:.2f} time={t1 - t0:.2f}')
+        shared.log.info(f'CogVideoX: frame={frames[0] if len(frames) > 0 else None} frames={len(frames)} its={its:.2f} time={t1 - t0:.2f}')
         return frames
 
     # auto-executed by the script-callback

From 944408e93b1cda1266fda6c2f1aeca9b4c30ee75 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Tue, 10 Dec 2024 10:39:13 -0500
Subject: [PATCH 097/162] warn on quanto with offload

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 modules/model_quant.py | 7 +++++--
 modules/shared.py      | 4 ++--
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/modules/model_quant.py b/modules/model_quant.py
index 9482fe898..03043b33a 100644
--- a/modules/model_quant.py
+++ b/modules/model_quant.py
@@ -58,11 +58,11 @@ def load_torchao(msg='', silent=False):
         import torchao
         ao = torchao
         fn = f'{sys._getframe(2).f_code.co_name}:{sys._getframe(1).f_code.co_name}' # pylint: disable=protected-access
-        log.debug(f'Quantization: type=quanto version={ao.__version__} fn={fn}') # pylint: disable=protected-access
+        log.debug(f'Quantization: type=torchao version={ao.__version__} fn={fn}') # pylint: disable=protected-access
         return ao
     except Exception as e:
         if len(msg) > 0:
-            log.error(f"{msg} failed to import optimum.quanto: {e}")
+            log.error(f"{msg} failed to import torchao: {e}")
         ao = None
         if not silent:
             raise
@@ -92,6 +92,7 @@ def load_bnb(msg='', silent=False):
 
 
 def load_quanto(msg='', silent=False):
+    from modules import shared
     global quanto # pylint: disable=global-statement
     if quanto is not None:
         return quanto
@@ -101,6 +102,8 @@ def load_quanto(msg='', silent=False):
         quanto = optimum_quanto
         fn = f'{sys._getframe(2).f_code.co_name}:{sys._getframe(1).f_code.co_name}' # pylint: disable=protected-access
         log.debug(f'Quantization: type=quanto version={quanto.__version__} fn={fn}') # pylint: disable=protected-access
+        if shared.opts.diffusers_offload_mode != 'none':
+            shared.log.error(f'Quantization: type=quanto offload={shared.opts.diffusers_offload_mode} not supported')
         return quanto
     except Exception as e:
         if len(msg) > 0:
diff --git a/modules/shared.py b/modules/shared.py
index 3d7571029..17db4595f 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -549,7 +549,7 @@ def get_default_modes():
     "diffusers_fuse_projections": OptionInfo(False, "Fused projections"),
     "torch_expandable_segments": OptionInfo(False, "Expandable segments"),
     "cuda_mem_fraction": OptionInfo(0.0, "Memory limit", gr.Slider, {"minimum": 0, "maximum": 2.0, "step": 0.05}),
-    "torch_gc_threshold": OptionInfo(80, "GC threshold", gr.Slider, {"minimum": 0, "maximum": 100, "step": 1}),
+    "torch_gc_threshold": OptionInfo(70, "GC threshold", gr.Slider, {"minimum": 0, "maximum": 100, "step": 1}),
     "inference_mode": OptionInfo("no-grad", "Inference mode", gr.Radio, {"choices": ["no-grad", "inference-mode", "none"]}),
     "torch_malloc": OptionInfo("native", "Memory allocator", gr.Radio, {"choices": ['native', 'cudaMallocAsync'] }),
 
@@ -566,7 +566,7 @@ def get_default_modes():
     "olive_cache_optimized": OptionInfo(True, 'Olive cache optimized models'),
 
     "ipex_sep": OptionInfo("<h2>IPEX</h2>", "", gr.HTML, {"visible": devices.backend == "ipex"}),
-    "ipex_optimize": OptionInfo([], "IPEX Optimize for Intel GPUs", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder", "Upscaler"], "visible": devices.backend == "ipex"}),
+    "ipex_optimize": OptionInfo([], "IPEX Optimize", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder", "Upscaler"], "visible": devices.backend == "ipex"}),
 
     "openvino_sep": OptionInfo("<h2>OpenVINO</h2>", "", gr.HTML, {"visible": cmd_opts.use_openvino}),
     "openvino_devices": OptionInfo([], "OpenVINO devices to use", gr.CheckboxGroup, {"choices": get_openvino_device_list() if cmd_opts.use_openvino else [], "visible": cmd_opts.use_openvino}), # pylint: disable=E0606

From beea969fd3429d8e78669ac18dee4b2f79b9571b Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Tue, 10 Dec 2024 12:34:27 -0500
Subject: [PATCH 098/162] update lora

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md                        |  7 ++++---
 modules/lora/extra_networks_lora.py |  6 +++++-
 modules/lora/networks.py            | 19 ++++++++++++-------
 modules/shared.py                   | 18 ++++++++++--------
 wiki                                |  2 +-
 5 files changed, 32 insertions(+), 20 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5412861a8..8ade15e58 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -37,9 +37,8 @@
   - LoRA weights are no longer calculated on-the-fly during model execution, but are pre-calculated at the start  
     this results in perceived overhead on generate startup, but results in overall faster execution as LoRA does not need to be processed on each step  
     thanks @AI-Casanova  
-  - *note*: LoRA weights backups are required so LoRA can be unapplied, but can take quite a lot of system memory  
-    if you know you will not need to unapply LoRA, you can disable backups in *settings -> networks -> lora fuse*  
-    in which case, you need to reload model to unapply LoRA  
+  - LoRA weights can be applied/unapplied as on each generate or they can store weights backups for later use  
+    this setting has large performance and resource implications, see [Offload](https://github.com/vladmandic/automatic/wiki/Offload) wiki for details  
 - **Model loader** improvements:  
   - detect model components on model load fail  
   - allow passing absolute path to model loader  
@@ -98,6 +97,8 @@
 - fix prompt caching  
 - fix xyz grid skip final pass  
 - fix sd upscale script
+- fix cogvideox-i2v
+- lora auto-apply tags remove duplicates
 
 ## Update for 2024-11-21
 
diff --git a/modules/lora/extra_networks_lora.py b/modules/lora/extra_networks_lora.py
index 135df1ccb..42c4a92f6 100644
--- a/modules/lora/extra_networks_lora.py
+++ b/modules/lora/extra_networks_lora.py
@@ -44,6 +44,8 @@ def prompt(p):
             loaded.tags = loaded.tags[:shared.opts.lora_apply_tags]
         all_tags.extend(loaded.tags)
     if len(all_tags) > 0:
+        all_tags = list(set(all_tags))
+        all_tags = [t for t in all_tags if t not in p.prompt]
         shared.log.debug(f"Load network: type=LoRA tags={all_tags} max={shared.opts.lora_apply_tags} apply")
         all_tags = ', '.join(all_tags)
         p.extra_generation_params["LoRA tags"] = all_tags
@@ -121,13 +123,15 @@ def activate(self, p, params_list, step=0, include=[], exclude=[]):
             # shared.log.debug(f'Activate network: type=LoRA model="{shared.opts.sd_model_checkpoint}"')
             self.active = True
             self.model = shared.opts.sd_model_checkpoint
+        if 'text_encoder' in include:
+            networks.timer.clear(complete=True)
         names, te_multipliers, unet_multipliers, dyn_dims = parse(p, params_list, step)
         networks.network_load(names, te_multipliers, unet_multipliers, dyn_dims) # load
         networks.network_activate(include, exclude)
         if len(networks.loaded_networks) > 0 and len(networks.applied_layers) > 0 and step == 0:
             infotext(p)
             prompt(p)
-            shared.log.info(f'Load network: type=LoRA apply={[n.name for n in networks.loaded_networks]} te={te_multipliers} unet={unet_multipliers} time={networks.timer.summary}')
+            shared.log.info(f'Load network: type=LoRA apply={[n.name for n in networks.loaded_networks]} mode={"fuse" if shared.opts.lora_fuse_diffusers else "backup"} te={te_multipliers} unet={unet_multipliers} time={networks.timer.summary}')
 
     def deactivate(self, p):
         if shared.native and len(networks.diffuser_loaded) > 0:
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index edd82f3e4..ada6f833d 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -42,6 +42,7 @@
 # section: load networks from disk
 
 def load_diffusers(name, network_on_disk, lora_scale=shared.opts.extra_networks_default_multiplier) -> Union[network.Network, None]:
+    t0 = time.time()
     name = name.replace(".", "_")
     shared.log.debug(f'Load network: type=LoRA name="{name}" file="{network_on_disk.filename}" detected={network_on_disk.sd_version} method=diffusers scale={lora_scale} fuse={shared.opts.lora_fuse_diffusers}')
     if not shared.native:
@@ -67,6 +68,7 @@ def load_diffusers(name, network_on_disk, lora_scale=shared.opts.extra_networks_
         diffuser_scales.append(lora_scale)
     net = network.Network(name, network_on_disk)
     net.mtime = os.path.getmtime(network_on_disk.filename)
+    timer.activate += time.time() - t0
     return net
 
 
@@ -256,10 +258,12 @@ def network_load(names, te_multipliers=None, unet_multipliers=None, dyn_dims=Non
     if len(diffuser_loaded) > 0:
         shared.log.debug(f'Load network: type=LoRA loaded={diffuser_loaded} available={shared.sd_model.get_list_adapters()} active={shared.sd_model.get_active_adapters()} scales={diffuser_scales}')
         try:
+            t0 = time.time()
             shared.sd_model.set_adapters(adapter_names=diffuser_loaded, adapter_weights=diffuser_scales)
             if shared.opts.lora_fuse_diffusers:
                 shared.sd_model.fuse_lora(adapter_names=diffuser_loaded, lora_scale=1.0, fuse_unet=True, fuse_text_encoder=True) # fuse uses fixed scale since later apply does the scaling
                 shared.sd_model.unload_lora_weights()
+            timer.activate += time.time() - t0
         except Exception as e:
             shared.log.error(f'Load network: type=LoRA {e}')
             if debug:
@@ -301,16 +305,15 @@ def network_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.n
                     bnb = model_quant.load_bnb('Load network: type=LoRA', silent=True)
                 if bnb is not None:
                     with devices.inference_context():
-                        weights_backup = bnb.functional.dequantize_4bit(weight, quant_state=weight.quant_state, quant_type=weight.quant_type, blocksize=weight.blocksize,)
+                        self.network_weights_backup = bnb.functional.dequantize_4bit(weight, quant_state=weight.quant_state, quant_type=weight.quant_type, blocksize=weight.blocksize,)
                         self.quant_state = weight.quant_state
                         self.quant_type = weight.quant_type
                         self.blocksize = weight.blocksize
                 else:
                     weights_backup = weight.clone()
-                weights_backup = weights_backup.to(devices.cpu)
+                self.network_weights_backup = weights_backup.to(devices.cpu)
             else:
-                weights_backup = weight.clone()
-                weights_backup = weights_backup.to(devices.cpu)
+                self.network_weights_backup = weight.clone().to(devices.cpu)
 
         bias_backup = getattr(self, "network_bias_backup", None)
         if bias_backup is None:
@@ -331,7 +334,10 @@ def network_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.n
 
 def network_calc_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.GroupNorm, torch.nn.LayerNorm, diffusers.models.lora.LoRACompatibleLinear, diffusers.models.lora.LoRACompatibleConv], network_layer_name: str):
     if shared.opts.diffusers_offload_mode == "none":
-        self.to(devices.device)
+        try:
+            self.to(devices.device)
+        except Exception:
+            pass
     batch_updown = None
     batch_ex_bias = None
     for net in loaded_networks:
@@ -501,7 +507,6 @@ def network_deactivate():
 
 def network_activate(include=[], exclude=[]):
     t0 = time.time()
-    timer.clear(complete=True)
     sd_model = getattr(shared.sd_model, "pipe", shared.sd_model)  # wrapped model compatiblility
     if shared.opts.diffusers_offload_mode == "sequential":
         sd_models.disable_offload(sd_model)
@@ -552,7 +557,7 @@ def network_activate(include=[], exclude=[]):
         if task is not None and len(applied_layers) == 0:
             pbar.remove_task(task) # hide progress bar for no action
     weights_devices, weights_dtypes = list(set([x for x in weights_devices if x is not None])), list(set([x for x in weights_dtypes if x is not None])) # noqa: C403 # pylint: disable=R1718
-    timer.activate = time.time() - t0
+    timer.activate += time.time() - t0
     if debug and len(loaded_networks) > 0:
         shared.log.debug(f'Load network: type=LoRA networks={len(loaded_networks)} components={components} modules={total} apply={len(applied_layers)} device={weights_devices} dtype={weights_dtypes} backup={backup_size} fuse={shared.opts.lora_fuse_diffusers} time={timer.summary}')
     modules.clear()
diff --git a/modules/shared.py b/modules/shared.py
index 17db4595f..256850d21 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -914,22 +914,24 @@ def get_default_modes():
 
     "extra_networks_model_sep": OptionInfo("<h2>Models</h2>", "", gr.HTML),
     "extra_network_reference": OptionInfo(False, "Use reference values when available", gr.Checkbox),
-    "extra_networks_embed_sep": OptionInfo("<h2>Embeddings</h2>", "", gr.HTML),
-    "diffusers_convert_embed": OptionInfo(False, "Auto-convert SD 1.5 embeddings to SDXL ", gr.Checkbox, {"visible": native}),
-    "extra_networks_styles_sep": OptionInfo("<h2>Styles</h2>", "", gr.HTML),
-    "extra_networks_styles": OptionInfo(True, "Show built-in styles"),
-    "extra_networks_wildcard_sep": OptionInfo("<h2>Wildcards</h2>", "", gr.HTML),
-    "wildcards_enabled": OptionInfo(True, "Enable file wildcards support"),
+
     "extra_networks_lora_sep": OptionInfo("<h2>LoRA</h2>", "", gr.HTML),
     "extra_networks_default_multiplier": OptionInfo(1.0, "Default strength", gr.Slider, {"minimum": 0.0, "maximum": 2.0, "step": 0.01}),
     "lora_preferred_name": OptionInfo("filename", "LoRA preferred name", gr.Radio, {"choices": ["filename", "alias"], "visible": False}),
-    "lora_add_hashes_to_infotext": OptionInfo(False, "LoRA add hash info"),
+    "lora_add_hashes_to_infotext": OptionInfo(False, "LoRA add hash info to metadata"),
     "lora_fuse_diffusers": OptionInfo(True, "LoRA fuse directly to model"),
     "lora_force_diffusers": OptionInfo(False if not cmd_opts.use_openvino else True, "LoRA force loading of all models using Diffusers"),
     "lora_maybe_diffusers": OptionInfo(False, "LoRA force loading of specific models using Diffusers"),
     "lora_apply_tags": OptionInfo(0, "LoRA auto-apply tags", gr.Slider, {"minimum": -1, "maximum": 32, "step": 1}),
     "lora_in_memory_limit": OptionInfo(0, "LoRA memory cache", gr.Slider, {"minimum": 0, "maximum": 24, "step": 1}),
-    "lora_quant": OptionInfo("NF4","LoRA precision in quantized models", gr.Radio, {"choices": ["NF4", "FP4"]}),
+    "lora_quant": OptionInfo("NF4","LoRA precision when quantized", gr.Radio, {"choices": ["NF4", "FP4"]}),
+
+    "extra_networks_styles_sep": OptionInfo("<h2>Styles</h2>", "", gr.HTML),
+    "extra_networks_styles": OptionInfo(True, "Show built-in styles"),
+    "extra_networks_embed_sep": OptionInfo("<h2>Embeddings</h2>", "", gr.HTML),
+    "diffusers_convert_embed": OptionInfo(False, "Auto-convert SD15 embeddings to SDXL ", gr.Checkbox, {"visible": native}),
+    "extra_networks_wildcard_sep": OptionInfo("<h2>Wildcards</h2>", "", gr.HTML),
+    "wildcards_enabled": OptionInfo(True, "Enable file wildcards support"),
 }))
 
 options_templates.update(options_section((None, "Internal options"), {
diff --git a/wiki b/wiki
index 8960da514..95f174900 160000
--- a/wiki
+++ b/wiki
@@ -1 +1 @@
-Subproject commit 8960da514e9aff4a5d47402925c9498536443379
+Subproject commit 95f1749005d56be490dab95cf92f4ca576d10396

From 8ec1c4f9c4981b33d4ae91dc8a703d21872adef6 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Tue, 10 Dec 2024 12:38:19 -0500
Subject: [PATCH 099/162] update bug report

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 .github/ISSUE_TEMPLATE/bug_report.yml | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
index cf176d6cf..a40320c63 100644
--- a/.github/ISSUE_TEMPLATE/bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -106,10 +106,14 @@ body:
         - StableDiffusion 1.5
         - StableDiffusion 2.1
         - StableDiffusion XL
-        - StableDiffusion 3
-        - PixArt
+        - StableDiffusion 3.x
         - StableCascade
+        - FLUX.1
+        - PixArt
         - Kandinsky
+        - Playground
+        - AuraFlow
+        - Any Video Model
         - Other
       default: 0
     validations:

From f4847f1b8a1d4f8f607bd8b39763f4ebf6036c5f Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Tue, 10 Dec 2024 15:49:20 -0500
Subject: [PATCH 100/162] optimize balanced offload

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md                    |  6 ++-
 modules/memstats.py             |  8 +++-
 modules/processing_diffusers.py |  4 +-
 modules/processing_vae.py       |  6 +--
 modules/sd_models.py            | 66 ++++++++++++++++++---------------
 modules/shared.py               |  1 +
 scripts/cogvideo.py             |  2 +-
 7 files changed, 54 insertions(+), 39 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8ade15e58..4f6b10909 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -50,8 +50,10 @@
 - **Memory** improvements:  
   - faster and more compatible *balanced offload* mode  
   - balanced offload: units are now in percentage instead of bytes  
-  - balanced offload: add both high and low watermark  
-    default is 25% for low-watermark (skip offload if memory usage is below 25%) and 70% high-watermark (must offload if memory usage is above 70%)  
+  - balanced offload: add both high and low watermark and pinned threshold, defaults as below  
+    25% for low-watermark: skip offload if memory usage is below 25%  
+    70% high-watermark: must offload if memory usage is above 70%  
+    15% pin-watermark: any model component smaller than 15% of total memory is pinned and not offloaded  
   - change-in-behavior:  
     low-end systems, triggered by either `lowvrwam` or by detection of <=4GB will use *sequential offload*  
     all other systems use *balanced offload* by default (can be changed in settings)  
diff --git a/modules/memstats.py b/modules/memstats.py
index fd5f152a0..d43e5bbfa 100644
--- a/modules/memstats.py
+++ b/modules/memstats.py
@@ -4,6 +4,8 @@
 from modules import shared, errors
 
 fail_once = False
+mem = {}
+
 
 def gb(val: float):
     return round(val / 1024 / 1024 / 1024, 2)
@@ -11,7 +13,7 @@ def gb(val: float):
 
 def memory_stats():
     global fail_once # pylint: disable=global-statement
-    mem = {}
+    mem.clear()
     try:
         process = psutil.Process(os.getpid())
         res = process.memory_info()
@@ -41,6 +43,10 @@ def memory_stats():
     return mem
 
 
+def memory_cache():
+    return mem
+
+
 def ram_stats():
     try:
         process = psutil.Process(os.getpid())
diff --git a/modules/processing_diffusers.py b/modules/processing_diffusers.py
index 627eb281f..9f12e44ab 100644
--- a/modules/processing_diffusers.py
+++ b/modules/processing_diffusers.py
@@ -349,7 +349,7 @@ def process_refine(p: processing.StableDiffusionProcessing, output):
 
 
 def process_decode(p: processing.StableDiffusionProcessing, output):
-    shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
+    shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model, exclude=['vae'])
     if output is not None:
         if not hasattr(output, 'images') and hasattr(output, 'frames'):
             shared.log.debug(f'Generated: frames={len(output.frames[0])}')
@@ -463,7 +463,7 @@ def process_diffusers(p: processing.StableDiffusionProcessing):
     timer.process.record('decode')
 
     shared.sd_model = orig_pipeline
-    shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
+    # shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
 
     if p.state == '':
         global last_p # pylint: disable=global-statement
diff --git a/modules/processing_vae.py b/modules/processing_vae.py
index 1c4a45f07..77a89c512 100644
--- a/modules/processing_vae.py
+++ b/modules/processing_vae.py
@@ -104,8 +104,6 @@ def full_vae_decode(latents, model):
     if shared.opts.diffusers_move_unet and not getattr(model, 'has_accelerate', False):
         base_device = sd_models.move_base(model, devices.cpu)
 
-    if shared.opts.diffusers_offload_mode == "balanced":
-        shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
     elif shared.opts.diffusers_offload_mode != "sequential":
         sd_models.move_model(model.vae, devices.device)
 
@@ -159,8 +157,8 @@ def full_vae_decode(latents, model):
             model.vae.apply(sd_models.convert_to_faketensors)
             devices.torch_gc(force=True)
 
-    if shared.opts.diffusers_offload_mode == "balanced":
-        shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
+    # if shared.opts.diffusers_offload_mode == "balanced":
+    #    shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
     elif shared.opts.diffusers_move_unet and not getattr(model, 'has_accelerate', False) and base_device is not None:
         sd_models.move_base(model, base_device)
     t1 = time.time()
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 8853916e4..bd69ba45b 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -18,7 +18,7 @@
 from ldm.util import instantiate_from_config
 from modules import paths, shared, shared_state, modelloader, devices, script_callbacks, sd_vae, sd_unet, errors, sd_models_config, sd_models_compile, sd_hijack_accelerate, sd_detect
 from modules.timer import Timer, process as process_timer
-from modules.memstats import memory_stats
+from modules.memstats import memory_stats, memory_cache
 from modules.modeldata import model_data
 from modules.sd_checkpoint import CheckpointInfo, select_checkpoint, list_models, checkpoints_list, checkpoint_titles, get_closet_checkpoint_match, model_hash, update_model_hashes, setup_model, write_metadata, read_metadata_from_safetensors # pylint: disable=unused-import
 
@@ -416,9 +416,10 @@ def detach_hook(self, module):
 
 
 offload_hook_instance = None
+offload_component_map = {}
 
 
-def apply_balanced_offload(sd_model):
+def apply_balanced_offload(sd_model, exclude=[]):
     global offload_hook_instance # pylint: disable=global-statement
     if shared.opts.diffusers_offload_mode != "balanced":
         return sd_model
@@ -428,8 +429,6 @@ def apply_balanced_offload(sd_model):
     excluded = ['OmniGenPipeline']
     if sd_model.__class__.__name__ in excluded:
         return sd_model
-    fn = f'{sys._getframe(2).f_code.co_name}:{sys._getframe(1).f_code.co_name}' # pylint: disable=protected-access
-    debug_move(f'Apply offload: type=balanced fn={fn}')
     checkpoint_name = sd_model.sd_checkpoint_info.name if getattr(sd_model, "sd_checkpoint_info", None) is not None else None
     if checkpoint_name is None:
         checkpoint_name = sd_model.__class__.__name__
@@ -442,32 +441,38 @@ def apply_balanced_offload_to_module(pipe):
             keys = pipe._internal_dict.keys() # pylint: disable=protected-access
         else:
             keys = get_signature(pipe).keys()
+        keys = [k for k in keys if k not in exclude and not k.startswith('_')]
         for module_name in keys: # pylint: disable=protected-access
             module = getattr(pipe, module_name, None)
-            if isinstance(module, torch.nn.Module):
-                network_layer_name = getattr(module, "network_layer_name", None)
-                device_map = getattr(module, "balanced_offload_device_map", None)
-                max_memory = getattr(module, "balanced_offload_max_memory", None)
-                module = accelerate.hooks.remove_hook_from_module(module, recurse=True)
-                try:
-                    do_offload = used_gpu > 100 * shared.opts.diffusers_offload_min_gpu_memory
-                    debug_move(f'Balanced offload: gpu={used_gpu} ram={used_ram} current={module.device} dtype={module.dtype} op={"move" if do_offload else "skip"} component={module.__class__.__name__}')
-                    if do_offload:
-                        module = module.to(devices.cpu)
-                        used_gpu, used_ram = devices.torch_gc(fast=True, force=True)
-                except Exception as e:
-                    if 'bitsandbytes' not in str(e):
-                        shared.log.error(f'Balanced offload: module={module_name} {e}')
-                    if os.environ.get('SD_MOVE_DEBUG', None):
-                        errors.display(e, f'Balanced offload: module={module_name}')
-                module.offload_dir = os.path.join(shared.opts.accelerate_offload_path, checkpoint_name, module_name)
-                module = accelerate.hooks.add_hook_to_module(module, offload_hook_instance, append=True)
-                module._hf_hook.execution_device = torch.device(devices.device) # pylint: disable=protected-access
-                if network_layer_name:
-                    module.network_layer_name = network_layer_name
-                if device_map and max_memory:
-                    module.balanced_offload_device_map = device_map
-                    module.balanced_offload_max_memory = max_memory
+            if not isinstance(module, torch.nn.Module):
+                continue
+            network_layer_name = getattr(module, "network_layer_name", None)
+            device_map = getattr(module, "balanced_offload_device_map", None)
+            max_memory = getattr(module, "balanced_offload_max_memory", None)
+            module = accelerate.hooks.remove_hook_from_module(module, recurse=True)
+            module_size = offload_component_map.get(module_name, None)
+            if module_size is None:
+                module_size = sum(p.numel()*p.element_size() for p in module.parameters(recurse=True)) / 1024 / 1024 / 1024
+                offload_component_map[module_name] = module_size
+            do_offload = (used_gpu > 100 * shared.opts.diffusers_offload_min_gpu_memory) and (module_size > shared.gpu_memory * shared.opts.diffusers_offload_pin_gpu_memory)
+            try:
+                debug_move(f'Balanced offload: gpu={used_gpu} ram={used_ram} current={module.device} dtype={module.dtype} op={"move" if do_offload else "skip"} component={module.__class__.__name__} size={module_size:.3f}')
+                if do_offload and module.device != devices.cpu:
+                    module = module.to(devices.cpu)
+                    used_gpu, used_ram = devices.torch_gc(fast=True, force=True)
+            except Exception as e:
+                if 'bitsandbytes' not in str(e):
+                    shared.log.error(f'Balanced offload: module={module_name} {e}')
+                if os.environ.get('SD_MOVE_DEBUG', None):
+                    errors.display(e, f'Balanced offload: module={module_name}')
+            module.offload_dir = os.path.join(shared.opts.accelerate_offload_path, checkpoint_name, module_name)
+            module = accelerate.hooks.add_hook_to_module(module, offload_hook_instance, append=True)
+            module._hf_hook.execution_device = torch.device(devices.device) # pylint: disable=protected-access
+            if network_layer_name:
+                module.network_layer_name = network_layer_name
+            if device_map and max_memory:
+                module.balanced_offload_device_map = device_map
+                module.balanced_offload_max_memory = max_memory
 
     apply_balanced_offload_to_module(sd_model)
     if hasattr(sd_model, "pipe"):
@@ -478,7 +483,10 @@ def apply_balanced_offload_to_module(pipe):
         apply_balanced_offload_to_module(sd_model.decoder_pipe)
     set_accelerate(sd_model)
     devices.torch_gc(fast=True)
-    process_timer.add('offload', time.time() - t0)
+    t = time.time() - t0
+    process_timer.add('offload', t)
+    fn = f'{sys._getframe(2).f_code.co_name}:{sys._getframe(1).f_code.co_name}' # pylint: disable=protected-access
+    debug_move(f'Apply offload: time={t:.2f} type=balanced fn={fn}')
     return sd_model
 
 
diff --git a/modules/shared.py b/modules/shared.py
index 256850d21..97c32bd84 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -483,6 +483,7 @@ def get_default_modes():
     "diffusers_offload_mode": OptionInfo(startup_offload_mode, "Model offload mode", gr.Radio, {"choices": ['none', 'balanced', 'model', 'sequential']}),
     "diffusers_offload_min_gpu_memory": OptionInfo(0.25, "Balanced offload GPU low watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
     "diffusers_offload_max_gpu_memory": OptionInfo(0.70, "Balanced offload GPU high watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
+    "diffusers_offload_pin_gpu_memory": OptionInfo(0.15, "Balanced offload GPU pin watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
     "diffusers_offload_max_cpu_memory": OptionInfo(0.90, "Balanced offload CPU high watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
 
     "advanced_sep": OptionInfo("<h2>Advanced Options</h2>", "", gr.HTML),
diff --git a/scripts/cogvideo.py b/scripts/cogvideo.py
index 284109857..a5efcd3e6 100644
--- a/scripts/cogvideo.py
+++ b/scripts/cogvideo.py
@@ -180,7 +180,7 @@ def generate(self, p: processing.StableDiffusionProcessing, model: str):
             else:
                 shared.sd_model = sd_models.switch_pipe(diffusers.CogVideoXPipeline, shared.sd_model)
             args['num_frames'] = p.frames # only txt2vid has num_frames
-            shared.log.info(f'CogVideoX: class={shared.sd_model.__class__.__name__} frames={p.frames} input={args.get('video', None) or args.get('image', None)}')
+            shared.log.info(f"CogVideoX: class={shared.sd_model.__class__.__name__} frames={p.frames} input={args.get('video', None) or args.get('image', None)}")
             if debug:
                 shared.log.debug(f'CogVideoX args: {args}')
             frames = shared.sd_model(**args).frames[0]

From 9a588d9c91033b162ef1520417993ea7edc5a762 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Wed, 11 Dec 2024 12:06:03 -0500
Subject: [PATCH 101/162] update balanced offload

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md         |  7 +++---
 modules/devices.py   | 54 ++++++++++++++++++++++++--------------------
 modules/sd_models.py | 52 ++++++++++++++++++++++++++++--------------
 modules/shared.py    |  3 +--
 wiki                 |  2 +-
 5 files changed, 70 insertions(+), 48 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4f6b10909..4b8f4994c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -50,10 +50,9 @@
 - **Memory** improvements:  
   - faster and more compatible *balanced offload* mode  
   - balanced offload: units are now in percentage instead of bytes  
-  - balanced offload: add both high and low watermark and pinned threshold, defaults as below  
-    25% for low-watermark: skip offload if memory usage is below 25%  
-    70% high-watermark: must offload if memory usage is above 70%  
-    15% pin-watermark: any model component smaller than 15% of total memory is pinned and not offloaded  
+  - balanced offload: add both high and low watermark, defaults as below  
+    `0.25` for low-watermark: skip offload if memory usage is below 25%  
+    `0.70` high-watermark: must offload if memory usage is above 70%  
   - change-in-behavior:  
     low-end systems, triggered by either `lowvrwam` or by detection of <=4GB will use *sequential offload*  
     all other systems use *balanced offload* by default (can be changed in settings)  
diff --git a/modules/devices.py b/modules/devices.py
index 71eef5726..51b770481 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -187,27 +187,34 @@ def get_device_for(task): # pylint: disable=unused-argument
 
 
 def torch_gc(force=False, fast=False):
+    def get_stats():
+        mem_dict = memstats.memory_stats()
+        gpu_dict = mem_dict.get('gpu', {})
+        ram_dict = mem_dict.get('ram', {})
+        oom = gpu_dict.get('oom', 0)
+        ram = ram_dict.get('used', 0)
+        if backend == "directml":
+            gpu = torch.cuda.memory_allocated() / (1 << 30)
+        else:
+            gpu = gpu_dict.get('used', 0)
+        used_gpu = round(100 * gpu / gpu_dict.get('total', 1)) if gpu_dict.get('total', 1) > 1 else 0
+        used_ram = round(100 * ram / ram_dict.get('total', 1)) if ram_dict.get('total', 1) > 1 else 0
+        return gpu, used_gpu, ram, used_ram, oom
+
+    global previous_oom # pylint: disable=global-statement
     import gc
     from modules import timer, memstats
     from modules.shared import cmd_opts
+
     t0 = time.time()
-    mem = memstats.memory_stats()
-    gpu = mem.get('gpu', {})
-    ram = mem.get('ram', {})
-    oom = gpu.get('oom', 0)
-    if backend == "directml":
-        used_gpu = round(100 * torch.cuda.memory_allocated() / (1 << 30) / gpu.get('total', 1)) if gpu.get('total', 1) > 1 else 0
-    else:
-        used_gpu = round(100 * gpu.get('used', 0) / gpu.get('total', 1)) if gpu.get('total', 1) > 1 else 0
-    used_ram = round(100 * ram.get('used', 0) / ram.get('total', 1)) if ram.get('total', 1) > 1 else 0
-    global previous_oom # pylint: disable=global-statement
+    gpu, used_gpu, ram, used_ram, oom = get_stats()
     threshold = 0 if (cmd_opts.lowvram and not cmd_opts.use_zluda) else opts.torch_gc_threshold
     collected = 0
     if force or threshold == 0 or used_gpu >= threshold or used_ram >= threshold:
         force = True
     if oom > previous_oom:
         previous_oom = oom
-        log.warning(f'Torch GPU out-of-memory error: {mem}')
+        log.warning(f'Torch GPU out-of-memory error: {memstats.memory_stats()}')
         force = True
     if force:
         # actual gc
@@ -215,25 +222,24 @@ def torch_gc(force=False, fast=False):
         if cuda_ok:
             try:
                 with torch.cuda.device(get_cuda_device_string()):
+                    torch.cuda.synchronize()
                     torch.cuda.empty_cache() # cuda gc
                     torch.cuda.ipc_collect()
             except Exception:
                 pass
+    else:
+        return gpu, ram
     t1 = time.time()
-    if 'gc' not in timer.process.records:
-        timer.process.records['gc'] = 0
-    timer.process.records['gc'] += t1 - t0
-    if not force or collected == 0:
-        return used_gpu, used_ram
-    mem = memstats.memory_stats()
-    saved = round(gpu.get('used', 0) - mem.get('gpu', {}).get('used', 0), 2)
-    before = { 'gpu': gpu.get('used', 0), 'ram': ram.get('used', 0) }
-    after = { 'gpu': mem.get('gpu', {}).get('used', 0), 'ram': mem.get('ram', {}).get('used', 0), 'retries': mem.get('retries', 0), 'oom': mem.get('oom', 0) }
-    utilization = { 'gpu': used_gpu, 'ram': used_ram, 'threshold': threshold }
-    results = { 'collected': collected, 'saved': saved }
+    timer.process.add('gc', t1 - t0)
+
+    new_gpu, new_used_gpu, new_ram, new_used_ram, oom = get_stats()
+    before = { 'gpu': gpu, 'ram': ram }
+    after = { 'gpu': new_gpu, 'ram': new_ram, 'oom': oom }
+    utilization = { 'gpu': new_used_gpu, 'ram': new_used_ram, 'threshold': threshold }
+    results = { 'saved': round(gpu - new_gpu, 2), 'collected': collected }
     fn = f'{sys._getframe(2).f_code.co_name}:{sys._getframe(1).f_code.co_name}' # pylint: disable=protected-access
-    log.debug(f'GC: utilization={utilization} gc={results} before={before} after={after} device={torch.device(get_optimal_device_name())} fn={fn} time={round(t1 - t0, 2)}') # pylint: disable=protected-access
-    return used_gpu, used_ram
+    log.debug(f'GC: utilization={utilization} gc={results} before={before} after={after} device={torch.device(get_optimal_device_name())} fn={fn} time={round(t1 - t0, 2)}')
+    return new_gpu, new_ram
 
 
 def set_cuda_sync_mode(mode):
diff --git a/modules/sd_models.py b/modules/sd_models.py
index bd69ba45b..85a5dc5d7 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -18,7 +18,7 @@
 from ldm.util import instantiate_from_config
 from modules import paths, shared, shared_state, modelloader, devices, script_callbacks, sd_vae, sd_unet, errors, sd_models_config, sd_models_compile, sd_hijack_accelerate, sd_detect
 from modules.timer import Timer, process as process_timer
-from modules.memstats import memory_stats, memory_cache
+from modules.memstats import memory_stats
 from modules.modeldata import model_data
 from modules.sd_checkpoint import CheckpointInfo, select_checkpoint, list_models, checkpoints_list, checkpoint_titles, get_closet_checkpoint_match, model_hash, update_model_hashes, setup_model, write_metadata, read_metadata_from_safetensors # pylint: disable=unused-import
 
@@ -35,6 +35,8 @@
 diffusers_version = int(diffusers.__version__.split('.')[1])
 checkpoint_tiles = checkpoint_titles # legacy compatibility
 should_offload = ['sc', 'sd3', 'f1', 'hunyuandit', 'auraflow', 'omnigen']
+offload_hook_instance = None
+offload_component_map = {}
 
 
 class NoWatermark:
@@ -415,10 +417,6 @@ def detach_hook(self, module):
         return module
 
 
-offload_hook_instance = None
-offload_component_map = {}
-
-
 def apply_balanced_offload(sd_model, exclude=[]):
     global offload_hook_instance # pylint: disable=global-statement
     if shared.opts.diffusers_offload_mode != "balanced":
@@ -433,6 +431,29 @@ def apply_balanced_offload(sd_model, exclude=[]):
     if checkpoint_name is None:
         checkpoint_name = sd_model.__class__.__name__
 
+    def get_pipe_modules(pipe):
+        if hasattr(pipe, "_internal_dict"):
+            modules_names = pipe._internal_dict.keys() # pylint: disable=protected-access
+        else:
+            modules_names = get_signature(pipe).keys()
+        modules_names = [m for m in modules_names if m not in exclude and not m.startswith('_')]
+        modules = {}
+        for module_name in modules_names:
+            module_size = offload_component_map.get(module_name, None)
+            if module_size is None:
+                module = getattr(pipe, module_name, None)
+                if not isinstance(module, torch.nn.Module):
+                    continue
+                try:
+                    module_size = sum(p.numel()*p.element_size() for p in module.parameters(recurse=True)) / 1024 / 1024 / 1024
+                except Exception as e:
+                    shared.log.error(f'Balanced offload: module={module_name} {e}')
+                    module_size = 0
+                offload_component_map[module_name] = module_size
+            modules[module_name] = module_size
+        modules = sorted(modules.items(), key=lambda x: x[1], reverse=True)
+        return modules
+
     def apply_balanced_offload_to_module(pipe):
         used_gpu, used_ram = devices.torch_gc(fast=True)
         if hasattr(pipe, "pipe"):
@@ -442,24 +463,20 @@ def apply_balanced_offload_to_module(pipe):
         else:
             keys = get_signature(pipe).keys()
         keys = [k for k in keys if k not in exclude and not k.startswith('_')]
-        for module_name in keys: # pylint: disable=protected-access
+        for module_name, module_size in get_pipe_modules(pipe): # pylint: disable=protected-access
             module = getattr(pipe, module_name, None)
-            if not isinstance(module, torch.nn.Module):
-                continue
             network_layer_name = getattr(module, "network_layer_name", None)
             device_map = getattr(module, "balanced_offload_device_map", None)
             max_memory = getattr(module, "balanced_offload_max_memory", None)
             module = accelerate.hooks.remove_hook_from_module(module, recurse=True)
-            module_size = offload_component_map.get(module_name, None)
-            if module_size is None:
-                module_size = sum(p.numel()*p.element_size() for p in module.parameters(recurse=True)) / 1024 / 1024 / 1024
-                offload_component_map[module_name] = module_size
-            do_offload = (used_gpu > 100 * shared.opts.diffusers_offload_min_gpu_memory) and (module_size > shared.gpu_memory * shared.opts.diffusers_offload_pin_gpu_memory)
+            perc_gpu = used_gpu / shared.gpu_memory
             try:
-                debug_move(f'Balanced offload: gpu={used_gpu} ram={used_ram} current={module.device} dtype={module.dtype} op={"move" if do_offload else "skip"} component={module.__class__.__name__} size={module_size:.3f}')
-                if do_offload and module.device != devices.cpu:
-                    module = module.to(devices.cpu)
-                    used_gpu, used_ram = devices.torch_gc(fast=True, force=True)
+                prev_gpu = used_gpu
+                do_offload = (perc_gpu > shared.opts.diffusers_offload_min_gpu_memory) and (module.device != devices.cpu)
+                if do_offload:
+                    module = module.to(devices.cpu, non_blocking=True)
+                    used_gpu -= module_size
+                debug_move(f'Balanced offload: op={"move" if do_offload else "skip"} gpu={prev_gpu:.3f}:{used_gpu:.3f} perc={perc_gpu:.2f} ram={used_ram:.3f} current={module.device} dtype={module.dtype} component={module.__class__.__name__} size={module_size:.3f}')
             except Exception as e:
                 if 'bitsandbytes' not in str(e):
                     shared.log.error(f'Balanced offload: module={module_name} {e}')
@@ -473,6 +490,7 @@ def apply_balanced_offload_to_module(pipe):
             if device_map and max_memory:
                 module.balanced_offload_device_map = device_map
                 module.balanced_offload_max_memory = max_memory
+        devices.torch_gc(fast=True, force=True)
 
     apply_balanced_offload_to_module(sd_model)
     if hasattr(sd_model, "pipe"):
diff --git a/modules/shared.py b/modules/shared.py
index 97c32bd84..01add6d60 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -483,8 +483,7 @@ def get_default_modes():
     "diffusers_offload_mode": OptionInfo(startup_offload_mode, "Model offload mode", gr.Radio, {"choices": ['none', 'balanced', 'model', 'sequential']}),
     "diffusers_offload_min_gpu_memory": OptionInfo(0.25, "Balanced offload GPU low watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
     "diffusers_offload_max_gpu_memory": OptionInfo(0.70, "Balanced offload GPU high watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
-    "diffusers_offload_pin_gpu_memory": OptionInfo(0.15, "Balanced offload GPU pin watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
-    "diffusers_offload_max_cpu_memory": OptionInfo(0.90, "Balanced offload CPU high watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
+    "diffusers_offload_max_cpu_memory": OptionInfo(0.90, "Balanced offload CPU high watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01, "visible": False }),
 
     "advanced_sep": OptionInfo("<h2>Advanced Options</h2>", "", gr.HTML),
     "sd_checkpoint_autoload": OptionInfo(True, "Model autoload on start"),
diff --git a/wiki b/wiki
index 95f174900..db828893c 160000
--- a/wiki
+++ b/wiki
@@ -1 +1 @@
-Subproject commit 95f1749005d56be490dab95cf92f4ca576d10396
+Subproject commit db828893c803f1d5d0180cfe09689884bf27af2d

From c3b0c0a3bfb96032a65a41db91d1867feb934c02 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Wed, 11 Dec 2024 12:32:34 -0500
Subject: [PATCH 102/162] add SD_NO_CACHE env variable

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md           | 1 +
 modules/files_cache.py | 8 ++++----
 modules/shared.py      | 4 +++-
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4b8f4994c..f215b84ec 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -83,6 +83,7 @@
 
 ### Fixes  
 
+- add `SD_NO_CACHE=true` env variable to disable file/folder caching  
 - update `diffusers`  
 - fix README links  
 - fix sdxl controlnet single-file loader  
diff --git a/modules/files_cache.py b/modules/files_cache.py
index fa2241afc..d65e0f4f4 100644
--- a/modules/files_cache.py
+++ b/modules/files_cache.py
@@ -6,6 +6,7 @@
 from installer import log
 
 
+do_cache_folders = os.environ.get('SD_NO_CACHE', None) is None
 class Directory: # forward declaration
     ...
 
@@ -87,8 +88,6 @@ def is_stale(self) -> bool:
         return not self.is_directory or self.mtime != self.live_mtime
 
 
-
-
 class DirectoryCache(UserDict, DirectoryCollection):
     def __delattr__(self, directory_path: str) -> None:
         directory: Directory = get_directory(directory_path, fetch=False)
@@ -126,7 +125,7 @@ def clean_directory(directory: Directory, /, recursive: RecursiveType=False) ->
     return is_clean
 
 
-def get_directory(directory_or_path: str, /, fetch:bool=True) -> Union[Directory, None]:
+def get_directory(directory_or_path: str, /, fetch: bool=True) -> Union[Directory, None]:
     if isinstance(directory_or_path, Directory):
         if directory_or_path.is_directory:
             return directory_or_path
@@ -136,8 +135,9 @@ def get_directory(directory_or_path: str, /, fetch:bool=True) -> Union[Directory
     if not cache_folders.get(directory_or_path, None):
         if fetch:
             directory = fetch_directory(directory_path=directory_or_path)
-            if directory:
+            if directory and do_cache_folders:
                 cache_folders[directory_or_path] = directory
+            return directory
     else:
         clean_directory(cache_folders[directory_or_path])
     return cache_folders[directory_or_path] if directory_or_path in cache_folders else None
diff --git a/modules/shared.py b/modules/shared.py
index 01add6d60..f383ee2b6 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -16,7 +16,7 @@
 import orjson
 import diffusers
 from rich.console import Console
-from modules import errors, devices, shared_items, shared_state, cmd_args, theme, history
+from modules import errors, devices, shared_items, shared_state, cmd_args, theme, history, files_cache
 from modules.paths import models_path, script_path, data_path, sd_configs_path, sd_default_config, sd_model_file, default_sd_model_file, extensions_dir, extensions_builtin_dir # pylint: disable=W0611
 from modules.dml import memory_providers, default_memory_provider, directml_do_hijack
 from modules.onnx_impl import initialize_onnx, execution_providers
@@ -238,6 +238,8 @@ def default(obj):
 mem_stat = memory_stats()
 gpu_memory = mem_stat['gpu']['total'] if "gpu" in mem_stat else 0
 native = backend == Backend.DIFFUSERS
+if not files_cache.do_cache_folders:
+    log.warning('File cache disabled: ')
 
 
 class OptionInfo:

From 8cea43f7db8099e3de909e0ffce5280906c7e922 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Wed, 11 Dec 2024 13:10:05 -0500
Subject: [PATCH 103/162] lora add erorr handler for partial offload

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 modules/lora/networks.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index ada6f833d..5d285af95 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -346,7 +346,10 @@ def network_calc_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.
             continue
         try:
             t0 = time.time()
-            weight = self.weight.to(devices.device)
+            try:
+                weight = self.weight.to(devices.device)
+            except Exception:
+                weight = self.weight
             updown, ex_bias = module.calc_updown(weight)
             if batch_updown is not None and updown is not None:
                 batch_updown += updown.to(batch_updown.device)
@@ -389,7 +392,10 @@ def network_apply_direct(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.
         if updown is not None:
             if deactivate:
                 updown *= -1
-            new_weight = self.weight.to(devices.device) + updown.to(devices.device)
+            try:
+                new_weight = self.weight.to(devices.device) + updown.to(devices.device)
+            except Exception:
+                new_weight = self.weight + updown
             if getattr(self, "quant_type", None) in ['nf4', 'fp4'] and bnb is not None:
                 self.weight = bnb.nn.Params4bit(new_weight, quant_state=self.quant_state, quant_type=self.quant_type, blocksize=self.blocksize)
             else:

From e9f951b2c503f85c21d568e469a15f54b51fcbd1 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Wed, 11 Dec 2024 14:20:01 -0500
Subject: [PATCH 104/162] offload logging

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 TODO.md                         |  5 +++
 modules/devices.py              |  2 ++
 modules/processing_diffusers.py |  1 +
 modules/sd_models.py            | 55 ++++++++++++++++++++++-----------
 modules/shared.py               |  2 +-
 5 files changed, 46 insertions(+), 19 deletions(-)

diff --git a/TODO.md b/TODO.md
index 90372e41f..9692b7635 100644
--- a/TODO.md
+++ b/TODO.md
@@ -2,6 +2,11 @@
 
 Main ToDo list can be found at [GitHub projects](https://github.com/users/vladmandic/projects)
 
+## Pending
+
+- LoRA direct with caching
+- Previewer issues
+
 ## Future Candidates
 
 - SD35 IPAdapter: <https://github.com/huggingface/diffusers/issues/9966>
diff --git a/modules/devices.py b/modules/devices.py
index 51b770481..3f1439fb7 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -231,6 +231,8 @@ def get_stats():
         return gpu, ram
     t1 = time.time()
     timer.process.add('gc', t1 - t0)
+    if fast:
+        return gpu, ram
 
     new_gpu, new_used_gpu, new_ram, new_used_ram, oom = get_stats()
     before = { 'gpu': gpu, 'ram': ram }
diff --git a/modules/processing_diffusers.py b/modules/processing_diffusers.py
index 9f12e44ab..3c59bbcf7 100644
--- a/modules/processing_diffusers.py
+++ b/modules/processing_diffusers.py
@@ -458,6 +458,7 @@ def process_diffusers(p: processing.StableDiffusionProcessing):
 
     extra_networks.deactivate(p)
     timer.process.add('lora', networks.timer.total)
+    networks.timer.clear(complete=True)
 
     results = process_decode(p, output)
     timer.process.record('decode')
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 85a5dc5d7..c39c0263e 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -36,7 +36,6 @@
 checkpoint_tiles = checkpoint_titles # legacy compatibility
 should_offload = ['sc', 'sd3', 'f1', 'hunyuandit', 'auraflow', 'omnigen']
 offload_hook_instance = None
-offload_component_map = {}
 
 
 class NoWatermark:
@@ -367,11 +366,7 @@ def set_diffuser_offload(sd_model, op: str = 'model'):
         except Exception as e:
             shared.log.error(f'Setting {op}: offload={shared.opts.diffusers_offload_mode} {e}')
     if shared.opts.diffusers_offload_mode == "balanced":
-        try:
-            shared.log.debug(f'Setting {op}: offload={shared.opts.diffusers_offload_mode} watermarks low={shared.opts.diffusers_offload_min_gpu_memory} high={shared.opts.diffusers_offload_max_gpu_memory} limit={shared.opts.cuda_mem_fraction:.2f}')
-            sd_model = apply_balanced_offload(sd_model)
-        except Exception as e:
-            shared.log.error(f'Setting {op}: offload={shared.opts.diffusers_offload_mode} {e}')
+        sd_model = apply_balanced_offload(sd_model)
     process_timer.add('offload', time.time() - t0)
 
 
@@ -386,11 +381,30 @@ def __init__(self):
         self.cpu_watermark = shared.opts.diffusers_offload_max_cpu_memory
         self.gpu = int(shared.gpu_memory * shared.opts.diffusers_offload_max_gpu_memory * 1024*1024*1024)
         self.cpu = int(shared.cpu_memory * shared.opts.diffusers_offload_max_cpu_memory * 1024*1024*1024)
-        gpu_dict = { "min": self.min_watermark, "max": self.max_watermark, "bytes": self.gpu }
-        cpu_dict = { "max": self.cpu_watermark, "bytes": self.cpu }
-        shared.log.info(f'Init offload: type=balanced gpu={gpu_dict} cpu={cpu_dict}')
+        self.offload_map = {}
+        gpu = f'{shared.gpu_memory * shared.opts.diffusers_offload_min_gpu_memory:.3f}-{shared.gpu_memory * shared.opts.diffusers_offload_max_gpu_memory}:{shared.gpu_memory}'
+        shared.log.info(f'Offload: type=balanced op=init watermark={self.min_watermark}-{self.max_watermark} gpu={gpu} cpu={shared.cpu_memory:.3f} limit={shared.opts.cuda_mem_fraction:.2f}')
+        self.validate()
         super().__init__()
 
+    def validate(self):
+        if shared.opts.diffusers_offload_mode != 'balanced':
+            return
+        if shared.opts.diffusers_offload_min_gpu_memory < 0 or shared.opts.diffusers_offload_min_gpu_memory > 1:
+            shared.opts.diffusers_offload_min_gpu_memory = 0.25
+            shared.log.warning(f'Offload: type=balanced op=validate: watermark low={shared.opts.diffusers_offload_min_gpu_memory} invalid value')
+        if shared.opts.diffusers_offload_max_gpu_memory < 0.1 or shared.opts.diffusers_offload_max_gpu_memory > 1:
+            shared.opts.diffusers_offload_max_gpu_memory = 0.75
+            shared.log.warning(f'Offload: type=balanced op=validate: watermark high={shared.opts.diffusers_offload_max_gpu_memory} invalid value')
+        if shared.opts.diffusers_offload_min_gpu_memory > shared.opts.diffusers_offload_max_gpu_memory:
+            shared.opts.diffusers_offload_min_gpu_memory = shared.opts.diffusers_offload_max_gpu_memory
+            shared.log.warning(f'Offload: type=balanced op=validate: watermark low={shared.opts.diffusers_offload_min_gpu_memory} reset')
+        if shared.opts.diffusers_offload_max_gpu_memory * shared.gpu_memory < 4:
+            shared.log.warning(f'Offload: type=balanced op=validate: watermark high={shared.opts.diffusers_offload_max_gpu_memory} low memory')
+
+    def model_size(self):
+        return sum(self.offload_map.values())
+
     def init_hook(self, module):
         return module
 
@@ -421,12 +435,14 @@ def apply_balanced_offload(sd_model, exclude=[]):
     global offload_hook_instance # pylint: disable=global-statement
     if shared.opts.diffusers_offload_mode != "balanced":
         return sd_model
-    if offload_hook_instance is None or offload_hook_instance.min_watermark != shared.opts.diffusers_offload_min_gpu_memory or offload_hook_instance.max_watermark != shared.opts.diffusers_offload_max_gpu_memory:
-        offload_hook_instance = OffloadHook()
     t0 = time.time()
     excluded = ['OmniGenPipeline']
     if sd_model.__class__.__name__ in excluded:
         return sd_model
+    cached = True
+    if offload_hook_instance is None or offload_hook_instance.min_watermark != shared.opts.diffusers_offload_min_gpu_memory or offload_hook_instance.max_watermark != shared.opts.diffusers_offload_max_gpu_memory:
+        cached = False
+        offload_hook_instance = OffloadHook()
     checkpoint_name = sd_model.sd_checkpoint_info.name if getattr(sd_model, "sd_checkpoint_info", None) is not None else None
     if checkpoint_name is None:
         checkpoint_name = sd_model.__class__.__name__
@@ -439,7 +455,7 @@ def get_pipe_modules(pipe):
         modules_names = [m for m in modules_names if m not in exclude and not m.startswith('_')]
         modules = {}
         for module_name in modules_names:
-            module_size = offload_component_map.get(module_name, None)
+            module_size = offload_hook_instance.offload_map.get(module_name, None)
             if module_size is None:
                 module = getattr(pipe, module_name, None)
                 if not isinstance(module, torch.nn.Module):
@@ -447,9 +463,9 @@ def get_pipe_modules(pipe):
                 try:
                     module_size = sum(p.numel()*p.element_size() for p in module.parameters(recurse=True)) / 1024 / 1024 / 1024
                 except Exception as e:
-                    shared.log.error(f'Balanced offload: module={module_name} {e}')
+                    shared.log.error(f'Offload: type=balanced op=calc module={module_name} {e}')
                     module_size = 0
-                offload_component_map[module_name] = module_size
+                offload_hook_instance.offload_map[module_name] = module_size
             modules[module_name] = module_size
         modules = sorted(modules.items(), key=lambda x: x[1], reverse=True)
         return modules
@@ -476,12 +492,12 @@ def apply_balanced_offload_to_module(pipe):
                 if do_offload:
                     module = module.to(devices.cpu, non_blocking=True)
                     used_gpu -= module_size
-                debug_move(f'Balanced offload: op={"move" if do_offload else "skip"} gpu={prev_gpu:.3f}:{used_gpu:.3f} perc={perc_gpu:.2f} ram={used_ram:.3f} current={module.device} dtype={module.dtype} component={module.__class__.__name__} size={module_size:.3f}')
+                debug_move(f'Offload: type=balanced op={"move" if do_offload else "skip"} gpu={prev_gpu:.3f}:{used_gpu:.3f} perc={perc_gpu:.2f} ram={used_ram:.3f} current={module.device} dtype={module.dtype} component={module.__class__.__name__} size={module_size:.3f}')
             except Exception as e:
                 if 'bitsandbytes' not in str(e):
-                    shared.log.error(f'Balanced offload: module={module_name} {e}')
+                    shared.log.error(f'Offload: type=balanced op=apply module={module_name} {e}')
                 if os.environ.get('SD_MOVE_DEBUG', None):
-                    errors.display(e, f'Balanced offload: module={module_name}')
+                    errors.display(e, f'Offload: type=balanced op=apply module={module_name}')
             module.offload_dir = os.path.join(shared.opts.accelerate_offload_path, checkpoint_name, module_name)
             module = accelerate.hooks.add_hook_to_module(module, offload_hook_instance, append=True)
             module._hf_hook.execution_device = torch.device(devices.device) # pylint: disable=protected-access
@@ -505,6 +521,8 @@ def apply_balanced_offload_to_module(pipe):
     process_timer.add('offload', t)
     fn = f'{sys._getframe(2).f_code.co_name}:{sys._getframe(1).f_code.co_name}' # pylint: disable=protected-access
     debug_move(f'Apply offload: time={t:.2f} type=balanced fn={fn}')
+    if not cached:
+        shared.log.info(f'Offload: type=balanced op=apply class={sd_model.__class__.__name__} modules={len(offload_hook_instance.offload_map)} size={offload_hook_instance.model_size():.3f}')
     return sd_model
 
 
@@ -1000,7 +1018,8 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No
         shared.log.error(f"Load {op}: {e}")
         errors.display(e, "Model")
 
-    devices.torch_gc(force=True)
+    if shared.opts.diffusers_offload_mode != 'balanced':
+        devices.torch_gc(force=True)
     if sd_model is not None:
         script_callbacks.model_loaded_callback(sd_model)
 
diff --git a/modules/shared.py b/modules/shared.py
index f383ee2b6..eaf4b361d 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -484,7 +484,7 @@ def get_default_modes():
     "diffusers_extract_ema": OptionInfo(False, "Use model EMA weights when possible", gr.Checkbox, {"visible": False }),
     "diffusers_offload_mode": OptionInfo(startup_offload_mode, "Model offload mode", gr.Radio, {"choices": ['none', 'balanced', 'model', 'sequential']}),
     "diffusers_offload_min_gpu_memory": OptionInfo(0.25, "Balanced offload GPU low watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
-    "diffusers_offload_max_gpu_memory": OptionInfo(0.70, "Balanced offload GPU high watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01 }),
+    "diffusers_offload_max_gpu_memory": OptionInfo(0.70, "Balanced offload GPU high watermark", gr.Slider, {"minimum": 0.1, "maximum": 1, "step": 0.01 }),
     "diffusers_offload_max_cpu_memory": OptionInfo(0.90, "Balanced offload CPU high watermark", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01, "visible": False }),
 
     "advanced_sep": OptionInfo("<h2>Advanced Options</h2>", "", gr.HTML),

From 8f21e96f73e4149d2772582a843d4ed84a1fd029 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Wed, 11 Dec 2024 15:22:51 -0500
Subject: [PATCH 105/162] update bnb and increase ui timeouts

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 TODO.md                       |  1 +
 installer.py                  |  6 +++---
 javascript/logger.js          | 10 ++++++----
 modules/model_quant.py        |  4 ++--
 modules/sd_samplers_common.py |  3 ++-
 5 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/TODO.md b/TODO.md
index 9692b7635..76c672260 100644
--- a/TODO.md
+++ b/TODO.md
@@ -6,6 +6,7 @@ Main ToDo list can be found at [GitHub projects](https://github.com/users/vladma
 
 - LoRA direct with caching
 - Previewer issues
+- Redesign postprocessing
 
 ## Future Candidates
 
diff --git a/installer.py b/installer.py
index 93cd10413..b020418e1 100644
--- a/installer.py
+++ b/installer.py
@@ -682,7 +682,7 @@ def install_torch_addons():
     if opts.get('nncf_compress_weights', False) and not args.use_openvino:
         install('nncf==2.7.0', 'nncf')
     if opts.get('optimum_quanto_weights', False):
-        install('optimum-quanto', 'optimum-quanto')
+        install('optimum-quanto==0.2.6', 'optimum-quanto')
     if triton_command is not None:
         install(triton_command, 'triton', quiet=True)
 
@@ -999,8 +999,8 @@ def install_optional():
     install('basicsr')
     install('gfpgan')
     install('clean-fid')
-    install('optimum-quanto', ignore=True)
-    install('bitsandbytes', ignore=True)
+    install('optimum-quanto=0.2.6', ignore=True)
+    install('bitsandbytes==0.45.0', ignore=True)
     install('pynvml', ignore=True)
     install('ultralytics==8.3.40', ignore=True)
     install('Cython', ignore=True)
diff --git a/javascript/logger.js b/javascript/logger.js
index 1677fa537..8fa812b86 100644
--- a/javascript/logger.js
+++ b/javascript/logger.js
@@ -1,3 +1,5 @@
+const timeout = 10000;
+
 const log = async (...msg) => {
   const dt = new Date();
   const ts = `${dt.getHours().toString().padStart(2, '0')}:${dt.getMinutes().toString().padStart(2, '0')}:${dt.getSeconds().toString().padStart(2, '0')}.${dt.getMilliseconds().toString().padStart(3, '0')}`;
@@ -21,7 +23,7 @@ const error = async (...msg) => {
   // if (!txt.includes('asctime') && !txt.includes('xhr.')) xhrPost('/sdapi/v1/log', { error: txt }); // eslint-disable-line no-use-before-define
 };
 
-const xhrInternal = (xhrObj, data, handler = undefined, errorHandler = undefined, ignore = false, serverTimeout = 5000) => {
+const xhrInternal = (xhrObj, data, handler = undefined, errorHandler = undefined, ignore = false, serverTimeout = timeout) => {
   const err = (msg) => {
     if (!ignore) {
       error(`${msg}: state=${xhrObj.readyState} status=${xhrObj.status} response=${xhrObj.responseText}`);
@@ -30,7 +32,7 @@ const xhrInternal = (xhrObj, data, handler = undefined, errorHandler = undefined
   };
 
   xhrObj.setRequestHeader('Content-Type', 'application/json');
-  xhrObj.timeout = serverTimeout;
+  xhrObj.timeout = timeout;
   xhrObj.ontimeout = () => err('xhr.ontimeout');
   xhrObj.onerror = () => err('xhr.onerror');
   xhrObj.onabort = () => err('xhr.onabort');
@@ -52,14 +54,14 @@ const xhrInternal = (xhrObj, data, handler = undefined, errorHandler = undefined
   xhrObj.send(req);
 };
 
-const xhrGet = (url, data, handler = undefined, errorHandler = undefined, ignore = false, serverTimeout = 5000) => {
+const xhrGet = (url, data, handler = undefined, errorHandler = undefined, ignore = false, serverTimeout = timeout) => {
   const xhr = new XMLHttpRequest();
   const args = Object.keys(data).map((k) => `${encodeURIComponent(k)}=${encodeURIComponent(data[k])}`).join('&');
   xhr.open('GET', `${url}?${args}`, true);
   xhrInternal(xhr, data, handler, errorHandler, ignore, serverTimeout);
 };
 
-function xhrPost(url, data, handler = undefined, errorHandler = undefined, ignore = false, serverTimeout = 5000) {
+function xhrPost(url, data, handler = undefined, errorHandler = undefined, ignore = false, serverTimeout = timeout) {
   const xhr = new XMLHttpRequest();
   xhr.open('POST', url, true);
   xhrInternal(xhr, data, handler, errorHandler, ignore, serverTimeout);
diff --git a/modules/model_quant.py b/modules/model_quant.py
index 03043b33a..5c0b40080 100644
--- a/modules/model_quant.py
+++ b/modules/model_quant.py
@@ -73,7 +73,7 @@ def load_bnb(msg='', silent=False):
     global bnb # pylint: disable=global-statement
     if bnb is not None:
         return bnb
-    install('bitsandbytes', quiet=True)
+    install('bitsandbytes==0.45.0', quiet=True)
     try:
         import bitsandbytes
         bnb = bitsandbytes
@@ -96,7 +96,7 @@ def load_quanto(msg='', silent=False):
     global quanto # pylint: disable=global-statement
     if quanto is not None:
         return quanto
-    install('optimum-quanto', quiet=True)
+    install('optimum-quanto==0.2.6', quiet=True)
     try:
         from optimum import quanto as optimum_quanto # pylint: disable=no-name-in-module
         quanto = optimum_quanto
diff --git a/modules/sd_samplers_common.py b/modules/sd_samplers_common.py
index a96795a25..cd51043c7 100644
--- a/modules/sd_samplers_common.py
+++ b/modules/sd_samplers_common.py
@@ -51,6 +51,7 @@ def single_sample_to_image(sample, approximation=None):
             return Image.new(mode="RGB", size=(512, 512))
         if len(sample.shape) == 4 and sample.shape[0]: # likely animatediff latent
             sample = sample.permute(1, 0, 2, 3)[0]
+        """
         # TODO remove
         if shared.native: # [-x,x] to [-5,5]
             sample_max = torch.max(sample)
@@ -59,7 +60,7 @@ def single_sample_to_image(sample, approximation=None):
             sample_min = torch.min(sample)
             if sample_min < -5:
                 sample = sample * (5 / abs(sample_min))
-
+        """
         if approximation == 2: # TAESD
             x_sample = sd_vae_taesd.decode(sample)
             x_sample = (1.0 + x_sample) / 2.0 # preview requires smaller range

From bd540efb73177f46b59e2c263bbcb378e9ea7d93 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Wed, 11 Dec 2024 18:10:51 -0500
Subject: [PATCH 106/162] lora: absolute path, hf download, flux controlnet
 loras

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md                        |  9 +++-
 TODO.md                             |  1 +
 installer.py                        |  2 +-
 modules/control/run.py              |  3 +-
 modules/control/units/controlnet.py | 73 ++++++++++++++++++++++-------
 modules/lora/networks.py            | 16 +++++++
 modules/processing_diffusers.py     |  4 +-
 requirements.txt                    |  2 +-
 8 files changed, 87 insertions(+), 23 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f215b84ec..2ecc7ef26 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,6 @@
 # Change Log for SD.Next
 
-## Update for 2024-12-10
+## Update for 2024-12-11
 
 ### New models and integrations
 
@@ -22,6 +22,9 @@
     *recommended*: guidance scale 30  
   - [Depth](https://huggingface.co/black-forest-labs/FLUX.1-Depth-dev): ~23.8GB, replaces currently loaded model  
     *recommended*: guidance scale 10  
+- [Flux ControlNet LoRA](https://huggingface.co/black-forest-labs/FLUX.1-Canny-dev-lora)  
+  alternative to standard ControlNets, FLUX.1 also allows LoRA to help guide the generation process  
+  both **Depth** and **Canny** LoRAs are available in standard control menus  
 - [StabilityAI SD35 ControlNets]([sd3_medium](https://huggingface.co/stabilityai/stable-diffusion-3.5-controlnets))
   - In addition to previously released `InstantX` and `Alimama`, we now have *official* ones from StabilityAI  
 - [Style Aligned Image Generation](https://style-aligned-gen.github.io/)  
@@ -39,6 +42,10 @@
     thanks @AI-Casanova  
   - LoRA weights can be applied/unapplied as on each generate or they can store weights backups for later use  
     this setting has large performance and resource implications, see [Offload](https://github.com/vladmandic/automatic/wiki/Offload) wiki for details  
+  - LoRA name in prompt can now also be an absolute path to a LoRA file, even if LoRA is not indexed  
+    example: `<lora:/test/folder/my-lora.safetensors:1.0>`
+  - LoRA name in prompt can now also be path to a LoRA file op `huggingface`  
+    example: `<lora:/huggingface.co/vendor/repo/my-lora.safetensors:1.0>`
 - **Model loader** improvements:  
   - detect model components on model load fail  
   - allow passing absolute path to model loader  
diff --git a/TODO.md b/TODO.md
index 76c672260..63088d39f 100644
--- a/TODO.md
+++ b/TODO.md
@@ -17,6 +17,7 @@ Main ToDo list can be found at [GitHub projects](https://github.com/users/vladma
 - SANA: <https://github.com/huggingface/diffusers/pull/9982>
 - LTX-Video: <https://github.com/huggingface/diffusers/pull/10021> <https://huggingface.co/Lightricks/LTX-Video> <https://huggingface.co/spaces/Lightricks/LTX-Video-Playground/tree/main>
 - TorchAO: <https://github.com/huggingface/diffusers/pull/10009>
+- ControlNetUnion/ControlNetPromax: <https://github.com/huggingface/diffusers/pull/10131>
 
 ## Other
 
diff --git a/installer.py b/installer.py
index b020418e1..36254b66a 100644
--- a/installer.py
+++ b/installer.py
@@ -459,7 +459,7 @@ def check_python(supported_minors=[9, 10, 11, 12], reason=None):
 def check_diffusers():
     if args.skip_all or args.skip_requirements:
         return
-    sha = '3335e2262d47e7d7e311a44dea7f454b5f01b643' # diffusers commit hash
+    sha = '914a585be8187ec0ad92fab4f072c992f8c297cd' # diffusers commit hash
     pkg = pkg_resources.working_set.by_key.get('diffusers', None)
     minor = int(pkg.version.split('.')[1] if pkg is not None else 0)
     cur = opts.get('diffusers_version', '') if minor > 0 else ''
diff --git a/modules/control/run.py b/modules/control/run.py
index 2fe13dd73..6ae7fb20c 100644
--- a/modules/control/run.py
+++ b/modules/control/run.py
@@ -332,7 +332,7 @@ def set_pipe():
             p.task_args['control_guidance_start'] = control_guidance_start
             p.task_args['control_guidance_end'] = control_guidance_end
             p.task_args['guess_mode'] = p.guess_mode
-            instance = controlnet.ControlNetPipeline(selected_models, shared.sd_model)
+            instance = controlnet.ControlNetPipeline(selected_models, shared.sd_model, p=p)
             pipe = instance.pipeline
         elif unit_type == 'xs' and has_models:
             p.extra_generation_params["Control mode"] = 'ControlNet-XS'
@@ -370,7 +370,6 @@ def set_pipe():
         debug(f'Control: run type={unit_type} models={has_models} pipe={pipe.__class__.__name__ if pipe is not None else None}')
         return pipe
 
-
     pipe = set_pipe()
     debug(f'Control pipeline: class={pipe.__class__.__name__} args={vars(p)}')
     t1, t2, t3 = time.time(), 0, 0
diff --git a/modules/control/units/controlnet.py b/modules/control/units/controlnet.py
index 3f68a4896..3fa2d90eb 100644
--- a/modules/control/units/controlnet.py
+++ b/modules/control/units/controlnet.py
@@ -5,6 +5,7 @@
 from modules.control.units import detect
 from modules.shared import log, opts, listdir
 from modules import errors, sd_models, devices, model_quant
+from modules.processing import StableDiffusionProcessingControl
 
 
 what = 'ControlNet'
@@ -75,6 +76,8 @@
     "InstantX Union": 'InstantX/FLUX.1-dev-Controlnet-Union',
     "InstantX Canny": 'InstantX/FLUX.1-dev-Controlnet-Canny',
     "JasperAI Depth": 'jasperai/Flux.1-dev-Controlnet-Depth',
+    "BlackForrestLabs Canny LoRA": '/huggingface.co/black-forest-labs/FLUX.1-Canny-dev-lora/flux1-canny-dev-lora.safetensors',
+    "BlackForrestLabs Depth LoRA": '/huggingface.co/black-forest-labs/FLUX.1-Depth-dev-lora/flux1-depth-dev-lora.safetensors',
     "JasperAI Surface Normals": 'jasperai/Flux.1-dev-Controlnet-Surface-Normals',
     "JasperAI Upscaler": 'jasperai/Flux.1-dev-Controlnet-Upscaler',
     "Shakker-Labs Union": 'Shakker-Labs/FLUX.1-dev-ControlNet-Union-Pro',
@@ -162,12 +165,21 @@ def reset(self):
         self.model = None
         self.model_id = None
 
-    def get_class(self):
+    def get_class(self, model_id:str=''):
         import modules.shared
         if modules.shared.sd_model_type == 'sd':
             from diffusers import ControlNetModel as cls # pylint: disable=reimported
             config = 'lllyasviel/control_v11p_sd15_canny'
         elif modules.shared.sd_model_type == 'sdxl':
+            # TODO ControlNetUnion
+            """
+            if 'union' in model_id.lower():
+                from diffusers import ControlNetUnionModel as cls
+                config = 'xinsir/controlnet-union-sdxl-1.0'
+            else:
+                from diffusers import ControlNetModel as cls # pylint: disable=reimported # sdxl shares same model class
+                config = 'Eugeoter/noob-sdxl-controlnet-canny'
+            """
             from diffusers import ControlNetModel as cls # pylint: disable=reimported # sdxl shares same model class
             config = 'Eugeoter/noob-sdxl-controlnet-canny'
         elif modules.shared.sd_model_type == 'f1':
@@ -181,7 +193,7 @@ def get_class(self):
             return None, None
         return cls, config
 
-    def load_safetensors(self, model_path):
+    def load_safetensors(self, model_id, model_path):
         name = os.path.splitext(model_path)[0]
         config_path = None
         if not os.path.exists(model_path):
@@ -206,7 +218,7 @@ def load_safetensors(self, model_path):
             config_path = f'{name}.json'
         if config_path is not None:
             self.load_config['original_config_file '] = config_path
-        cls, config = self.get_class()
+        cls, config = self.get_class(model_id)
         if cls is None:
             log.error(f'Control {what} model load failed: unknown base model')
         else:
@@ -228,18 +240,21 @@ def load(self, model_id: str = None, force: bool = True) -> str:
             if model_path is None:
                 log.error(f'Control {what} model load failed: id="{model_id}" error=unknown model id')
                 return
+            if 'lora' in model_id.lower():
+                self.model = model_path
+                return
             if model_id == self.model_id and not force:
                 log.debug(f'Control {what} model: id="{model_id}" path="{model_path}" already loaded')
                 return
             log.debug(f'Control {what} model loading: id="{model_id}" path="{model_path}"')
+            cls, _config = self.get_class(model_id)
             if model_path.endswith('.safetensors'):
-                self.load_safetensors(model_path)
+                self.load_safetensors(model_id, model_path)
             else:
                 kwargs = {}
                 if '/bin' in model_path:
                     model_path = model_path.replace('/bin', '')
                     self.load_config['use_safetensors'] = False
-                cls, _config = self.get_class()
                 if cls is None:
                     log.error(f'Control {what} model load failed: id="{model_id}" unknown base model')
                     return
@@ -271,7 +286,7 @@ def load(self, model_id: str = None, force: bool = True) -> str:
                 self.model.to(self.device)
             t1 = time.time()
             self.model_id = model_id
-            log.debug(f'Control {what} model loaded: id="{model_id}" path="{model_path}" time={t1-t0:.2f}')
+            log.debug(f'Control {what} model loaded: id="{model_id}" path="{model_path}" cls={cls.__name__} time={t1-t0:.2f}')
             return f'{what} loaded model: {model_id}'
         except Exception as e:
             log.error(f'Control {what} model load failed: id="{model_id}" error={e}')
@@ -284,16 +299,30 @@ def __init__(self,
                  controlnet: Union[ControlNetModel, list[ControlNetModel]],
                  pipeline: Union[StableDiffusionXLPipeline, StableDiffusionPipeline, FluxPipeline, StableDiffusion3Pipeline],
                  dtype = None,
+                 p: StableDiffusionProcessingControl = None,
                 ):
         t0 = time.time()
         self.orig_pipeline = pipeline
         self.pipeline = None
+
+        controlnets = controlnet if isinstance(controlnet, list) else [controlnet]
+        loras = [cn for cn in controlnets if isinstance(cn, str)]
+        controlnets = [cn for cn in controlnets if not isinstance(cn, str)]
+
         if pipeline is None:
             log.error('Control model pipeline: model not loaded')
             return
-        elif detect.is_sdxl(pipeline):
-            from diffusers import StableDiffusionXLControlNetPipeline
-            self.pipeline = StableDiffusionXLControlNetPipeline(
+        elif detect.is_sdxl(pipeline) and len(controlnets) > 0:
+            from diffusers import StableDiffusionXLControlNetPipeline, StableDiffusionXLControlNetUnionPipeline
+            # TODO ControlNetUnion
+            """
+            if controlnet.__class__.__name__ == 'ControlNetUnionModel':
+                cls = StableDiffusionXLControlNetUnionPipeline
+            else:
+                cls = StableDiffusionXLControlNetPipeline
+            """
+            cls = StableDiffusionXLControlNetPipeline
+            self.pipeline = cls(
                 vae=pipeline.vae,
                 text_encoder=pipeline.text_encoder,
                 text_encoder_2=pipeline.text_encoder_2,
@@ -302,9 +331,9 @@ def __init__(self,
                 unet=pipeline.unet,
                 scheduler=pipeline.scheduler,
                 feature_extractor=getattr(pipeline, 'feature_extractor', None),
-                controlnet=controlnet, # can be a list
+                controlnet=controlnets, # can be a list
             )
-        elif detect.is_sd15(pipeline):
+        elif detect.is_sd15(pipeline) and len(controlnets) > 0:
             from diffusers import StableDiffusionControlNetPipeline
             self.pipeline = StableDiffusionControlNetPipeline(
                 vae=pipeline.vae,
@@ -315,10 +344,10 @@ def __init__(self,
                 feature_extractor=getattr(pipeline, 'feature_extractor', None),
                 requires_safety_checker=False,
                 safety_checker=None,
-                controlnet=controlnet, # can be a list
+                controlnet=controlnets, # can be a list
             )
             sd_models.move_model(self.pipeline, pipeline.device)
-        elif detect.is_f1(pipeline):
+        elif detect.is_f1(pipeline) and len(controlnets) > 0:
             from diffusers import FluxControlNetPipeline
             self.pipeline = FluxControlNetPipeline(
                 vae=pipeline.vae.to(devices.device),
@@ -328,9 +357,9 @@ def __init__(self,
                 tokenizer_2=pipeline.tokenizer_2,
                 transformer=pipeline.transformer,
                 scheduler=pipeline.scheduler,
-                controlnet=controlnet, # can be a list
+                controlnet=controlnets, # can be a list
             )
-        elif detect.is_sd3(pipeline):
+        elif detect.is_sd3(pipeline) and len(controlnets) > 0:
             from diffusers import StableDiffusion3ControlNetPipeline
             self.pipeline = StableDiffusion3ControlNetPipeline(
                 vae=pipeline.vae,
@@ -342,8 +371,18 @@ def __init__(self,
                 tokenizer_3=pipeline.tokenizer_3,
                 transformer=pipeline.transformer,
                 scheduler=pipeline.scheduler,
-                controlnet=controlnet, # can be a list
+                controlnet=controlnets, # can be a list
             )
+        elif len(loras) > 0:
+            self.pipeline = pipeline
+            for lora in loras:
+                log.debug(f'Control {what} pipeline: lora="{lora}"')
+                lora = lora.replace('/huggingface.co/', '')
+                self.pipeline.load_lora_weights(lora)
+                """
+                if p is not None:
+                    p.prompt += f'<lora:{lora}:1.0>'
+                """
         else:
             log.error(f'Control {what} pipeline: class={pipeline.__class__.__name__} unsupported model type')
             return
@@ -353,6 +392,7 @@ def __init__(self,
             return
         if dtype is not None:
             self.pipeline = self.pipeline.to(dtype)
+
         if opts.diffusers_offload_mode == 'none':
             sd_models.move_model(self.pipeline, devices.device)
         from modules.sd_models import set_diffuser_offload
@@ -362,5 +402,6 @@ def __init__(self,
         log.debug(f'Control {what} pipeline: class={self.pipeline.__class__.__name__} time={t1-t0:.2f}')
 
     def restore(self):
+        self.pipeline.unload_lora_weights()
         self.pipeline = None
         return self.orig_pipeline
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index 5d285af95..a38945072 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -207,11 +207,27 @@ def add_network(filename):
     shared.log.info(f'Available LoRAs: path="{shared.cmd_opts.lora_dir}" items={len(available_networks)} folders={len(forbidden_network_aliases)} time={t1 - t0:.2f}')
 
 
+def network_download(name):
+    from huggingface_hub import hf_hub_download
+    if os.path.exists(name):
+        return network.NetworkOnDisk(name, name)
+    parts = name.split('/')
+    if len(parts) >= 5 and parts[1] == 'huggingface.co':
+        repo_id = f'{parts[2]}/{parts[3]}'
+        filename = '/'.join(parts[4:])
+        fn = hf_hub_download(repo_id=repo_id, filename=filename, cache_dir=shared.opts.hfcache_dir)
+        return network.NetworkOnDisk(name, fn)
+    return None
+
+
 def network_load(names, te_multipliers=None, unet_multipliers=None, dyn_dims=None):
     networks_on_disk: list[network.NetworkOnDisk] = [available_network_aliases.get(name, None) for name in names]
     if any(x is None for x in networks_on_disk):
         list_available_networks()
         networks_on_disk: list[network.NetworkOnDisk] = [available_network_aliases.get(name, None) for name in names]
+    for i in range(len(names)):
+        if names[i].startswith('/'):
+            networks_on_disk[i] = network_download(names[i])
     failed_to_load_networks = []
     recompile_model = maybe_recompile_model(names, te_multipliers)
 
diff --git a/modules/processing_diffusers.py b/modules/processing_diffusers.py
index 3c59bbcf7..3b6f228ba 100644
--- a/modules/processing_diffusers.py
+++ b/modules/processing_diffusers.py
@@ -375,10 +375,10 @@ def process_decode(p: processing.StableDiffusionProcessing, output):
         elif hasattr(output, 'images'):
             results = output.images
         else:
-            shared.log.warning('Processing returned no results')
+            shared.log.warning('Processing: no results')
             results = []
     else:
-        shared.log.warning('Processing returned no results')
+        shared.log.warning('Processing: no results')
         results = []
     return results
 
diff --git a/requirements.txt b/requirements.txt
index 3b1b14c7d..d9eba6958 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -34,7 +34,7 @@ pi-heif
 # versioned
 safetensors==0.4.5
 tensordict==0.1.2
-peft==0.13.1
+peft==0.14.0
 httpx==0.24.1
 compel==2.0.3
 torchsde==0.2.6

From fe1b0a8d351d188899856e0467e132d8356ab599 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Thu, 12 Dec 2024 13:58:58 -0500
Subject: [PATCH 107/162] add docs reference

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md | 4 +++-
 README.md    | 9 +++++----
 wiki         | 2 +-
 3 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2ecc7ef26..75979cc4c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,6 @@
 # Change Log for SD.Next
 
-## Update for 2024-12-11
+## Update for 2024-12-12
 
 ### New models and integrations
 
@@ -36,6 +36,8 @@
 
 ### UI and workflow improvements
 
+- **Docs**:
+  - New documentation site! <https://vladmandic.github.io/sdnext-docs/>
 - **LoRA** handler rewrite:  
   - LoRA weights are no longer calculated on-the-fly during model execution, but are pre-calculated at the start  
     this results in perceived overhead on generate startup, but results in overall faster execution as LoRA does not need to be processed on each step  
diff --git a/README.md b/README.md
index 1bb5eacd0..722041c93 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
 <div align="center">
-<img src="https://github.com/vladmandic/automatic/blob/master/html/logo-transparent.png" width=200 alt="SD.Next">
+<img src="https://github.com/vladmandic/automatic/raw/master/html/logo-transparent.png" width=200 alt="SD.Next">
 
 **Image Diffusion implementation with advanced features**
 
@@ -8,13 +8,14 @@
 [![Discord](https://img.shields.io/discord/1101998836328697867?logo=Discord&svg=true)](https://discord.gg/VjvR2tabEX)
 [![Sponsors](https://img.shields.io/static/v1?label=Sponsor&message=%E2%9D%A4&logo=GitHub&color=%23fe8e86)](https://github.com/sponsors/vladmandic)
 
-[Wiki](https://github.com/vladmandic/automatic/wiki) | [Discord](https://discord.gg/VjvR2tabEX) | [Changelog](CHANGELOG.md)
+[Docs](https://vladmandic.github.io/sdnext-docs/) | [Wiki](https://github.com/vladmandic/automatic/wiki) | [Discord](https://discord.gg/VjvR2tabEX) | [Changelog](CHANGELOG.md)
 
 </div>
 </br>
 
 ## Table of contents
 
+- [Documentation](https://vladmandic.github.io/sdnext-docs/)
 - [SD.Next Features](#sdnext-features)
 - [Model support](#model-support)
 - [Platform support](#platform-support)
@@ -137,7 +138,7 @@ This should be fully cross-platform, but we'd really love to have additional con
 
 ### Credits
 
-- Main credit goes to [Automatic1111 WebUI](https://github.com/AUTOMATIC1111/stable-diffusion-webui) for original codebase  
+- Main credit goes to [Automatic1111 WebUI](https://github.com/AUTOMATIC1111/stable-diffusion-webui) for the original codebase  
 - Additional credits are listed in [Credits](https://github.com/AUTOMATIC1111/stable-diffusion-webui/#credits)  
 - Licenses for modules are listed in [Licenses](html/licenses.html)  
 
@@ -154,7 +155,7 @@ This should be fully cross-platform, but we'd really love to have additional con
 
 ### Docs
 
-If you're unsure how to use a feature, best place to start is [Wiki](https://github.com/vladmandic/automatic/wiki) and if its not there,  
+If you're unsure how to use a feature, best place to start is [Docs](https://vladmandic.github.io/sdnext-docs/) or [Wiki](https://github.com/vladmandic/automatic/wiki) and if its not there,  
 check [ChangeLog](CHANGELOG.md) for when feature was first introduced as it will always have a short note on how to use it  
 
 ### Sponsors
diff --git a/wiki b/wiki
index db828893c..8d63a0f04 160000
--- a/wiki
+++ b/wiki
@@ -1 +1 @@
-Subproject commit db828893c803f1d5d0180cfe09689884bf27af2d
+Subproject commit 8d63a0f04687f24c4ef413f231970087f167175c

From 7a213fe69aaacf2f9c6f3ed64dc7d5a79be5918f Mon Sep 17 00:00:00 2001
From: Disty0 <disty@disty.xyz>
Date: Fri, 13 Dec 2024 00:12:40 +0300
Subject: [PATCH 108/162] IPEX fix Flux

---
 modules/intel/ipex/diffusers.py | 42 +++++++++++++++++++++------------
 1 file changed, 27 insertions(+), 15 deletions(-)

diff --git a/modules/intel/ipex/diffusers.py b/modules/intel/ipex/diffusers.py
index f742fe5c0..2af602558 100644
--- a/modules/intel/ipex/diffusers.py
+++ b/modules/intel/ipex/diffusers.py
@@ -20,20 +20,31 @@ def fourier_filter(x_in, threshold, scale):
 
 
 # fp64 error
-def rope(pos: torch.Tensor, dim: int, theta: int) -> torch.Tensor:
-    assert dim % 2 == 0, "The dimension must be even."
-
-    scale = torch.arange(0, dim, 2, dtype=torch.float32, device=pos.device) / dim # force fp32 instead of fp64
-    omega = 1.0 / (theta**scale)
-
-    batch_size, seq_length = pos.shape
-    out = torch.einsum("...n,d->...nd", pos, omega)
-    cos_out = torch.cos(out)
-    sin_out = torch.sin(out)
-
-    stacked_out = torch.stack([cos_out, -sin_out, sin_out, cos_out], dim=-1)
-    out = stacked_out.view(batch_size, -1, dim // 2, 2, 2)
-    return out.float()
+class FluxPosEmbed(torch.nn.Module):
+    def __init__(self, theta: int, axes_dim):
+        super().__init__()
+        self.theta = theta
+        self.axes_dim = axes_dim
+
+    def forward(self, ids: torch.Tensor) -> torch.Tensor:
+        n_axes = ids.shape[-1]
+        cos_out = []
+        sin_out = []
+        pos = ids.float()
+        for i in range(n_axes):
+            cos, sin = diffusers.models.embeddings.get_1d_rotary_pos_embed(
+                self.axes_dim[i],
+                pos[:, i],
+                theta=self.theta,
+                repeat_interleave_real=True,
+                use_real=True,
+                freqs_dtype=torch.float32,
+            )
+            cos_out.append(cos)
+            sin_out.append(sin)
+        freqs_cos = torch.cat(cos_out, dim=-1).to(ids.device)
+        freqs_sin = torch.cat(sin_out, dim=-1).to(ids.device)
+        return freqs_cos, freqs_sin
 
 
 @cache
@@ -337,4 +348,5 @@ def ipex_diffusers():
     if not device_supports_fp64 or os.environ.get('IPEX_FORCE_ATTENTION_SLICE', None) is not None:
         diffusers.models.attention_processor.SlicedAttnProcessor = SlicedAttnProcessor
         diffusers.models.attention_processor.AttnProcessor = AttnProcessor
-        diffusers.models.transformers.transformer_flux.rope = rope
+    if not device_supports_fp64:
+        diffusers.models.embeddings.FluxPosEmbed = FluxPosEmbed

From 8a6e5617baa2b70bea0254683a4fb8f0759a7a06 Mon Sep 17 00:00:00 2001
From: Disty0 <disty@disty.xyz>
Date: Fri, 13 Dec 2024 22:05:13 +0300
Subject: [PATCH 109/162] Fix IPEX 2.5

---
 modules/intel/ipex/__init__.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/modules/intel/ipex/__init__.py b/modules/intel/ipex/__init__.py
index e1c476e7e..147aa2798 100644
--- a/modules/intel/ipex/__init__.py
+++ b/modules/intel/ipex/__init__.py
@@ -77,7 +77,7 @@ def ipex_init(): # pylint: disable=too-many-statements
                 torch.cuda.warnings = torch.xpu.warnings
                 torch.cuda.classproperty = torch.xpu.classproperty
                 torch.UntypedStorage.cuda = torch.UntypedStorage.xpu
-                if not ipex.__version__.startswith("2.3"):
+                if float(ipex.__version__[:3]) < 2.3:
                     torch.cuda._initialization_lock = torch.xpu.lazy_init._initialization_lock
                     torch.cuda._initialized = torch.xpu.lazy_init._initialized
                     torch.cuda._is_in_bad_fork = torch.xpu.lazy_init._is_in_bad_fork
@@ -111,7 +111,7 @@ def ipex_init(): # pylint: disable=too-many-statements
                     torch.cuda.ComplexFloatStorage = torch.xpu.ComplexFloatStorage
                     torch.cuda.ComplexDoubleStorage = torch.xpu.ComplexDoubleStorage
 
-            if not legacy or ipex.__version__.startswith("2.3"):
+            if not legacy or float(ipex.__version__[:3]) >= 2.3:
                 torch.cuda._initialization_lock = torch.xpu._initialization_lock
                 torch.cuda._initialized = torch.xpu._initialized
                 torch.cuda._is_in_bad_fork = torch.xpu._is_in_bad_fork
@@ -159,7 +159,7 @@ def ipex_init(): # pylint: disable=too-many-statements
                 torch.xpu.amp.custom_fwd = torch.cuda.amp.custom_fwd
                 torch.xpu.amp.custom_bwd = torch.cuda.amp.custom_bwd
                 torch.cuda.amp = torch.xpu.amp
-                if not ipex.__version__.startswith("2.3"):
+                if float(ipex.__version__[:3]) < 2.3:
                     torch.is_autocast_enabled = torch.xpu.is_autocast_xpu_enabled
                     torch.get_autocast_gpu_dtype = torch.xpu.get_autocast_xpu_dtype
 
@@ -178,7 +178,7 @@ def ipex_init(): # pylint: disable=too-many-statements
                         torch.cuda.amp.GradScaler = ipex.cpu.autocast._grad_scaler.GradScaler
 
             # C
-            if legacy and not ipex.__version__.startswith("2.3"):
+            if legacy and float(ipex.__version__[:3]) < 2.3:
                 torch._C._cuda_getCurrentRawStream = ipex._C._getCurrentRawStream
                 ipex._C._DeviceProperties.multi_processor_count = ipex._C._DeviceProperties.gpu_subslice_count
                 ipex._C._DeviceProperties.major = 12

From 8ee5103ade8f16b86d3dd4d42f5361466c489c28 Mon Sep 17 00:00:00 2001
From: Disty0 <disty@disty.xyz>
Date: Fri, 13 Dec 2024 22:16:56 +0300
Subject: [PATCH 110/162] Update to IPEX 2.5.10+xpu

---
 CHANGELOG.md | 1 +
 installer.py | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 75979cc4c..7b05eac4f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -83,6 +83,7 @@
 ### Updates
 
 - Additional Wiki content: Styles, Wildcards, etc.
+- **IPEX**: update to IPEX 2.5.10+xpu  
 - **OpenVINO**: update to 2024.5.0  
 - **Sampler** improvements  
   - Euler FlowMatch: add sigma methods (*karras/exponential/betas*)  
diff --git a/installer.py b/installer.py
index 36254b66a..70c655ae3 100644
--- a/installer.py
+++ b/installer.py
@@ -635,13 +635,13 @@ def install_ipex(torch_command):
     if os.environ.get("ClDeviceGlobalMemSizeAvailablePercent", None) is None:
         os.environ.setdefault('ClDeviceGlobalMemSizeAvailablePercent', '100')
     if "linux" in sys.platform:
-        torch_command = os.environ.get('TORCH_COMMAND', 'torch==2.3.1+cxx11.abi torchvision==0.18.1+cxx11.abi intel-extension-for-pytorch==2.3.110+xpu oneccl_bind_pt==2.3.100+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/')
+        torch_command = os.environ.get('TORCH_COMMAND', 'torch==2.5.1+cxx11.abi torchvision==0.20.1+cxx11.abi intel-extension-for-pytorch==2.5.10+xpu oneccl_bind_pt==2.5.0+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/')
         # torch_command = os.environ.get('TORCH_COMMAND', 'torch torchvision --index-url https://download.pytorch.org/whl/test/xpu') # test wheels are stable previews, significantly slower than IPEX
         # os.environ.setdefault('TENSORFLOW_PACKAGE', 'tensorflow==2.15.1 intel-extension-for-tensorflow[xpu]==2.15.0.1')
     else:
         torch_command = os.environ.get('TORCH_COMMAND', '--pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/xpu') # torchvision doesn't exist on test/stable branch for windows
     install(os.environ.get('OPENVINO_PACKAGE', 'openvino==2024.5.0'), 'openvino', ignore=True)
-    install('nncf==2.7.0', 'nncf', ignore=True)
+    install('nncf==2.7.0', ignore=True, no_deps=True) # requires older pandas
     install(os.environ.get('ONNXRUNTIME_PACKAGE', 'onnxruntime-openvino'), 'onnxruntime-openvino', ignore=True)
     return torch_command
 

From 7d7bcb9684b5b9b535ee74ff9d19de2af3791ab8 Mon Sep 17 00:00:00 2001
From: Disty0 <disty@disty.xyz>
Date: Fri, 13 Dec 2024 23:01:53 +0300
Subject: [PATCH 111/162] Fix balanced offload with Cascade

---
 modules/intel/ipex/diffusers.py | 2 +-
 modules/sd_models.py            | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/modules/intel/ipex/diffusers.py b/modules/intel/ipex/diffusers.py
index 2af602558..5bf5bbe39 100644
--- a/modules/intel/ipex/diffusers.py
+++ b/modules/intel/ipex/diffusers.py
@@ -1,7 +1,7 @@
 import os
 from functools import wraps, cache
 import torch
-import diffusers #0.29.1 # pylint: disable=import-error
+import diffusers # pylint: disable=import-error
 from diffusers.models.attention_processor import Attention
 
 # pylint: disable=protected-access, missing-function-docstring, line-too-long
diff --git a/modules/sd_models.py b/modules/sd_models.py
index c39c0263e..c5875c61f 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -481,6 +481,8 @@ def apply_balanced_offload_to_module(pipe):
         keys = [k for k in keys if k not in exclude and not k.startswith('_')]
         for module_name, module_size in get_pipe_modules(pipe): # pylint: disable=protected-access
             module = getattr(pipe, module_name, None)
+            if module is None:
+                continue
             network_layer_name = getattr(module, "network_layer_name", None)
             device_map = getattr(module, "balanced_offload_device_map", None)
             max_memory = getattr(module, "balanced_offload_max_memory", None)

From 814161cb210c623cb87f3fb9f87cd76b1a7e35ac Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Sat, 14 Dec 2024 17:29:51 -0500
Subject: [PATCH 112/162] major controlnet work, xinsir promax and tiling
 support

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md                        | 17 +++++--
 installer.py                        |  2 +-
 modules/control/run.py              | 74 ++++++++++++++++++++--------
 modules/control/tile.py             | 75 +++++++++++++++++++++++++++++
 modules/control/unit.py             | 38 +++++++++++++--
 modules/control/units/controlnet.py | 32 ++++++------
 modules/images_resize.py            | 10 ++--
 modules/sd_models.py                |  5 ++
 modules/ui_control.py               | 10 ++--
 scripts/regional_prompting.py       |  1 +
 10 files changed, 208 insertions(+), 56 deletions(-)
 create mode 100644 modules/control/tile.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7b05eac4f..c24484113 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,6 @@
 # Change Log for SD.Next
 
-## Update for 2024-12-12
+## Update for 2024-12-13
 
 ### New models and integrations
 
@@ -33,11 +33,18 @@
   style-aligned applies selected attention layers uniformly to all images to achive consistency  
   can be used with or without input image in which case first prompt is used to establish baseline  
   *note:* all prompts are processes as a single batch, so vram is limiting factor
+- **ControlNet**
+  - improved support for `Union` controlnets with granular control mode type
+  - added support for latest [Xinsir ProMax](https://huggingface.co/xinsir/controlnet-union-sdxl-1.0) all-in-one controlnet  
+  - added support for multiple **Tiling** controlnets, for example [Xinsir Tile](https://huggingface.co/xinsir/controlnet-tile-sdxl-1.0)  
+    *note*: when selecting tiles in control settings, you can also specify non-square ratios  
+    in which case it will use context-aware image resize to maintain overall composition
 
 ### UI and workflow improvements
 
 - **Docs**:
   - New documentation site! <https://vladmandic.github.io/sdnext-docs/>
+  - Additional Wiki content: Styles, Wildcards, etc.
 - **LoRA** handler rewrite:  
   - LoRA weights are no longer calculated on-the-fly during model execution, but are pre-calculated at the start  
     this results in perceived overhead on generate startup, but results in overall faster execution as LoRA does not need to be processed on each step  
@@ -82,7 +89,6 @@
 
 ### Updates
 
-- Additional Wiki content: Styles, Wildcards, etc.
 - **IPEX**: update to IPEX 2.5.10+xpu  
 - **OpenVINO**: update to 2024.5.0  
 - **Sampler** improvements  
@@ -108,9 +114,10 @@
 - simplify img2img/inpaint/sketch canvas handling  
 - fix prompt caching  
 - fix xyz grid skip final pass  
-- fix sd upscale script
-- fix cogvideox-i2v
-- lora auto-apply tags remove duplicates
+- fix sd upscale script  
+- fix cogvideox-i2v  
+- lora auto-apply tags remove duplicates  
+- control load model on-demand if not already loaded  
 
 ## Update for 2024-11-21
 
diff --git a/installer.py b/installer.py
index 70c655ae3..18a8ad1f1 100644
--- a/installer.py
+++ b/installer.py
@@ -459,7 +459,7 @@ def check_python(supported_minors=[9, 10, 11, 12], reason=None):
 def check_diffusers():
     if args.skip_all or args.skip_requirements:
         return
-    sha = '914a585be8187ec0ad92fab4f072c992f8c297cd' # diffusers commit hash
+    sha = '63243406ba5510c10d5cac931882918ceba926f9' # diffusers commit hash
     pkg = pkg_resources.working_set.by_key.get('diffusers', None)
     minor = int(pkg.version.split('.')[1] if pkg is not None else 0)
     cur = opts.get('diffusers_version', '') if minor > 0 else ''
diff --git a/modules/control/run.py b/modules/control/run.py
index 6ae7fb20c..ac1ff233d 100644
--- a/modules/control/run.py
+++ b/modules/control/run.py
@@ -7,6 +7,7 @@
 from modules.control import util # helper functions
 from modules.control import unit # control units
 from modules.control import processors # image preprocessors
+from modules.control import tile # tiling module
 from modules.control.units import controlnet # lllyasviel ControlNet
 from modules.control.units import xs # VisLearn ControlNet-XS
 from modules.control.units import lite # Kohya ControlLLLite
@@ -83,6 +84,7 @@ def control_run(state: str = '',
                 u.adapter.load(u.model_name, force=False)
             else:
                 u.controlnet.load(u.model_name, force=False)
+                u.update_choices(u.model_name)
         if u.process is not None and u.process.override is None and u.override is not None:
             u.process.override = u.override
 
@@ -149,6 +151,7 @@ def control_run(state: str = '',
         outpath_grids=shared.opts.outdir_grids or shared.opts.outdir_control_grids,
     )
     p.state = state
+    p.is_tile = False
     # processing.process_init(p)
     resize_mode_before = resize_mode_before if resize_name_before != 'None' and inputs is not None and len(inputs) > 0 else 0
 
@@ -242,7 +245,7 @@ def control_run(state: str = '',
             active_model.append(u.adapter)
             active_strength.append(float(u.strength))
             p.adapter_conditioning_factor = u.factor
-            shared.log.debug(f'Control T2I-Adapter unit: i={num_units} process={u.process.processor_id} model={u.adapter.model_id} strength={u.strength} factor={u.factor}')
+            shared.log.debug(f'Control T2I-Adapter unit: i={num_units} process="{u.process.processor_id}" model="{u.adapter.model_id}" strength={u.strength} factor={u.factor}')
         elif unit_type == 'controlnet' and u.controlnet.model is not None:
             active_process.append(u.process)
             active_model.append(u.controlnet)
@@ -250,8 +253,12 @@ def control_run(state: str = '',
             active_start.append(float(u.start))
             active_end.append(float(u.end))
             p.guess_mode = u.guess
-            p.control_mode = u.mode
-            shared.log.debug(f'Control ControlNet unit: i={num_units} process={u.process.processor_id} model={u.controlnet.model_id} strength={u.strength} guess={u.guess} start={u.start} end={u.end} mode={u.mode}')
+            if isinstance(u.mode, str):
+                p.control_mode = u.choices.index(u.mode) if u.mode in u.choices else 0
+                p.is_tile = p.is_tile or 'tile' in u.mode.lower()
+                p.control_tile = u.tile
+                p.extra_generation_params["Control mode"] = u.mode
+            shared.log.debug(f'Control ControlNet unit: i={num_units} process="{u.process.processor_id}" model="{u.controlnet.model_id}" strength={u.strength} guess={u.guess} start={u.start} end={u.end} mode={u.mode}')
         elif unit_type == 'xs' and u.controlnet.model is not None:
             active_process.append(u.process)
             active_model.append(u.controlnet)
@@ -291,6 +298,7 @@ def control_run(state: str = '',
             selected_models = None
         elif len(active_model) == 1:
             selected_models = active_model[0].model if active_model[0].model is not None else None
+            p.is_tile = p.is_tile or 'tile' in active_model[0].model_id.lower()
             has_models = selected_models is not None
             control_conditioning = active_strength[0] if len(active_strength) > 0 else 1 # strength or list[strength]
             control_guidance_start = active_start[0] if len(active_start) > 0 else 0
@@ -305,29 +313,30 @@ def control_run(state: str = '',
         has_models = any(u.enabled for u in units if u.type == 'reference')
     else:
         pass
+    p.is_tile = p.is_tile and has_models
 
     def set_pipe():
         global pipe, instance # pylint: disable=global-statement
         pipe = None
         if has_models:
             p.ops.append('control')
-            p.extra_generation_params["Control mode"] = unit_type # overriden later with pretty-print
+            p.extra_generation_params["Control type"] = unit_type # overriden later with pretty-print
+            p.extra_generation_params["Control model"] = ';'.join([(m.model_id or '') for m in active_model if m.model is not None])
             p.extra_generation_params["Control conditioning"] = control_conditioning if isinstance(control_conditioning, list) else [control_conditioning]
             p.extra_generation_params['Control start'] = control_guidance_start if isinstance(control_guidance_start, list) else [control_guidance_start]
             p.extra_generation_params['Control end'] = control_guidance_end if isinstance(control_guidance_end, list) else [control_guidance_end]
-            p.extra_generation_params["Control model"] = ';'.join([(m.model_id or '') for m in active_model if m.model is not None])
             p.extra_generation_params["Control conditioning"] = ';'.join([str(c) for c in p.extra_generation_params["Control conditioning"]])
             p.extra_generation_params['Control start'] = ';'.join([str(c) for c in p.extra_generation_params['Control start']])
             p.extra_generation_params['Control end'] = ';'.join([str(c) for c in p.extra_generation_params['Control end']])
         if unit_type == 't2i adapter' and has_models:
-            p.extra_generation_params["Control mode"] = 'T2I-Adapter'
+            p.extra_generation_params["Control type"] = 'T2I-Adapter'
             p.task_args['adapter_conditioning_scale'] = control_conditioning
             instance = t2iadapter.AdapterPipeline(selected_models, shared.sd_model)
             pipe = instance.pipeline
             if inits is not None:
                 shared.log.warning('Control: T2I-Adapter does not support separate init image')
         elif unit_type == 'controlnet' and has_models:
-            p.extra_generation_params["Control mode"] = 'ControlNet'
+            p.extra_generation_params["Control type"] = 'ControlNet'
             p.task_args['controlnet_conditioning_scale'] = control_conditioning
             p.task_args['control_guidance_start'] = control_guidance_start
             p.task_args['control_guidance_end'] = control_guidance_end
@@ -335,7 +344,7 @@ def set_pipe():
             instance = controlnet.ControlNetPipeline(selected_models, shared.sd_model, p=p)
             pipe = instance.pipeline
         elif unit_type == 'xs' and has_models:
-            p.extra_generation_params["Control mode"] = 'ControlNet-XS'
+            p.extra_generation_params["Control type"] = 'ControlNet-XS'
             p.controlnet_conditioning_scale = control_conditioning
             p.control_guidance_start = control_guidance_start
             p.control_guidance_end = control_guidance_end
@@ -344,14 +353,14 @@ def set_pipe():
             if inits is not None:
                 shared.log.warning('Control: ControlNet-XS does not support separate init image')
         elif unit_type == 'lite' and has_models:
-            p.extra_generation_params["Control mode"] = 'ControlLLLite'
+            p.extra_generation_params["Control type"] = 'ControlLLLite'
             p.controlnet_conditioning_scale = control_conditioning
             instance = lite.ControlLLitePipeline(shared.sd_model)
             pipe = instance.pipeline
             if inits is not None:
                 shared.log.warning('Control: ControlLLLite does not support separate init image')
         elif unit_type == 'reference' and has_models:
-            p.extra_generation_params["Control mode"] = 'Reference'
+            p.extra_generation_params["Control type"] = 'Reference'
             p.extra_generation_params["Control attention"] = p.attention
             p.task_args['reference_attn'] = 'Attention' in p.attention
             p.task_args['reference_adain'] = 'Adain' in p.attention
@@ -393,6 +402,8 @@ def set_pipe():
     else:
         original_pipeline = None
 
+    possible = sd_models.get_call(pipe).keys()
+
     try:
         with devices.inference_context():
             if isinstance(inputs, str): # only video, the rest is a list
@@ -562,19 +573,29 @@ def set_pipe():
                             return [], '', '', 'Reference mode without image'
                     elif unit_type == 'controlnet' and has_models:
                         if input_type == 0: # Control only
-                            if shared.sd_model_type in ['f1', 'sd3'] and 'control_image' not in p.task_args:
-                                p.task_args['control_image'] = p.init_images # some controlnets mandate this
+                            if 'control_image' in possible:
+                                p.task_args['control_image'] = [p.init_images] if isinstance(p.init_images, Image.Image) else p.init_images
+                            elif 'image' in possible:
+                                p.task_args['image'] = [p.init_images] if isinstance(p.init_images, Image.Image) else p.init_images
+                            if 'control_mode' in possible:
+                                p.task_args['control_mode'] = p.control_mode
+                            if 'strength' in possible:
                                 p.task_args['strength'] = p.denoising_strength
+                            p.init_images = None
                         elif input_type == 1: # Init image same as control
-                            p.task_args['control_image'] = p.init_images # switch image and control_image
-                            p.task_args['strength'] = p.denoising_strength
+                            if 'control_image' in possible:
+                                p.task_args['control_image'] = p.init_images # switch image and control_image
+                            if 'strength' in possible:
+                                p.task_args['strength'] = p.denoising_strength
                             p.init_images = [p.override or input_image] * len(active_model)
                         elif input_type == 2: # Separate init image
                             if init_image is None:
                                 shared.log.warning('Control: separate init image not provided')
                                 init_image = input_image
-                            p.task_args['control_image'] = p.init_images # switch image and control_image
-                            p.task_args['strength'] = p.denoising_strength
+                            if 'control_image' in possible:
+                                p.task_args['control_image'] = p.init_images # switch image and control_image
+                            if 'strength' in possible:
+                                p.task_args['strength'] = p.denoising_strength
                             p.init_images = [init_image] * len(active_model)
 
                     if is_generator:
@@ -607,11 +628,11 @@ def set_pipe():
                             p.task_args['strength'] = denoising_strength
                             p.image_mask = mask
                             shared.sd_model = sd_models.set_diffuser_pipe(shared.sd_model, sd_models.DiffusersTaskType.INPAINTING) # only controlnet supports inpaint
-                        elif 'control_image' in p.task_args:
+                        if hasattr(p, 'init_images') and p.init_images is not None:
                             shared.sd_model = sd_models.set_diffuser_pipe(shared.sd_model, sd_models.DiffusersTaskType.IMAGE_2_IMAGE) # only controlnet supports img2img
                         else:
                             shared.sd_model = sd_models.set_diffuser_pipe(shared.sd_model, sd_models.DiffusersTaskType.TEXT_2_IMAGE)
-                            if hasattr(p, 'init_images') and p.init_images is not None:
+                            if hasattr(p, 'init_images') and p.init_images is not None and 'image' in possible:
                                 p.task_args['image'] = p.init_images # need to set explicitly for txt2img
                                 del p.init_images
                         if unit_type == 'lite':
@@ -624,9 +645,14 @@ def set_pipe():
 
                     # final check
                     if has_models:
-                        if unit_type in ['controlnet', 't2i adapter', 'lite', 'xs'] and p.task_args.get('image', None) is None and getattr(p, 'init_images', None) is None:
+                        if unit_type in ['controlnet', 't2i adapter', 'lite', 'xs'] \
+                            and p.task_args.get('image', None) is None \
+                            and p.task_args.get('control_image', None) is None \
+                            and getattr(p, 'init_images', None) is None \
+                            and getattr(p, 'image', None) is None:
                             if is_generator:
-                                yield terminate(f'Mode={p.extra_generation_params.get("Control mode", None)} input image is none')
+                                shared.log.debug(f'Control args: {p.task_args}')
+                                yield terminate(f'Mode={p.extra_generation_params.get("Control type", None)} input image is none')
                             return [], '', '', 'Error: Input image is none'
 
                     # resize mask
@@ -656,11 +682,17 @@ def set_pipe():
                                 script_runner.initialize_scripts(False)
                             p.script_args = script.init_default_script_args(script_runner)
 
-                        processed = p.scripts.run(p, *p.script_args)
+                        # actual processing
+                        if p.is_tile:
+                            processed: processing.Processed = tile.run_tiling(p, input_image)
+                        if processed is None:
+                            processed = p.scripts.run(p, *p.script_args)
                         if processed is None:
                             processed: processing.Processed = processing.process_images(p) # run actual pipeline
                         else:
                             script_run = True
+
+                        # postprocessing
                         processed = p.scripts.after(p, processed, *p.script_args)
                         output = None
                         if processed is not None:
diff --git a/modules/control/tile.py b/modules/control/tile.py
new file mode 100644
index 000000000..5dc104e47
--- /dev/null
+++ b/modules/control/tile.py
@@ -0,0 +1,75 @@
+from PIL import Image
+from modules import shared, processing, images, sd_models
+
+
+def get_tile(image: Image.Image, x: int, y: int, sx: int, sy: int) -> Image.Image:
+    return image.crop((
+        (x + 0) * image.width // sx,
+        (y + 0) * image.height // sy,
+        (x + 1) * image.width // sx,
+        (y + 1) * image.height // sy
+    ))
+
+
+def set_tile(image: Image.Image, x: int, y: int, tiled: Image.Image):
+    image.paste(tiled, (x * tiled.width, y * tiled.height))
+    return image
+
+
+def run_tiling(p: processing.StableDiffusionProcessing, input_image: Image.Image) -> processing.Processed:
+    # prepare images
+    sx, sy = p.control_tile.split('x')
+    sx = int(sx)
+    sy = int(sy)
+    if sx <= 0 or sy <= 0:
+        raise ValueError('Control: invalid tile size')
+    control_image = p.task_args.get('control_image', None) or p.task_args.get('image', None)
+    control_upscaled = None
+    if isinstance(control_image, list) and len(control_image) > 0:
+        control_upscaled = images.resize_image(resize_mode=1 if sx==sy else 5,
+                                               im=control_image[0],
+                                               width=8 * int(sx * control_image[0].width) // 8,
+                                               height=8 * int(sy * control_image[0].height) // 8,
+                                               context='add with forward'
+                                              )
+    init_image = p.override or input_image
+    init_upscaled = None
+    if init_image is not None:
+        init_upscaled = images.resize_image(resize_mode=1 if sx==sy else 5,
+                                            im=init_image,
+                                            width=8 * int(sx * init_image.width) // 8,
+                                            height=8 * int(sy * init_image.height) // 8,
+                                            context='add with forward'
+                                           )
+
+    # stop processing from restoring pipeline on each iteration
+    orig_restore_pipeline = getattr(shared.sd_model, 'restore_pipeline', None)
+    shared.sd_model.restore_pipeline = None
+
+    # run tiling
+    for x in range(sx):
+        for y in range(sy):
+            shared.log.info(f'Control Tile: tile={x+1}-{sx}/{y+1}-{sy} target={control_upscaled}')
+            shared.sd_model = sd_models.set_diffuser_pipe(shared.sd_model, sd_models.DiffusersTaskType.IMAGE_2_IMAGE)
+            p.init_images = None
+            p.task_args['control_mode'] = p.control_mode
+            p.task_args['strength'] = p.denoising_strength
+            if init_upscaled is not None:
+                p.task_args['image'] = [get_tile(init_upscaled, x, y, sx, sy)]
+            if control_upscaled is not None:
+                p.task_args['control_image'] = [get_tile(control_upscaled, x, y, sx, sy)]
+            processed: processing.Processed = processing.process_images(p) # run actual pipeline
+            if processed is None or len(processed.images) == 0:
+                continue
+            control_upscaled = set_tile(control_upscaled, x, y, processed.images[0])
+
+    # post-process
+    p.width = control_upscaled.width
+    p.height = control_upscaled.height
+    processed.images = [control_upscaled]
+    processed.info = processed.infotext(p, 0)
+    processed.infotexts = [processed.info]
+    shared.sd_model.restore_pipeline = orig_restore_pipeline
+    if hasattr(shared.sd_model, 'restore_pipeline') and shared.sd_model.restore_pipeline is not None:
+        shared.sd_model.restore_pipeline()
+    return processed
diff --git a/modules/control/unit.py b/modules/control/unit.py
index 7dc5528a6..eeb729740 100644
--- a/modules/control/unit.py
+++ b/modules/control/unit.py
@@ -16,6 +16,22 @@
 
 
 class Unit(): # mashup of gradio controls and mapping to actual implementation classes
+    def update_choices(self, model_id=None):
+        name = model_id or self.model_name
+        if name == 'InstantX Union':
+            self.choices = ['canny', 'tile', 'depth', 'blur', 'pose', 'gray', 'lq']
+        elif name == 'Shakker-Labs Union':
+            self.choices = ['canny', 'tile', 'depth', 'blur', 'pose', 'gray', 'lq']
+        elif name == 'Xinsir Union XL':
+            self.choices = ['openpose', 'depth', 'scribble', 'canny', 'normal']
+        elif name == 'Xinsir ProMax XL':
+            self.choices = ['openpose', 'depth', 'scribble', 'canny', 'normal', 'segment', 'tile', 'repaint']
+        else:
+            self.choices = ['default']
+
+    def __str__(self):
+        return f'Unit: type={self.type} enabled={self.enabled} strength={self.strength} start={self.start} end={self.end} mode={self.mode} tile={self.tile}'
+
     def __init__(self,
                  # values
                  index: int = None,
@@ -38,6 +54,7 @@ def __init__(self,
                  control_start = None,
                  control_end = None,
                  control_mode = None,
+                 control_tile = None,
                  result_txt = None,
                  extra_controls: list = [],
         ):
@@ -70,6 +87,10 @@ def __init__(self,
         self.fidelity = 0.5
         self.query_weight = 1.0
         self.adain_weight = 1.0
+        # control mode
+        self.choices = ['default']
+        # control tile
+        self.tile = '1x1'
 
         def reset():
             if self.process is not None:
@@ -92,10 +113,16 @@ def control_change(start, end):
             self.end = max(start, end)
 
         def control_mode_change(mode):
-            self.mode = mode - 1 if mode > 0 else None
+            self.mode = self.choices.index(mode) if mode is not None and mode in self.choices else 0
+
+        def control_tile_change(tile):
+            self.tile = tile
 
-        def control_mode_show(model_id):
-            return gr.update(visible='union' in model_id.lower())
+        def control_choices(model_id):
+            self.update_choices(model_id)
+            mode_visible = 'union' in model_id.lower() or 'promax' in model_id.lower()
+            tile_visible = 'union' in model_id.lower() or 'promax' in model_id.lower() or 'tile' in model_id.lower()
+            return [gr.update(visible=mode_visible, choices=self.choices), gr.update(visible=tile_visible)]
 
         def adapter_extra(c1):
             self.factor = c1
@@ -172,7 +199,7 @@ def set_image(image):
                 else:
                     self.controls.append(model_id)
                     model_id.change(fn=self.controlnet.load, inputs=[model_id], outputs=[result_txt], show_progress=True)
-                    model_id.change(fn=control_mode_show, inputs=[model_id], outputs=[control_mode], show_progress=False)
+                    model_id.change(fn=control_choices, inputs=[model_id], outputs=[control_mode, control_tile], show_progress=False)
             if extra_controls is not None and len(extra_controls) > 0:
                 extra_controls[0].change(fn=controlnet_extra, inputs=extra_controls)
         elif self.type == 'xs':
@@ -231,3 +258,6 @@ def set_image(image):
         if control_mode is not None:
             self.controls.append(control_mode)
             control_mode.change(fn=control_mode_change, inputs=[control_mode])
+        if control_tile is not None:
+            self.controls.append(control_tile)
+            control_tile.change(fn=control_tile_change, inputs=[control_tile])
diff --git a/modules/control/units/controlnet.py b/modules/control/units/controlnet.py
index 3fa2d90eb..7361638c6 100644
--- a/modules/control/units/controlnet.py
+++ b/modules/control/units/controlnet.py
@@ -52,17 +52,20 @@
     'Depth Mid XL': 'diffusers/controlnet-depth-sdxl-1.0-mid',
     'OpenPose XL': 'thibaud/controlnet-openpose-sdxl-1.0/bin',
     'Xinsir Union XL': 'xinsir/controlnet-union-sdxl-1.0',
+    'Xinsir ProMax XL': 'brad-twinkl/controlnet-union-sdxl-1.0-promax',
     'Xinsir OpenPose XL': 'xinsir/controlnet-openpose-sdxl-1.0',
     'Xinsir Canny XL': 'xinsir/controlnet-canny-sdxl-1.0',
     'Xinsir Depth XL': 'xinsir/controlnet-depth-sdxl-1.0',
     'Xinsir Scribble XL': 'xinsir/controlnet-scribble-sdxl-1.0',
     'Xinsir Anime Painter XL': 'xinsir/anime-painter',
+    'Xinsir Tile XL': 'xinsir/controlnet-tile-sdxl-1.0',
     'NoobAI Canny XL': 'Eugeoter/noob-sdxl-controlnet-canny',
     'NoobAI Lineart Anime XL': 'Eugeoter/noob-sdxl-controlnet-lineart_anime',
     'NoobAI Depth XL': 'Eugeoter/noob-sdxl-controlnet-depth',
     'NoobAI Normal XL': 'Eugeoter/noob-sdxl-controlnet-normal',
     'NoobAI SoftEdge XL': 'Eugeoter/noob-sdxl-controlnet-softedge_hed',
     'NoobAI OpenPose XL': 'einar77/noob-openpose',
+    'TTPlanet Tile Realistic XL': 'Yakonrus/SDXL_Controlnet_Tile_Realistic_v2',
     # 'StabilityAI Canny R128': 'stabilityai/control-lora/control-LoRAs-rank128/control-lora-canny-rank128.safetensors',
     # 'StabilityAI Depth R128': 'stabilityai/control-lora/control-LoRAs-rank128/control-lora-depth-rank128.safetensors',
     # 'StabilityAI Recolor R128': 'stabilityai/control-lora/control-LoRAs-rank128/control-lora-recolor-rank128.safetensors',
@@ -166,30 +169,30 @@ def reset(self):
         self.model_id = None
 
     def get_class(self, model_id:str=''):
-        import modules.shared
-        if modules.shared.sd_model_type == 'sd':
+        from modules import shared
+        if shared.sd_model_type == 'none':
+            _load = shared.sd_model # trigger a load
+        if shared.sd_model_type == 'sd':
             from diffusers import ControlNetModel as cls # pylint: disable=reimported
             config = 'lllyasviel/control_v11p_sd15_canny'
-        elif modules.shared.sd_model_type == 'sdxl':
-            # TODO ControlNetUnion
-            """
+        elif shared.sd_model_type == 'sdxl':
             if 'union' in model_id.lower():
                 from diffusers import ControlNetUnionModel as cls
                 config = 'xinsir/controlnet-union-sdxl-1.0'
+            elif 'promax' in model_id.lower():
+                from diffusers import ControlNetUnionModel as cls
+                config = 'brad-twinkl/controlnet-union-sdxl-1.0-promax'
             else:
                 from diffusers import ControlNetModel as cls # pylint: disable=reimported # sdxl shares same model class
                 config = 'Eugeoter/noob-sdxl-controlnet-canny'
-            """
-            from diffusers import ControlNetModel as cls # pylint: disable=reimported # sdxl shares same model class
-            config = 'Eugeoter/noob-sdxl-controlnet-canny'
-        elif modules.shared.sd_model_type == 'f1':
+        elif shared.sd_model_type == 'f1':
             from diffusers import FluxControlNetModel as cls
             config = 'InstantX/FLUX.1-dev-Controlnet-Union'
-        elif modules.shared.sd_model_type == 'sd3':
+        elif shared.sd_model_type == 'sd3':
             from diffusers import SD3ControlNetModel as cls
             config = 'InstantX/SD3-Controlnet-Canny'
         else:
-            log.error(f'Control {what}: type={modules.shared.sd_model_type} unsupported model')
+            log.error(f'Control {what}: type={shared.sd_model_type} unsupported model')
             return None, None
         return cls, config
 
@@ -299,7 +302,7 @@ def __init__(self,
                  controlnet: Union[ControlNetModel, list[ControlNetModel]],
                  pipeline: Union[StableDiffusionXLPipeline, StableDiffusionPipeline, FluxPipeline, StableDiffusion3Pipeline],
                  dtype = None,
-                 p: StableDiffusionProcessingControl = None,
+                 p: StableDiffusionProcessingControl = None, # pylint: disable=unused-argument
                 ):
         t0 = time.time()
         self.orig_pipeline = pipeline
@@ -314,14 +317,11 @@ def __init__(self,
             return
         elif detect.is_sdxl(pipeline) and len(controlnets) > 0:
             from diffusers import StableDiffusionXLControlNetPipeline, StableDiffusionXLControlNetUnionPipeline
-            # TODO ControlNetUnion
-            """
             if controlnet.__class__.__name__ == 'ControlNetUnionModel':
                 cls = StableDiffusionXLControlNetUnionPipeline
+                controlnets = controlnets[0] # using only first one
             else:
                 cls = StableDiffusionXLControlNetPipeline
-            """
-            cls = StableDiffusionXLControlNetPipeline
             self.pipeline = cls(
                 vae=pipeline.vae,
                 text_encoder=pipeline.text_encoder,
diff --git a/modules/images_resize.py b/modules/images_resize.py
index d86ff6f22..5cf3e57e4 100644
--- a/modules/images_resize.py
+++ b/modules/images_resize.py
@@ -5,7 +5,7 @@
 from modules import shared
 
 
-def resize_image(resize_mode, im, width, height, upscaler_name=None, output_type='image', context=None):
+def resize_image(resize_mode: int, im: Image.Image, width: int, height: int, upscaler_name: str=None, output_type: str='image', context: str=None):
     upscaler_name = upscaler_name or shared.opts.upscaler_for_img2img
 
     def latent(im, w, h, upscaler):
@@ -79,18 +79,18 @@ def fill(im, color=None):
 
     def context_aware(im, width, height, context):
         import seam_carving # https://github.com/li-plus/seam-carving
-        if 'forward' in context:
+        if 'forward' in context.lower():
             energy_mode = "forward"
-        elif 'backward' in context:
+        elif 'backward' in context.lower():
             energy_mode = "backward"
         else:
             return im
-        if 'Add' in context:
+        if 'add' in context.lower():
             src_ratio = min(width / im.width, height / im.height)
             src_w = int(im.width * src_ratio)
             src_h = int(im.height * src_ratio)
             src_image = resize(im, src_w, src_h)
-        elif 'Remove' in context:
+        elif 'remove' in context.lower():
             ratio = width / height
             src_ratio = im.width / im.height
             src_w = width if ratio > src_ratio else im.width * height // im.height
diff --git a/modules/sd_models.py b/modules/sd_models.py
index c5875c61f..5d42e314b 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -1057,6 +1057,11 @@ def get_signature(cls):
     return signature.parameters
 
 
+def get_call(cls):
+    signature = inspect.signature(cls.__call__, follow_wrapped=True, eval_str=True)
+    return signature.parameters
+
+
 def switch_pipe(cls: diffusers.DiffusionPipeline, pipeline: diffusers.DiffusionPipeline = None, force = False, args = {}):
     """
     args:
diff --git a/modules/ui_control.py b/modules/ui_control.py
index 59db12fc5..5a146a8fc 100644
--- a/modules/ui_control.py
+++ b/modules/ui_control.py
@@ -138,7 +138,7 @@ def create_ui(_blocks: gr.Blocks=None):
                         show_input = gr.Checkbox(label="Show input", value=True, elem_id="control_show_input")
                         show_preview = gr.Checkbox(label="Show preview", value=False, elem_id="control_show_preview")
                     with gr.Row():
-                        input_type = gr.Radio(label="Input type", choices=['Control only', 'Init image same as control', 'Separate init image'], value='Control only', type='index', elem_id='control_input_type')
+                        input_type = gr.Radio(label="Control input type", choices=['Control only', 'Init image same as control', 'Separate init image'], value='Control only', type='index', elem_id='control_input_type')
                     with gr.Row():
                         denoising_strength = gr.Slider(minimum=0.01, maximum=1.0, step=0.01, label='Denoising strength', value=0.30, elem_id="control_input_denoising_strength")
 
@@ -251,9 +251,10 @@ def create_ui(_blocks: gr.Blocks=None):
                                     model_id = gr.Dropdown(label="ControlNet", choices=controlnet.list_models(), value='None', elem_id=f'control_unit-{i}-model_name')
                                     ui_common.create_refresh_button(model_id, controlnet.list_models, lambda: {"choices": controlnet.list_models(refresh=True)}, f'refresh_controlnet_models_{i}')
                                     model_strength = gr.Slider(label="CN Strength", minimum=0.01, maximum=2.0, step=0.01, value=1.0, elem_id=f'control_unit-{i}-strength')
-                                    control_start = gr.Slider(label="Start", minimum=0.0, maximum=1.0, step=0.05, value=0, elem_id=f'control_unit-{i}-start')
-                                    control_end = gr.Slider(label="End", minimum=0.0, maximum=1.0, step=0.05, value=1.0, elem_id=f'control_unit-{i}-end')
-                                    control_mode = gr.Dropdown(label="CN Mode", choices=['', 'Canny', 'Tile', 'Depth', 'Blur', 'Pose', 'Gray', 'LQ'], value=0, type='index', visible=False, elem_id=f'control_unit-{i}-mode')
+                                    control_start = gr.Slider(label="CN Start", minimum=0.0, maximum=1.0, step=0.05, value=0, elem_id=f'control_unit-{i}-start')
+                                    control_end = gr.Slider(label="CN End", minimum=0.0, maximum=1.0, step=0.05, value=1.0, elem_id=f'control_unit-{i}-end')
+                                    control_mode = gr.Dropdown(label="CN Mode", choices=['default'], value='default', visible=False, elem_id=f'control_unit-{i}-mode')
+                                    control_tile = gr.Dropdown(label="CN Tiles", choices=['1x1', '1x2', '1x3', '1x4', '2x1', '2x1', '2x2', '2x3', '2x4', '3x1', '3x2', '3x3', '3x4', '4x1', '4x2', '4x3', '4x4'], value='1x1', visible=False, elem_id=f'control_unit-{i}-tile')
                                     reset_btn = ui_components.ToolButton(value=ui_symbols.reset)
                                     image_upload = gr.UploadButton(label=ui_symbols.upload, file_types=['image'], elem_classes=['form', 'gradio-button', 'tool'])
                                     image_reuse= ui_components.ToolButton(value=ui_symbols.reuse)
@@ -278,6 +279,7 @@ def create_ui(_blocks: gr.Blocks=None):
                                 control_start = control_start,
                                 control_end = control_end,
                                 control_mode = control_mode,
+                                control_tile = control_tile,
                                 extra_controls = extra_controls,
                                 )
                             )
diff --git a/scripts/regional_prompting.py b/scripts/regional_prompting.py
index 08b84dd94..48309704e 100644
--- a/scripts/regional_prompting.py
+++ b/scripts/regional_prompting.py
@@ -82,6 +82,7 @@ def run(self, p: processing.StableDiffusionProcessing, mode, grid, power, thresh
         }
         # run pipeline
         shared.log.debug(f'Regional: args={p.task_args}')
+        p.task_args['prompt'] = p.prompt
         processed: processing.Processed = processing.process_images(p) # runs processing using main loop
 
         # restore pipeline and params

From 3e8dec929730a9c2cb765d9051ba870cd3a19769 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Sun, 15 Dec 2024 12:40:54 -0500
Subject: [PATCH 113/162] add freescale

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md                                  |   16 +-
 modules/control/tile.py                       |   24 +-
 modules/control/units/controlnet.py           |   12 +-
 modules/freescale/__init__.py                 |    4 +
 modules/freescale/free_lunch_utils.py         |  305 ++++
 modules/freescale/freescale_pipeline.py       | 1189 ++++++++++++++++
 .../freescale/freescale_pipeline_img2img.py   | 1245 +++++++++++++++++
 modules/freescale/scale_attention.py          |  367 +++++
 modules/processing_diffusers.py               |   27 +-
 modules/sd_samplers_common.py                 |    4 +-
 modules/sd_vae_taesd.py                       |    3 +
 modules/shared.py                             |    1 +
 modules/shared_state.py                       |    4 +-
 modules/ui_control.py                         |    2 +-
 scripts/freescale.py                          |  130 ++
 15 files changed, 3303 insertions(+), 30 deletions(-)
 create mode 100644 modules/freescale/__init__.py
 create mode 100644 modules/freescale/free_lunch_utils.py
 create mode 100644 modules/freescale/freescale_pipeline.py
 create mode 100644 modules/freescale/freescale_pipeline_img2img.py
 create mode 100644 modules/freescale/scale_attention.py
 create mode 100644 scripts/freescale.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c24484113..35d3fcbe9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,6 @@
 # Change Log for SD.Next
 
-## Update for 2024-12-13
+## Update for 2024-12-15
 
 ### New models and integrations
 
@@ -32,13 +32,19 @@
   enter multiple prompts in prompt field separated by new line  
   style-aligned applies selected attention layers uniformly to all images to achive consistency  
   can be used with or without input image in which case first prompt is used to establish baseline  
-  *note:* all prompts are processes as a single batch, so vram is limiting factor
+  *note:* all prompts are processes as a single batch, so vram is limiting factor  
+- [FreeScale](https://github.com/ali-vilab/FreeScale)  
+  enable in scripts, compatible with sd-xl for text and img2img  
+  run iterative generation of images at different scales to achieve better results  
+  can render 4k sdxl images  
+  *note*: disable live preview to avoid memory issues when generating large images  
 - **ControlNet**
-  - improved support for `Union` controlnets with granular control mode type
+  - improved support for **Union** controlnets with granular control mode type
   - added support for latest [Xinsir ProMax](https://huggingface.co/xinsir/controlnet-union-sdxl-1.0) all-in-one controlnet  
   - added support for multiple **Tiling** controlnets, for example [Xinsir Tile](https://huggingface.co/xinsir/controlnet-tile-sdxl-1.0)  
     *note*: when selecting tiles in control settings, you can also specify non-square ratios  
-    in which case it will use context-aware image resize to maintain overall composition
+    in which case it will use context-aware image resize to maintain overall composition  
+    *note*: available tiling options can be set in settings -> control  
 
 ### UI and workflow improvements
 
@@ -118,6 +124,8 @@
 - fix cogvideox-i2v  
 - lora auto-apply tags remove duplicates  
 - control load model on-demand if not already loaded  
+- taesd limit render to 2024px  
+- taesd downscale preview to 1024px max  
 
 ## Update for 2024-11-21
 
diff --git a/modules/control/tile.py b/modules/control/tile.py
index 5dc104e47..de9df1131 100644
--- a/modules/control/tile.py
+++ b/modules/control/tile.py
@@ -1,3 +1,4 @@
+import time
 from PIL import Image
 from modules import shared, processing, images, sd_models
 
@@ -17,30 +18,25 @@ def set_tile(image: Image.Image, x: int, y: int, tiled: Image.Image):
 
 
 def run_tiling(p: processing.StableDiffusionProcessing, input_image: Image.Image) -> processing.Processed:
+    t0 = time.time()
     # prepare images
     sx, sy = p.control_tile.split('x')
     sx = int(sx)
     sy = int(sy)
     if sx <= 0 or sy <= 0:
-        raise ValueError('Control: invalid tile size')
+        raise ValueError('Control Tile: invalid tile size')
     control_image = p.task_args.get('control_image', None) or p.task_args.get('image', None)
     control_upscaled = None
     if isinstance(control_image, list) and len(control_image) > 0:
-        control_upscaled = images.resize_image(resize_mode=1 if sx==sy else 5,
-                                               im=control_image[0],
-                                               width=8 * int(sx * control_image[0].width) // 8,
-                                               height=8 * int(sy * control_image[0].height) // 8,
-                                               context='add with forward'
-                                              )
+        w, h = 8 * int(sx * control_image[0].width) // 8, 8 * int(sy * control_image[0].height) // 8
+        control_upscaled = images.resize_image(resize_mode=1 if sx==sy else 5, im=control_image[0], width=w, height=h, context='add with forward')
     init_image = p.override or input_image
     init_upscaled = None
     if init_image is not None:
-        init_upscaled = images.resize_image(resize_mode=1 if sx==sy else 5,
-                                            im=init_image,
-                                            width=8 * int(sx * init_image.width) // 8,
-                                            height=8 * int(sy * init_image.height) // 8,
-                                            context='add with forward'
-                                           )
+        w, h = 8 * int(sx * init_image.width) // 8, 8 * int(sy * init_image.height) // 8
+        init_upscaled = images.resize_image(resize_mode=1 if sx==sy else 5, im=init_image, width=w, height=h, context='add with forward')
+    t1 = time.time()
+    shared.log.debug(f'Control Tile: scale={sx}x{sy} resize={"fixed" if sx==sy else "context"} control={control_upscaled} init={init_upscaled} time={t1-t0:.3f}')
 
     # stop processing from restoring pipeline on each iteration
     orig_restore_pipeline = getattr(shared.sd_model, 'restore_pipeline', None)
@@ -72,4 +68,6 @@ def run_tiling(p: processing.StableDiffusionProcessing, input_image: Image.Image
     shared.sd_model.restore_pipeline = orig_restore_pipeline
     if hasattr(shared.sd_model, 'restore_pipeline') and shared.sd_model.restore_pipeline is not None:
         shared.sd_model.restore_pipeline()
+    t2 = time.time()
+    shared.log.debug(f'Control Tile: image={control_upscaled} time={t2-t0:.3f}')
     return processed
diff --git a/modules/control/units/controlnet.py b/modules/control/units/controlnet.py
index 7361638c6..c887aca8f 100644
--- a/modules/control/units/controlnet.py
+++ b/modules/control/units/controlnet.py
@@ -101,6 +101,14 @@
     "Alimama Inpainting": 'alimama-creative/SD3-Controlnet-Inpainting',
     "Alimama SoftEdge": 'alimama-creative/SD3-Controlnet-Softedge',
 }
+variants = {
+    'NoobAI Canny XL': 'fp16',
+    'NoobAI Lineart Anime XL': 'fp16',
+    'NoobAI Depth XL': 'fp16',
+    'NoobAI Normal XL': 'fp16',
+    'NoobAI SoftEdge XL': 'fp16',
+    'TTPlanet Tile Realistic XL': 'fp16',
+}
 models = {}
 all_models = {}
 all_models.update(predefined_sd15)
@@ -261,8 +269,8 @@ def load(self, model_id: str = None, force: bool = True) -> str:
                 if cls is None:
                     log.error(f'Control {what} model load failed: id="{model_id}" unknown base model')
                     return
-                if 'Eugeoter' in model_path:
-                    kwargs['variant'] = 'fp16'
+                if variants.get(model_id, None) is not None:
+                    kwargs['variant'] = variants[model_id]
                 self.model = cls.from_pretrained(model_path, **self.load_config, **kwargs)
             if self.model is None:
                 return
diff --git a/modules/freescale/__init__.py b/modules/freescale/__init__.py
new file mode 100644
index 000000000..7b9c17f5d
--- /dev/null
+++ b/modules/freescale/__init__.py
@@ -0,0 +1,4 @@
+# Credits: https://github.com/ali-vilab/FreeScale
+
+from .freescale_pipeline import StableDiffusionXLFreeScale
+from .freescale_pipeline_img2img import StableDiffusionXLFreeScaleImg2Img
diff --git a/modules/freescale/free_lunch_utils.py b/modules/freescale/free_lunch_utils.py
new file mode 100644
index 000000000..be26b732a
--- /dev/null
+++ b/modules/freescale/free_lunch_utils.py
@@ -0,0 +1,305 @@
+from typing import Any, Dict, Optional, Tuple
+import torch
+import torch.fft as fft
+from diffusers.utils import is_torch_version
+
+""" Borrowed from https://github.com/ChenyangSi/FreeU/blob/main/demo/free_lunch_utils.py
+"""
+
+def isinstance_str(x: object, cls_name: str):
+    """
+    Checks whether x has any class *named* cls_name in its ancestry.
+    Doesn't require access to the class's implementation.
+
+    Useful for patching!
+    """
+
+    for _cls in x.__class__.__mro__:
+        if _cls.__name__ == cls_name:
+            return True
+
+    return False
+
+
+def Fourier_filter(x, threshold, scale):
+    dtype = x.dtype
+    x = x.type(torch.float32)
+    # FFT
+    x_freq = fft.fftn(x, dim=(-2, -1))
+    x_freq = fft.fftshift(x_freq, dim=(-2, -1))
+
+    B, C, H, W = x_freq.shape
+    mask = torch.ones((B, C, H, W)).cuda() 
+
+    crow, ccol = H // 2, W //2
+    mask[..., crow - threshold:crow + threshold, ccol - threshold:ccol + threshold] = scale
+    x_freq = x_freq * mask
+
+    # IFFT
+    x_freq = fft.ifftshift(x_freq, dim=(-2, -1))
+    x_filtered = fft.ifftn(x_freq, dim=(-2, -1)).real
+
+    x_filtered = x_filtered.type(dtype)
+    return x_filtered
+
+
+def register_upblock2d(model):
+    def up_forward(self):
+        def forward(hidden_states, res_hidden_states_tuple, temb=None, upsample_size=None):
+            for resnet in self.resnets:
+                # pop res hidden states
+                res_hidden_states = res_hidden_states_tuple[-1]
+                res_hidden_states_tuple = res_hidden_states_tuple[:-1]
+                #print(f"in upblock2d, hidden states shape: {hidden_states.shape}")
+                hidden_states = torch.cat([hidden_states, res_hidden_states], dim=1)
+
+                if self.training and self.gradient_checkpointing:
+
+                    def create_custom_forward(module):
+                        def custom_forward(*inputs):
+                            return module(*inputs)
+
+                        return custom_forward
+
+                    if is_torch_version(">=", "1.11.0"):
+                        hidden_states = torch.utils.checkpoint.checkpoint(
+                            create_custom_forward(resnet), hidden_states, temb, use_reentrant=False
+                        )
+                    else:
+                        hidden_states = torch.utils.checkpoint.checkpoint(
+                            create_custom_forward(resnet), hidden_states, temb
+                        )
+                else:
+                    hidden_states = resnet(hidden_states, temb)
+
+            if self.upsamplers is not None:
+                for upsampler in self.upsamplers:
+                    hidden_states = upsampler(hidden_states, upsample_size)
+
+            return hidden_states
+
+        return forward
+
+    for i, upsample_block in enumerate(model.unet.up_blocks):
+        if isinstance_str(upsample_block, "UpBlock2D"):
+            upsample_block.forward = up_forward(upsample_block)
+
+
+def register_free_upblock2d(model, b1=1.2, b2=1.4, s1=0.9, s2=0.2):
+    def up_forward(self):
+        def forward(hidden_states, res_hidden_states_tuple, temb=None, upsample_size=None):
+            for resnet in self.resnets:
+                # pop res hidden states
+                res_hidden_states = res_hidden_states_tuple[-1]
+                res_hidden_states_tuple = res_hidden_states_tuple[:-1]
+                #print(f"in free upblock2d, hidden states shape: {hidden_states.shape}")
+
+                # --------------- FreeU code -----------------------
+                # Only operate on the first two stages
+                if hidden_states.shape[1] == 1280:
+                    hidden_states[:,:640] = hidden_states[:,:640] * self.b1
+                    res_hidden_states = Fourier_filter(res_hidden_states, threshold=1, scale=self.s1)
+                if hidden_states.shape[1] == 640:
+                    hidden_states[:,:320] = hidden_states[:,:320] * self.b2
+                    res_hidden_states = Fourier_filter(res_hidden_states, threshold=1, scale=self.s2)
+                # ---------------------------------------------------------
+
+                hidden_states = torch.cat([hidden_states, res_hidden_states], dim=1)
+
+                if self.training and self.gradient_checkpointing:
+
+                    def create_custom_forward(module):
+                        def custom_forward(*inputs):
+                            return module(*inputs)
+
+                        return custom_forward
+
+                    if is_torch_version(">=", "1.11.0"):
+                        hidden_states = torch.utils.checkpoint.checkpoint(
+                            create_custom_forward(resnet), hidden_states, temb, use_reentrant=False
+                        )
+                    else:
+                        hidden_states = torch.utils.checkpoint.checkpoint(
+                            create_custom_forward(resnet), hidden_states, temb
+                        )
+                else:
+                    hidden_states = resnet(hidden_states, temb)
+
+            if self.upsamplers is not None:
+                for upsampler in self.upsamplers:
+                    hidden_states = upsampler(hidden_states, upsample_size)
+
+            return hidden_states
+
+        return forward
+
+    for i, upsample_block in enumerate(model.unet.up_blocks):
+        if isinstance_str(upsample_block, "UpBlock2D"):
+            upsample_block.forward = up_forward(upsample_block)
+            setattr(upsample_block, 'b1', b1)
+            setattr(upsample_block, 'b2', b2)
+            setattr(upsample_block, 's1', s1)
+            setattr(upsample_block, 's2', s2)
+
+
+def register_crossattn_upblock2d(model):
+    def up_forward(self):
+        def forward(
+            hidden_states: torch.FloatTensor,
+            res_hidden_states_tuple: Tuple[torch.FloatTensor, ...],
+            temb: Optional[torch.FloatTensor] = None,
+            encoder_hidden_states: Optional[torch.FloatTensor] = None,
+            cross_attention_kwargs: Optional[Dict[str, Any]] = None,
+            upsample_size: Optional[int] = None,
+            attention_mask: Optional[torch.FloatTensor] = None,
+            encoder_attention_mask: Optional[torch.FloatTensor] = None,
+        ):
+            for resnet, attn in zip(self.resnets, self.attentions):
+                # pop res hidden states
+                #print(f"in crossatten upblock2d, hidden states shape: {hidden_states.shape}")
+                res_hidden_states = res_hidden_states_tuple[-1]
+                res_hidden_states_tuple = res_hidden_states_tuple[:-1]
+                hidden_states = torch.cat([hidden_states, res_hidden_states], dim=1)
+
+                if self.training and self.gradient_checkpointing:
+
+                    def create_custom_forward(module, return_dict=None):
+                        def custom_forward(*inputs):
+                            if return_dict is not None:
+                                return module(*inputs, return_dict=return_dict)
+                            else:
+                                return module(*inputs)
+
+                        return custom_forward
+
+                    ckpt_kwargs: Dict[str, Any] = {"use_reentrant": False} if is_torch_version(">=", "1.11.0") else {}
+                    hidden_states = torch.utils.checkpoint.checkpoint(
+                        create_custom_forward(resnet),
+                        hidden_states,
+                        temb,
+                        **ckpt_kwargs,
+                    )
+                    hidden_states = torch.utils.checkpoint.checkpoint(
+                        create_custom_forward(attn, return_dict=False),
+                        hidden_states,
+                        encoder_hidden_states,
+                        None,  # timestep
+                        None,  # class_labels
+                        cross_attention_kwargs,
+                        attention_mask,
+                        encoder_attention_mask,
+                        **ckpt_kwargs,
+                    )[0]
+                else:
+                    hidden_states = resnet(hidden_states, temb)
+                    hidden_states = attn(
+                        hidden_states,
+                        encoder_hidden_states=encoder_hidden_states,
+                        cross_attention_kwargs=cross_attention_kwargs,
+                        attention_mask=attention_mask,
+                        encoder_attention_mask=encoder_attention_mask,
+                        return_dict=False,
+                    )[0]
+
+            if self.upsamplers is not None:
+                for upsampler in self.upsamplers:
+                    hidden_states = upsampler(hidden_states, upsample_size)
+
+            return hidden_states
+
+        return forward
+
+    for i, upsample_block in enumerate(model.unet.up_blocks):
+        if isinstance_str(upsample_block, "CrossAttnUpBlock2D"):
+            upsample_block.forward = up_forward(upsample_block)
+
+
+def register_free_crossattn_upblock2d(model, b1=1.2, b2=1.4, s1=0.9, s2=0.2):
+    def up_forward(self):
+        def forward(
+            hidden_states: torch.FloatTensor,
+            res_hidden_states_tuple: Tuple[torch.FloatTensor, ...],
+            temb: Optional[torch.FloatTensor] = None,
+            encoder_hidden_states: Optional[torch.FloatTensor] = None,
+            cross_attention_kwargs: Optional[Dict[str, Any]] = None,
+            upsample_size: Optional[int] = None,
+            attention_mask: Optional[torch.FloatTensor] = None,
+            encoder_attention_mask: Optional[torch.FloatTensor] = None,
+        ):
+            for resnet, attn in zip(self.resnets, self.attentions):
+                # pop res hidden states
+                #print(f"in free crossatten upblock2d, hidden states shape: {hidden_states.shape}")
+                res_hidden_states = res_hidden_states_tuple[-1]
+                res_hidden_states_tuple = res_hidden_states_tuple[:-1]
+
+                # --------------- FreeU code -----------------------
+                # Only operate on the first two stages
+                if hidden_states.shape[1] == 1280:
+                    hidden_states[:,:640] = hidden_states[:,:640] * self.b1
+                    res_hidden_states = Fourier_filter(res_hidden_states, threshold=1, scale=self.s1)
+                if hidden_states.shape[1] == 640:
+                    hidden_states[:,:320] = hidden_states[:,:320] * self.b2
+                    res_hidden_states = Fourier_filter(res_hidden_states, threshold=1, scale=self.s2)
+                # ---------------------------------------------------------
+
+                hidden_states = torch.cat([hidden_states, res_hidden_states], dim=1)
+
+                if self.training and self.gradient_checkpointing:
+
+                    def create_custom_forward(module, return_dict=None):
+                        def custom_forward(*inputs):
+                            if return_dict is not None:
+                                return module(*inputs, return_dict=return_dict)
+                            else:
+                                return module(*inputs)
+
+                        return custom_forward
+
+                    ckpt_kwargs: Dict[str, Any] = {"use_reentrant": False} if is_torch_version(">=", "1.11.0") else {}
+                    hidden_states = torch.utils.checkpoint.checkpoint(
+                        create_custom_forward(resnet),
+                        hidden_states,
+                        temb,
+                        **ckpt_kwargs,
+                    )
+                    hidden_states = torch.utils.checkpoint.checkpoint(
+                        create_custom_forward(attn, return_dict=False),
+                        hidden_states,
+                        encoder_hidden_states,
+                        None,  # timestep
+                        None,  # class_labels
+                        cross_attention_kwargs,
+                        attention_mask,
+                        encoder_attention_mask,
+                        **ckpt_kwargs,
+                    )[0]
+                else:
+                    hidden_states = resnet(hidden_states, temb)
+                    # hidden_states = attn(
+                    #     hidden_states,
+                    #     encoder_hidden_states=encoder_hidden_states,
+                    #     cross_attention_kwargs=cross_attention_kwargs,
+                    #     encoder_attention_mask=encoder_attention_mask,
+                    #     return_dict=False,
+                    # )[0]
+                    hidden_states = attn(
+                        hidden_states,
+                        encoder_hidden_states=encoder_hidden_states,
+                        cross_attention_kwargs=cross_attention_kwargs,
+                    )[0]
+
+            if self.upsamplers is not None:
+                for upsampler in self.upsamplers:
+                    hidden_states = upsampler(hidden_states, upsample_size)
+
+            return hidden_states
+
+        return forward
+
+    for i, upsample_block in enumerate(model.unet.up_blocks):
+        if isinstance_str(upsample_block, "CrossAttnUpBlock2D"):
+            upsample_block.forward = up_forward(upsample_block)
+            setattr(upsample_block, 'b1', b1)
+            setattr(upsample_block, 'b2', b2)
+            setattr(upsample_block, 's1', s1)
+            setattr(upsample_block, 's2', s2)
diff --git a/modules/freescale/freescale_pipeline.py b/modules/freescale/freescale_pipeline.py
new file mode 100644
index 000000000..9b7a68b68
--- /dev/null
+++ b/modules/freescale/freescale_pipeline.py
@@ -0,0 +1,1189 @@
+from inspect import isfunction
+from functools import partial
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+import inspect
+import os
+import random
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+from einops import rearrange
+from transformers import CLIPTextModel, CLIPTextModelWithProjection, CLIPTokenizer
+
+from diffusers.image_processor import VaeImageProcessor
+from diffusers.loaders import FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin
+from diffusers.models import AutoencoderKL, UNet2DConditionModel
+from diffusers.models.attention_processor import AttnProcessor2_0, LoRAAttnProcessor2_0, LoRAXFormersAttnProcessor, XFormersAttnProcessor
+from diffusers.schedulers import KarrasDiffusionSchedulers
+from diffusers.utils.torch_utils import randn_tensor
+from diffusers.utils import is_accelerate_available, is_accelerate_version, logging, replace_example_docstring
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline
+from diffusers.pipelines.stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
+from diffusers.models.attention import BasicTransformerBlock
+
+from .scale_attention import ori_forward, scale_forward
+
+
+logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
+
+EXAMPLE_DOC_STRING = """
+    Examples:
+        ```py
+        >>> import torch
+        >>> from diffusers import StableDiffusionXLPipeline
+
+        >>> pipe = StableDiffusionXLPipeline.from_pretrained(
+        ...     "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16
+        ... )
+        >>> pipe = pipe.to("cuda")
+
+        >>> prompt = "a photo of an astronaut riding a horse on mars"
+        >>> image = pipe(prompt).images[0]
+        ```
+"""
+
+def default(val, d):
+    if exists(val):
+        return val
+    return d() if isfunction(d) else d
+
+def exists(val):
+    return val is not None
+
+def extract_into_tensor(a, t, x_shape):
+    b, *_ = t.shape
+    out = a.gather(-1, t)
+    return out.reshape(b, *((1,) * (len(x_shape) - 1)))
+
+def make_beta_schedule(schedule, n_timestep, linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3):
+    if schedule == "linear":
+        betas = (
+                torch.linspace(linear_start ** 0.5, linear_end ** 0.5, n_timestep, dtype=torch.float64) ** 2
+        )
+    elif schedule == "cosine":
+        timesteps = (
+                torch.arange(n_timestep + 1, dtype=torch.float64) / n_timestep + cosine_s
+        )
+        alphas = timesteps / (1 + cosine_s) * np.pi / 2
+        alphas = torch.cos(alphas).pow(2)
+        alphas = alphas / alphas[0]
+        betas = 1 - alphas[1:] / alphas[:-1]
+        betas = np.clip(betas, a_min=0, a_max=0.999)
+    elif schedule == "sqrt_linear":
+        betas = torch.linspace(linear_start, linear_end, n_timestep, dtype=torch.float64)
+    elif schedule == "sqrt":
+        betas = torch.linspace(linear_start, linear_end, n_timestep, dtype=torch.float64) ** 0.5
+    else:
+        raise ValueError(f"schedule '{schedule}' unknown.")
+    return betas.numpy()
+
+to_torch = partial(torch.tensor, dtype=torch.float16)
+betas = make_beta_schedule("linear", 1000, linear_start=0.00085, linear_end=0.012)
+alphas = 1. - betas
+alphas_cumprod = np.cumprod(alphas, axis=0)
+sqrt_alphas_cumprod = to_torch(np.sqrt(alphas_cumprod))
+sqrt_one_minus_alphas_cumprod = to_torch(np.sqrt(1. - alphas_cumprod))
+
+def q_sample(x_start, t, init_noise_sigma = 1.0, noise=None, device=None):
+    noise = default(noise, lambda: torch.randn_like(x_start)).to(device) * init_noise_sigma
+    return (extract_into_tensor(sqrt_alphas_cumprod.to(device), t, x_start.shape) * x_start +
+            extract_into_tensor(sqrt_one_minus_alphas_cumprod.to(device), t, x_start.shape) * noise)
+
+def get_views(height, width, h_window_size=128, w_window_size=128, h_window_stride=64, w_window_stride=64, vae_scale_factor=8):
+    height //= vae_scale_factor
+    width //= vae_scale_factor
+    num_blocks_height = int((height - h_window_size) / h_window_stride - 1e-6) + 2 if height > h_window_size else 1
+    num_blocks_width = int((width - w_window_size) / w_window_stride - 1e-6) + 2 if width > w_window_size else 1
+    total_num_blocks = int(num_blocks_height * num_blocks_width)
+    views = []
+    for i in range(total_num_blocks):
+        h_start = int((i // num_blocks_width) * h_window_stride)
+        h_end = h_start + h_window_size
+        w_start = int((i % num_blocks_width) * w_window_stride)
+        w_end = w_start + w_window_size
+
+        if h_end > height:
+            h_start = int(h_start + height - h_end)
+            h_end = int(height)
+        if w_end > width:
+            w_start = int(w_start + width - w_end)
+            w_end = int(width)
+        if h_start < 0:
+            h_end = int(h_end - h_start)
+            h_start = 0
+        if w_start < 0:
+            w_end = int(w_end - w_start)
+            w_start = 0
+
+        random_jitter = True
+        if random_jitter:
+            h_jitter_range = (h_window_size - h_window_stride) // 4
+            w_jitter_range = (w_window_size - w_window_stride) // 4
+            h_jitter = 0
+            w_jitter = 0
+
+            if (w_start != 0) and (w_end != width):
+                w_jitter = random.randint(-w_jitter_range, w_jitter_range)
+            elif (w_start == 0) and (w_end != width):
+                w_jitter = random.randint(-w_jitter_range, 0)
+            elif (w_start != 0) and (w_end == width):
+                w_jitter = random.randint(0, w_jitter_range)
+            if (h_start != 0) and (h_end != height):
+                h_jitter = random.randint(-h_jitter_range, h_jitter_range)
+            elif (h_start == 0) and (h_end != height):
+                h_jitter = random.randint(-h_jitter_range, 0)
+            elif (h_start != 0) and (h_end == height):
+                h_jitter = random.randint(0, h_jitter_range)
+            h_start += (h_jitter + h_jitter_range)
+            h_end += (h_jitter + h_jitter_range)
+            w_start += (w_jitter + w_jitter_range)
+            w_end += (w_jitter + w_jitter_range)
+
+        views.append((h_start, h_end, w_start, w_end))
+    return views
+
+def gaussian_kernel(kernel_size=3, sigma=1.0, channels=3):
+    x_coord = torch.arange(kernel_size)
+    gaussian_1d = torch.exp(-(x_coord - (kernel_size - 1) / 2) ** 2 / (2 * sigma ** 2))
+    gaussian_1d = gaussian_1d / gaussian_1d.sum()
+    gaussian_2d = gaussian_1d[:, None] * gaussian_1d[None, :]
+    kernel = gaussian_2d[None, None, :, :].repeat(channels, 1, 1, 1)
+
+    return kernel
+
+def gaussian_filter(latents, kernel_size=3, sigma=1.0):
+    channels = latents.shape[1]
+    kernel = gaussian_kernel(kernel_size, sigma, channels).to(latents.device, latents.dtype)
+    if len(latents.shape) == 5:
+        b = latents.shape[0]
+        latents = rearrange(latents, 'b c t i j -> (b t) c i j')
+        blurred_latents = F.conv2d(latents, kernel, padding=kernel_size//2, groups=channels)
+        blurred_latents = rearrange(blurred_latents, '(b t) c i j -> b c t i j', b=b)
+    else:
+        blurred_latents = F.conv2d(latents, kernel, padding=kernel_size//2, groups=channels)
+
+    return blurred_latents
+
+# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.rescale_noise_cfg
+def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
+    """
+    Rescale `noise_cfg` according to `guidance_rescale`. Based on findings of [Common Diffusion Noise Schedules and
+    Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf). See Section 3.4
+    """
+    std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True)
+    std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True)
+    # rescale the results from guidance (fixes overexposure)
+    noise_pred_rescaled = noise_cfg * (std_text / std_cfg)
+    # mix with the original results from guidance by factor guidance_rescale to avoid "plain looking" images
+    noise_cfg = guidance_rescale * noise_pred_rescaled + (1 - guidance_rescale) * noise_cfg
+    return noise_cfg
+
+
+class StableDiffusionXLFreeScale(DiffusionPipeline, FromSingleFileMixin, LoraLoaderMixin):
+    r"""
+    Pipeline for text-to-image generation using Stable Diffusion XL.
+
+    This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
+    library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)
+
+    In addition the pipeline inherits the following loading methods:
+        - *LoRA*: [`StableDiffusionXLPipeline.load_lora_weights`]
+        - *Ckpt*: [`loaders.FromSingleFileMixin.from_single_file`]
+
+    as well as the following saving methods:
+        - *LoRA*: [`loaders.StableDiffusionXLPipeline.save_lora_weights`]
+
+    Args:
+        vae ([`AutoencoderKL`]):
+            Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.
+        text_encoder ([`CLIPTextModel`]):
+            Frozen text-encoder. Stable Diffusion XL uses the text portion of
+            [CLIP](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModel), specifically
+            the [clip-vit-large-patch14](https://huggingface.co/openai/clip-vit-large-patch14) variant.
+        text_encoder_2 ([` CLIPTextModelWithProjection`]):
+            Second frozen text-encoder. Stable Diffusion XL uses the text and pool portion of
+            [CLIP](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModelWithProjection),
+            specifically the
+            [laion/CLIP-ViT-bigG-14-laion2B-39B-b160k](https://huggingface.co/laion/CLIP-ViT-bigG-14-laion2B-39B-b160k)
+            variant.
+        tokenizer (`CLIPTokenizer`):
+            Tokenizer of class
+            [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer).
+        tokenizer_2 (`CLIPTokenizer`):
+            Second Tokenizer of class
+            [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer).
+        unet ([`UNet2DConditionModel`]): Conditional U-Net architecture to denoise the encoded image latents.
+        scheduler ([`SchedulerMixin`]):
+            A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of
+            [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
+    """
+
+    def __init__(
+        self,
+        vae: AutoencoderKL,
+        text_encoder: CLIPTextModel,
+        text_encoder_2: CLIPTextModelWithProjection,
+        tokenizer: CLIPTokenizer,
+        tokenizer_2: CLIPTokenizer,
+        unet: UNet2DConditionModel,
+        scheduler: KarrasDiffusionSchedulers,
+        force_zeros_for_empty_prompt: bool = True,
+    ):
+        super().__init__()
+
+        self.register_modules(
+            vae=vae,
+            text_encoder=text_encoder,
+            text_encoder_2=text_encoder_2,
+            tokenizer=tokenizer,
+            tokenizer_2=tokenizer_2,
+            unet=unet,
+            scheduler=scheduler,
+        )
+        self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt)
+        self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
+        self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
+        self.default_sample_size = self.unet.config.sample_size
+        self.vae.enable_tiling()
+
+    # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
+    def enable_vae_slicing(self):
+        r"""
+        Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
+        compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
+        """
+        self.vae.enable_slicing()
+
+    # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
+    def disable_vae_slicing(self):
+        r"""
+        Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
+        computing decoding in one step.
+        """
+        self.vae.disable_slicing()
+
+    # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
+    def enable_vae_tiling(self):
+        r"""
+        Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
+        compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
+        processing larger images.
+        """
+        self.vae.enable_tiling()
+
+    # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
+    def disable_vae_tiling(self):
+        r"""
+        Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
+        computing decoding in one step.
+        """
+        self.vae.disable_tiling()
+
+    def enable_model_cpu_offload(self, gpu_id=0):
+        r"""
+        Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
+        to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward`
+        method is called, and the model remains in GPU until the next model runs. Memory savings are lower than with
+        `enable_sequential_cpu_offload`, but performance is much better due to the iterative execution of the `unet`.
+        """
+        if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"):
+            from accelerate import cpu_offload_with_hook
+        else:
+            raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.")
+
+        device = torch.device(f"cuda:{gpu_id}")
+
+        if self.device.type != "cpu":
+            self.to("cpu", silence_dtype_warnings=True)
+            torch.cuda.empty_cache()  # otherwise we don't see the memory savings (but they probably exist)
+
+        model_sequence = (
+            [self.text_encoder, self.text_encoder_2] if self.text_encoder is not None else [self.text_encoder_2]
+        )
+        model_sequence.extend([self.unet, self.vae])
+
+        hook = None
+        for cpu_offloaded_model in model_sequence:
+            _, hook = cpu_offload_with_hook(cpu_offloaded_model, device, prev_module_hook=hook)
+
+        # We'll offload the last model manually.
+        self.final_offload_hook = hook
+
+    def encode_prompt(
+        self,
+        prompt: str,
+        prompt_2: Optional[str] = None,
+        device: Optional[torch.device] = None,
+        num_images_per_prompt: int = 1,
+        do_classifier_free_guidance: bool = True,
+        negative_prompt: Optional[str] = None,
+        negative_prompt_2: Optional[str] = None,
+        prompt_embeds: Optional[torch.FloatTensor] = None,
+        negative_prompt_embeds: Optional[torch.FloatTensor] = None,
+        pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
+        negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
+        lora_scale: Optional[float] = None,
+    ):
+        r"""
+        Encodes the prompt into text encoder hidden states.
+
+        Args:
+            prompt (`str` or `List[str]`, *optional*):
+                prompt to be encoded
+            prompt_2 (`str` or `List[str]`, *optional*):
+                The prompt or prompts to be sent to the `tokenizer_2` and `text_encoder_2`. If not defined, `prompt` is
+                used in both text-encoders
+            device: (`torch.device`):
+                torch device
+            num_images_per_prompt (`int`):
+                number of images that should be generated per prompt
+            do_classifier_free_guidance (`bool`):
+                whether to use classifier free guidance or not
+            negative_prompt (`str` or `List[str]`, *optional*):
+                The prompt or prompts not to guide the image generation. If not defined, one has to pass
+                `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
+                less than `1`).
+            negative_prompt_2 (`str` or `List[str]`, *optional*):
+                The prompt or prompts not to guide the image generation to be sent to `tokenizer_2` and
+                `text_encoder_2`. If not defined, `negative_prompt` is used in both text-encoders
+            prompt_embeds (`torch.FloatTensor`, *optional*):
+                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
+                provided, text embeddings will be generated from `prompt` input argument.
+            negative_prompt_embeds (`torch.FloatTensor`, *optional*):
+                Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
+                weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
+                argument.
+            pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
+                Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
+                If not provided, pooled text embeddings will be generated from `prompt` input argument.
+            negative_pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
+                Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
+                weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
+                input argument.
+            lora_scale (`float`, *optional*):
+                A lora scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.
+        """
+        device = device or self._execution_device
+
+        # set lora scale so that monkey patched LoRA
+        # function of text encoder can correctly access it
+        if lora_scale is not None and isinstance(self, LoraLoaderMixin):
+            self._lora_scale = lora_scale
+
+        if prompt is not None and isinstance(prompt, str):
+            batch_size = 1
+        elif prompt is not None and isinstance(prompt, list):
+            batch_size = len(prompt)
+        else:
+            batch_size = prompt_embeds.shape[0]
+
+        # Define tokenizers and text encoders
+        tokenizers = [self.tokenizer, self.tokenizer_2] if self.tokenizer is not None else [self.tokenizer_2]
+        text_encoders = (
+            [self.text_encoder, self.text_encoder_2] if self.text_encoder is not None else [self.text_encoder_2]
+        )
+
+        if prompt_embeds is None:
+            prompt_2 = prompt_2 or prompt
+            # textual inversion: procecss multi-vector tokens if necessary
+            prompt_embeds_list = []
+            prompts = [prompt, prompt_2]
+            for prompt, tokenizer, text_encoder in zip(prompts, tokenizers, text_encoders):
+                if isinstance(self, TextualInversionLoaderMixin):
+                    prompt = self.maybe_convert_prompt(prompt, tokenizer)
+
+                text_inputs = tokenizer(
+                    prompt,
+                    padding="max_length",
+                    max_length=tokenizer.model_max_length,
+                    truncation=True,
+                    return_tensors="pt",
+                )
+
+                text_input_ids = text_inputs.input_ids
+                untruncated_ids = tokenizer(prompt, padding="longest", return_tensors="pt").input_ids
+
+                if untruncated_ids.shape[-1] >= text_input_ids.shape[-1] and not torch.equal(
+                    text_input_ids, untruncated_ids
+                ):
+                    removed_text = tokenizer.batch_decode(untruncated_ids[:, tokenizer.model_max_length - 1 : -1])
+                    logger.warning(
+                        "The following part of your input was truncated because CLIP can only handle sequences up to"
+                        f" {tokenizer.model_max_length} tokens: {removed_text}"
+                    )
+
+                prompt_embeds = text_encoder(
+                    text_input_ids.to(device),
+                    output_hidden_states=True,
+                )
+
+                # We are only ALWAYS interested in the pooled output of the final text encoder
+                pooled_prompt_embeds = prompt_embeds[0]
+                prompt_embeds = prompt_embeds.hidden_states[-2]
+
+                prompt_embeds_list.append(prompt_embeds)
+
+            prompt_embeds = torch.concat(prompt_embeds_list, dim=-1)
+
+        # get unconditional embeddings for classifier free guidance
+        zero_out_negative_prompt = negative_prompt is None and self.config.force_zeros_for_empty_prompt
+        if do_classifier_free_guidance and negative_prompt_embeds is None and zero_out_negative_prompt:
+            negative_prompt_embeds = torch.zeros_like(prompt_embeds)
+            negative_pooled_prompt_embeds = torch.zeros_like(pooled_prompt_embeds)
+        elif do_classifier_free_guidance and negative_prompt_embeds is None:
+            negative_prompt = negative_prompt or ""
+            negative_prompt_2 = negative_prompt_2 or negative_prompt
+
+            uncond_tokens: List[str]
+            if prompt is not None and type(prompt) is not type(negative_prompt):
+                raise TypeError(
+                    f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !="
+                    f" {type(prompt)}."
+                )
+            elif isinstance(negative_prompt, str):
+                uncond_tokens = [negative_prompt, negative_prompt_2]
+            elif batch_size != len(negative_prompt):
+                raise ValueError(
+                    f"`negative_prompt`: {negative_prompt} has batch size {len(negative_prompt)}, but `prompt`:"
+                    f" {prompt} has batch size {batch_size}. Please make sure that passed `negative_prompt` matches"
+                    " the batch size of `prompt`."
+                )
+            else:
+                uncond_tokens = [negative_prompt, negative_prompt_2]
+
+            negative_prompt_embeds_list = []
+            for negative_prompt, tokenizer, text_encoder in zip(uncond_tokens, tokenizers, text_encoders):
+                if isinstance(self, TextualInversionLoaderMixin):
+                    negative_prompt = self.maybe_convert_prompt(negative_prompt, tokenizer)
+
+                max_length = prompt_embeds.shape[1]
+                uncond_input = tokenizer(
+                    negative_prompt,
+                    padding="max_length",
+                    max_length=max_length,
+                    truncation=True,
+                    return_tensors="pt",
+                )
+
+                negative_prompt_embeds = text_encoder(
+                    uncond_input.input_ids.to(device),
+                    output_hidden_states=True,
+                )
+                # We are only ALWAYS interested in the pooled output of the final text encoder
+                negative_pooled_prompt_embeds = negative_prompt_embeds[0]
+                negative_prompt_embeds = negative_prompt_embeds.hidden_states[-2]
+
+                negative_prompt_embeds_list.append(negative_prompt_embeds)
+
+            negative_prompt_embeds = torch.concat(negative_prompt_embeds_list, dim=-1)
+
+        prompt_embeds = prompt_embeds.to(dtype=self.text_encoder_2.dtype, device=device)
+        bs_embed, seq_len, _ = prompt_embeds.shape
+        # duplicate text embeddings for each generation per prompt, using mps friendly method
+        prompt_embeds = prompt_embeds.repeat(1, num_images_per_prompt, 1)
+        prompt_embeds = prompt_embeds.view(bs_embed * num_images_per_prompt, seq_len, -1)
+
+        if do_classifier_free_guidance:
+            # duplicate unconditional embeddings for each generation per prompt, using mps friendly method
+            seq_len = negative_prompt_embeds.shape[1]
+            negative_prompt_embeds = negative_prompt_embeds.to(dtype=self.text_encoder_2.dtype, device=device)
+            negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
+            negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
+
+        pooled_prompt_embeds = pooled_prompt_embeds.repeat(1, num_images_per_prompt).view(
+            bs_embed * num_images_per_prompt, -1
+        )
+        if do_classifier_free_guidance:
+            negative_pooled_prompt_embeds = negative_pooled_prompt_embeds.repeat(1, num_images_per_prompt).view(
+                bs_embed * num_images_per_prompt, -1
+            )
+
+        return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds
+
+    # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
+    def prepare_extra_step_kwargs(self, generator, eta):
+        # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
+        # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
+        # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
+        # and should be between [0, 1]
+
+        accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
+        extra_step_kwargs = {}
+        if accepts_eta:
+            extra_step_kwargs["eta"] = eta
+
+        # check if the scheduler accepts generator
+        accepts_generator = "generator" in set(inspect.signature(self.scheduler.step).parameters.keys())
+        if accepts_generator:
+            extra_step_kwargs["generator"] = generator
+        return extra_step_kwargs
+
+    def check_inputs(
+        self,
+        prompt,
+        prompt_2,
+        height,
+        width,
+        callback_steps,
+        negative_prompt=None,
+        negative_prompt_2=None,
+        prompt_embeds=None,
+        negative_prompt_embeds=None,
+        pooled_prompt_embeds=None,
+        negative_pooled_prompt_embeds=None,
+    ):
+        if height % 8 != 0 or width % 8 != 0:
+            raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.")
+
+        if (callback_steps is None) or (
+            callback_steps is not None and (not isinstance(callback_steps, int) or callback_steps <= 0)
+        ):
+            raise ValueError(
+                f"`callback_steps` has to be a positive integer but is {callback_steps} of type"
+                f" {type(callback_steps)}."
+            )
+
+        if prompt is not None and prompt_embeds is not None:
+            raise ValueError(
+                f"Cannot forward both `prompt`: {prompt} and `prompt_embeds`: {prompt_embeds}. Please make sure to"
+                " only forward one of the two."
+            )
+        elif prompt_2 is not None and prompt_embeds is not None:
+            raise ValueError(
+                f"Cannot forward both `prompt_2`: {prompt_2} and `prompt_embeds`: {prompt_embeds}. Please make sure to"
+                " only forward one of the two."
+            )
+        elif prompt is None and prompt_embeds is None:
+            raise ValueError(
+                "Provide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined."
+            )
+        elif prompt is not None and (not isinstance(prompt, str) and not isinstance(prompt, list)):
+            raise ValueError(f"`prompt` has to be of type `str` or `list` but is {type(prompt)}")
+        elif prompt_2 is not None and (not isinstance(prompt_2, str) and not isinstance(prompt_2, list)):
+            raise ValueError(f"`prompt_2` has to be of type `str` or `list` but is {type(prompt_2)}")
+
+        if negative_prompt is not None and negative_prompt_embeds is not None:
+            raise ValueError(
+                f"Cannot forward both `negative_prompt`: {negative_prompt} and `negative_prompt_embeds`:"
+                f" {negative_prompt_embeds}. Please make sure to only forward one of the two."
+            )
+        elif negative_prompt_2 is not None and negative_prompt_embeds is not None:
+            raise ValueError(
+                f"Cannot forward both `negative_prompt_2`: {negative_prompt_2} and `negative_prompt_embeds`:"
+                f" {negative_prompt_embeds}. Please make sure to only forward one of the two."
+            )
+
+        if prompt_embeds is not None and negative_prompt_embeds is not None:
+            if prompt_embeds.shape != negative_prompt_embeds.shape:
+                raise ValueError(
+                    "`prompt_embeds` and `negative_prompt_embeds` must have the same shape when passed directly, but"
+                    f" got: `prompt_embeds` {prompt_embeds.shape} != `negative_prompt_embeds`"
+                    f" {negative_prompt_embeds.shape}."
+                )
+
+        if prompt_embeds is not None and pooled_prompt_embeds is None:
+            raise ValueError(
+                "If `prompt_embeds` are provided, `pooled_prompt_embeds` also have to be passed. Make sure to generate `pooled_prompt_embeds` from the same text encoder that was used to generate `prompt_embeds`."
+            )
+
+        if negative_prompt_embeds is not None and negative_pooled_prompt_embeds is None:
+            raise ValueError(
+                "If `negative_prompt_embeds` are provided, `negative_pooled_prompt_embeds` also have to be passed. Make sure to generate `negative_pooled_prompt_embeds` from the same text encoder that was used to generate `negative_prompt_embeds`."
+            )
+
+    # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
+    def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
+        shape = (batch_size, num_channels_latents, height // self.vae_scale_factor, width // self.vae_scale_factor)
+        if isinstance(generator, list) and len(generator) != batch_size:
+            raise ValueError(
+                f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
+                f" size of {batch_size}. Make sure the batch size matches the length of the generators."
+            )
+
+        if latents is None:
+            latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
+        else:
+            latents = latents.to(device)
+
+        # scale the initial noise by the standard deviation required by the scheduler
+        latents = latents * self.scheduler.init_noise_sigma
+        return latents
+
+    def _get_add_time_ids(self, original_size, crops_coords_top_left, target_size, dtype):
+        add_time_ids = list(original_size + crops_coords_top_left + target_size)
+
+        passed_add_embed_dim = (
+            self.unet.config.addition_time_embed_dim * len(add_time_ids) + self.text_encoder_2.config.projection_dim
+        )
+        expected_add_embed_dim = self.unet.add_embedding.linear_1.in_features
+
+        if expected_add_embed_dim != passed_add_embed_dim:
+            raise ValueError(
+                f"Model expects an added time embedding vector of length {expected_add_embed_dim}, but a vector of {passed_add_embed_dim} was created. The model has an incorrect config. Please check `unet.config.time_embedding_type` and `text_encoder_2.config.projection_dim`."
+            )
+
+        add_time_ids = torch.tensor([add_time_ids], dtype=dtype)
+        return add_time_ids
+
+    # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_upscale.StableDiffusionUpscalePipeline.upcast_vae
+    def upcast_vae(self):
+        dtype = self.vae.dtype
+        self.vae.to(dtype=torch.float32)
+        use_torch_2_0_or_xformers = isinstance(
+            self.vae.decoder.mid_block.attentions[0].processor,
+            (
+                AttnProcessor2_0,
+                XFormersAttnProcessor,
+                LoRAXFormersAttnProcessor,
+                LoRAAttnProcessor2_0,
+            ),
+        )
+        # if xformers or torch_2_0 is used attention block does not need
+        # to be in float32 which can save lots of memory
+        if use_torch_2_0_or_xformers:
+            self.vae.post_quant_conv.to(dtype)
+            self.vae.decoder.conv_in.to(dtype)
+            self.vae.decoder.mid_block.to(dtype)
+
+    @torch.no_grad()
+    @replace_example_docstring(EXAMPLE_DOC_STRING)
+    def __call__(
+        self,
+        prompt: Union[str, List[str]] = None,
+        prompt_2: Optional[Union[str, List[str]]] = None,
+        height: Optional[int] = None,
+        width: Optional[int] = None,
+        num_inference_steps: int = 50,
+        denoising_end: Optional[float] = None,
+        guidance_scale: float = 5.0,
+        negative_prompt: Optional[Union[str, List[str]]] = None,
+        negative_prompt_2: Optional[Union[str, List[str]]] = None,
+        num_images_per_prompt: Optional[int] = 1,
+        eta: float = 0.0,
+        generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
+        latents: Optional[torch.FloatTensor] = None,
+        prompt_embeds: Optional[torch.FloatTensor] = None,
+        negative_prompt_embeds: Optional[torch.FloatTensor] = None,
+        pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
+        negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
+        output_type: Optional[str] = "pil",
+        return_dict: bool = True,
+        callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
+        callback_steps: int = 1,
+        cross_attention_kwargs: Optional[Dict[str, Any]] = None,
+        guidance_rescale: float = 0.0,
+        original_size: Optional[Tuple[int, int]] = None,
+        crops_coords_top_left: Tuple[int, int] = (0, 0),
+        target_size: Optional[Tuple[int, int]] = None,
+        resolutions_list: Optional[Union[int, List[int]]] = None,
+        restart_steps: Optional[Union[int, List[int]]] = None,
+        cosine_scale: float = 2.0,
+        dilate_tau: int = 35,
+    ):
+        r"""
+        Function invoked when calling the pipeline for generation.
+
+        Args:
+            prompt (`str` or `List[str]`, *optional*):
+                The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
+                instead.
+            prompt_2 (`str` or `List[str]`, *optional*):
+                The prompt or prompts to be sent to the `tokenizer_2` and `text_encoder_2`. If not defined, `prompt` is
+                used in both text-encoders
+            height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
+                The height in pixels of the generated image.
+            width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
+                The width in pixels of the generated image.
+            num_inference_steps (`int`, *optional*, defaults to 50):
+                The number of denoising steps. More denoising steps usually lead to a higher quality image at the
+                expense of slower inference.
+            denoising_end (`float`, *optional*):
+                When specified, determines the fraction (between 0.0 and 1.0) of the total denoising process to be
+                completed before it is intentionally prematurely terminated. As a result, the returned sample will
+                still retain a substantial amount of noise as determined by the discrete timesteps selected by the
+                scheduler. The denoising_end parameter should ideally be utilized when this pipeline forms a part of a
+                "Mixture of Denoisers" multi-pipeline setup, as elaborated in [**Refining the Image
+                Output**](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#refining-the-image-output)
+            guidance_scale (`float`, *optional*, defaults to 5.0):
+                Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
+                `guidance_scale` is defined as `w` of equation 2. of [Imagen
+                Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
+                1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
+                usually at the expense of lower image quality.
+            negative_prompt (`str` or `List[str]`, *optional*):
+                The prompt or prompts not to guide the image generation. If not defined, one has to pass
+                `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
+                less than `1`).
+            negative_prompt_2 (`str` or `List[str]`, *optional*):
+                The prompt or prompts not to guide the image generation to be sent to `tokenizer_2` and
+                `text_encoder_2`. If not defined, `negative_prompt` is used in both text-encoders
+            num_images_per_prompt (`int`, *optional*, defaults to 1):
+                The number of images to generate per prompt.
+            eta (`float`, *optional*, defaults to 0.0):
+                Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to
+                [`schedulers.DDIMScheduler`], will be ignored for others.
+            generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
+                One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
+                to make generation deterministic.
+            latents (`torch.FloatTensor`, *optional*):
+                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
+                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
+                tensor will ge generated by sampling using the supplied random `generator`.
+            prompt_embeds (`torch.FloatTensor`, *optional*):
+                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
+                provided, text embeddings will be generated from `prompt` input argument.
+            negative_prompt_embeds (`torch.FloatTensor`, *optional*):
+                Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
+                weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
+                argument.
+            pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
+                Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
+                If not provided, pooled text embeddings will be generated from `prompt` input argument.
+            negative_pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
+                Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
+                weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
+                input argument.
+            output_type (`str`, *optional*, defaults to `"pil"`):
+                The output format of the generate image. Choose between
+                [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
+            return_dict (`bool`, *optional*, defaults to `True`):
+                Whether or not to return a [`~pipelines.stable_diffusion_xl.StableDiffusionXLPipelineOutput`] instead
+                of a plain tuple.
+            callback (`Callable`, *optional*):
+                A function that will be called every `callback_steps` steps during inference. The function will be
+                called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
+            callback_steps (`int`, *optional*, defaults to 1):
+                The frequency at which the `callback` function will be called. If not specified, the callback will be
+                called at every step.
+            cross_attention_kwargs (`dict`, *optional*):
+                A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
+                `self.processor` in
+                [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
+            guidance_rescale (`float`, *optional*, defaults to 0.7):
+                Guidance rescale factor proposed by [Common Diffusion Noise Schedules and Sample Steps are
+                Flawed](https://arxiv.org/pdf/2305.08891.pdf) `guidance_scale` is defined as `φ` in equation 16. of
+                [Common Diffusion Noise Schedules and Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf).
+                Guidance rescale factor should fix overexposure when using zero terminal SNR.
+            original_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
+                If `original_size` is not the same as `target_size` the image will appear to be down- or upsampled.
+                `original_size` defaults to `(width, height)` if not specified. Part of SDXL's micro-conditioning as
+                explained in section 2.2 of
+                [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
+            crops_coords_top_left (`Tuple[int]`, *optional*, defaults to (0, 0)):
+                `crops_coords_top_left` can be used to generate an image that appears to be "cropped" from the position
+                `crops_coords_top_left` downwards. Favorable, well-centered images are usually achieved by setting
+                `crops_coords_top_left` to (0, 0). Part of SDXL's micro-conditioning as explained in section 2.2 of
+                [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
+            target_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
+                For most cases, `target_size` should be set to the desired height and width of the generated image. If
+                not specified it will default to `(width, height)`. Part of SDXL's micro-conditioning as explained in
+                section 2.2 of [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
+
+        Examples:
+
+        Returns:
+            [`~pipelines.stable_diffusion_xl.StableDiffusionXLPipelineOutput`] or `tuple`:
+            [`~pipelines.stable_diffusion_xl.StableDiffusionXLPipelineOutput`] if `return_dict` is True, otherwise a
+            `tuple`. When returning a tuple, the first element is a list with the generated images.
+        """
+
+        # 0. Default height and width to unet
+        if resolutions_list:
+            height, width = resolutions_list[0]
+            target_sizes = resolutions_list[1:]
+            if not restart_steps:
+                restart_steps = [15] * len(target_sizes)
+        else:
+            height = height or self.default_sample_size * self.vae_scale_factor
+            width = width or self.default_sample_size * self.vae_scale_factor
+
+        original_size = original_size or (height, width)
+        target_size = target_size or (height, width)
+
+        # 1. Check inputs. Raise error if not correct
+        self.check_inputs(
+            prompt,
+            prompt_2,
+            height,
+            width,
+            callback_steps,
+            negative_prompt,
+            negative_prompt_2,
+            prompt_embeds,
+            negative_prompt_embeds,
+            pooled_prompt_embeds,
+            negative_pooled_prompt_embeds,
+        )
+
+        # 2. Define call parameters
+        if prompt is not None and isinstance(prompt, str):
+            batch_size = 1
+        elif prompt is not None and isinstance(prompt, list):
+            batch_size = len(prompt)
+        else:
+            batch_size = prompt_embeds.shape[0]
+
+        device = self._execution_device
+
+        # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
+        # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
+        # corresponds to doing no classifier free guidance.
+        do_classifier_free_guidance = guidance_scale > 1.0
+
+        # 3. Encode input prompt
+        text_encoder_lora_scale = (
+            cross_attention_kwargs.get("scale", None) if cross_attention_kwargs is not None else None
+        )
+        (
+            prompt_embeds,
+            negative_prompt_embeds,
+            pooled_prompt_embeds,
+            negative_pooled_prompt_embeds,
+        ) = self.encode_prompt(
+            prompt=prompt,
+            prompt_2=prompt_2,
+            device=device,
+            num_images_per_prompt=num_images_per_prompt,
+            do_classifier_free_guidance=do_classifier_free_guidance,
+            negative_prompt=negative_prompt,
+            negative_prompt_2=negative_prompt_2,
+            prompt_embeds=prompt_embeds,
+            negative_prompt_embeds=negative_prompt_embeds,
+            pooled_prompt_embeds=pooled_prompt_embeds,
+            negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
+            lora_scale=text_encoder_lora_scale,
+        )
+
+        # 4. Prepare timesteps
+        self.scheduler.set_timesteps(num_inference_steps, device=device)
+
+        timesteps = self.scheduler.timesteps
+
+        # 5. Prepare latent variables
+        num_channels_latents = self.unet.config.in_channels
+        latents = self.prepare_latents(
+            batch_size * num_images_per_prompt,
+            num_channels_latents,
+            height,
+            width,
+            prompt_embeds.dtype,
+            device,
+            generator,
+            latents,
+        )
+
+        # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
+        extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
+
+        # 7. Prepare added time ids & embeddings
+        add_text_embeds = pooled_prompt_embeds
+        add_time_ids = self._get_add_time_ids(
+            original_size, crops_coords_top_left, target_size, dtype=prompt_embeds.dtype
+        )
+
+        if do_classifier_free_guidance:
+            prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
+            add_text_embeds = torch.cat([negative_pooled_prompt_embeds, add_text_embeds], dim=0)
+            add_time_ids = torch.cat([add_time_ids, add_time_ids], dim=0)
+
+        prompt_embeds = prompt_embeds.to(device)
+        add_text_embeds = add_text_embeds.to(device)
+        add_time_ids = add_time_ids.to(device).repeat(batch_size * num_images_per_prompt, 1)
+
+        # 8. Denoising loop
+        num_warmup_steps = max(len(timesteps) - num_inference_steps * self.scheduler.order, 0)
+
+        # 9.1 Apply denoising_end
+        if denoising_end is not None and type(denoising_end) == float and denoising_end > 0 and denoising_end < 1:
+            discrete_timestep_cutoff = int(
+                round(
+                    self.scheduler.config.num_train_timesteps
+                    - (denoising_end * self.scheduler.config.num_train_timesteps)
+                )
+            )
+            num_inference_steps = len(list(filter(lambda ts: ts >= discrete_timestep_cutoff, timesteps)))
+            timesteps = timesteps[:num_inference_steps]
+
+        results_list = []
+
+        for block in self.unet.down_blocks + [self.unet.mid_block] + self.unet.up_blocks:
+            for module in block.modules():
+                if isinstance(module, BasicTransformerBlock):
+                    module.forward = ori_forward.__get__(module, BasicTransformerBlock)
+
+        with self.progress_bar(total=num_inference_steps) as progress_bar:
+            for i, t in enumerate(timesteps):
+                # expand the latents if we are doing classifier free guidance
+                latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
+
+                latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
+
+                # predict the noise residual
+                added_cond_kwargs = {"text_embeds": add_text_embeds, "time_ids": add_time_ids}
+                noise_pred = self.unet(
+                    latent_model_input,
+                    t,
+                    encoder_hidden_states=prompt_embeds,
+                    cross_attention_kwargs=cross_attention_kwargs,
+                    added_cond_kwargs=added_cond_kwargs,
+                    return_dict=False,
+                )[0]
+
+                # perform guidance
+                if do_classifier_free_guidance:
+                    noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
+                    noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
+
+                if do_classifier_free_guidance and guidance_rescale > 0.0:
+                    # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
+                    noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=guidance_rescale)
+
+                # compute the previous noisy sample x_t -> x_t-1
+                latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0]
+
+                # call the callback, if provided
+                if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
+                    progress_bar.update()
+                    if callback is not None and i % callback_steps == 0:
+                        callback(i, t, latents)
+        results_list.append(latents)
+
+        for restart_index, target_size in enumerate(target_sizes):
+            restart_step = restart_steps[restart_index]
+            target_size_ = [target_size[0]//8, target_size[1]//8]
+
+            for block in self.unet.down_blocks + [self.unet.mid_block] + self.unet.up_blocks:
+                for module in block.modules():
+                    if isinstance(module, BasicTransformerBlock):
+                        module.forward = scale_forward.__get__(module, BasicTransformerBlock)
+                        module.current_hw = target_size
+
+            needs_upcasting = self.vae.dtype == torch.float16 and self.vae.config.force_upcast
+            if needs_upcasting:
+                self.upcast_vae()
+                latents = latents.to(next(iter(self.vae.post_quant_conv.parameters())).dtype)
+
+            latents = latents / self.vae.config.scaling_factor
+            image = self.vae.decode(latents, return_dict=False)[0]
+            image = torch.nn.functional.interpolate(
+                image,
+                size=target_size,
+                mode='bicubic',
+                )
+            latents = self.vae.encode(image).latent_dist.sample().to(self.vae.dtype)
+            latents = latents * self.vae.config.scaling_factor
+
+            noise_latents = []
+            noise = torch.randn_like(latents)
+            for timestep in self.scheduler.timesteps:
+                noise_latent = self.scheduler.add_noise(latents, noise, timestep.unsqueeze(0))
+                noise_latents.append(noise_latent)
+            latents = noise_latents[restart_step]
+
+            self.scheduler._step_index = 0
+            with self.progress_bar(total=num_inference_steps) as progress_bar:
+                for i, t in enumerate(timesteps):
+
+                    if i < restart_step:
+                        self.scheduler._step_index += 1
+                        progress_bar.update()
+                        continue
+
+                    cosine_factor = 0.5 * (1 + torch.cos(torch.pi * (self.scheduler.config.num_train_timesteps - t) / self.scheduler.config.num_train_timesteps)).cpu()
+                    c1 = cosine_factor ** cosine_scale
+                    latents = latents * (1 - c1) + noise_latents[i] * c1
+
+                    dilate_coef=target_size[1]//1024
+
+                    dilate_layers = [
+                        # "down_blocks.1.resnets.0.conv1",
+                        # "down_blocks.1.resnets.0.conv2",
+                        # "down_blocks.1.resnets.1.conv1",
+                        # "down_blocks.1.resnets.1.conv2",
+                        "down_blocks.1.downsamplers.0.conv",
+                        "down_blocks.2.resnets.0.conv1",
+                        "down_blocks.2.resnets.0.conv2",
+                        "down_blocks.2.resnets.1.conv1",
+                        "down_blocks.2.resnets.1.conv2",
+                        # "up_blocks.0.resnets.0.conv1",
+                        # "up_blocks.0.resnets.0.conv2",
+                        # "up_blocks.0.resnets.1.conv1",
+                        # "up_blocks.0.resnets.1.conv2",
+                        # "up_blocks.0.resnets.2.conv1",
+                        # "up_blocks.0.resnets.2.conv2",
+                        # "up_blocks.0.upsamplers.0.conv",
+                        # "up_blocks.1.resnets.0.conv1",
+                        # "up_blocks.1.resnets.0.conv2",
+                        # "up_blocks.1.resnets.1.conv1",
+                        # "up_blocks.1.resnets.1.conv2",
+                        # "up_blocks.1.resnets.2.conv1",
+                        # "up_blocks.1.resnets.2.conv2",
+                        # "up_blocks.1.upsamplers.0.conv",
+                        # "up_blocks.2.resnets.0.conv1",
+                        # "up_blocks.2.resnets.0.conv2",
+                        # "up_blocks.2.resnets.1.conv1",
+                        # "up_blocks.2.resnets.1.conv2",
+                        # "up_blocks.2.resnets.2.conv1",
+                        # "up_blocks.2.resnets.2.conv2",
+                        "mid_block.resnets.0.conv1",
+                        "mid_block.resnets.0.conv2",
+                        "mid_block.resnets.1.conv1",
+                        "mid_block.resnets.1.conv2"
+                        ]
+
+                    for name, module in self.unet.named_modules():
+                        if name in dilate_layers:
+                            if i < dilate_tau:
+                                module.dilation = (dilate_coef, dilate_coef)
+                                module.padding = (dilate_coef, dilate_coef)
+                            else:
+                                module.dilation = (1, 1)
+                                module.padding = (1, 1)
+
+                    # expand the latents if we are doing classifier free guidance
+                    latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
+
+                    latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
+
+
+                    # predict the noise residual
+                    added_cond_kwargs = {"text_embeds": add_text_embeds, "time_ids": add_time_ids}
+                    noise_pred = self.unet(
+                        latent_model_input,
+                        t,
+                        encoder_hidden_states=prompt_embeds,
+                        cross_attention_kwargs=cross_attention_kwargs,
+                        added_cond_kwargs=added_cond_kwargs,
+                        return_dict=False,
+                    )[0]
+
+                    # perform guidance
+                    if do_classifier_free_guidance:
+                        noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
+                        noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
+
+                    if do_classifier_free_guidance and guidance_rescale > 0.0:
+                        # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
+                        noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=guidance_rescale)
+
+                    # compute the previous noisy sample x_t -> x_t-1
+                    latents_dtype = latents.dtype
+                    latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0]
+                    if latents.dtype != latents_dtype:
+                        if torch.backends.mps.is_available():
+                            # some platforms (eg. apple mps) misbehave due to a pytorch bug: https://github.com/pytorch/pytorch/pull/99272
+                            latents = latents.to(latents_dtype)
+
+                    # call the callback, if provided
+                    if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
+                        progress_bar.update()
+                        if callback is not None and i % callback_steps == 0:
+                            callback(i, t, latents)
+
+                    for name, module in self.unet.named_modules():
+                        # if ('.conv' in name) and ('.conv_' not in name):
+                        if name in dilate_layers:
+                            module.dilation = (1, 1)
+                            module.padding = (1, 1)
+
+            results_list.append(latents)
+
+        """
+        final_results = []
+        for latents in results_list:
+            # make sure the VAE is in float32 mode, as it overflows in float16
+            if self.vae.dtype == torch.float16 and self.vae.config.force_upcast:
+                self.upcast_vae()
+                latents = latents.to(next(iter(self.vae.post_quant_conv.parameters())).dtype)
+
+            if not output_type == "latent":
+                image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
+            else:
+                image = latents
+                return StableDiffusionXLPipelineOutput(images=image)
+
+            image = self.image_processor.postprocess(image, output_type=output_type)
+
+            if not return_dict:
+                final_results += [(image,)]
+            else:
+                final_results += [StableDiffusionXLPipelineOutput(images=image)]
+
+        # Offload last model to CPU
+        if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
+            self.final_offload_hook.offload()
+
+        return final_results
+        """
+        return StableDiffusionXLPipelineOutput(images=results_list)
+
+    # Overrride to properly handle the loading and unloading of the additional text encoder.
+    def load_lora_weights(self, pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]], **kwargs):
+        # We could have accessed the unet config from `lora_state_dict()` too. We pass
+        # it here explicitly to be able to tell that it's coming from an SDXL
+        # pipeline.
+        state_dict, network_alphas = self.lora_state_dict(
+            pretrained_model_name_or_path_or_dict,
+            unet_config=self.unet.config,
+            **kwargs,
+        )
+        self.load_lora_into_unet(state_dict, network_alphas=network_alphas, unet=self.unet)
+
+        text_encoder_state_dict = {k: v for k, v in state_dict.items() if "text_encoder." in k}
+        if len(text_encoder_state_dict) > 0:
+            self.load_lora_into_text_encoder(
+                text_encoder_state_dict,
+                network_alphas=network_alphas,
+                text_encoder=self.text_encoder,
+                prefix="text_encoder",
+                lora_scale=self.lora_scale,
+            )
+
+        text_encoder_2_state_dict = {k: v for k, v in state_dict.items() if "text_encoder_2." in k}
+        if len(text_encoder_2_state_dict) > 0:
+            self.load_lora_into_text_encoder(
+                text_encoder_2_state_dict,
+                network_alphas=network_alphas,
+                text_encoder=self.text_encoder_2,
+                prefix="text_encoder_2",
+                lora_scale=self.lora_scale,
+            )
+
+    @classmethod
+    def save_lora_weights(
+        self,
+        save_directory: Union[str, os.PathLike],
+        unet_lora_layers: Dict[str, Union[torch.nn.Module, torch.Tensor]] = None,
+        text_encoder_lora_layers: Dict[str, Union[torch.nn.Module, torch.Tensor]] = None,
+        text_encoder_2_lora_layers: Dict[str, Union[torch.nn.Module, torch.Tensor]] = None,
+        is_main_process: bool = True,
+        weight_name: str = None,
+        save_function: Callable = None,
+        safe_serialization: bool = True,
+    ):
+        state_dict = {}
+
+        def pack_weights(layers, prefix):
+            layers_weights = layers.state_dict() if isinstance(layers, torch.nn.Module) else layers
+            layers_state_dict = {f"{prefix}.{module_name}": param for module_name, param in layers_weights.items()}
+            return layers_state_dict
+
+        state_dict.update(pack_weights(unet_lora_layers, "unet"))
+
+        if text_encoder_lora_layers and text_encoder_2_lora_layers:
+            state_dict.update(pack_weights(text_encoder_lora_layers, "text_encoder"))
+            state_dict.update(pack_weights(text_encoder_2_lora_layers, "text_encoder_2"))
+
+        self.write_lora_layers(
+            state_dict=state_dict,
+            save_directory=save_directory,
+            is_main_process=is_main_process,
+            weight_name=weight_name,
+            save_function=save_function,
+            safe_serialization=safe_serialization,
+        )
+
+    def _remove_text_encoder_monkey_patch(self):
+        self._remove_text_encoder_monkey_patch_classmethod(self.text_encoder)
+        self._remove_text_encoder_monkey_patch_classmethod(self.text_encoder_2)
diff --git a/modules/freescale/freescale_pipeline_img2img.py b/modules/freescale/freescale_pipeline_img2img.py
new file mode 100644
index 000000000..df4c3f0c1
--- /dev/null
+++ b/modules/freescale/freescale_pipeline_img2img.py
@@ -0,0 +1,1245 @@
+from inspect import isfunction
+from functools import partial
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+import inspect
+import os
+import random
+
+from PIL import Image
+import numpy as np
+import torch
+import torch.nn.functional as F
+from einops import rearrange
+from transformers import CLIPTextModel, CLIPTextModelWithProjection, CLIPTokenizer
+import torchvision.transforms as transforms
+
+from diffusers.image_processor import VaeImageProcessor
+from diffusers.loaders import FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin
+from diffusers.models import AutoencoderKL, UNet2DConditionModel
+from diffusers.models.attention_processor import AttnProcessor2_0, LoRAAttnProcessor2_0, LoRAXFormersAttnProcessor, XFormersAttnProcessor
+from diffusers.schedulers import KarrasDiffusionSchedulers
+from diffusers.utils.torch_utils import randn_tensor
+from diffusers.utils import is_accelerate_available, is_accelerate_version, logging, replace_example_docstring
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline
+from diffusers.pipelines.stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
+from diffusers.models.attention import BasicTransformerBlock
+
+from .scale_attention import ori_forward, scale_forward
+
+
+logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
+
+EXAMPLE_DOC_STRING = """
+    Examples:
+        ```py
+        >>> import torch
+        >>> from diffusers import StableDiffusionXLPipeline
+
+        >>> pipe = StableDiffusionXLPipeline.from_pretrained(
+        ...     "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16
+        ... )
+        >>> pipe = pipe.to("cuda")
+
+        >>> prompt = "a photo of an astronaut riding a horse on mars"
+        >>> image = pipe(prompt).images[0]
+        ```
+"""
+
+def process_image_to_tensor(image):
+    image = image.convert("RGB")
+    # image = Image.open(image_path).convert("RGB")
+    transform = transforms.Compose(
+        [
+            # transforms.Resize((1024, 1024)),
+            transforms.ToTensor(),
+            transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
+        ]
+    )
+    image_tensor = transform(image)
+    return image_tensor
+
+def process_image_to_bitensor(image):
+    # image = Image.open(image_path).convert("L")
+    image = image.convert("L")
+    transform = transforms.ToTensor()
+    image_tensor = transform(image)
+    binary_tensor = torch.where(image_tensor != 0, torch.tensor(1.0), torch.tensor(0.0))
+    return binary_tensor
+
+def default(val, d):
+    if exists(val):
+        return val
+    return d() if isfunction(d) else d
+
+def exists(val):
+    return val is not None
+
+def extract_into_tensor(a, t, x_shape):
+    b, *_ = t.shape
+    out = a.gather(-1, t)
+    return out.reshape(b, *((1,) * (len(x_shape) - 1)))
+
+def make_beta_schedule(schedule, n_timestep, linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3):
+    if schedule == "linear":
+        betas = (
+                torch.linspace(linear_start ** 0.5, linear_end ** 0.5, n_timestep, dtype=torch.float64) ** 2
+        )
+    elif schedule == "cosine":
+        timesteps = (
+                torch.arange(n_timestep + 1, dtype=torch.float64) / n_timestep + cosine_s
+        )
+        alphas = timesteps / (1 + cosine_s) * np.pi / 2
+        alphas = torch.cos(alphas).pow(2)
+        alphas = alphas / alphas[0]
+        betas = 1 - alphas[1:] / alphas[:-1]
+        betas = np.clip(betas, a_min=0, a_max=0.999)
+    elif schedule == "sqrt_linear":
+        betas = torch.linspace(linear_start, linear_end, n_timestep, dtype=torch.float64)
+    elif schedule == "sqrt":
+        betas = torch.linspace(linear_start, linear_end, n_timestep, dtype=torch.float64) ** 0.5
+    else:
+        raise ValueError(f"schedule '{schedule}' unknown.")
+    return betas.numpy()
+
+to_torch = partial(torch.tensor, dtype=torch.float16)
+betas = make_beta_schedule("linear", 1000, linear_start=0.00085, linear_end=0.012)
+alphas = 1. - betas
+alphas_cumprod = np.cumprod(alphas, axis=0)
+sqrt_alphas_cumprod = to_torch(np.sqrt(alphas_cumprod))
+sqrt_one_minus_alphas_cumprod = to_torch(np.sqrt(1. - alphas_cumprod))
+
+def q_sample(x_start, t, init_noise_sigma = 1.0, noise=None, device=None):
+    noise = default(noise, lambda: torch.randn_like(x_start)).to(device) * init_noise_sigma
+    return (extract_into_tensor(sqrt_alphas_cumprod.to(device), t, x_start.shape) * x_start +
+            extract_into_tensor(sqrt_one_minus_alphas_cumprod.to(device), t, x_start.shape) * noise)
+
+def get_views(height, width, h_window_size=128, w_window_size=128, h_window_stride=64, w_window_stride=64, vae_scale_factor=8):
+    height //= vae_scale_factor
+    width //= vae_scale_factor
+    num_blocks_height = int((height - h_window_size) / h_window_stride - 1e-6) + 2 if height > h_window_size else 1
+    num_blocks_width = int((width - w_window_size) / w_window_stride - 1e-6) + 2 if width > w_window_size else 1
+    total_num_blocks = int(num_blocks_height * num_blocks_width)
+    views = []
+    for i in range(total_num_blocks):
+        h_start = int((i // num_blocks_width) * h_window_stride)
+        h_end = h_start + h_window_size
+        w_start = int((i % num_blocks_width) * w_window_stride)
+        w_end = w_start + w_window_size
+
+        if h_end > height:
+            h_start = int(h_start + height - h_end)
+            h_end = int(height)
+        if w_end > width:
+            w_start = int(w_start + width - w_end)
+            w_end = int(width)
+        if h_start < 0:
+            h_end = int(h_end - h_start)
+            h_start = 0
+        if w_start < 0:
+            w_end = int(w_end - w_start)
+            w_start = 0
+
+        random_jitter = True
+        if random_jitter:
+            h_jitter_range = (h_window_size - h_window_stride) // 4
+            w_jitter_range = (w_window_size - w_window_stride) // 4
+            h_jitter = 0
+            w_jitter = 0
+
+            if (w_start != 0) and (w_end != width):
+                w_jitter = random.randint(-w_jitter_range, w_jitter_range)
+            elif (w_start == 0) and (w_end != width):
+                w_jitter = random.randint(-w_jitter_range, 0)
+            elif (w_start != 0) and (w_end == width):
+                w_jitter = random.randint(0, w_jitter_range)
+            if (h_start != 0) and (h_end != height):
+                h_jitter = random.randint(-h_jitter_range, h_jitter_range)
+            elif (h_start == 0) and (h_end != height):
+                h_jitter = random.randint(-h_jitter_range, 0)
+            elif (h_start != 0) and (h_end == height):
+                h_jitter = random.randint(0, h_jitter_range)
+            h_start += (h_jitter + h_jitter_range)
+            h_end += (h_jitter + h_jitter_range)
+            w_start += (w_jitter + w_jitter_range)
+            w_end += (w_jitter + w_jitter_range)
+
+        views.append((h_start, h_end, w_start, w_end))
+    return views
+
+def gaussian_kernel(kernel_size=3, sigma=1.0, channels=3):
+    x_coord = torch.arange(kernel_size)
+    gaussian_1d = torch.exp(-(x_coord - (kernel_size - 1) / 2) ** 2 / (2 * sigma ** 2))
+    gaussian_1d = gaussian_1d / gaussian_1d.sum()
+    gaussian_2d = gaussian_1d[:, None] * gaussian_1d[None, :]
+    kernel = gaussian_2d[None, None, :, :].repeat(channels, 1, 1, 1)
+
+    return kernel
+
+def gaussian_filter(latents, kernel_size=3, sigma=1.0):
+    channels = latents.shape[1]
+    kernel = gaussian_kernel(kernel_size, sigma, channels).to(latents.device, latents.dtype)
+    if len(latents.shape) == 5:
+        b = latents.shape[0]
+        latents = rearrange(latents, 'b c t i j -> (b t) c i j')
+        blurred_latents = F.conv2d(latents, kernel, padding=kernel_size//2, groups=channels)
+        blurred_latents = rearrange(blurred_latents, '(b t) c i j -> b c t i j', b=b)
+    else:
+        blurred_latents = F.conv2d(latents, kernel, padding=kernel_size//2, groups=channels)
+
+    return blurred_latents
+
+# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.rescale_noise_cfg
+def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
+    """
+    Rescale `noise_cfg` according to `guidance_rescale`. Based on findings of [Common Diffusion Noise Schedules and
+    Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf). See Section 3.4
+    """
+    std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True)
+    std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True)
+    # rescale the results from guidance (fixes overexposure)
+    noise_pred_rescaled = noise_cfg * (std_text / std_cfg)
+    # mix with the original results from guidance by factor guidance_rescale to avoid "plain looking" images
+    noise_cfg = guidance_rescale * noise_pred_rescaled + (1 - guidance_rescale) * noise_cfg
+    return noise_cfg
+
+
+class StableDiffusionXLFreeScaleImg2Img(DiffusionPipeline, FromSingleFileMixin, LoraLoaderMixin):
+    r"""
+    Pipeline for text-to-image generation using Stable Diffusion XL.
+
+    This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
+    library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)
+
+    In addition the pipeline inherits the following loading methods:
+        - *LoRA*: [`StableDiffusionXLPipeline.load_lora_weights`]
+        - *Ckpt*: [`loaders.FromSingleFileMixin.from_single_file`]
+
+    as well as the following saving methods:
+        - *LoRA*: [`loaders.StableDiffusionXLPipeline.save_lora_weights`]
+
+    Args:
+        vae ([`AutoencoderKL`]):
+            Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.
+        text_encoder ([`CLIPTextModel`]):
+            Frozen text-encoder. Stable Diffusion XL uses the text portion of
+            [CLIP](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModel), specifically
+            the [clip-vit-large-patch14](https://huggingface.co/openai/clip-vit-large-patch14) variant.
+        text_encoder_2 ([` CLIPTextModelWithProjection`]):
+            Second frozen text-encoder. Stable Diffusion XL uses the text and pool portion of
+            [CLIP](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModelWithProjection),
+            specifically the
+            [laion/CLIP-ViT-bigG-14-laion2B-39B-b160k](https://huggingface.co/laion/CLIP-ViT-bigG-14-laion2B-39B-b160k)
+            variant.
+        tokenizer (`CLIPTokenizer`):
+            Tokenizer of class
+            [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer).
+        tokenizer_2 (`CLIPTokenizer`):
+            Second Tokenizer of class
+            [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer).
+        unet ([`UNet2DConditionModel`]): Conditional U-Net architecture to denoise the encoded image latents.
+        scheduler ([`SchedulerMixin`]):
+            A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of
+            [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
+    """
+
+    def __init__(
+        self,
+        vae: AutoencoderKL,
+        text_encoder: CLIPTextModel,
+        text_encoder_2: CLIPTextModelWithProjection,
+        tokenizer: CLIPTokenizer,
+        tokenizer_2: CLIPTokenizer,
+        unet: UNet2DConditionModel,
+        scheduler: KarrasDiffusionSchedulers,
+        force_zeros_for_empty_prompt: bool = True,
+        add_watermarker: Optional[bool] = None,
+    ):
+        super().__init__()
+
+        self.register_modules(
+            vae=vae,
+            text_encoder=text_encoder,
+            text_encoder_2=text_encoder_2,
+            tokenizer=tokenizer,
+            tokenizer_2=tokenizer_2,
+            unet=unet,
+            scheduler=scheduler,
+        )
+        self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt)
+        self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
+        self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
+        self.default_sample_size = self.unet.config.sample_size
+
+        self.vae.enable_tiling()
+
+    # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
+    def enable_vae_slicing(self):
+        r"""
+        Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
+        compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
+        """
+        self.vae.enable_slicing()
+
+    # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
+    def disable_vae_slicing(self):
+        r"""
+        Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
+        computing decoding in one step.
+        """
+        self.vae.disable_slicing()
+
+    # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
+    def enable_vae_tiling(self):
+        r"""
+        Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
+        compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
+        processing larger images.
+        """
+        self.vae.enable_tiling()
+
+    # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
+    def disable_vae_tiling(self):
+        r"""
+        Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
+        computing decoding in one step.
+        """
+        self.vae.disable_tiling()
+
+    def enable_model_cpu_offload(self, gpu_id=0):
+        r"""
+        Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
+        to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward`
+        method is called, and the model remains in GPU until the next model runs. Memory savings are lower than with
+        `enable_sequential_cpu_offload`, but performance is much better due to the iterative execution of the `unet`.
+        """
+        if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"):
+            from accelerate import cpu_offload_with_hook
+        else:
+            raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.")
+
+        device = torch.device(f"cuda:{gpu_id}")
+
+        if self.device.type != "cpu":
+            self.to("cpu", silence_dtype_warnings=True)
+            torch.cuda.empty_cache()  # otherwise we don't see the memory savings (but they probably exist)
+
+        model_sequence = (
+            [self.text_encoder, self.text_encoder_2] if self.text_encoder is not None else [self.text_encoder_2]
+        )
+        model_sequence.extend([self.unet, self.vae])
+
+        hook = None
+        for cpu_offloaded_model in model_sequence:
+            _, hook = cpu_offload_with_hook(cpu_offloaded_model, device, prev_module_hook=hook)
+
+        # We'll offload the last model manually.
+        self.final_offload_hook = hook
+
+    def encode_prompt(
+        self,
+        prompt: str,
+        prompt_2: Optional[str] = None,
+        device: Optional[torch.device] = None,
+        num_images_per_prompt: int = 1,
+        do_classifier_free_guidance: bool = True,
+        negative_prompt: Optional[str] = None,
+        negative_prompt_2: Optional[str] = None,
+        prompt_embeds: Optional[torch.FloatTensor] = None,
+        negative_prompt_embeds: Optional[torch.FloatTensor] = None,
+        pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
+        negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
+        lora_scale: Optional[float] = None,
+    ):
+        r"""
+        Encodes the prompt into text encoder hidden states.
+
+        Args:
+            prompt (`str` or `List[str]`, *optional*):
+                prompt to be encoded
+            prompt_2 (`str` or `List[str]`, *optional*):
+                The prompt or prompts to be sent to the `tokenizer_2` and `text_encoder_2`. If not defined, `prompt` is
+                used in both text-encoders
+            device: (`torch.device`):
+                torch device
+            num_images_per_prompt (`int`):
+                number of images that should be generated per prompt
+            do_classifier_free_guidance (`bool`):
+                whether to use classifier free guidance or not
+            negative_prompt (`str` or `List[str]`, *optional*):
+                The prompt or prompts not to guide the image generation. If not defined, one has to pass
+                `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
+                less than `1`).
+            negative_prompt_2 (`str` or `List[str]`, *optional*):
+                The prompt or prompts not to guide the image generation to be sent to `tokenizer_2` and
+                `text_encoder_2`. If not defined, `negative_prompt` is used in both text-encoders
+            prompt_embeds (`torch.FloatTensor`, *optional*):
+                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
+                provided, text embeddings will be generated from `prompt` input argument.
+            negative_prompt_embeds (`torch.FloatTensor`, *optional*):
+                Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
+                weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
+                argument.
+            pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
+                Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
+                If not provided, pooled text embeddings will be generated from `prompt` input argument.
+            negative_pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
+                Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
+                weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
+                input argument.
+            lora_scale (`float`, *optional*):
+                A lora scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.
+        """
+        device = device or self._execution_device
+
+        # set lora scale so that monkey patched LoRA
+        # function of text encoder can correctly access it
+        if lora_scale is not None and isinstance(self, LoraLoaderMixin):
+            self._lora_scale = lora_scale
+
+        if prompt is not None and isinstance(prompt, str):
+            batch_size = 1
+        elif prompt is not None and isinstance(prompt, list):
+            batch_size = len(prompt)
+        else:
+            batch_size = prompt_embeds.shape[0]
+
+        # Define tokenizers and text encoders
+        tokenizers = [self.tokenizer, self.tokenizer_2] if self.tokenizer is not None else [self.tokenizer_2]
+        text_encoders = (
+            [self.text_encoder, self.text_encoder_2] if self.text_encoder is not None else [self.text_encoder_2]
+        )
+
+        if prompt_embeds is None:
+            prompt_2 = prompt_2 or prompt
+            # textual inversion: procecss multi-vector tokens if necessary
+            prompt_embeds_list = []
+            prompts = [prompt, prompt_2]
+            for prompt, tokenizer, text_encoder in zip(prompts, tokenizers, text_encoders):
+                if isinstance(self, TextualInversionLoaderMixin):
+                    prompt = self.maybe_convert_prompt(prompt, tokenizer)
+
+                text_inputs = tokenizer(
+                    prompt,
+                    padding="max_length",
+                    max_length=tokenizer.model_max_length,
+                    truncation=True,
+                    return_tensors="pt",
+                )
+
+                text_input_ids = text_inputs.input_ids
+                untruncated_ids = tokenizer(prompt, padding="longest", return_tensors="pt").input_ids
+
+                if untruncated_ids.shape[-1] >= text_input_ids.shape[-1] and not torch.equal(
+                    text_input_ids, untruncated_ids
+                ):
+                    removed_text = tokenizer.batch_decode(untruncated_ids[:, tokenizer.model_max_length - 1 : -1])
+                    logger.warning(
+                        "The following part of your input was truncated because CLIP can only handle sequences up to"
+                        f" {tokenizer.model_max_length} tokens: {removed_text}"
+                    )
+
+                prompt_embeds = text_encoder(
+                    text_input_ids.to(device),
+                    output_hidden_states=True,
+                )
+
+                # We are only ALWAYS interested in the pooled output of the final text encoder
+                pooled_prompt_embeds = prompt_embeds[0]
+                prompt_embeds = prompt_embeds.hidden_states[-2]
+
+                prompt_embeds_list.append(prompt_embeds)
+
+            prompt_embeds = torch.concat(prompt_embeds_list, dim=-1)
+
+        # get unconditional embeddings for classifier free guidance
+        zero_out_negative_prompt = negative_prompt is None and self.config.force_zeros_for_empty_prompt
+        if do_classifier_free_guidance and negative_prompt_embeds is None and zero_out_negative_prompt:
+            negative_prompt_embeds = torch.zeros_like(prompt_embeds)
+            negative_pooled_prompt_embeds = torch.zeros_like(pooled_prompt_embeds)
+        elif do_classifier_free_guidance and negative_prompt_embeds is None:
+            negative_prompt = negative_prompt or ""
+            negative_prompt_2 = negative_prompt_2 or negative_prompt
+
+            uncond_tokens: List[str]
+            if prompt is not None and type(prompt) is not type(negative_prompt):
+                raise TypeError(
+                    f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !="
+                    f" {type(prompt)}."
+                )
+            elif isinstance(negative_prompt, str):
+                uncond_tokens = [negative_prompt, negative_prompt_2]
+            elif batch_size != len(negative_prompt):
+                raise ValueError(
+                    f"`negative_prompt`: {negative_prompt} has batch size {len(negative_prompt)}, but `prompt`:"
+                    f" {prompt} has batch size {batch_size}. Please make sure that passed `negative_prompt` matches"
+                    " the batch size of `prompt`."
+                )
+            else:
+                uncond_tokens = [negative_prompt, negative_prompt_2]
+
+            negative_prompt_embeds_list = []
+            for negative_prompt, tokenizer, text_encoder in zip(uncond_tokens, tokenizers, text_encoders):
+                if isinstance(self, TextualInversionLoaderMixin):
+                    negative_prompt = self.maybe_convert_prompt(negative_prompt, tokenizer)
+
+                max_length = prompt_embeds.shape[1]
+                uncond_input = tokenizer(
+                    negative_prompt,
+                    padding="max_length",
+                    max_length=max_length,
+                    truncation=True,
+                    return_tensors="pt",
+                )
+
+                negative_prompt_embeds = text_encoder(
+                    uncond_input.input_ids.to(device),
+                    output_hidden_states=True,
+                )
+                # We are only ALWAYS interested in the pooled output of the final text encoder
+                negative_pooled_prompt_embeds = negative_prompt_embeds[0]
+                negative_prompt_embeds = negative_prompt_embeds.hidden_states[-2]
+
+                negative_prompt_embeds_list.append(negative_prompt_embeds)
+
+            negative_prompt_embeds = torch.concat(negative_prompt_embeds_list, dim=-1)
+
+        prompt_embeds = prompt_embeds.to(dtype=self.text_encoder_2.dtype, device=device)
+        bs_embed, seq_len, _ = prompt_embeds.shape
+        # duplicate text embeddings for each generation per prompt, using mps friendly method
+        prompt_embeds = prompt_embeds.repeat(1, num_images_per_prompt, 1)
+        prompt_embeds = prompt_embeds.view(bs_embed * num_images_per_prompt, seq_len, -1)
+
+        if do_classifier_free_guidance:
+            # duplicate unconditional embeddings for each generation per prompt, using mps friendly method
+            seq_len = negative_prompt_embeds.shape[1]
+            negative_prompt_embeds = negative_prompt_embeds.to(dtype=self.text_encoder_2.dtype, device=device)
+            negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
+            negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
+
+        pooled_prompt_embeds = pooled_prompt_embeds.repeat(1, num_images_per_prompt).view(
+            bs_embed * num_images_per_prompt, -1
+        )
+        if do_classifier_free_guidance:
+            negative_pooled_prompt_embeds = negative_pooled_prompt_embeds.repeat(1, num_images_per_prompt).view(
+                bs_embed * num_images_per_prompt, -1
+            )
+
+        return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds
+
+    # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
+    def prepare_extra_step_kwargs(self, generator, eta):
+        # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
+        # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
+        # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
+        # and should be between [0, 1]
+
+        accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
+        extra_step_kwargs = {}
+        if accepts_eta:
+            extra_step_kwargs["eta"] = eta
+
+        # check if the scheduler accepts generator
+        accepts_generator = "generator" in set(inspect.signature(self.scheduler.step).parameters.keys())
+        if accepts_generator:
+            extra_step_kwargs["generator"] = generator
+        return extra_step_kwargs
+
+    def check_inputs(
+        self,
+        prompt,
+        prompt_2,
+        height,
+        width,
+        callback_steps,
+        negative_prompt=None,
+        negative_prompt_2=None,
+        prompt_embeds=None,
+        negative_prompt_embeds=None,
+        pooled_prompt_embeds=None,
+        negative_pooled_prompt_embeds=None,
+    ):
+        if height % 8 != 0 or width % 8 != 0:
+            raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.")
+
+        if (callback_steps is None) or (
+            callback_steps is not None and (not isinstance(callback_steps, int) or callback_steps <= 0)
+        ):
+            raise ValueError(
+                f"`callback_steps` has to be a positive integer but is {callback_steps} of type"
+                f" {type(callback_steps)}."
+            )
+
+        if prompt is not None and prompt_embeds is not None:
+            raise ValueError(
+                f"Cannot forward both `prompt`: {prompt} and `prompt_embeds`: {prompt_embeds}. Please make sure to"
+                " only forward one of the two."
+            )
+        elif prompt_2 is not None and prompt_embeds is not None:
+            raise ValueError(
+                f"Cannot forward both `prompt_2`: {prompt_2} and `prompt_embeds`: {prompt_embeds}. Please make sure to"
+                " only forward one of the two."
+            )
+        elif prompt is None and prompt_embeds is None:
+            raise ValueError(
+                "Provide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined."
+            )
+        elif prompt is not None and (not isinstance(prompt, str) and not isinstance(prompt, list)):
+            raise ValueError(f"`prompt` has to be of type `str` or `list` but is {type(prompt)}")
+        elif prompt_2 is not None and (not isinstance(prompt_2, str) and not isinstance(prompt_2, list)):
+            raise ValueError(f"`prompt_2` has to be of type `str` or `list` but is {type(prompt_2)}")
+
+        if negative_prompt is not None and negative_prompt_embeds is not None:
+            raise ValueError(
+                f"Cannot forward both `negative_prompt`: {negative_prompt} and `negative_prompt_embeds`:"
+                f" {negative_prompt_embeds}. Please make sure to only forward one of the two."
+            )
+        elif negative_prompt_2 is not None and negative_prompt_embeds is not None:
+            raise ValueError(
+                f"Cannot forward both `negative_prompt_2`: {negative_prompt_2} and `negative_prompt_embeds`:"
+                f" {negative_prompt_embeds}. Please make sure to only forward one of the two."
+            )
+
+        if prompt_embeds is not None and negative_prompt_embeds is not None:
+            if prompt_embeds.shape != negative_prompt_embeds.shape:
+                raise ValueError(
+                    "`prompt_embeds` and `negative_prompt_embeds` must have the same shape when passed directly, but"
+                    f" got: `prompt_embeds` {prompt_embeds.shape} != `negative_prompt_embeds`"
+                    f" {negative_prompt_embeds.shape}."
+                )
+
+        if prompt_embeds is not None and pooled_prompt_embeds is None:
+            raise ValueError(
+                "If `prompt_embeds` are provided, `pooled_prompt_embeds` also have to be passed. Make sure to generate `pooled_prompt_embeds` from the same text encoder that was used to generate `prompt_embeds`."
+            )
+
+        if negative_prompt_embeds is not None and negative_pooled_prompt_embeds is None:
+            raise ValueError(
+                "If `negative_prompt_embeds` are provided, `negative_pooled_prompt_embeds` also have to be passed. Make sure to generate `negative_pooled_prompt_embeds` from the same text encoder that was used to generate `negative_prompt_embeds`."
+            )
+
+    # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
+    def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
+        shape = (batch_size, num_channels_latents, height // self.vae_scale_factor, width // self.vae_scale_factor)
+        if isinstance(generator, list) and len(generator) != batch_size:
+            raise ValueError(
+                f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
+                f" size of {batch_size}. Make sure the batch size matches the length of the generators."
+            )
+
+        if latents is None:
+            latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
+        else:
+            latents = latents.to(device)
+
+        # scale the initial noise by the standard deviation required by the scheduler
+        latents = latents * self.scheduler.init_noise_sigma
+        return latents
+
+    def _get_add_time_ids(self, original_size, crops_coords_top_left, target_size, dtype):
+        add_time_ids = list(original_size + crops_coords_top_left + target_size)
+
+        passed_add_embed_dim = (
+            self.unet.config.addition_time_embed_dim * len(add_time_ids) + self.text_encoder_2.config.projection_dim
+        )
+        expected_add_embed_dim = self.unet.add_embedding.linear_1.in_features
+
+        if expected_add_embed_dim != passed_add_embed_dim:
+            raise ValueError(
+                f"Model expects an added time embedding vector of length {expected_add_embed_dim}, but a vector of {passed_add_embed_dim} was created. The model has an incorrect config. Please check `unet.config.time_embedding_type` and `text_encoder_2.config.projection_dim`."
+            )
+
+        add_time_ids = torch.tensor([add_time_ids], dtype=dtype)
+        return add_time_ids
+
+    # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_upscale.StableDiffusionUpscalePipeline.upcast_vae
+    def upcast_vae(self):
+        dtype = self.vae.dtype
+        self.vae.to(dtype=torch.float32)
+        use_torch_2_0_or_xformers = isinstance(
+            self.vae.decoder.mid_block.attentions[0].processor,
+            (
+                AttnProcessor2_0,
+                XFormersAttnProcessor,
+                LoRAXFormersAttnProcessor,
+                LoRAAttnProcessor2_0,
+            ),
+        )
+        # if xformers or torch_2_0 is used attention block does not need
+        # to be in float32 which can save lots of memory
+        if use_torch_2_0_or_xformers:
+            self.vae.post_quant_conv.to(dtype)
+            self.vae.decoder.conv_in.to(dtype)
+            self.vae.decoder.mid_block.to(dtype)
+
+    @torch.no_grad()
+    @replace_example_docstring(EXAMPLE_DOC_STRING)
+    def __call__(
+        self,
+        prompt: Union[str, List[str]] = None,
+        prompt_2: Optional[Union[str, List[str]]] = None,
+        height: Optional[int] = None,
+        width: Optional[int] = None,
+        num_inference_steps: int = 50,
+        denoising_end: Optional[float] = None,
+        guidance_scale: float = 5.0,
+        negative_prompt: Optional[Union[str, List[str]]] = None,
+        negative_prompt_2: Optional[Union[str, List[str]]] = None,
+        num_images_per_prompt: Optional[int] = 1,
+        eta: float = 0.0,
+        generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
+        latents: Optional[torch.FloatTensor] = None,
+        prompt_embeds: Optional[torch.FloatTensor] = None,
+        negative_prompt_embeds: Optional[torch.FloatTensor] = None,
+        pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
+        negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
+        output_type: Optional[str] = "pil",
+        return_dict: bool = True,
+        callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
+        callback_steps: int = 1,
+        cross_attention_kwargs: Optional[Dict[str, Any]] = None,
+        guidance_rescale: float = 0.0,
+        original_size: Optional[Tuple[int, int]] = None,
+        crops_coords_top_left: Tuple[int, int] = (0, 0),
+        target_size: Optional[Tuple[int, int]] = None,
+        resolutions_list: Optional[Union[int, List[int]]] = None,
+        restart_steps: Optional[Union[int, List[int]]] = None,
+        cosine_scale: float = 2.0,
+        cosine_scale_bg: float = 1.0,
+        dilate_tau: int = 35,
+        img_path: Optional[str] = "",
+        mask_path: Optional[str] = "",
+    ):
+        r"""
+        Function invoked when calling the pipeline for generation.
+
+        Args:
+            prompt (`str` or `List[str]`, *optional*):
+                The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
+                instead.
+            prompt_2 (`str` or `List[str]`, *optional*):
+                The prompt or prompts to be sent to the `tokenizer_2` and `text_encoder_2`. If not defined, `prompt` is
+                used in both text-encoders
+            height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
+                The height in pixels of the generated image.
+            width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
+                The width in pixels of the generated image.
+            num_inference_steps (`int`, *optional*, defaults to 50):
+                The number of denoising steps. More denoising steps usually lead to a higher quality image at the
+                expense of slower inference.
+            denoising_end (`float`, *optional*):
+                When specified, determines the fraction (between 0.0 and 1.0) of the total denoising process to be
+                completed before it is intentionally prematurely terminated. As a result, the returned sample will
+                still retain a substantial amount of noise as determined by the discrete timesteps selected by the
+                scheduler. The denoising_end parameter should ideally be utilized when this pipeline forms a part of a
+                "Mixture of Denoisers" multi-pipeline setup, as elaborated in [**Refining the Image
+                Output**](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#refining-the-image-output)
+            guidance_scale (`float`, *optional*, defaults to 5.0):
+                Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
+                `guidance_scale` is defined as `w` of equation 2. of [Imagen
+                Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
+                1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
+                usually at the expense of lower image quality.
+            negative_prompt (`str` or `List[str]`, *optional*):
+                The prompt or prompts not to guide the image generation. If not defined, one has to pass
+                `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
+                less than `1`).
+            negative_prompt_2 (`str` or `List[str]`, *optional*):
+                The prompt or prompts not to guide the image generation to be sent to `tokenizer_2` and
+                `text_encoder_2`. If not defined, `negative_prompt` is used in both text-encoders
+            num_images_per_prompt (`int`, *optional*, defaults to 1):
+                The number of images to generate per prompt.
+            eta (`float`, *optional*, defaults to 0.0):
+                Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to
+                [`schedulers.DDIMScheduler`], will be ignored for others.
+            generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
+                One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
+                to make generation deterministic.
+            latents (`torch.FloatTensor`, *optional*):
+                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
+                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
+                tensor will ge generated by sampling using the supplied random `generator`.
+            prompt_embeds (`torch.FloatTensor`, *optional*):
+                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
+                provided, text embeddings will be generated from `prompt` input argument.
+            negative_prompt_embeds (`torch.FloatTensor`, *optional*):
+                Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
+                weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
+                argument.
+            pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
+                Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
+                If not provided, pooled text embeddings will be generated from `prompt` input argument.
+            negative_pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
+                Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
+                weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
+                input argument.
+            output_type (`str`, *optional*, defaults to `"pil"`):
+                The output format of the generate image. Choose between
+                [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
+            return_dict (`bool`, *optional*, defaults to `True`):
+                Whether or not to return a [`~pipelines.stable_diffusion_xl.StableDiffusionXLPipelineOutput`] instead
+                of a plain tuple.
+            callback (`Callable`, *optional*):
+                A function that will be called every `callback_steps` steps during inference. The function will be
+                called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
+            callback_steps (`int`, *optional*, defaults to 1):
+                The frequency at which the `callback` function will be called. If not specified, the callback will be
+                called at every step.
+            cross_attention_kwargs (`dict`, *optional*):
+                A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
+                `self.processor` in
+                [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
+            guidance_rescale (`float`, *optional*, defaults to 0.7):
+                Guidance rescale factor proposed by [Common Diffusion Noise Schedules and Sample Steps are
+                Flawed](https://arxiv.org/pdf/2305.08891.pdf) `guidance_scale` is defined as `φ` in equation 16. of
+                [Common Diffusion Noise Schedules and Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf).
+                Guidance rescale factor should fix overexposure when using zero terminal SNR.
+            original_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
+                If `original_size` is not the same as `target_size` the image will appear to be down- or upsampled.
+                `original_size` defaults to `(width, height)` if not specified. Part of SDXL's micro-conditioning as
+                explained in section 2.2 of
+                [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
+            crops_coords_top_left (`Tuple[int]`, *optional*, defaults to (0, 0)):
+                `crops_coords_top_left` can be used to generate an image that appears to be "cropped" from the position
+                `crops_coords_top_left` downwards. Favorable, well-centered images are usually achieved by setting
+                `crops_coords_top_left` to (0, 0). Part of SDXL's micro-conditioning as explained in section 2.2 of
+                [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
+            target_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
+                For most cases, `target_size` should be set to the desired height and width of the generated image. If
+                not specified it will default to `(width, height)`. Part of SDXL's micro-conditioning as explained in
+                section 2.2 of [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
+
+        Examples:
+
+        Returns:
+            [`~pipelines.stable_diffusion_xl.StableDiffusionXLPipelineOutput`] or `tuple`:
+            [`~pipelines.stable_diffusion_xl.StableDiffusionXLPipelineOutput`] if `return_dict` is True, otherwise a
+            `tuple`. When returning a tuple, the first element is a list with the generated images.
+        """
+
+
+        # 0. Default height and width to unet
+        if resolutions_list:
+            height, width = resolutions_list[0]
+            target_sizes = resolutions_list[1:]
+            if not restart_steps:
+                restart_steps = [15] * len(target_sizes)
+        else:
+            height = height or self.default_sample_size * self.vae_scale_factor
+            width = width or self.default_sample_size * self.vae_scale_factor
+
+        original_size = original_size or (height, width)
+        target_size = target_size or (height, width)
+
+        # 1. Check inputs. Raise error if not correct
+        self.check_inputs(
+            prompt,
+            prompt_2,
+            height,
+            width,
+            callback_steps,
+            negative_prompt,
+            negative_prompt_2,
+            prompt_embeds,
+            negative_prompt_embeds,
+            pooled_prompt_embeds,
+            negative_pooled_prompt_embeds,
+        )
+
+        # 2. Define call parameters
+        if prompt is not None and isinstance(prompt, str):
+            batch_size = 1
+        elif prompt is not None and isinstance(prompt, list):
+            batch_size = len(prompt)
+        else:
+            batch_size = prompt_embeds.shape[0]
+
+        device = self._execution_device
+
+        # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
+        # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
+        # corresponds to doing no classifier free guidance.
+        do_classifier_free_guidance = guidance_scale > 1.0
+
+        # 3. Encode input prompt
+        text_encoder_lora_scale = (
+            cross_attention_kwargs.get("scale", None) if cross_attention_kwargs is not None else None
+        )
+        (
+            prompt_embeds,
+            negative_prompt_embeds,
+            pooled_prompt_embeds,
+            negative_pooled_prompt_embeds,
+        ) = self.encode_prompt(
+            prompt=prompt,
+            prompt_2=prompt_2,
+            device=device,
+            num_images_per_prompt=num_images_per_prompt,
+            do_classifier_free_guidance=do_classifier_free_guidance,
+            negative_prompt=negative_prompt,
+            negative_prompt_2=negative_prompt_2,
+            prompt_embeds=prompt_embeds,
+            negative_prompt_embeds=negative_prompt_embeds,
+            pooled_prompt_embeds=pooled_prompt_embeds,
+            negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
+            lora_scale=text_encoder_lora_scale,
+        )
+
+        # 4. Prepare timesteps
+        self.scheduler.set_timesteps(num_inference_steps, device=device)
+
+        timesteps = self.scheduler.timesteps
+
+        # 5. Prepare latent variables
+        num_channels_latents = self.unet.config.in_channels
+        latents = self.prepare_latents(
+            batch_size * num_images_per_prompt,
+            num_channels_latents,
+            height,
+            width,
+            prompt_embeds.dtype,
+            device,
+            generator,
+            latents,
+        )
+
+        # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
+        extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
+
+        # 7. Prepare added time ids & embeddings
+        add_text_embeds = pooled_prompt_embeds
+        add_time_ids = self._get_add_time_ids(
+            original_size, crops_coords_top_left, target_size, dtype=prompt_embeds.dtype
+        )
+
+        if do_classifier_free_guidance:
+            prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
+            add_text_embeds = torch.cat([negative_pooled_prompt_embeds, add_text_embeds], dim=0)
+            add_time_ids = torch.cat([add_time_ids, add_time_ids], dim=0)
+
+        prompt_embeds = prompt_embeds.to(device)
+        add_text_embeds = add_text_embeds.to(device)
+        add_time_ids = add_time_ids.to(device).repeat(batch_size * num_images_per_prompt, 1)
+
+        # 8. Denoising loop
+        num_warmup_steps = max(len(timesteps) - num_inference_steps * self.scheduler.order, 0)
+
+        # 9.1 Apply denoising_end
+        if denoising_end is not None and type(denoising_end) == float and denoising_end > 0 and denoising_end < 1:
+            discrete_timestep_cutoff = int(
+                round(
+                    self.scheduler.config.num_train_timesteps
+                    - (denoising_end * self.scheduler.config.num_train_timesteps)
+                )
+            )
+            num_inference_steps = len(list(filter(lambda ts: ts >= discrete_timestep_cutoff, timesteps)))
+            timesteps = timesteps[:num_inference_steps]
+
+        results_list = []
+
+        for block in self.unet.down_blocks + [self.unet.mid_block] + self.unet.up_blocks:
+            for module in block.modules():
+                if isinstance(module, BasicTransformerBlock):
+                    module.forward = ori_forward.__get__(module, BasicTransformerBlock)
+
+        if img_path != '':
+            needs_upcasting = self.vae.dtype == torch.float16 and self.vae.config.force_upcast
+            if needs_upcasting:
+                self.upcast_vae()
+                latents = latents.to(next(iter(self.vae.post_quant_conv.parameters())).dtype)
+            input_image = process_image_to_tensor(img_path).unsqueeze(0).to(dtype=self.vae.dtype, device=device)
+            latents = self.vae.encode(input_image).latent_dist.sample().to(self.vae.dtype)
+            latents = latents * self.vae.config.scaling_factor
+        else:
+            with self.progress_bar(total=num_inference_steps) as progress_bar:
+                for i, t in enumerate(timesteps):
+                    # expand the latents if we are doing classifier free guidance
+                    latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
+
+                    latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
+
+                    # predict the noise residual
+                    added_cond_kwargs = {"text_embeds": add_text_embeds, "time_ids": add_time_ids}
+                    noise_pred = self.unet(
+                        latent_model_input,
+                        t,
+                        encoder_hidden_states=prompt_embeds,
+                        cross_attention_kwargs=cross_attention_kwargs,
+                        added_cond_kwargs=added_cond_kwargs,
+                        return_dict=False,
+                    )[0]
+
+                    # perform guidance
+                    if do_classifier_free_guidance:
+                        noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
+                        noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
+
+                    if do_classifier_free_guidance and guidance_rescale > 0.0:
+                        # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
+                        noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=guidance_rescale)
+
+                    # compute the previous noisy sample x_t -> x_t-1
+                    latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0]
+
+                    # call the callback, if provided
+                    if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
+                        progress_bar.update()
+                        if callback is not None and i % callback_steps == 0:
+                            callback(i, t, latents)
+
+        results_list.append(latents)
+
+        if mask_path != '':
+            mask = process_image_to_bitensor(mask_path).unsqueeze(0)
+
+        for restart_index, target_size in enumerate(target_sizes):
+            restart_step = restart_steps[restart_index]
+            target_size_ = [target_size[0]//8, target_size[1]//8]
+
+            for block in self.unet.down_blocks + [self.unet.mid_block] + self.unet.up_blocks:
+                for module in block.modules():
+                    if isinstance(module, BasicTransformerBlock):
+                        module.forward = scale_forward.__get__(module, BasicTransformerBlock)
+                        module.current_hw = target_size
+
+            needs_upcasting = self.vae.dtype == torch.float16 and self.vae.config.force_upcast
+            if needs_upcasting:
+                self.upcast_vae()
+                latents = latents.to(next(iter(self.vae.post_quant_conv.parameters())).dtype)
+
+            latents = latents / self.vae.config.scaling_factor
+            image = self.vae.decode(latents, return_dict=False)[0]
+            image = torch.nn.functional.interpolate(
+                image,
+                size=target_size,
+                mode='bicubic',
+                )
+            latents = self.vae.encode(image).latent_dist.sample().to(self.vae.dtype)
+            latents = latents * self.vae.config.scaling_factor
+
+            if mask_path != '':
+                mask_ = torch.nn.functional.interpolate(
+                    mask,
+                    size=target_size_,
+                    mode="nearest",
+                    ).to(device)
+
+            noise_latents = []
+            noise = torch.randn_like(latents)
+            for timestep in self.scheduler.timesteps:
+                noise_latent = self.scheduler.add_noise(latents, noise, timestep.unsqueeze(0))
+                noise_latents.append(noise_latent)
+            latents = noise_latents[restart_step]
+
+            self.scheduler._step_index = 0
+            with self.progress_bar(total=num_inference_steps) as progress_bar:
+                for i, t in enumerate(timesteps):
+
+                    if i < restart_step:
+                        self.scheduler._step_index += 1
+                        progress_bar.update()
+                        continue
+
+                    cosine_factor = 0.5 * (1 + torch.cos(torch.pi * (self.scheduler.config.num_train_timesteps - t) / self.scheduler.config.num_train_timesteps)).cpu()
+                    if mask_path != '':
+                        c1 = (cosine_factor ** (mask_ * cosine_scale + (1-mask_) * cosine_scale_bg)).to(dtype=torch.float16)
+                    else:
+                        c1 = cosine_factor ** cosine_scale
+                    latents = latents * (1 - c1) + noise_latents[i] * c1
+
+                    dilate_coef=target_size[1]//1024
+
+                    dilate_layers = [
+                        # "down_blocks.1.resnets.0.conv1",
+                        # "down_blocks.1.resnets.0.conv2",
+                        # "down_blocks.1.resnets.1.conv1",
+                        # "down_blocks.1.resnets.1.conv2",
+                        "down_blocks.1.downsamplers.0.conv",
+                        "down_blocks.2.resnets.0.conv1",
+                        "down_blocks.2.resnets.0.conv2",
+                        "down_blocks.2.resnets.1.conv1",
+                        "down_blocks.2.resnets.1.conv2",
+                        # "up_blocks.0.resnets.0.conv1",
+                        # "up_blocks.0.resnets.0.conv2",
+                        # "up_blocks.0.resnets.1.conv1",
+                        # "up_blocks.0.resnets.1.conv2",
+                        # "up_blocks.0.resnets.2.conv1",
+                        # "up_blocks.0.resnets.2.conv2",
+                        # "up_blocks.0.upsamplers.0.conv",
+                        # "up_blocks.1.resnets.0.conv1",
+                        # "up_blocks.1.resnets.0.conv2",
+                        # "up_blocks.1.resnets.1.conv1",
+                        # "up_blocks.1.resnets.1.conv2",
+                        # "up_blocks.1.resnets.2.conv1",
+                        # "up_blocks.1.resnets.2.conv2",
+                        # "up_blocks.1.upsamplers.0.conv",
+                        # "up_blocks.2.resnets.0.conv1",
+                        # "up_blocks.2.resnets.0.conv2",
+                        # "up_blocks.2.resnets.1.conv1",
+                        # "up_blocks.2.resnets.1.conv2",
+                        # "up_blocks.2.resnets.2.conv1",
+                        # "up_blocks.2.resnets.2.conv2",
+                        "mid_block.resnets.0.conv1",
+                        "mid_block.resnets.0.conv2",
+                        "mid_block.resnets.1.conv1",
+                        "mid_block.resnets.1.conv2"
+                        ]
+
+                    for name, module in self.unet.named_modules():
+                        if name in dilate_layers:
+                            if i < dilate_tau:
+                                module.dilation = (dilate_coef, dilate_coef)
+                                module.padding = (dilate_coef, dilate_coef)
+                            else:
+                                module.dilation = (1, 1)
+                                module.padding = (1, 1)
+
+                    # expand the latents if we are doing classifier free guidance
+                    latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
+
+                    latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
+
+
+                    # predict the noise residual
+                    added_cond_kwargs = {"text_embeds": add_text_embeds, "time_ids": add_time_ids}
+                    noise_pred = self.unet(
+                        latent_model_input,
+                        t,
+                        encoder_hidden_states=prompt_embeds,
+                        cross_attention_kwargs=cross_attention_kwargs,
+                        added_cond_kwargs=added_cond_kwargs,
+                        return_dict=False,
+                    )[0]
+
+                    # perform guidance
+                    if do_classifier_free_guidance:
+                        noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
+                        noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
+
+                    if do_classifier_free_guidance and guidance_rescale > 0.0:
+                        # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
+                        noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=guidance_rescale)
+
+                    # compute the previous noisy sample x_t -> x_t-1
+                    latents_dtype = latents.dtype
+                    latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0]
+                    if latents.dtype != latents_dtype:
+                        if torch.backends.mps.is_available():
+                            # some platforms (eg. apple mps) misbehave due to a pytorch bug: https://github.com/pytorch/pytorch/pull/99272
+                            latents = latents.to(latents_dtype)
+
+                    # call the callback, if provided
+                    if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
+                        progress_bar.update()
+                        if callback is not None and i % callback_steps == 0:
+                            callback(i, t, latents)
+
+                    for name, module in self.unet.named_modules():
+                        # if ('.conv' in name) and ('.conv_' not in name):
+                        if name in dilate_layers:
+                            module.dilation = (1, 1)
+                            module.padding = (1, 1)
+
+            results_list.append(latents)
+
+        """
+        final_results = []
+        for latents in results_list:
+            # make sure the VAE is in float32 mode, as it overflows in float16
+            if self.vae.dtype == torch.float16 and self.vae.config.force_upcast:
+                self.upcast_vae()
+                latents = latents.to(next(iter(self.vae.post_quant_conv.parameters())).dtype)
+
+            if not output_type == "latent":
+                image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
+            else:
+                image = latents
+                return StableDiffusionXLPipelineOutput(images=image)
+
+            # apply watermark if available
+            if self.watermark is not None:
+                image = self.watermark.apply_watermark(image)
+
+            image = self.image_processor.postprocess(image, output_type=output_type)
+
+            if not return_dict:
+                final_results += [(image,)]
+            else:
+                final_results += [StableDiffusionXLPipelineOutput(images=image)]
+
+        # Offload last model to CPU
+        if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
+            self.final_offload_hook.offload()
+
+        return final_results
+        """
+        return StableDiffusionXLPipelineOutput(images=results_list)
+
+    # Overrride to properly handle the loading and unloading of the additional text encoder.
+    def load_lora_weights(self, pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]], **kwargs):
+        # We could have accessed the unet config from `lora_state_dict()` too. We pass
+        # it here explicitly to be able to tell that it's coming from an SDXL
+        # pipeline.
+        state_dict, network_alphas = self.lora_state_dict(
+            pretrained_model_name_or_path_or_dict,
+            unet_config=self.unet.config,
+            **kwargs,
+        )
+        self.load_lora_into_unet(state_dict, network_alphas=network_alphas, unet=self.unet)
+
+        text_encoder_state_dict = {k: v for k, v in state_dict.items() if "text_encoder." in k}
+        if len(text_encoder_state_dict) > 0:
+            self.load_lora_into_text_encoder(
+                text_encoder_state_dict,
+                network_alphas=network_alphas,
+                text_encoder=self.text_encoder,
+                prefix="text_encoder",
+                lora_scale=self.lora_scale,
+            )
+
+        text_encoder_2_state_dict = {k: v for k, v in state_dict.items() if "text_encoder_2." in k}
+        if len(text_encoder_2_state_dict) > 0:
+            self.load_lora_into_text_encoder(
+                text_encoder_2_state_dict,
+                network_alphas=network_alphas,
+                text_encoder=self.text_encoder_2,
+                prefix="text_encoder_2",
+                lora_scale=self.lora_scale,
+            )
+
+    @classmethod
+    def save_lora_weights(
+        self,
+        save_directory: Union[str, os.PathLike],
+        unet_lora_layers: Dict[str, Union[torch.nn.Module, torch.Tensor]] = None,
+        text_encoder_lora_layers: Dict[str, Union[torch.nn.Module, torch.Tensor]] = None,
+        text_encoder_2_lora_layers: Dict[str, Union[torch.nn.Module, torch.Tensor]] = None,
+        is_main_process: bool = True,
+        weight_name: str = None,
+        save_function: Callable = None,
+        safe_serialization: bool = True,
+    ):
+        state_dict = {}
+
+        def pack_weights(layers, prefix):
+            layers_weights = layers.state_dict() if isinstance(layers, torch.nn.Module) else layers
+            layers_state_dict = {f"{prefix}.{module_name}": param for module_name, param in layers_weights.items()}
+            return layers_state_dict
+
+        state_dict.update(pack_weights(unet_lora_layers, "unet"))
+
+        if text_encoder_lora_layers and text_encoder_2_lora_layers:
+            state_dict.update(pack_weights(text_encoder_lora_layers, "text_encoder"))
+            state_dict.update(pack_weights(text_encoder_2_lora_layers, "text_encoder_2"))
+
+        self.write_lora_layers(
+            state_dict=state_dict,
+            save_directory=save_directory,
+            is_main_process=is_main_process,
+            weight_name=weight_name,
+            save_function=save_function,
+            safe_serialization=safe_serialization,
+        )
+
+    def _remove_text_encoder_monkey_patch(self):
+        self._remove_text_encoder_monkey_patch_classmethod(self.text_encoder)
+        self._remove_text_encoder_monkey_patch_classmethod(self.text_encoder_2)
diff --git a/modules/freescale/scale_attention.py b/modules/freescale/scale_attention.py
new file mode 100644
index 000000000..9e83d5067
--- /dev/null
+++ b/modules/freescale/scale_attention.py
@@ -0,0 +1,367 @@
+from typing import Any, Dict, Optional
+import random
+import torch
+import torch.nn.functional as F
+from einops import rearrange
+
+
+def gaussian_kernel(kernel_size=3, sigma=1.0, channels=3):
+    x_coord = torch.arange(kernel_size)
+    gaussian_1d = torch.exp(-(x_coord - (kernel_size - 1) / 2) ** 2 / (2 * sigma ** 2))
+    gaussian_1d = gaussian_1d / gaussian_1d.sum()
+    gaussian_2d = gaussian_1d[:, None] * gaussian_1d[None, :]
+    kernel = gaussian_2d[None, None, :, :].repeat(channels, 1, 1, 1)
+
+    return kernel
+
+def gaussian_filter(latents, kernel_size=3, sigma=1.0):
+    channels = latents.shape[1]
+    kernel = gaussian_kernel(kernel_size, sigma, channels).to(latents.device, latents.dtype)
+    blurred_latents = F.conv2d(latents, kernel, padding=kernel_size//2, groups=channels)
+
+    return blurred_latents
+
+def get_views(height, width, h_window_size=128, w_window_size=128, scale_factor=8):
+    height = int(height)
+    width = int(width)
+    h_window_stride = h_window_size // 2
+    w_window_stride = w_window_size // 2
+    h_window_size = int(h_window_size / scale_factor)
+    w_window_size = int(w_window_size / scale_factor)
+    h_window_stride = int(h_window_stride / scale_factor)
+    w_window_stride = int(w_window_stride / scale_factor)
+    num_blocks_height = int((height - h_window_size) / h_window_stride - 1e-6) + 2 if height > h_window_size else 1
+    num_blocks_width = int((width - w_window_size) / w_window_stride - 1e-6) + 2 if width > w_window_size else 1
+    total_num_blocks = int(num_blocks_height * num_blocks_width)
+    views = []
+    for i in range(total_num_blocks):
+        h_start = int((i // num_blocks_width) * h_window_stride)
+        h_end = h_start + h_window_size
+        w_start = int((i % num_blocks_width) * w_window_stride)
+        w_end = w_start + w_window_size
+
+        if h_end > height:
+            h_start = int(h_start + height - h_end)
+            h_end = int(height)
+        if w_end > width:
+            w_start = int(w_start + width - w_end)
+            w_end = int(width)
+        if h_start < 0:
+            h_end = int(h_end - h_start)
+            h_start = 0
+        if w_start < 0:
+            w_end = int(w_end - w_start)
+            w_start = 0
+
+        random_jitter = True
+        if random_jitter:
+            h_jitter_range = h_window_size // 8
+            w_jitter_range = w_window_size // 8
+            h_jitter = 0
+            w_jitter = 0
+
+            if (w_start != 0) and (w_end != width):
+                w_jitter = random.randint(-w_jitter_range, w_jitter_range)
+            elif (w_start == 0) and (w_end != width):
+                w_jitter = random.randint(-w_jitter_range, 0)
+            elif (w_start != 0) and (w_end == width):
+                w_jitter = random.randint(0, w_jitter_range)
+            if (h_start != 0) and (h_end != height):
+                h_jitter = random.randint(-h_jitter_range, h_jitter_range)
+            elif (h_start == 0) and (h_end != height):
+                h_jitter = random.randint(-h_jitter_range, 0)
+            elif (h_start != 0) and (h_end == height):
+                h_jitter = random.randint(0, h_jitter_range)
+            h_start += (h_jitter + h_jitter_range)
+            h_end += (h_jitter + h_jitter_range)
+            w_start += (w_jitter + w_jitter_range)
+            w_end += (w_jitter + w_jitter_range)
+
+        views.append((h_start, h_end, w_start, w_end))
+    return views
+
+def scale_forward(
+    self,
+    hidden_states: torch.FloatTensor,
+    attention_mask: Optional[torch.FloatTensor] = None,
+    encoder_hidden_states: Optional[torch.FloatTensor] = None,
+    encoder_attention_mask: Optional[torch.FloatTensor] = None,
+    timestep: Optional[torch.LongTensor] = None,
+    cross_attention_kwargs: Dict[str, Any] = None,
+    class_labels: Optional[torch.LongTensor] = None,
+):
+    # Notice that normalization is always applied before the real computation in the following blocks.
+    if self.current_hw:
+        current_scale_num_h, current_scale_num_w = max(self.current_hw[0] // 1024, 1), max(self.current_hw[1] // 1024, 1)
+    else:
+        current_scale_num_h, current_scale_num_w = 1, 1
+
+    # 0. Self-Attention
+    if self.use_ada_layer_norm:
+        norm_hidden_states = self.norm1(hidden_states, timestep)
+    elif self.use_ada_layer_norm_zero:
+        norm_hidden_states, gate_msa, shift_mlp, scale_mlp, gate_mlp = self.norm1(
+            hidden_states, timestep, class_labels, hidden_dtype=hidden_states.dtype
+        )
+    else:
+        norm_hidden_states = self.norm1(hidden_states)
+
+    # 2. Prepare GLIGEN inputs
+    cross_attention_kwargs = cross_attention_kwargs.copy() if cross_attention_kwargs is not None else {}
+    gligen_kwargs = cross_attention_kwargs.pop("gligen", None)
+
+    ratio_hw = current_scale_num_h / current_scale_num_w
+    latent_h = int((norm_hidden_states.shape[1] * ratio_hw) ** 0.5)
+    latent_w = int(latent_h / ratio_hw)
+    scale_factor = 128 * current_scale_num_h / latent_h
+    if ratio_hw > 1:
+        sub_h = 128
+        sub_w = int(128 / ratio_hw)
+    else:
+        sub_h = int(128 * ratio_hw)
+        sub_w = 128
+
+    h_jitter_range = int(sub_h / scale_factor // 8)
+    w_jitter_range = int(sub_w / scale_factor // 8)
+    views = get_views(latent_h, latent_w, sub_h, sub_w, scale_factor = scale_factor)
+
+    current_scale_num = max(current_scale_num_h, current_scale_num_w)
+    global_views = [[h, w] for h in range(current_scale_num_h) for w in range(current_scale_num_w)]
+
+    four_window = True
+    fourg_window = False
+
+    if four_window:
+        norm_hidden_states_ = rearrange(norm_hidden_states, 'bh (h w) d -> bh h w d', h = latent_h)
+        norm_hidden_states_ = F.pad(norm_hidden_states_, (0, 0, w_jitter_range, w_jitter_range, h_jitter_range, h_jitter_range), 'constant', 0)
+        value = torch.zeros_like(norm_hidden_states_)
+        count = torch.zeros_like(norm_hidden_states_)
+        for index, view in enumerate(views):
+            h_start, h_end, w_start, w_end = view
+            local_states = norm_hidden_states_[:, h_start:h_end, w_start:w_end, :]
+            local_states = rearrange(local_states, 'bh h w d -> bh (h w) d')
+            local_output = self.attn1(
+                local_states,
+                encoder_hidden_states=encoder_hidden_states if self.only_cross_attention else None,
+                attention_mask=attention_mask,
+                **cross_attention_kwargs,
+            )
+            local_output = rearrange(local_output, 'bh (h w) d -> bh h w d', h = int(sub_h / scale_factor))
+
+            value[:, h_start:h_end, w_start:w_end, :] += local_output * 1
+            count[:, h_start:h_end, w_start:w_end, :] += 1
+
+        value = value[:, h_jitter_range:-h_jitter_range, w_jitter_range:-w_jitter_range, :]
+        count = count[:, h_jitter_range:-h_jitter_range, w_jitter_range:-w_jitter_range, :]
+        attn_output = torch.where(count>0, value/count, value)
+
+        gaussian_local = gaussian_filter(attn_output, kernel_size=(2*current_scale_num-1), sigma=1.0)
+
+        attn_output_global = self.attn1(
+            norm_hidden_states,
+            encoder_hidden_states=encoder_hidden_states if self.only_cross_attention else None,
+            attention_mask=attention_mask,
+            **cross_attention_kwargs,
+        )
+        attn_output_global = rearrange(attn_output_global, 'bh (h w) d -> bh h w d', h = latent_h)
+
+        gaussian_global = gaussian_filter(attn_output_global, kernel_size=(2*current_scale_num-1), sigma=1.0)
+
+        attn_output = gaussian_local + (attn_output_global - gaussian_global)
+        attn_output = rearrange(attn_output, 'bh h w d -> bh (h w) d')
+
+    elif fourg_window:
+        norm_hidden_states = rearrange(norm_hidden_states, 'bh (h w) d -> bh h w d', h = latent_h)
+        norm_hidden_states_ = F.pad(norm_hidden_states, (0, 0, w_jitter_range, w_jitter_range, h_jitter_range, h_jitter_range), 'constant', 0)
+        value = torch.zeros_like(norm_hidden_states_)
+        count = torch.zeros_like(norm_hidden_states_)
+        for index, view in enumerate(views):
+            h_start, h_end, w_start, w_end = view
+            local_states = norm_hidden_states_[:, h_start:h_end, w_start:w_end, :]
+            local_states = rearrange(local_states, 'bh h w d -> bh (h w) d')
+            local_output = self.attn1(
+                local_states,
+                encoder_hidden_states=encoder_hidden_states if self.only_cross_attention else None,
+                attention_mask=attention_mask,
+                **cross_attention_kwargs,
+            )
+            local_output = rearrange(local_output, 'bh (h w) d -> bh h w d', h = int(sub_h / scale_factor))
+
+            value[:, h_start:h_end, w_start:w_end, :] += local_output * 1
+            count[:, h_start:h_end, w_start:w_end, :] += 1
+
+        value = value[:, h_jitter_range:-h_jitter_range, w_jitter_range:-w_jitter_range, :]
+        count = count[:, h_jitter_range:-h_jitter_range, w_jitter_range:-w_jitter_range, :]
+        attn_output = torch.where(count>0, value/count, value)
+
+        gaussian_local = gaussian_filter(attn_output, kernel_size=(2*current_scale_num-1), sigma=1.0)
+
+        value = torch.zeros_like(norm_hidden_states)
+        count = torch.zeros_like(norm_hidden_states)
+        for index, global_view in enumerate(global_views):
+            h, w = global_view
+            global_states = norm_hidden_states[:, h::current_scale_num_h, w::current_scale_num_w, :]
+            global_states = rearrange(global_states, 'bh h w d -> bh (h w) d')
+            global_output = self.attn1(
+                global_states,
+                encoder_hidden_states=encoder_hidden_states if self.only_cross_attention else None,
+                attention_mask=attention_mask,
+                **cross_attention_kwargs,
+            )
+            global_output = rearrange(global_output, 'bh (h w) d -> bh h w d', h = int(global_output.shape[1] ** 0.5))
+
+            value[:, h::current_scale_num_h, w::current_scale_num_w, :] += global_output * 1
+            count[:, h::current_scale_num_h, w::current_scale_num_w, :] += 1
+
+        attn_output_global = torch.where(count>0, value/count, value)
+
+        gaussian_global = gaussian_filter(attn_output_global, kernel_size=(2*current_scale_num-1), sigma=1.0)
+
+        attn_output = gaussian_local + (attn_output_global - gaussian_global)
+        attn_output = rearrange(attn_output, 'bh h w d -> bh (h w) d')
+
+    else:
+        attn_output = self.attn1(
+            norm_hidden_states,
+            encoder_hidden_states=encoder_hidden_states if self.only_cross_attention else None,
+            attention_mask=attention_mask,
+            **cross_attention_kwargs,
+        )
+
+    if self.use_ada_layer_norm_zero:
+        attn_output = gate_msa.unsqueeze(1) * attn_output
+    hidden_states = attn_output + hidden_states
+
+    # 2.5 GLIGEN Control
+    if gligen_kwargs is not None:
+        hidden_states = self.fuser(hidden_states, gligen_kwargs["objs"])
+    # 2.5 ends
+
+    # 3. Cross-Attention
+    if self.attn2 is not None:
+        norm_hidden_states = (
+            self.norm2(hidden_states, timestep) if self.use_ada_layer_norm else self.norm2(hidden_states)
+        )
+        attn_output = self.attn2(
+            norm_hidden_states,
+            encoder_hidden_states=encoder_hidden_states,
+            attention_mask=encoder_attention_mask,
+            **cross_attention_kwargs,
+        )
+        hidden_states = attn_output + hidden_states
+
+    # 4. Feed-forward
+    norm_hidden_states = self.norm3(hidden_states)
+
+    if self.use_ada_layer_norm_zero:
+        norm_hidden_states = norm_hidden_states * (1 + scale_mlp[:, None]) + shift_mlp[:, None]
+
+    if self._chunk_size is not None:
+        # "feed_forward_chunk_size" can be used to save memory
+        if norm_hidden_states.shape[self._chunk_dim] % self._chunk_size != 0:
+            raise ValueError(
+                f"`hidden_states` dimension to be chunked: {norm_hidden_states.shape[self._chunk_dim]} has to be divisible by chunk size: {self._chunk_size}. Make sure to set an appropriate `chunk_size` when calling `unet.enable_forward_chunking`."
+            )
+
+        num_chunks = norm_hidden_states.shape[self._chunk_dim] // self._chunk_size
+        ff_output = torch.cat(
+            [
+                self.ff(hid_slice)
+                for hid_slice in norm_hidden_states.chunk(num_chunks, dim=self._chunk_dim)
+            ],
+            dim=self._chunk_dim,
+        )
+    else:
+        ff_output = self.ff(norm_hidden_states)
+
+    if self.use_ada_layer_norm_zero:
+        ff_output = gate_mlp.unsqueeze(1) * ff_output
+
+    hidden_states = ff_output + hidden_states
+
+    return hidden_states
+
+def ori_forward(
+    self,
+    hidden_states: torch.FloatTensor,
+    attention_mask: Optional[torch.FloatTensor] = None,
+    encoder_hidden_states: Optional[torch.FloatTensor] = None,
+    encoder_attention_mask: Optional[torch.FloatTensor] = None,
+    timestep: Optional[torch.LongTensor] = None,
+    cross_attention_kwargs: Dict[str, Any] = None,
+    class_labels: Optional[torch.LongTensor] = None,
+):
+    # Notice that normalization is always applied before the real computation in the following blocks.
+    # 0. Self-Attention
+    if self.use_ada_layer_norm:
+        norm_hidden_states = self.norm1(hidden_states, timestep)
+    elif self.use_ada_layer_norm_zero:
+        norm_hidden_states, gate_msa, shift_mlp, scale_mlp, gate_mlp = self.norm1(
+            hidden_states, timestep, class_labels, hidden_dtype=hidden_states.dtype
+        )
+    else:
+        norm_hidden_states = self.norm1(hidden_states)
+
+    # 2. Prepare GLIGEN inputs
+    cross_attention_kwargs = cross_attention_kwargs.copy() if cross_attention_kwargs is not None else {}
+    gligen_kwargs = cross_attention_kwargs.pop("gligen", None)
+
+    attn_output = self.attn1(
+        norm_hidden_states,
+        encoder_hidden_states=encoder_hidden_states if self.only_cross_attention else None,
+        attention_mask=attention_mask,
+        **cross_attention_kwargs,
+    )
+
+    if self.use_ada_layer_norm_zero:
+        attn_output = gate_msa.unsqueeze(1) * attn_output
+    hidden_states = attn_output + hidden_states
+
+    # 2.5 GLIGEN Control
+    if gligen_kwargs is not None:
+        hidden_states = self.fuser(hidden_states, gligen_kwargs["objs"])
+    # 2.5 ends
+
+    # 3. Cross-Attention
+    if self.attn2 is not None:
+        norm_hidden_states = (
+            self.norm2(hidden_states, timestep) if self.use_ada_layer_norm else self.norm2(hidden_states)
+        )
+        attn_output = self.attn2(
+            norm_hidden_states,
+            encoder_hidden_states=encoder_hidden_states,
+            attention_mask=encoder_attention_mask,
+            **cross_attention_kwargs,
+        )
+        hidden_states = attn_output + hidden_states
+
+    # 4. Feed-forward
+    norm_hidden_states = self.norm3(hidden_states)
+
+    if self.use_ada_layer_norm_zero:
+        norm_hidden_states = norm_hidden_states * (1 + scale_mlp[:, None]) + shift_mlp[:, None]
+
+    if self._chunk_size is not None:
+        # "feed_forward_chunk_size" can be used to save memory
+        if norm_hidden_states.shape[self._chunk_dim] % self._chunk_size != 0:
+            raise ValueError(
+                f"`hidden_states` dimension to be chunked: {norm_hidden_states.shape[self._chunk_dim]} has to be divisible by chunk size: {self._chunk_size}. Make sure to set an appropriate `chunk_size` when calling `unet.enable_forward_chunking`."
+            )
+
+        num_chunks = norm_hidden_states.shape[self._chunk_dim] // self._chunk_size
+        ff_output = torch.cat(
+            [
+                self.ff(hid_slice)
+                for hid_slice in norm_hidden_states.chunk(num_chunks, dim=self._chunk_dim)
+            ],
+            dim=self._chunk_dim,
+        )
+    else:
+        ff_output = self.ff(norm_hidden_states)
+
+    if self.use_ada_layer_norm_zero:
+        ff_output = gate_mlp.unsqueeze(1) * ff_output
+
+    hidden_states = ff_output + hidden_states
+
+    return hidden_states
diff --git a/modules/processing_diffusers.py b/modules/processing_diffusers.py
index 3b6f228ba..adb047511 100644
--- a/modules/processing_diffusers.py
+++ b/modules/processing_diffusers.py
@@ -365,13 +365,26 @@ def process_decode(p: processing.StableDiffusionProcessing, output):
             else:
                 width = getattr(p, 'width', 0)
                 height = getattr(p, 'height', 0)
-            results = processing_vae.vae_decode(
-                latents = output.images,
-                model = model,
-                full_quality = p.full_quality,
-                width = width,
-                height = height,
-            )
+            if isinstance(output.images, list):
+                results = []
+                for i in range(len(output.images)):
+                    result_batch = processing_vae.vae_decode(
+                        latents = output.images[i],
+                        model = model,
+                        full_quality = p.full_quality,
+                        width = width,
+                        height = height,
+                    )
+                    for result in list(result_batch):
+                        results.append(result)
+            else:
+                results = processing_vae.vae_decode(
+                    latents = output.images,
+                    model = model,
+                    full_quality = p.full_quality,
+                    width = width,
+                    height = height,
+                )
         elif hasattr(output, 'images'):
             results = output.images
         else:
diff --git a/modules/sd_samplers_common.py b/modules/sd_samplers_common.py
index cd51043c7..723f7b181 100644
--- a/modules/sd_samplers_common.py
+++ b/modules/sd_samplers_common.py
@@ -40,7 +40,6 @@ def single_sample_to_image(sample, approximation=None):
             if approximation is None:
                 warn_once('Unknown decode type')
                 approximation = 0
-        # normal sample is [4,64,64]
         try:
             if sample.dtype == torch.bfloat16 and (approximation == 0 or approximation == 1):
                 sample = sample.to(torch.float16)
@@ -62,6 +61,9 @@ def single_sample_to_image(sample, approximation=None):
                 sample = sample * (5 / abs(sample_min))
         """
         if approximation == 2: # TAESD
+            if sample.shape[-1] > 128 or sample.shape[-2] > 128:
+                scale = 128 / max(sample.shape[-1], sample.shape[-2])
+                sample = torch.nn.functional.interpolate(sample.unsqueeze(0), scale_factor=[scale, scale], mode='bilinear', align_corners=False)[0]
             x_sample = sd_vae_taesd.decode(sample)
             x_sample = (1.0 + x_sample) / 2.0 # preview requires smaller range
         elif shared.sd_model_type == 'sc' and approximation != 3:
diff --git a/modules/sd_vae_taesd.py b/modules/sd_vae_taesd.py
index 4d213ad48..a1959817c 100644
--- a/modules/sd_vae_taesd.py
+++ b/modules/sd_vae_taesd.py
@@ -169,6 +169,9 @@ def decode(latents):
     if vae is None:
         return latents
     try:
+        size = max(latents.shape[-1], latents.shape[-2])
+        if size > 256:
+            return latents
         with devices.inference_context():
             latents = latents.detach().clone().to(devices.device, dtype)
             if len(latents.shape) == 3:
diff --git a/modules/shared.py b/modules/shared.py
index eaf4b361d..90dbe2647 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -873,6 +873,7 @@ def get_default_modes():
 
 options_templates.update(options_section(('control', "Control Options"), {
     "control_max_units": OptionInfo(4, "Maximum number of units", gr.Slider, {"minimum": 1, "maximum": 10, "step": 1}),
+    "control_tiles": OptionInfo("1x1, 1x2, 1x3, 1x4, 2x1, 2x1, 2x2, 2x3, 2x4, 3x1, 3x2, 3x3, 3x4, 4x1, 4x2, 4x3, 4x4", "Tiling options"),
     "control_move_processor": OptionInfo(False, "Processor move to CPU after use"),
     "control_unload_processor": OptionInfo(False, "Processor unload after use"),
 }))
diff --git a/modules/shared_state.py b/modules/shared_state.py
index 3d3cb1ae6..a3312ec33 100644
--- a/modules/shared_state.py
+++ b/modules/shared_state.py
@@ -141,9 +141,9 @@ def set_current_image(self):
         if self.job == 'VAE': # avoid generating preview while vae is running
             return
         from modules.shared import opts, cmd_opts
-        if cmd_opts.lowvram or self.api:
+        if cmd_opts.lowvram or self.api or not opts.live_previews_enable or opts.show_progress_every_n_steps <= 0:
             return
-        if abs(self.sampling_step - self.current_image_sampling_step) >= opts.show_progress_every_n_steps and opts.live_previews_enable and opts.show_progress_every_n_steps > 0:
+        if abs(self.sampling_step - self.current_image_sampling_step) >= opts.show_progress_every_n_steps:
             self.do_set_current_image()
 
     def do_set_current_image(self):
diff --git a/modules/ui_control.py b/modules/ui_control.py
index 5a146a8fc..7baf74d75 100644
--- a/modules/ui_control.py
+++ b/modules/ui_control.py
@@ -254,7 +254,7 @@ def create_ui(_blocks: gr.Blocks=None):
                                     control_start = gr.Slider(label="CN Start", minimum=0.0, maximum=1.0, step=0.05, value=0, elem_id=f'control_unit-{i}-start')
                                     control_end = gr.Slider(label="CN End", minimum=0.0, maximum=1.0, step=0.05, value=1.0, elem_id=f'control_unit-{i}-end')
                                     control_mode = gr.Dropdown(label="CN Mode", choices=['default'], value='default', visible=False, elem_id=f'control_unit-{i}-mode')
-                                    control_tile = gr.Dropdown(label="CN Tiles", choices=['1x1', '1x2', '1x3', '1x4', '2x1', '2x1', '2x2', '2x3', '2x4', '3x1', '3x2', '3x3', '3x4', '4x1', '4x2', '4x3', '4x4'], value='1x1', visible=False, elem_id=f'control_unit-{i}-tile')
+                                    control_tile = gr.Dropdown(label="CN Tiles", choices=[x.strip() for x in shared.opts.control_tiles.split(',') if 'x' in x], value='1x1', visible=False, elem_id=f'control_unit-{i}-tile')
                                     reset_btn = ui_components.ToolButton(value=ui_symbols.reset)
                                     image_upload = gr.UploadButton(label=ui_symbols.upload, file_types=['image'], elem_classes=['form', 'gradio-button', 'tool'])
                                     image_reuse= ui_components.ToolButton(value=ui_symbols.reuse)
diff --git a/scripts/freescale.py b/scripts/freescale.py
new file mode 100644
index 000000000..672ceea41
--- /dev/null
+++ b/scripts/freescale.py
@@ -0,0 +1,130 @@
+import gradio as gr
+from modules import scripts, processing, shared, sd_models
+
+
+registered = False
+
+
+class Script(scripts.Script):
+    def __init__(self):
+        super().__init__()
+        self.orig_pipe = None
+        self.orig_slice = None
+        self.orig_tile = None
+        self.is_img2img = False
+
+    def title(self):
+        return 'FreeScale: Tuning-Free Scale Fusion'
+
+    def show(self, is_img2img):
+        self.is_img2img = is_img2img
+        return shared.native
+
+    def ui(self, _is_img2img): # ui elements
+        with gr.Row():
+            gr.HTML('<a href="https://github.com/ali-vilab/FreeScale">&nbsp FreeScale: Tuning-Free Scale Fusion</a><br>')
+        with gr.Row():
+            cosine_scale = gr.Slider(minimum=0.1, maximum=5.0, value=2.0, label='Cosine scale')
+            override_sampler = gr.Checkbox(value=True, label='Override sampler')
+        with gr.Row(visible=self.is_img2img):
+            cosine_scale_bg = gr.Slider(minimum=0.1, maximum=5.0, value=1.0, label='Cosine Background')
+            dilate_tau = gr.Slider(minimum=1, maximum=100, value=35, label='Dilate tau')
+        with gr.Row():
+            s1_enable = gr.Checkbox(value=True, label='1st Stage', interactive=False)
+            s1_scale = gr.Slider(minimum=1, maximum=8.0, value=1.0, label='Scale')
+            s1_restart = gr.Slider(minimum=0, maximum=1.0, value=0.75, label='Restart step')
+        with gr.Row():
+            s2_enable = gr.Checkbox(value=True, label='2nd Stage')
+            s2_scale = gr.Slider(minimum=1, maximum=8.0, value=2.0, label='Scale')
+            s2_restart = gr.Slider(minimum=0, maximum=1.0, value=0.75, label='Restart step')
+        with gr.Row():
+            s3_enable = gr.Checkbox(value=False, label='3rd Stage')
+            s3_scale = gr.Slider(minimum=1, maximum=8.0, value=3.0, label='Scale')
+            s3_restart = gr.Slider(minimum=0, maximum=1.0, value=0.75, label='Restart step')
+        with gr.Row():
+            s4_enable = gr.Checkbox(value=False, label='4th Stage')
+            s4_scale = gr.Slider(minimum=1, maximum=8.0, value=4.0, label='Scale')
+            s4_restart = gr.Slider(minimum=0, maximum=1.0, value=0.75, label='Restart step')
+        return [cosine_scale, override_sampler, cosine_scale_bg, dilate_tau, s1_enable, s1_scale, s1_restart, s2_enable, s2_scale, s2_restart, s3_enable, s3_scale, s3_restart, s4_enable, s4_scale, s4_restart]
+
+    def run(self, p: processing.StableDiffusionProcessing, cosine_scale, override_sampler, cosine_scale_bg, dilate_tau, s1_enable, s1_scale, s1_restart, s2_enable, s2_scale, s2_restart, s3_enable, s3_scale, s3_restart, s4_enable, s4_scale, s4_restart): # pylint: disable=arguments-differ
+        supported_model_list = ['sdxl']
+        if shared.sd_model_type not in supported_model_list:
+            shared.log.warning(f'FreeScale: class={shared.sd_model.__class__.__name__} model={shared.sd_model_type} required={supported_model_list}')
+            return None
+
+        if self.is_img2img:
+            if p.init_images is None or len(p.init_images) == 0:
+                shared.log.warning('FreeScale: missing input image')
+                return None
+
+        from modules.freescale import StableDiffusionXLFreeScale, StableDiffusionXLFreeScaleImg2Img
+        self.orig_pipe = shared.sd_model
+        self.orig_slice = shared.opts.diffusers_vae_slicing
+        self.orig_tile = shared.opts.diffusers_vae_tiling
+
+        def scale(x):
+            if (p.width == 0 or p.height == 0) and p.init_images is not None:
+                p.width, p.height = p.init_images[0].width, p.init_images[0].height
+            resolution = [int(8 * p.width * x // 8), int(8 * p.height * x // 8)]
+            return resolution
+
+        scales = []
+        resolutions_list = []
+        restart_steps = []
+        if s1_enable:
+            scales.append(s1_scale)
+            resolutions_list.append(scale(s1_scale))
+            restart_steps.append(int(p.steps * s1_restart))
+        if s2_enable and s2_scale > s1_scale:
+            scales.append(s2_scale)
+            resolutions_list.append(scale(s2_scale))
+            restart_steps.append(int(p.steps * s2_restart))
+        if s3_enable and s3_scale > s2_scale:
+            scales.append(s3_scale)
+            resolutions_list.append(scale(s3_scale))
+            restart_steps.append(int(p.steps * s3_restart))
+        if s4_enable and s4_scale > s3_scale:
+            scales.append(s4_scale)
+            resolutions_list.append(scale(s4_scale))
+            restart_steps.append(int(p.steps * s4_restart))
+
+        p.task_args['resolutions_list'] = resolutions_list
+        p.task_args['cosine_scale'] = cosine_scale
+        p.task_args['restart_steps'] = [min(max(1, step), p.steps-1) for step in restart_steps]
+        if self.is_img2img:
+            p.task_args['cosine_scale_bg'] = cosine_scale_bg
+            p.task_args['dilate_tau'] = dilate_tau
+            p.task_args['img_path'] = p.init_images[0]
+            p.init_images = None
+        if override_sampler:
+            p.sampler_name = 'Euler a'
+
+        if p.width < 1024 or p.height < 1024:
+            shared.log.error(f'FreeScale: width={p.width} height={p.height} minimum=1024')
+            return None
+
+        if not self.is_img2img:
+            shared.sd_model = sd_models.switch_pipe(StableDiffusionXLFreeScale, shared.sd_model)
+        else:
+            shared.sd_model = sd_models.switch_pipe(StableDiffusionXLFreeScaleImg2Img, shared.sd_model)
+        shared.sd_model.enable_vae_slicing()
+        shared.sd_model.enable_vae_tiling()
+
+        shared.log.info(f'FreeScale: mode={"txt" if not self.is_img2img else "img"} cosine={cosine_scale} bg={cosine_scale_bg} tau={dilate_tau} scales={scales} resolutions={resolutions_list} steps={restart_steps} sampler={p.sampler_name}')
+        resolutions = ','.join([f'{x[0]}x{x[1]}' for x in resolutions_list])
+        steps = ','.join([str(x) for x in restart_steps])
+        p.extra_generation_params["FreeScale"] = f'cosine {cosine_scale} resolutions {resolutions} steps {steps}'
+
+    def after(self, p: processing.StableDiffusionProcessing, processed: processing.Processed, *args): # pylint: disable=arguments-differ, unused-argument
+        if self.orig_pipe is None:
+            return processed
+        # restore pipeline
+        if shared.sd_model_type == "sdxl":
+            shared.sd_model = self.orig_pipe
+        self.orig_pipe = None
+        if not self.orig_slice:
+            shared.sd_model.disable_vae_slicing()
+        if not self.orig_tile:
+            shared.sd_model.disable_vae_tiling()
+        return processed

From b1f1864099907e539b5fbea9bf765438afe2f327 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Sun, 15 Dec 2024 13:12:35 -0500
Subject: [PATCH 114/162] lint updates

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 .pylintrc               |   1 +
 .ruff.toml              |   1 +
 modules/control/run.py  | 332 +++++++++++++++++++++-------------------
 modules/lora/network.py |   1 -
 modules/shared.py       |   2 +-
 5 files changed, 175 insertions(+), 162 deletions(-)

diff --git a/.pylintrc b/.pylintrc
index 59f1cb127..ad42ddd13 100644
--- a/.pylintrc
+++ b/.pylintrc
@@ -13,6 +13,7 @@ ignore-paths=/usr/lib/.*$,
              modules/control/units,
              modules/ctrlx,
              modules/dml,
+             modules/freescale,
              modules/ggml,
              modules/hidiffusion,
              modules/hijack,
diff --git a/.ruff.toml b/.ruff.toml
index c2d4a6f9a..4bab64260 100644
--- a/.ruff.toml
+++ b/.ruff.toml
@@ -7,6 +7,7 @@ exclude = [
     "modules/consistory",
     "modules/control/proc",
     "modules/control/units",
+    "modules/freescale",
     "modules/ggml",
     "modules/hidiffusion",
     "modules/hijack",
diff --git a/modules/control/run.py b/modules/control/run.py
index ac1ff233d..e780b9bae 100644
--- a/modules/control/run.py
+++ b/modules/control/run.py
@@ -45,6 +45,167 @@ def terminate(msg):
     return msg
 
 
+def set_pipe(p, has_models, unit_type, selected_models, active_model, active_strength, control_conditioning, control_guidance_start, control_guidance_end, inits):
+    global pipe, instance # pylint: disable=global-statement
+    pipe = None
+    if has_models:
+        p.ops.append('control')
+        p.extra_generation_params["Control type"] = unit_type # overriden later with pretty-print
+        p.extra_generation_params["Control model"] = ';'.join([(m.model_id or '') for m in active_model if m.model is not None])
+        p.extra_generation_params["Control conditioning"] = control_conditioning if isinstance(control_conditioning, list) else [control_conditioning]
+        p.extra_generation_params['Control start'] = control_guidance_start if isinstance(control_guidance_start, list) else [control_guidance_start]
+        p.extra_generation_params['Control end'] = control_guidance_end if isinstance(control_guidance_end, list) else [control_guidance_end]
+        p.extra_generation_params["Control conditioning"] = ';'.join([str(c) for c in p.extra_generation_params["Control conditioning"]])
+        p.extra_generation_params['Control start'] = ';'.join([str(c) for c in p.extra_generation_params['Control start']])
+        p.extra_generation_params['Control end'] = ';'.join([str(c) for c in p.extra_generation_params['Control end']])
+    if unit_type == 't2i adapter' and has_models:
+        p.extra_generation_params["Control type"] = 'T2I-Adapter'
+        p.task_args['adapter_conditioning_scale'] = control_conditioning
+        instance = t2iadapter.AdapterPipeline(selected_models, shared.sd_model)
+        pipe = instance.pipeline
+        if inits is not None:
+            shared.log.warning('Control: T2I-Adapter does not support separate init image')
+    elif unit_type == 'controlnet' and has_models:
+        p.extra_generation_params["Control type"] = 'ControlNet'
+        p.task_args['controlnet_conditioning_scale'] = control_conditioning
+        p.task_args['control_guidance_start'] = control_guidance_start
+        p.task_args['control_guidance_end'] = control_guidance_end
+        p.task_args['guess_mode'] = p.guess_mode
+        instance = controlnet.ControlNetPipeline(selected_models, shared.sd_model, p=p)
+        pipe = instance.pipeline
+    elif unit_type == 'xs' and has_models:
+        p.extra_generation_params["Control type"] = 'ControlNet-XS'
+        p.controlnet_conditioning_scale = control_conditioning
+        p.control_guidance_start = control_guidance_start
+        p.control_guidance_end = control_guidance_end
+        instance = xs.ControlNetXSPipeline(selected_models, shared.sd_model)
+        pipe = instance.pipeline
+        if inits is not None:
+            shared.log.warning('Control: ControlNet-XS does not support separate init image')
+    elif unit_type == 'lite' and has_models:
+        p.extra_generation_params["Control type"] = 'ControlLLLite'
+        p.controlnet_conditioning_scale = control_conditioning
+        instance = lite.ControlLLitePipeline(shared.sd_model)
+        pipe = instance.pipeline
+        if inits is not None:
+            shared.log.warning('Control: ControlLLLite does not support separate init image')
+    elif unit_type == 'reference' and has_models:
+        p.extra_generation_params["Control type"] = 'Reference'
+        p.extra_generation_params["Control attention"] = p.attention
+        p.task_args['reference_attn'] = 'Attention' in p.attention
+        p.task_args['reference_adain'] = 'Adain' in p.attention
+        p.task_args['attention_auto_machine_weight'] = p.query_weight
+        p.task_args['gn_auto_machine_weight'] = p.adain_weight
+        p.task_args['style_fidelity'] = p.fidelity
+        instance = reference.ReferencePipeline(shared.sd_model)
+        pipe = instance.pipeline
+        if inits is not None:
+            shared.log.warning('Control: ControlNet-XS does not support separate init image')
+    else: # run in txt2img/img2img mode
+        if len(active_strength) > 0:
+            p.strength = active_strength[0]
+        pipe = shared.sd_model
+        instance = None
+    debug(f'Control: run type={unit_type} models={has_models} pipe={pipe.__class__.__name__ if pipe is not None else None}')
+    return pipe
+
+
+def check_active(p, unit_type, units):
+    active_process: List[processors.Processor] = [] # all active preprocessors
+    active_model: List[Union[controlnet.ControlNet, xs.ControlNetXS, t2iadapter.Adapter]] = [] # all active models
+    active_strength: List[float] = [] # strength factors for all active models
+    active_start: List[float] = [] # start step for all active models
+    active_end: List[float] = [] # end step for all active models
+    num_units = 0
+    for u in units:
+        if u.type != unit_type:
+            continue
+        num_units += 1
+        debug(f'Control unit: i={num_units} type={u.type} enabled={u.enabled}')
+        if not u.enabled:
+            if u.controlnet is not None and u.controlnet.model is not None:
+                debug(f'Control unit offload: model="{u.controlnet.model_id}" device={devices.cpu}')
+                sd_models.move_model(u.controlnet.model, devices.cpu)
+            continue
+        if u.controlnet is not None and u.controlnet.model is not None:
+            debug(f'Control unit offload: model="{u.controlnet.model_id}" device={devices.device}')
+            sd_models.move_model(u.controlnet.model, devices.device)
+        if unit_type == 't2i adapter' and u.adapter.model is not None:
+            active_process.append(u.process)
+            active_model.append(u.adapter)
+            active_strength.append(float(u.strength))
+            p.adapter_conditioning_factor = u.factor
+            shared.log.debug(f'Control T2I-Adapter unit: i={num_units} process="{u.process.processor_id}" model="{u.adapter.model_id}" strength={u.strength} factor={u.factor}')
+        elif unit_type == 'controlnet' and u.controlnet.model is not None:
+            active_process.append(u.process)
+            active_model.append(u.controlnet)
+            active_strength.append(float(u.strength))
+            active_start.append(float(u.start))
+            active_end.append(float(u.end))
+            p.guess_mode = u.guess
+            if isinstance(u.mode, str):
+                p.control_mode = u.choices.index(u.mode) if u.mode in u.choices else 0
+                p.is_tile = p.is_tile or 'tile' in u.mode.lower()
+                p.control_tile = u.tile
+                p.extra_generation_params["Control mode"] = u.mode
+            shared.log.debug(f'Control ControlNet unit: i={num_units} process="{u.process.processor_id}" model="{u.controlnet.model_id}" strength={u.strength} guess={u.guess} start={u.start} end={u.end} mode={u.mode}')
+        elif unit_type == 'xs' and u.controlnet.model is not None:
+            active_process.append(u.process)
+            active_model.append(u.controlnet)
+            active_strength.append(float(u.strength))
+            active_start.append(float(u.start))
+            active_end.append(float(u.end))
+            shared.log.debug(f'Control ControlNet-XS unit: i={num_units} process={u.process.processor_id} model={u.controlnet.model_id} strength={u.strength} guess={u.guess} start={u.start} end={u.end}')
+        elif unit_type == 'lite' and u.controlnet.model is not None:
+            active_process.append(u.process)
+            active_model.append(u.controlnet)
+            active_strength.append(float(u.strength))
+            shared.log.debug(f'Control ControlLLite unit: i={num_units} process={u.process.processor_id} model={u.controlnet.model_id} strength={u.strength} guess={u.guess} start={u.start} end={u.end}')
+        elif unit_type == 'reference':
+            p.override = u.override
+            p.attention = u.attention
+            p.query_weight = float(u.query_weight)
+            p.adain_weight = float(u.adain_weight)
+            p.fidelity = u.fidelity
+            shared.log.debug('Control Reference unit')
+        else:
+            if u.process.processor_id is not None:
+                active_process.append(u.process)
+            shared.log.debug(f'Control process unit: i={num_units} process={u.process.processor_id}')
+            active_strength.append(float(u.strength))
+    debug(f'Control active: process={len(active_process)} model={len(active_model)}')
+    return active_process, active_model, active_strength, active_start, active_end
+
+
+def check_enabled(p, unit_type, units, active_model, active_strength, active_start, active_end):
+    has_models = False
+    selected_models: List[Union[controlnet.ControlNetModel, xs.ControlNetXSModel, t2iadapter.AdapterModel]] = None
+    control_conditioning = None
+    control_guidance_start = None
+    control_guidance_end = None
+    if unit_type == 't2i adapter' or unit_type == 'controlnet' or unit_type == 'xs' or unit_type == 'lite':
+        if len(active_model) == 0:
+            selected_models = None
+        elif len(active_model) == 1:
+            selected_models = active_model[0].model if active_model[0].model is not None else None
+            p.is_tile = p.is_tile or 'tile' in active_model[0].model_id.lower()
+            has_models = selected_models is not None
+            control_conditioning = active_strength[0] if len(active_strength) > 0 else 1 # strength or list[strength]
+            control_guidance_start = active_start[0] if len(active_start) > 0 else 0
+            control_guidance_end = active_end[0] if len(active_end) > 0 else 1
+        else:
+            selected_models = [m.model for m in active_model if m.model is not None]
+            has_models = len(selected_models) > 0
+            control_conditioning = active_strength[0] if len(active_strength) == 1 else list(active_strength) # strength or list[strength]
+            control_guidance_start = active_start[0] if len(active_start) == 1 else list(active_start)
+            control_guidance_end = active_end[0] if len(active_end) == 1 else list(active_end)
+    elif unit_type == 'reference':
+        has_models = any(u.enabled for u in units if u.type == 'reference')
+    else:
+        pass
+    return has_models, selected_models, control_conditioning, control_guidance_start, control_guidance_end
+
+
 def control_set(kwargs):
     if kwargs:
         global p_extra_args # pylint: disable=global-statement
@@ -88,16 +249,11 @@ def control_run(state: str = '',
         if u.process is not None and u.process.override is None and u.override is not None:
             u.process.override = u.override
 
-    global instance, pipe, original_pipeline # pylint: disable=global-statement
+    global pipe, original_pipeline # pylint: disable=global-statement
     debug(f'Control: type={unit_type} input={inputs} init={inits} type={input_type}')
     if inputs is None or (type(inputs) is list and len(inputs) == 0):
         inputs = [None]
     output_images: List[Image.Image] = [] # output images
-    active_process: List[processors.Processor] = [] # all active preprocessors
-    active_model: List[Union[controlnet.ControlNet, xs.ControlNetXS, t2iadapter.Adapter]] = [] # all active models
-    active_strength: List[float] = [] # strength factors for all active models
-    active_start: List[float] = [] # start step for all active models
-    active_end: List[float] = [] # end step for all active models
     processed_image: Image.Image = None # last processed image
     if mask is not None and input_type == 0:
         input_type = 1 # inpaint always requires control_image
@@ -226,160 +382,17 @@ def control_run(state: str = '',
 
     unit_type = unit_type.strip().lower() if unit_type is not None else ''
     t0 = time.time()
-    num_units = 0
-    for u in units:
-        if u.type != unit_type:
-            continue
-        num_units += 1
-        debug(f'Control unit: i={num_units} type={u.type} enabled={u.enabled}')
-        if not u.enabled:
-            if u.controlnet is not None and u.controlnet.model is not None:
-                debug(f'Control unit offload: model="{u.controlnet.model_id}" device={devices.cpu}')
-                sd_models.move_model(u.controlnet.model, devices.cpu)
-            continue
-        if u.controlnet is not None and u.controlnet.model is not None:
-            debug(f'Control unit offload: model="{u.controlnet.model_id}" device={devices.device}')
-            sd_models.move_model(u.controlnet.model, devices.device)
-        if unit_type == 't2i adapter' and u.adapter.model is not None:
-            active_process.append(u.process)
-            active_model.append(u.adapter)
-            active_strength.append(float(u.strength))
-            p.adapter_conditioning_factor = u.factor
-            shared.log.debug(f'Control T2I-Adapter unit: i={num_units} process="{u.process.processor_id}" model="{u.adapter.model_id}" strength={u.strength} factor={u.factor}')
-        elif unit_type == 'controlnet' and u.controlnet.model is not None:
-            active_process.append(u.process)
-            active_model.append(u.controlnet)
-            active_strength.append(float(u.strength))
-            active_start.append(float(u.start))
-            active_end.append(float(u.end))
-            p.guess_mode = u.guess
-            if isinstance(u.mode, str):
-                p.control_mode = u.choices.index(u.mode) if u.mode in u.choices else 0
-                p.is_tile = p.is_tile or 'tile' in u.mode.lower()
-                p.control_tile = u.tile
-                p.extra_generation_params["Control mode"] = u.mode
-            shared.log.debug(f'Control ControlNet unit: i={num_units} process="{u.process.processor_id}" model="{u.controlnet.model_id}" strength={u.strength} guess={u.guess} start={u.start} end={u.end} mode={u.mode}')
-        elif unit_type == 'xs' and u.controlnet.model is not None:
-            active_process.append(u.process)
-            active_model.append(u.controlnet)
-            active_strength.append(float(u.strength))
-            active_start.append(float(u.start))
-            active_end.append(float(u.end))
-            shared.log.debug(f'Control ControlNet-XS unit: i={num_units} process={u.process.processor_id} model={u.controlnet.model_id} strength={u.strength} guess={u.guess} start={u.start} end={u.end}')
-        elif unit_type == 'lite' and u.controlnet.model is not None:
-            active_process.append(u.process)
-            active_model.append(u.controlnet)
-            active_strength.append(float(u.strength))
-            shared.log.debug(f'Control ControlLLite unit: i={num_units} process={u.process.processor_id} model={u.controlnet.model_id} strength={u.strength} guess={u.guess} start={u.start} end={u.end}')
-        elif unit_type == 'reference':
-            p.override = u.override
-            p.attention = u.attention
-            p.query_weight = float(u.query_weight)
-            p.adain_weight = float(u.adain_weight)
-            p.fidelity = u.fidelity
-            shared.log.debug('Control Reference unit')
-        else:
-            if u.process.processor_id is not None:
-                active_process.append(u.process)
-            shared.log.debug(f'Control process unit: i={num_units} process={u.process.processor_id}')
-            active_strength.append(float(u.strength))
-    debug(f'Control active: process={len(active_process)} model={len(active_model)}')
+
+    active_process, active_model, active_strength, active_start, active_end = check_active(p, unit_type, units)
+    has_models, selected_models, control_conditioning, control_guidance_start, control_guidance_end = check_enabled(p, unit_type, units, active_model, active_strength, active_start, active_end)
 
     processed: processing.Processed = None
     image_txt = ''
     info_txt = []
-    has_models = False
-    selected_models: List[Union[controlnet.ControlNetModel, xs.ControlNetXSModel, t2iadapter.AdapterModel]] = None
-    control_conditioning = None
-    control_guidance_start = None
-    control_guidance_end = None
-    if unit_type == 't2i adapter' or unit_type == 'controlnet' or unit_type == 'xs' or unit_type == 'lite':
-        if len(active_model) == 0:
-            selected_models = None
-        elif len(active_model) == 1:
-            selected_models = active_model[0].model if active_model[0].model is not None else None
-            p.is_tile = p.is_tile or 'tile' in active_model[0].model_id.lower()
-            has_models = selected_models is not None
-            control_conditioning = active_strength[0] if len(active_strength) > 0 else 1 # strength or list[strength]
-            control_guidance_start = active_start[0] if len(active_start) > 0 else 0
-            control_guidance_end = active_end[0] if len(active_end) > 0 else 1
-        else:
-            selected_models = [m.model for m in active_model if m.model is not None]
-            has_models = len(selected_models) > 0
-            control_conditioning = active_strength[0] if len(active_strength) == 1 else list(active_strength) # strength or list[strength]
-            control_guidance_start = active_start[0] if len(active_start) == 1 else list(active_start)
-            control_guidance_end = active_end[0] if len(active_end) == 1 else list(active_end)
-    elif unit_type == 'reference':
-        has_models = any(u.enabled for u in units if u.type == 'reference')
-    else:
-        pass
+
     p.is_tile = p.is_tile and has_models
 
-    def set_pipe():
-        global pipe, instance # pylint: disable=global-statement
-        pipe = None
-        if has_models:
-            p.ops.append('control')
-            p.extra_generation_params["Control type"] = unit_type # overriden later with pretty-print
-            p.extra_generation_params["Control model"] = ';'.join([(m.model_id or '') for m in active_model if m.model is not None])
-            p.extra_generation_params["Control conditioning"] = control_conditioning if isinstance(control_conditioning, list) else [control_conditioning]
-            p.extra_generation_params['Control start'] = control_guidance_start if isinstance(control_guidance_start, list) else [control_guidance_start]
-            p.extra_generation_params['Control end'] = control_guidance_end if isinstance(control_guidance_end, list) else [control_guidance_end]
-            p.extra_generation_params["Control conditioning"] = ';'.join([str(c) for c in p.extra_generation_params["Control conditioning"]])
-            p.extra_generation_params['Control start'] = ';'.join([str(c) for c in p.extra_generation_params['Control start']])
-            p.extra_generation_params['Control end'] = ';'.join([str(c) for c in p.extra_generation_params['Control end']])
-        if unit_type == 't2i adapter' and has_models:
-            p.extra_generation_params["Control type"] = 'T2I-Adapter'
-            p.task_args['adapter_conditioning_scale'] = control_conditioning
-            instance = t2iadapter.AdapterPipeline(selected_models, shared.sd_model)
-            pipe = instance.pipeline
-            if inits is not None:
-                shared.log.warning('Control: T2I-Adapter does not support separate init image')
-        elif unit_type == 'controlnet' and has_models:
-            p.extra_generation_params["Control type"] = 'ControlNet'
-            p.task_args['controlnet_conditioning_scale'] = control_conditioning
-            p.task_args['control_guidance_start'] = control_guidance_start
-            p.task_args['control_guidance_end'] = control_guidance_end
-            p.task_args['guess_mode'] = p.guess_mode
-            instance = controlnet.ControlNetPipeline(selected_models, shared.sd_model, p=p)
-            pipe = instance.pipeline
-        elif unit_type == 'xs' and has_models:
-            p.extra_generation_params["Control type"] = 'ControlNet-XS'
-            p.controlnet_conditioning_scale = control_conditioning
-            p.control_guidance_start = control_guidance_start
-            p.control_guidance_end = control_guidance_end
-            instance = xs.ControlNetXSPipeline(selected_models, shared.sd_model)
-            pipe = instance.pipeline
-            if inits is not None:
-                shared.log.warning('Control: ControlNet-XS does not support separate init image')
-        elif unit_type == 'lite' and has_models:
-            p.extra_generation_params["Control type"] = 'ControlLLLite'
-            p.controlnet_conditioning_scale = control_conditioning
-            instance = lite.ControlLLitePipeline(shared.sd_model)
-            pipe = instance.pipeline
-            if inits is not None:
-                shared.log.warning('Control: ControlLLLite does not support separate init image')
-        elif unit_type == 'reference' and has_models:
-            p.extra_generation_params["Control type"] = 'Reference'
-            p.extra_generation_params["Control attention"] = p.attention
-            p.task_args['reference_attn'] = 'Attention' in p.attention
-            p.task_args['reference_adain'] = 'Adain' in p.attention
-            p.task_args['attention_auto_machine_weight'] = p.query_weight
-            p.task_args['gn_auto_machine_weight'] = p.adain_weight
-            p.task_args['style_fidelity'] = p.fidelity
-            instance = reference.ReferencePipeline(shared.sd_model)
-            pipe = instance.pipeline
-            if inits is not None:
-                shared.log.warning('Control: ControlNet-XS does not support separate init image')
-        else: # run in txt2img/img2img mode
-            if len(active_strength) > 0:
-                p.strength = active_strength[0]
-            pipe = shared.sd_model
-            instance = None
-        debug(f'Control: run type={unit_type} models={has_models} pipe={pipe.__class__.__name__ if pipe is not None else None}')
-        return pipe
-
-    pipe = set_pipe()
+    pipe = set_pipe(p, has_models, unit_type, selected_models, active_model, active_strength, control_conditioning, control_guidance_start, control_guidance_end, inits)
     debug(f'Control pipeline: class={pipe.__class__.__name__} args={vars(p)}')
     t1, t2, t3 = time.time(), 0, 0
     status = True
@@ -433,7 +446,7 @@ def set_pipe():
 
             while status:
                 if pipe is None: # pipe may have been reset externally
-                    pipe = set_pipe()
+                    pipe = set_pipe(p, has_models, unit_type, selected_models, active_model, active_strength, control_conditioning, control_guidance_start, control_guidance_end, inits)
                     debug(f'Control pipeline reinit: class={pipe.__class__.__name__}')
                 processed_image = None
                 if frame is not None:
@@ -578,7 +591,7 @@ def set_pipe():
                             elif 'image' in possible:
                                 p.task_args['image'] = [p.init_images] if isinstance(p.init_images, Image.Image) else p.init_images
                             if 'control_mode' in possible:
-                                p.task_args['control_mode'] = p.control_mode
+                                p.task_args['control_mode'] = getattr(p, 'control_mode', None)
                             if 'strength' in possible:
                                 p.task_args['strength'] = p.denoising_strength
                             p.init_images = None
@@ -638,8 +651,8 @@ def set_pipe():
                         if unit_type == 'lite':
                             p.init_image = [input_image]
                             instance.apply(selected_models, processed_image, control_conditioning)
-                        if p.control_mode is not None:
-                            p.task_args['control_mode'] = p.control_mode
+                        if getattr(p, 'control_mode', None) is not None:
+                            p.task_args['control_mode'] = getattr(p, 'control_mode', None)
                     if hasattr(p, 'init_images') and p.init_images is None: # delete empty
                         del p.init_images
 
@@ -770,5 +783,4 @@ def set_pipe():
         html_txt = html_txt + infotext_to_html(info_txt[0])
     if is_generator:
         yield (output_images, blended_image, html_txt, output_filename)
-    else:
-        return (output_images, blended_image, html_txt, output_filename)
+    return (output_images, blended_image, html_txt, output_filename)
diff --git a/modules/lora/network.py b/modules/lora/network.py
index 8e6f87368..97feb76f1 100644
--- a/modules/lora/network.py
+++ b/modules/lora/network.py
@@ -2,7 +2,6 @@
 import enum
 from typing import Union
 from collections import namedtuple
-
 from modules import sd_models, hashes, shared
 
 
diff --git a/modules/shared.py b/modules/shared.py
index 90dbe2647..5e353b1b1 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -490,7 +490,7 @@ def get_default_modes():
     "advanced_sep": OptionInfo("<h2>Advanced Options</h2>", "", gr.HTML),
     "sd_checkpoint_autoload": OptionInfo(True, "Model autoload on start"),
     "sd_checkpoint_autodownload": OptionInfo(True, "Model auto-download on demand"),
-    "stream_load": OptionInfo(False, "Load models using stream loading method", gr.Checkbox, {"visible": not native }),
+    "stream_load": OptionInfo(False, "Model load using streams", gr.Checkbox),
     "diffusers_eval": OptionInfo(True, "Force model eval", gr.Checkbox, {"visible": False }),
     "diffusers_to_gpu": OptionInfo(False, "Load model directly to GPU"),
     "disable_accelerate": OptionInfo(False, "Disable accelerate", gr.Checkbox, {"visible": False }),

From b89f41082daeda47f93a8d8013cdb95754a4e29e Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Sun, 15 Dec 2024 13:28:40 -0500
Subject: [PATCH 115/162] update requirements

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 TODO.md          | 2 +-
 requirements.txt | 8 ++++----
 wiki             | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/TODO.md b/TODO.md
index 63088d39f..996da5ad9 100644
--- a/TODO.md
+++ b/TODO.md
@@ -17,12 +17,12 @@ Main ToDo list can be found at [GitHub projects](https://github.com/users/vladma
 - SANA: <https://github.com/huggingface/diffusers/pull/9982>
 - LTX-Video: <https://github.com/huggingface/diffusers/pull/10021> <https://huggingface.co/Lightricks/LTX-Video> <https://huggingface.co/spaces/Lightricks/LTX-Video-Playground/tree/main>
 - TorchAO: <https://github.com/huggingface/diffusers/pull/10009>
-- ControlNetUnion/ControlNetPromax: <https://github.com/huggingface/diffusers/pull/10131>
 
 ## Other
 
 - IPAdapter negative: <https://github.com/huggingface/diffusers/discussions/7167>
 - Control API enhance scripts compatibility
+- PixelSmith: <https://github.com/Thanos-DB/Pixelsmith>
 
 ## Workaround in place
 
diff --git a/requirements.txt b/requirements.txt
index d9eba6958..572c4927b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -41,18 +41,18 @@ torchsde==0.2.6
 antlr4-python3-runtime==4.9.3
 requests==2.32.3
 tqdm==4.66.5
-accelerate==1.1.1
+accelerate==1.2.1
 opencv-contrib-python-headless==4.9.0.80
 einops==0.4.1
 gradio==3.43.2
-huggingface_hub==0.26.2
+huggingface_hub==0.26.5
 numexpr==2.8.8
 numpy==1.26.4
 numba==0.59.1
 protobuf==4.25.3
 pytorch_lightning==1.9.4
-tokenizers==0.20.3
-transformers==4.46.3
+tokenizers==0.21.0
+transformers==4.47.0
 urllib3==1.26.19
 Pillow==10.4.0
 timm==0.9.16
diff --git a/wiki b/wiki
index 8d63a0f04..a4eaad83c 160000
--- a/wiki
+++ b/wiki
@@ -1 +1 @@
-Subproject commit 8d63a0f04687f24c4ef413f231970087f167175c
+Subproject commit a4eaad83ccb8e82cb91fde4c038877616ed012d6

From ab07788ab5bbd5556ad310482ede102582a86f59 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Mon, 16 Dec 2024 11:30:15 -0500
Subject: [PATCH 116/162] add sana

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md                                  |  13 ++-
 html/reference.json                           |  13 +++
 installer.py                                  |   2 +-
 ...rge-Model--Sana_1600M_1024px_diffusers.jpg | Bin 0 -> 53061 bytes
 modules/model_flux.py                         |   6 +-
 modules/model_omnigen.py                      |   3 +-
 modules/model_sana.py                         |  25 ++++
 modules/model_te.py                           |   3 +-
 modules/modeldata.py                          |  83 +++++++------
 modules/pag/__init__.py                       |   7 +-
 modules/processing_vae.py                     |   3 +-
 modules/schedulers/scheduler_dpm_flowmatch.py |   3 +-
 modules/sd_detect.py                          |   2 +
 modules/sd_models.py                          |   3 +
 modules/sd_samplers.py                        |   9 +-
 modules/sd_samplers_common.py                 |   1 +
 modules/sd_samplers_diffusers.py              | 109 +++++++++---------
 modules/shared_items.py                       |  21 ++--
 modules/ui_sections.py                        |   4 +-
 19 files changed, 188 insertions(+), 122 deletions(-)
 create mode 100644 models/Reference/Efficient-Large-Model--Sana_1600M_1024px_diffusers.jpg
 create mode 100644 modules/model_sana.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 35d3fcbe9..3a07f154c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,9 +1,19 @@
 # Change Log for SD.Next
 
-## Update for 2024-12-15
+## Update for 2024-12-16
+
+- Sana: both 1.6B and 0.6B  
+- ControlNet: better Union results, support for ProMax and Tile  
+- FreeScale: run optimized iterative generation of images at different scales  
+- Samplers: UniPC, DEIS, SA, DPM-Multistep: add FlowMatch sigma method and prediction type  
 
 ### New models and integrations
 
+- [NVLabs Sana](https://huggingface.co/Efficient-Large-Model/Sana_1600M_1024px)
+  **Sana** can synthesize high-resolution images with strong text-image alignment by using **Gemma2** as text-encoder  
+  support for both 1.6B and 0.6B models  
+  to use, select from *networks -> models -> reference* and models will be auto-downloaded on first use  
+  *reference values*: sampler: default, width/height: 1024, guidance scale: 4.5, attention guidance: 3.0, adaptive scaling: 0.0
 - [Flux Tools](https://blackforestlabs.ai/flux-1-tools/)  
   **Redux** is actually a tool, **Fill** is inpaint/outpaint optimized version of *Flux-dev*  
   **Canny** & **Depth** are optimized versions of *Flux-dev* for their respective tasks: they are *not* ControlNets that work on top of a model  
@@ -98,6 +108,7 @@
 - **IPEX**: update to IPEX 2.5.10+xpu  
 - **OpenVINO**: update to 2024.5.0  
 - **Sampler** improvements  
+  - UniPC, DEIS, SA, DPM-Multistep: allow FlowMatch method  
   - Euler FlowMatch: add sigma methods (*karras/exponential/betas*)  
   - Euler FlowMatch: allow using timestep presets to set sigmas  
   - DPM FlowMatch: update all and add sigma methods  
diff --git a/html/reference.json b/html/reference.json
index 4a549586f..8a0965697 100644
--- a/html/reference.json
+++ b/html/reference.json
@@ -180,6 +180,19 @@
     "extras": "sampler: Default, cfg_scale: 3.5"
   },
 
+  "NVLabs Sana 1.6B": {
+    "path": "Efficient-Large-Model/Sana_1600M_1024px_diffusers",
+    "desc": "Sana is a text-to-image framework that can efficiently generate images up to 4096 × 4096 resolution. Sana can synthesize high-resolution, high-quality images with strong text-image alignment at a remarkably fast speed, deployable on laptop GPU.",
+    "preview": "Efficient-Large-Model--Sana_1600M_1024px_diffusers.jpg",
+    "skip": true
+  }, 
+  "NVLabs Sana 0.6B": {
+    "path": "Efficient-Large-Model/Sana_600M_1024px_diffusers",
+    "desc": "Sana is a text-to-image framework that can efficiently generate images up to 4096 × 4096 resolution. Sana can synthesize high-resolution, high-quality images with strong text-image alignment at a remarkably fast speed, deployable on laptop GPU.",
+    "preview": "Efficient-Large-Model--Sana_1600M_1024px_diffusers.jpg",
+    "skip": true
+  }, 
+
   "VectorSpaceLab OmniGen v1": {
     "path": "Shitao/OmniGen-v1",
     "desc": "OmniGen is a unified image generation model that can generate a wide range of images from multi-modal prompts. It is designed to be simple, flexible and easy to use.",
diff --git a/installer.py b/installer.py
index 18a8ad1f1..a12b09d4d 100644
--- a/installer.py
+++ b/installer.py
@@ -459,7 +459,7 @@ def check_python(supported_minors=[9, 10, 11, 12], reason=None):
 def check_diffusers():
     if args.skip_all or args.skip_requirements:
         return
-    sha = '63243406ba5510c10d5cac931882918ceba926f9' # diffusers commit hash
+    sha = '5fb3a985173efaae7ff381b9040c386751d643da' # diffusers commit hash
     pkg = pkg_resources.working_set.by_key.get('diffusers', None)
     minor = int(pkg.version.split('.')[1] if pkg is not None else 0)
     cur = opts.get('diffusers_version', '') if minor > 0 else ''
diff --git a/models/Reference/Efficient-Large-Model--Sana_1600M_1024px_diffusers.jpg b/models/Reference/Efficient-Large-Model--Sana_1600M_1024px_diffusers.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..654f854034544bc8fafa5a0de21b54c4b3862709
GIT binary patch
literal 53061
zcmbTdby!=`(=Hkc6p9v#yK8Zm5L$}6JH;gwcZaq>3&EY@9w1QMp~a<m2=4CgcKChg
z-0$4y{&mk;c_!J<T6-t^T`M!QXWm)QGtVo4w+b@yG5{nbB!KD53-G)Ou$A_<vjzZ^
zl>ux30013;f<y*DeK|sU5eX#n|2~#MVgVrkSO2S*K_0-%I{@!RR9@si&Gbe7bIpH!
zI-A)!x^p^Oxzh-6a&z+Vz5LQVKnj3?hK7#z8Ur029TNis6Nlt24mLIpB_R<W2|YC<
z106LTEfc#C7ZWQ#D=i)O2OfSAF$oC?M$S+2QsQz#Knbz`d<hZ;CMFIx4#nHI6k;rN
zEMou1?fDM?5B=54{Uamc176`FA>$!EcLS(j?jH4JUj8#R|EnRrLPkM-jfVaP1M}sC
zy0?H=NXW>qP>@kkQBYpa4tN;{px~k6)AD?H{Z8Esjn0LDHzY0>o&IChUqX%X69ztW
z*RO9dh=@r@$rzcKSy<Wl1q6kJMMR}ONz2H}$t!4TY3u0f=^I#BT3OrJ+JW8NJv_a<
zeSE)t4-E_d@iQVmAu%aAB{eNQFTbF$sJH}LT3u6HSKrXs)ZE$C-P7CGKQK5k37<kt
z&&<xPuB~ruZf)=E?wy{UUtC^Y-`w8)2N%){&i|+VPhkHaTzD_IUZJ2MqoDl<7t$-Q
z7eU5DL8aw+jsHO%&CKN;9d8Ia!N<7Vs=sgO`7}-l&0WVah#2@+8BhNM?Z1%y{{!~*
z{|ecE1N+~&<^Wj8NH2?rj0XS#HonU=yX5ZJ9Nol&dXb+5D%p<$UKK052f2w<5fj3T
z!}%NTv*;Q?di{li!vuC$Fa0reEvP4%xzt=g-jBlZWZ#G1M>pK3^*v_X=~_Jl(7m0S
zV0MSk0OTGC9d@&2%_wfXx#sH=tH1Z@$r;O6^Una7YQjGi;lOP&^Q(%&2g1^K_f6&5
zkFz}>>-#j}aX-0D6y}NM#1oT7+`ClrU0!&z>k*?$%sEA0+@Z1hiDNBjvtMXJ`WZlG
zG9rD?2KpDR)b%v%fxXIMY4gN5L7mU~RNIyjam62$7O>55O15KtQeO+&A_c7=<;Tmo
z=Ho<BB>sWIT4LVpL%b(BU7i6(!xS)zqv&E`<u5|tFE>GFm`$Lo{&8#Tof--X518Ny
zqQ1jVv6<u<P^7Hz4+w#*xEWyP?3G>D?&m%Oz8F5R&L;joJJ3*1A19N(d)#N8-hUSq
zN@Q`@+CoUkc2d=7Qh$b9N>+&XXkr}U$2MCZ=bMjPQanhllVV;pSZ&c~!m??8SKC6^
z+TmMM6;QHic4_Cyc3AfQ+gvtFJB+i(I0=+8%#OKtb|9%!?&BVQz&Hb|oyumpSEJTV
zv0&CTlFUAS21sezzo}p1dj=?ja?7`?LVZ>6l=^x&%w5A+T_j(#O6HiGXS`S^&EOnc
zHP$~`^=g}hb*mj%U=!w6!8vzu4?1rP_bUNVw~VXoMA4>0QyQ6jrS)Vxj@{T0K|@OQ
z;H#%ILW+0M!_NRj;Slo}*jW~IvUGJL&zMmo_vNc&1YHx>?KWDSZ$)OVo}J)6Y6g<F
zUsiDjf{;bUN_2eZXEMiWSv88kTlCTt-wJlxZ4}NMS>u0P(AWKX6tSwF$6AMlvBv!G
z8#RIqHlwte2N6Zp-xSkVIV+uPTz}*1Q6HVxC)`sZ`%YsNcmmvqiAxKQ4n7|>c`P&@
zrLYi*ygq6Kf(aV3Sku;7x+}PK?+_6h$q6!^ih94Zl_Csn^y$90xHZy@1b{cb7cee_
zVl-J~q1v;lZ_Ff#uo_dC6t51E+r@3b<40s~)%q6(4u@R>fv0r)suY3;%hr)1vg+|m
zP1_OMQNs0Jz1j{wViemyQOHy(b{+V=I5Vm~H^Lt6vyx8a0Jy5){ZRfSh>d!^6fNf=
zy{lflidZ@YB$gy;zrSzSFh4VCV;i_@*-!3AC2c^PblSh6d8)FJHGe|>Bt5M8)FJy6
zT-$;44A?N90_|S}?O^KEGiDXa>9H_I1NM}8F!hAm`Nj|O^r_?-&Kci!R~0FZr~uTn
zd~&e&!1bHhwH2~ba5ZEopCqOdZ;3GT>tJf5$I&SFUzAq-nUbnjZZu3>65LR(>U!s(
z5f@2HRLxd+UUjXIGC}pH)~cuvY(Tk@AT~ggk@rpZDxxuKk!*D{M3EQebDHFcjA1(9
zfPECS>g6qd-Tn;t6ZF&u@TK(Ge&9Stw-EV%^{m`1ubjF3I(|TV2JDZa<xXeEL`mS;
z&Ph%;nshXhKDi_95OztQdUc)+jze3qzA<yhDBVr|rW|&gHW3^@RQ_&N6u+)6?dRt`
z$;%DwAEkb@ki!XUx_NNP8S)dzGd{A#nWZo-ZMgOpVctQ{!TDO!RW`4}`_m=5`_KAq
z1fWfu1`aDu>?Dm>8K6tzIO0PDb4*wV6IEYUzSZC1VVH&rC2u2*^CDgBD>GdoYn0f{
z4M&&Z%iD2#xFku`2A}M3NE-bnxvr?}^3|@KEke{WF4{YEyJST#BdWLI_iQui?$mTI
z>L;dpa#UPG<EKb;HYyvGNH*Sn>hL(9Bu}=4yBa~o4IBplvShWes#bq{LvD!dm4HdD
zyjEJ&#QU3W>xdT=Im}eHbVPkvR(c<iGAF&2G4FV2z;zXI5=6G+;IGdL>4CEC5jk{G
zc8K1`E#)@l23Qc{MzG#dC_Mu>?Y(*ouyoY=pmq~J%>jLEvSM!zFA7QP2g5is9f%&&
zhDbNiHskunf0UdKe_`L#i?B}?KHqv^a5^m`QorW^WxJFq;V_`KXu<9w47!}^=xqjJ
z?4IoT=6w)n-{>sT$Gr|W4JZYcwN=>sEK|iLA7mDv7TPI{zj8)<<QJInj_*@ypNNJQ
zexxHhOD)nSLCdfqaBE2&@792E%1b3(G<8YTl+muMWd!{xCGYNX(f((z5g+s68E_J3
zPhV8*48qeQ;ynyzUFxN1@TJ!P;n}y<8OBULuq$>w5oYT)>wWslfY~lfFGl>JEcHvt
z?Mf?YPp?2+aSes~j368I;sY?2rXMJYGEKR@IAJf))(>P@;jYf5h!L`mm2v7FZ$Y?j
zlmsrG0jUWMQ?fTYN~eeBY>8=_#e`S73tx&Mm+Q{}uHZ*C9egaZf?15WPg?HdHyuXw
zPZD$>y)wZ;qO=KNcufncHswIU7NYoLuEKP@Ozvr5Bih}N#pSnWK&1g$@mH>kMV`un
zWFd4uIu;T2%VT-%04)0VjPkBMWoZuU0al`g2?3@rN`Rp3T&>dmH<(Gn8Mbn|mHVLM
z$?@Vos*J{wCn9U8t#DpuQ>4kMtzbsz0r|yE8tpNv@=3FC2jHZ7cXuq0xnDg4KnlGP
z__~Fho=z0+sa5~YO0d;|g1U6jo=~3wGYBcexCf?CLCwythP7wFzex#(2^IfQjM+f!
z7n@z-={=Z`-frzSl4vn`YPOE8tJpD_6VWFf(W#y7CQnMuJ+}~O8K@mohP%jo{|p#|
z!+oiAcnsL#1I24q;-`(p=CSCkoxuRsKN?Ls1Q_aa<{>KT8oe>*H(XGIx~Q#Rn|+iM
zf{%YC?Fg=!p?#`@O`y>9xZkJt6<YN3e1rc)>CRk^Rd_e24p@74uJtP&OofxTSWX|e
zU$tu?j^?hr4MLAWZOw6Ez9Ee(f!c+~rF@&Z@Vg#8VN^Xr`Vyf1s06<8fMA-acxc#^
zDc_$%ea*5_rfew#I51tK9dcYGp+SVqlx#|2o>x7&$QxA&)nJG!(ATmWHSo>L6+aw(
z)kIG&R-<!K{XQXd#XN0WCfr_7)pvg@s(cYr-V-nV<JE`12l)b|jS-(y&`4I`g9RqB
z7V?*}*y~Otoz0erG7D#`#oxrJ>MFROjXZi(Pv3R9f_?c+{)!z{M`d)2%Ao=IAgAd=
zOF>k9%0igCP3yY4U;5(Z2~Dh+i=`&&Bcw-^D(Q)tA?xX6d@<+4w&kWrpHvCT3#h<x
z49qZ*J|K^l6n!CmI*OmopJ;JlY0&h}%Q_G>9Rs|7c-yazG<n0G;uv?Lk@k88wompA
zq>bN>B&>mAG!Qc5U-as6?-X;lAOAku_f4?31sQ$qWwL2(Nu>d*$ZtFY_;i0$b(~DA
zBy{Tzua(V27#eO-3cl_orr>+@jTuF>y_28Jncy!o7M=ar<3JK6c|}NLSfl_sO_H5a
zC9ld*Lp6j+o9fJI#qDPVX0m6T$`r1SV$Y>HeaBx2@^#tbjp@tIgQy{UP?H6O%EN1a
zX2ApYBdzh-a5ea{`^iq1-|nC8Hw<yc<PK2@D`vKk+($Jc?;}#FLf2qUT~zcZ>%Dj1
zg*+{A3WSLTVY5l9%#?jbHs4IR0&M(H+O2cR!-LkmuvGVKe^Z8Kp0XQ{<^NI?a@ni@
zfzij}S}r4@*^<Qr$II3<0^Qf@VG=hq5Kk++u+q-{G%hakv(Cj0)(@XtN`0F&JAGDE
zWzCQ-DP9fTa9q!_8zQGWqOrUiL$RbKHHtCNq;*8kLH%6up(Lq(>~&^c?*IemvX3e#
z)FynvN0{J<-WM0|bLE_stY>@kPmXy1l|B-H!)K)xT*Pk(B!l(A4(pN<V@P)Fp&l+S
zPzr{Tpce3iU|bAnF*8rU=QA-zMvbKs1}ba?OJtmE)vPY6WA0RQl;<4U_K`?HL~Z6;
zUsIYL8gs&q8H8-PDYf#E9v2e60;{Bh?|ky3&wwd-QK#AcS2WX6H(qP@MhkzP)}TdU
zGf1@`3w+uki-VWDeHC^{ho#(ref@gh=Do3kbjvS@BVJAtGnc3SV@dpdQ6tfj6d+`M
zGszkETGc{V+p~m}dEvU~mm$*mQc4+^>fLGORY3PBS(gMA-=z4;BaVH+&Ft!7gP_4<
zwb`$0KD@)h>|NbcObZK53Dw{Oi^x!)=z{>&8_n9C1g%Lfe=+uhrZ7#8Cgw{2plpNn
zVsdZ*gF)`kt2zl{_y&b5w!DjzK<y}Mx3vhpu$2I8_6RCtpU&u?aa~A18K;$+#@Gs@
z$PHInN91dUp^^4z&&?<sgFTulf}Bn&8cZL0NY3f07llU&?gL~M$6MJwe;bugdoBOL
zWsi%tXQ()~=<0@Q_ef7hPI@|HQt43})c+7Nbth~#plYarXVDOW6m(Zu9>7co(Md=7
zr6=dl0P7nYZTOv799B@uP5L)Ikl9H^g1NBIxeLu>P{~7?DF1PpxXNeP+R+n9@&os7
zkWHyw@skPd`P-mcVbDZXt60Dlw&$FVu=11PGoX1PNL&+IP+=F7GofvF{O;8eUzRLA
z41AORZ)QIw)&j$Fp+jm|XWL3N4koWmQXhvaY+Y1Ol3X&8p<!dZIzZ`8>9cOmprwYs
z_nPITPn4;><KiFnMANNdg7|`~BB8>L&A08JM~?h{(WV_gD&DMn-9BlDV0X<KNxP8J
zu9*(qs4ZrWELaoIODmAcf6<&eV5n(IU;1#~y{XzR_9QTMgP0I&JoFfHNv)(uI-L!M
z5lxGnTs~D1liD#h{b8YjA5&4!)i$XRSr^5K85l{q1`~%#JL9UbiSdMj)IUZf!1vW4
z(8d8<uX@q8kt+KN*uXz~09$5qP3ueOL3Q^Gc=UtA<rSs+dL}~I65)i=_&aA3iAGm<
z+F3~sgYXSf;Zl+KChJ8$2>fcd$AZaIGX!SkNLTCEYuqPhPE}NUQ5IAOPFKl#ttFiP
z;XvfcyTY$&sJ{zOH=g&`EIHzdY7ExXmQ~1OzyBQ@vW|ivY6`l_erK#w{6i1r84&yo
zNHA2uKwk^RxU}<=S1!7e1+6&<)DG*cjW^+CC*DDKUzX{uIsTqtLxgZsJsL>$8DMNg
zl_J<VmE`O6%OiJ7wB&IvsN)e^v5b>m_4*l*o0mU!UNv5;3yq5ioEzLumxX9volAFi
zNfzS`6C<&@$eFSZvT{?^ZtSt?+b*3%g;ly#_TGuSS~t^B;i<hu6W7i2=Q??~^Jqjk
zI%07zFc0C8VDGpsnAfy8E@C988_#a0%()uwoy`S6Fn<j3F&uW8X2b)<rZrF574-eJ
zB?_(R6v<5$ENBiGOut2d<xPY*{18dCj3;$5S8d(CM!uoFzub59rOT@6LgoS6)S6oS
zhu$~hbKv;7g6oN}Pe12TE!M@__<Gq%dYTrN3O_+-9sL}C7AqI-y678yR`eftW=b{t
zae!NiXk4UU7YcVG)Olo{+}!ahx)`l8I=$@iV6^m^*h**-8;aIM25e6jekk5c7VRl%
z{N3{P2tV+FlLU9LI>RS6p$f}IZi3KrK_<{yxEw^_y|mm#94jr*n()^(d^?<)69?C%
zZV<|8?buXM%LQ)k$x4l*TcpjBk3(UR5<3#^<?A!4+oa+=)yXcgPbLeC#C`CAI-KX;
zX462v?Z|HHk~tz~Q%E>w7jII!d0sSkQd2Wtel2TBJLxTM3Zt{ac7|>&xVf+w7I>W^
zjI(|8EjE8lC$=b$-42Sn;4-f?*29wYcGW28?LiP*=rj$L@S#D0h~OVQ8N;uxeu0Fa
zuu{3q21v!n{IYx}KUKGUbyYQR!(r>URK6$#TN0Tz#{&fo12$x=&IPX)AvR~Fj`!y5
zd-i%jbQ_-&LnULW-ial-mgL@7`*9DSR)P$6pOEHICp_#A+!#AQ;og^JKc=do!8DD6
zi-9c2qU*zO@!?5eUBoL)+ODDfQL9JQ2LBFx@SzhJF@~ENw!nhI-16?voktGN$K$R!
zoU)QAg(G3}(2aTGU#!-6xtlZbf!4r|W}vcj77Luy!U8wr$>6k3b?8M670Qr=AOidL
z4m!5}yu<e9s_db(^DQj&kY~W<ex#=zN!3QWIW_QK3U<2>W&%7G?W_cYkLq>6F5qPF
zwKgssF80~e@OF&nhW9sRI80cufAq^`4hlx;toT|gj@;+&G30`{4Pos^R<Dz8X96`8
zBzcFnVP!C~3g0AM?A!h@!8u>}v^Rs$DV6H_L@>WN_na>{!h_w#1=&AT@1$wnYgPZ$
z>h&sIEjX~u;R>j*HV=`eRg?WFhIdKA-=M_!i7O!_c(Jc4_eUia(pItn%4IHA9a$Na
zKEs2uM^u34(1V2L#E%-BOWw>`NrC?UQdnZqboq}8L*!%0atZxp@kBL4w+0$yl{&*$
z^EXvrjtOT1g)X&Y(=WNiHL-gizK!goA<~h_BjF0wKQ((QoVtp(bMDF$^9p>F-9g>x
zmw)$<ml7oX>WY-u*49ULNOk{g1QP!7Htg=038W3e0guZU_lX|Y$<MQeH<w*l0bB0i
zRhgiu+6%Zps~>rZdE^$SAKuG@_pq%!q~j!J4@-`X!&2nMLZ!QSn%l%N?km1`djk=Y
zkBa{`y{|W}q6<OzhcB4m>r^d9-s9rY&DkS4aKX*R3MoXcD`doe?`{sZOkPqe7?p3|
zmA~66xc!`ROI+tbbuODd*c`KAai-9k|0GaWxWAXYt~w<<&p&-D)xbfFC^|EYn(EfL
zN#Q+exQ})!4z#y1IXT}=ZK`*w)f@WfS}A-b;F9h~uF*YFcGBkO!95B*Res_!(&_z|
z#gnzjdWyrR(i=yKg6$QAU5%AytQN>jSh+H9=CUSNWOioAuCyHd=P9Vrb#ytdFRJnY
zD%jwMdz!J&)IQc5sA!9z0Sb>^5+;=_khjc7Mz?=~be&g3#hz#ZqX@E&b)-liSCFA3
zqa9!fh3Pe#0Djc?6yv{gcL@$^$J&Xy&}TsM$*pTpD`)^}{`nC1HJB*aT2Y7?4{6$N
zWBn3ZToE26-6{}d`9$il=ZaXrPW&CP;zx$)tHK`1?|cTFVNUlw@=y<^bS|IaPQL_U
z!B2G%_g~@}eUEDiBKd>g7BnqH2#R7H#4<mJW9&<iw~sAs$;tkD?Hg0Pt`|rRS4n_%
zxvIPsoW9naey}Ix)Ew+x-6%ZV^;bOQW&RHROG5t@$ew>1mR@f#)M{}pL_Y$F&bWLG
z%Jh&nHYQhtVM$9^b!cMDm|@FftkYGHXM#obcKtcs>??z&8*V1NHGercd);&0AY7-i
zWGo&G0*kRh?<Ep%W#sO(GpDpSr17nK^cz%vF@+5js~j}h5nX@kTl%6Msk!Ntdht`9
z+GJ{2y)8P;uc~@s*I!&`$yit4EVMRq0P+&SWoUXxKMkHZBxg_9_nsF%dB9lojZPgd
zn+MZxESiSjL#D}>%NjsYGJROUXfRt#)<OQ!EG166#O%r=-8uAos?k&SL_wW)>xoXW
z6MW9-jqZ+f5Hv5cSzIn~o!Bu!d|E#S{69m*yS+8x(emuEDJ@^Tw9c`Z%AJ3}+I77w
zjvxKS0^C(ZtT%b9W~s$%k)dHBigNE&!4cPZ^vM__$b`n-uY72{Bo_CKMK{YY(K!<w
zcZ`G=c!i->dB&f6e0#brl%k((%N;tr2-M}Y2&syp5577S1q19Ip5{TK;|!1^@GQ82
zEHDZiR(2a`H9E&Y-(-Y}HV=6c_>frI3z{Y5U^*H!U?-R0vhWDFIwRrevicy76zy^Y
z52Oppt(myyd&i*E#|-3o63E1Xx(im?Gg*#0J2L26exiM~N(@-<K;#iNjd%s29ZL+Y
zJ2CXDbHXyYafG=4Qcs^(h$(lZD8gY*t6Pe8I?|-Q2UKa-q|{MN6}Vcyy3zhz6@|lH
zA8p_qN_k$h=<gw26c+Kq+sKV~(}qE1J<uMjwNF`B)2j85diu{dx7ZC3n_|kqy8exP
zc5<ooxh&O^8_#3;y4vK^IEH5O<o6pn&j9=7xn|XOH}(dNW~IY+l(pQQf<%ct>>1II
zKD)yTzwTuQn5%lpjruJ#D8r3gr#<{iN%%?PM<x&m(!3dkie1WZw3C<Qd6IsEWfU;E
z80X8G=(z!2UoX3fr)rH-!TVFl)OJ+3Bhb$6_l8`56NGhsqTIMNdI3>o&G$2b=1?^e
zAP0*ZWzgj>7o8%lZhb8EsE=O_>tUvJF_1OaKm3eT6xGUFV)#|O|2UHSiY4_WgjL<t
z%f=Qz#vO6AKwU|5VdFZ*{ajLtcOZ0@x+*NZp0KNRsdz!A-dr~?CfqGm<v4pqsZ1kY
z(`#`5!5a}*9z#RTE5oY`B;NCV2Hfp~x;JZoy(u!P>lrt-acU)a+<FH1#2}B^%$4B0
zy4o{rN|#;nBpv=VsI&)ZK`YKFqW$Be&NY<Auz~wQ)f0)#bEN?Th|0|_VtGIO?NEEe
z^+0}C)0uGreL!4Gq5M%)qp`<k=?@MdA3%W<piJ|Y%1u))w&XMC$D;rl9!adxqgk=b
z2e5>8+YZu6W-%y=?nLgu7HN{A#im2SeNY8K*~5*lCo~A=CdE-DF#6QBB^%mkwP`m}
zq=jF+tkQn*Cj@s}e!Ky<TZ0rV_O(>*@1IRo-{6Nqg(v<ax^=l~8oUgf*y1~FX%XZ0
zZ*-9%Q#V|OjbjfV2)o$|uaejnNvQIwFJ=mw2Rq+n>iU~f8QL<$^uN2&=Hiz42Cprn
zrns@jOa-~QLYom^EW2o)Ea>AFbVW{v3_6;}9BFIiHfEU~M*@#lcW<=nY;wq`c&05T
z+DV4_kWhyg=#bD4w!_vZVQztJV*=#+4Ow@e`50Dx*=#D%z^`2ahftIERMKMEMk`~o
zv;-k86JPV@_&K)BCOeF-OU!rJOv1Go-96f=5bL#z#Y-_lDjA1Xj{bRmkL+3Bzy6_$
z`dhw9@Tg?J$_k&hSr|WM`uP|I^^x=Xz_N&W7o&iH|AdQUnt8A;a+LSdEww=>k}hln
ziRx6rJ6cn)wBbs;Yy;3{D$MdA#+VW}?B$>34=mr44P&8^I(|~!RcU@<K~4yxI%HcC
z`<K>5%2z2>@r&<ucw`T{w|)h)#Mc7r`S?xmG--BPnVq4f(pXXVEsb-F;qAO2n1#Ut
zxehThZLSMT-#s&Eu*u3>Wt)f(_t3295q)l9&UgUa^_rM9=7sXXm^-y<?pi~3&fYu9
z*H669s+-B28vLt!1)uc0eD7_vuKFKWvk+vUiWSr^){XX**>_oC^&sQ21?nww{jouz
zCHXK_5VyywpZrVg+tu+$%7KV&+xY;M>eXjJ{R2+I<9?LD@1;Vqr;H~;!@olP>EDz2
zECyPo$dUKwA~ZHV{irzUGJYooR;k)Fb8BwW6}QzoNL=~J^QBUh+<88-*CS>wq3|d<
zR#^+D&d+-um3NmfDlbkyXpf8-y-WXl<f9+|xCzpXb|0WtNUdbQC>qsJZ^38m+$umO
zMXr;wk`WYeZ;|rzva)O%?*7wopI7J4;bLR&>VcWr??B6%;ok%Vk@fIzLboi+6nI=B
zrgmi4#i}IQPjVj@OVM8vhsAd~0*iffj$4ghC_}VP|E8UkaWb$`KSmgvZ0dsybT&>$
zCw|8x`s^mX#Up>#iQ#)U*Ys1Z(>w!sv>)Dj-c&D&FwD!5Q-(=fDK2`78%?l&6d>Bo
z0R2dWwbESTuRH^mnQFy-%s3Lusd#@lkudo|cnAx|qb+m<STy4Uf@EA##u3%xroDDE
zJ+w1Q8f(E-<SweL>6}D#Q2&}4b>}L3m4<WrmS$U}-*bkw(R?AJ$d@7yY=?{4GwoD`
z%R?P%aL?B$+vZ#!fvG~<%IsA_4mhUBAz9vqPRYFB0glDsY8f=!X#s0L>l6zX2C4$A
z6LyB-LZYS6N`i&=S9c#vlV-tg*{-0@8V~siee+L|LwX6OB;x1O>8!HJPv&Ku))891
zh$6|sp|4uOU2LkphnF$fy&YKm!`2H#XUudmjySWpAS1yAKP?%saz&4TjYZSVXBXlh
z;;eQkBR#b~SWdX)Mjjsnyg{h~{STz@o|H(dDzCYevA3^%tIt5n%bA~cp<)SMgt@y<
z0zWd}M2`x%C96EmT=cjLaWgKq#tPq3Dr6gU7P23XH46k}RE%O}F*iErI7r`@&D#H-
zZUs_B$5r=Js-iKYxb+bZ&POK$g$D@*Z3r0~8+v^N1dE*Kt6!Bo?90{nF~sQ)@iTai
zxM&3~bx0hPP_k<iS#H0RI5TP&i-5XurSS=w+ZXXv3m1!33vO(s_twhwR6$(i`e5qs
zcl^bQfFn^JzNK;z8=L_WYW6KRz+zeGjg)SEERNIb$sPYX*lwLYpP=+AOhx#0ul8-s
zKU}x{%aXbvT%%oGr#gj|iv#cisvC^hIabL>&G*oe{kv!@VyNyf8VU6G@Mru{inugV
zd@1TX*dPur%w-NyeQx+0Wdnj?E+S7R&&6m%)6%qiS@JzgpEK~<2`tI6k>>pEDCbY<
zh`b0;nduJ!r_6fDju^bYKEY(6&*>F$ybMOEXuO{m#G-$MJ0Bd~*=G3Z=CcD~vqb*=
z?2jR&Yu(*_ot&8pA5OU)dw8pUo-G;nFC<4Y(<b_A<VlI`-^Wr>!Do-Rdwh!Y;jz^;
zl71K=QS_9)cWRNzfa@F-lr0#!Ls?eAMGi{z`WHjL7=vI`?DOrDBET7uNX7E%b~ryS
z-d?fNlz~>dHirY91ajOED^=t6`9*F>k2Igm?BP<|sM-y2!N5Cm_9lx2iq{(&$X$!@
zjHG_xzG}A#X~zko{LbURlEN`6T$U;?mDF&`M_s*|K$tF=VT7FjkI$=M0sbiYi=i9G
zgyT<t!*4GRZziM*2Y}T>SU(oi<dI*eQoe*T{+jl=_UQ$ZhqX;qUqpbsZZTbPM%F2L
z;1u7^-QRf}d#t80G+kW&cyW?caTVU3VhA(5&a7}mTHXWalvSL`DMDaR{!r<YFCvmD
zEr_uocvVHcqqQx{aW}m^b8QIy;_;&y<r<chh%ocaJ>t!^L<LFOguITf@${aHG7>gK
zR~~v}qdB=Dc9?G4Cr`gY-I>j6n5hQ)SX31+0{@8ZU}#I0IcZM*hUamcTrw9f-CS$d
zI-ZU7Wh4hH-kWf8_9kFGv5WkTW6)+n*P{{Ua@JOLl47NMr}v$r+)vPv_^Z+d(qod6
zt6_~(ra~w2F+F{XIepG<(30a;1;$d5o`NP+e~qcUndE>ZM;j&j`+5^>-%yoLVUkWg
z!OdUM_w*UCSK?8y=SoS)1LjTiqtAo$i;>@xW9nZr)cV=V4#h2aY&AI%F8C}lw9Kf`
z&A<H*VQSKBp1$X7dQxYF`=w~z%}Siu8kU5}N%Z9*>`6nb<kzpMS}hA2-M*%ryOCH3
z8tRD*6zD{wDk}WTD^X_r3>aGOdU~_4;4M<{qp%f$KHPiQ+ZgM`N=i>(l|S3<(rNy#
zx$K5g6_P^~yQ|~4y7d*ie7$ikt<n+h4rJE7@3TqZc?Q@9UH&bNY-k?wZ@Knhe)AwD
z{*vIMY4f8#KLWwzSG7hmD{=2kK`_g9v82?xLnwvj$<#}x(^o@febq6se}FJC&e^KW
zCmnIhC>4f6NRP-%h=oN<TVAlaXz2Ly!A_0Xy(qKl&~F_zE!+RGea5Zcr=o0~82A_B
zKT|Us?sUFy*y(AI^`7BG+voG2XTU35^uBrrW6`Y_59aml8@80%M~TPWf{lz?cHU<|
zitD+Eh6uHS4>f3_HXc}5Q3w-6joWH+Nj<b!2$(rxcj~(HUC`J68<T#vGw|YP%-Fvc
zEEM<q=(Xy;&+z4V7$$7{)|t*&j;{|J9-cY3)NqM`nR(~kAI@DI;0_QUP42LlzEw9!
zTXnX_yPtdXlnH-eB8qvc`4AjqO}%RLJ$|*ml3_}WG6prwn0jdh)+)Q&^yXgXtJYl+
z?q!8JOXuc1R)FdUwS$4XOo^2H*%r68+PbJ|nW`AopUv$H{mZLHu+6iL-nSXo{1sAN
ztE&^oqne;@vc$U(os1T<Gtv>aza;{U!>tbc%oqFxqacd5?uVD>9RX>DpFK8iL%+xB
z1m|=qe^hA@_7G#@WhdDN+6CV$SGwWX%h?f_VkIQ(E)|W#lo2&Hj!byOY0leKBhrd9
zwuchzGmOdMMN)R)`YRs6%Nd?_88lBBiO=2eX8_63SA!3BuCOMB+V?e9nf00T(_=1Q
z#nMZhEExTa&JFKLIW96qcSnc38LZYxFeEEF)*NG-i4s;F#A$RRCRDkjkNn$~E5Pdk
zdcWF#EZ%tK>F-Ot_KfG*uI&|!%^FkzW%|^AD^<1u%?8dm9B!Gate>%e>%zN*+#|GE
z-Ne#AI#8m;v<gwhf#&Gafd%KJE-eyWFxR=-@zK%+dB-)mw00c~EvnaYXgSQxgj&0e
zus)4q&_^+yPtBnPjB3IQvTA&Y95li*UfFSVJC5=js%@o|*Q7aCVcLJsafBU3t*hGc
zZt}#Aaj}V{BBR7zLRWg_`__#rBu3Ti=fz`Wn}2;eMNL}wk2hF=I(_&S_Ea}^v)<*!
zzE-w#a2P4gpJnOi5=>(AV6QCzpG@ADT9)p;kf*<mgL6A}lMTH@TJv-#PubWvZ@jMD
z@saxbsaN#|Rj!&MWIjh@mkVP0@JqOHzH@v)%DPNC6^LgybUhc2n>mCC4N5-IfoN~0
z#Z&m}L{IKMYmOW-%hS2r@r2wW9Oj7tXXF%!)3Bl%Xs&`q15MD~E)}mV%$+`N@1xv%
zVa$EGE=urt>U2Lcu!n#0(}IOsUxa1W)Cu#G^x^-iF!6u$42bj7dLgmLp8+`>w)Hd@
zk&6;@PxO@CX5khK+UW&z$lnl86pt{yStFWJyVsy3FWw;vo|?hEA87CU0GeCoRp-+Y
z{*+K#LD!`vA9kz&^jKF_NmO^xTXFT(HeLOOhF|LP2Zxp;GqS0qUpF$MZ`ty9mTD{b
z`Xx?p+lvaCEVGVu!^J2ENU3W7{CqojSzJ74`Bj!cPuAun|5Nn3VN-jO5GG5CwJiCv
z_@kueGax|s!%6vj7jWG6*16gfx%cA-7_`vUP^`L9_*J1cYMH!3zdRM`sl<`Kxa1Wx
ztvQp-iy6zxjt>Tj?gTR2INFJ;)Z{%h!~!6)<SAc4aTW3c0m>TUeTvP|Lz?wX$y@?-
zv3yK?v8+z~E&Fds9UZJ<>^37Z0zHJ3-VgghVXg9O`i!+9dC`)MIsl50uih}+usI78
zdsn4?g-@v?V(E=#@$xRKKlv9q{hHd%%g*jGE{5Hd45L_A*$OCZyp$|$B;a$^DBd06
zt?C}D^)2@Hem?1YAP&etW93BIe)^ZWK$r{4?^+a*PT^jOto&Vln1b)So6qVAD%0<o
zA(^Mf-TN`u!StsuIR=LZk&~p6nzdnI!AFkOSR=(Gkc-In-VAkiq4pg{-^7Ima>z|f
zu2hM);r%C?^8*GQ>}Z854B7qc&blYMQ3?q(I0^(QPMg3tzB3(pTdYNZCmxNiwop=r
zN7sy^^KCVDSD`_BwYSk=K5JG$DTi5XZ(+!7wC_4U+!c4wS(YxCDMH^YL!M$nmWG->
zKUQ}KEHb(nC$WA8jB$~}+5g)?P#1*NPus=|_T-%Pi2rD*>R~b6fp$*#2aPUR^oBEi
zv5S@_BH*w}9y0`Px!%!cK3eQQZj<9})0d)%JDc%yFdAQTbj9yUU@Om#&V`N#C*vy{
zyk*adMI*b9z)w6&qLa`8=(a$-eydBLb2Fj%gJ{W5Ouy*ER7&zS%mTx0CUtdJs8H--
zXfFHbu?PAb<*pObJ_WrkNK++<aNa!tf4GDCsm}g@Y@9N24eHOYy99aHKpzW$Ro=+g
z7Ci&>SNA{v?YNu5`6e1zr}-4{#L}_K=fW{4{___(&3XBEXOif&lJ(QxOv3!RA_x={
zBFYZIQ=Hj%kKrRIMFgRy7;C=}@4woch29N2t0;|(-lii0Y&t84yq%Gs1`MWW_q^VJ
z%10$cD=xmx7?0g<bGOhq`4;r=)yrib;Wtxf?Y)6Zp9h}-xaT5&*>z>FmR~m*GaUxT
z<DTY?{%)`0+6)T6P5rI(W;<d*-=D<Sa9Q@H@DjAHvASVTVQAR%xP0<)Qr9qmXkRf4
z#LKF36LU@8aWnZaiZ1j}*RH<f3T9L|x}wv}7b%U#c?v5I-E5G%{7r&B+`N+Y5ENz8
zyYx<Suwi<z8Vj$k3997)9sUtfCM>3?N}px5p4C|Ky=#^ErMM>aB^s-Dcp_M29=nie
zpx#Vba5HqaFCa2`=a^nNcGtVSs<A7`1(Ut~ykMGx6{_mH^EzcUWzz$yyHENOOb?I6
z<@I#E4F2oD-E$}a$0@1ySB6s`WYs|*f9CQqihs-pEhz;l#FX{-!VCyaR~eqRJm@<6
z<de@06l?FrPbi^BK5I>3V;8e;nI9P65>OImhumnhafmIP2SAJyw-$62`B1VjsbNmI
zSTh}qr`W#&^mHnCZ7MJ41Q#Fw&HYA}KDh%Yyd^O}33VIU5&=!rFr6%3Uho)l{or7V
z;JUOmEOhI`9b&rA5{n|$-Yx@j`RTtH&%p&FBBx`qtbk#omPypm$@#iYWxA82OyU_o
zMEs=NT$Gh?g>nS9WcLyC-tw5bk0-}87-Eq_7@2K;5nXNO<FZlo*78x>biv5y$V;wE
z0uFP_Dq=5?fAV2?zq6cWSO2H-p|(G#lefLxSPj_*<4*n&n=?;)HA?>P^6iN|Fxua*
zO}G*?o~SLuW;-S{$mZx{N4WegB3+ME#U-8dTT;^9MBKSW8?Ri3e>cW>>zDv02OGAI
z_s2OS5xE6H*ML5-<+7&VYSuTRnzauM^Q|oc=dQ4CEg%l}?AP?6=2J`~*8l8dq;ES$
zS5;Vrpo9z))x3dpOHviy>oOA>xJw*6(Z+*90OJ5LywO#?G4QU#DVyrMJ+O5I=B~L5
z$#(D{EXFLtm%V*;aAIukW-r9aUG*39K&r)LVueND6BE^Y#4wy~!2XqwGuldAMzTa2
zD2GtpS1;*x(rjPJ!iw55``uc(R5}eV!(>{`sINiH2Xqn=62ko4of`Y;YC~zdUW?E|
z8?&?0%E~QNR(WaTC}w6$@Pjzwm-MMsg{P0u7upo(ZHjfOTe{2&Z{%(X>hQy?4~=zm
z42oreo+s$clW4bEgKRFS`9RD*#5?&lMe-k{dF-}2g2<SSJPg$TbUlh9Kfm<`5BlWk
z(=|6l8<ztm1!8KF%EQSbxHPgukB$Vmux1xjpMIC12H*d2NuT-5Ik+H&f>N&VDh~&q
zLD;sXn6!D6KPtg&r_0M=kUo%_uTGvzGC$B2XsIi;{<qlJTgRvJh>~L!#Qno6Glb-`
zMJ$_$hYIHwylA?46f5zFfptlaIYb*?h0O4}Pgoch|9hc=#{U(z6uW6G+ky4s_gKiO
z5~FjQ{L}jv(yvy^(sB$8=}niCq?-QU?mp7M=}({DgHV|m1xxD-OTJtOBkHCcjSyG1
zSq-2vyw7jUxur)#jrnl5)+)9Z{*$vV2ye)Ig8S>tY91xBCmKBZ=^3CjiA>%jAv7(n
z`98|ed)L-*qYiQi!I`D*_D0LN*A?TlL7oUE0V%lKwN$=JC32u!_&^^$#eg3?Ls=Rd
z^-CH9m3Q$ZC%smF>?TMz;lq@~OoU~a_8U<y4YZHIUezso3ZpN_GF`x86$FecPv2on
zp5Tru`?JDnqX+TN>)wA+d&PuCi+`@}cfQowtJW;CSfRQ87PgegnNFC$Dbt7wTHl=U
z9A(NasVW+u1Dg;;7+M=<+RECt*6+wj-X+9LGy-MLuJyLnn*K?;yrYO2%6$L>Gju;T
zZP;g%+<0}K1VnZvSB-qQt*~(<xm)pgWY5s9i41-eV++yBQy$Je1*yfwWV|<4)#9ND
z+R;vovVHJ@fRMBYiA7=T-3H7*D(I@4<1zKKH>K#_5bwPCOC4<=pWgdMlg+{vmptXg
z@o}b8^8qJie~&)^9ft_U^9TOL5AKn+fpEdMPlhcIKCkyQYfTwotrmNBt_<u8*D%?>
zcFIXF_T@JP)PK<zf;!v%qK|Z(I6qX~<Bg3Ee%&@(rzT$T&O3B2VrNl3${p2w+`4|!
z?(?ZWx;4ZSqY8uSBk@PO4aUha>eFM%;5f?`z9L{7AYFIvqsycsI5J5@8a0`FO!v>d
zB~KFArJ_D5%#K`%Us8>iHe#<-e+EnwV&(tyAC1cgxo5;!7<YG|w@RkB*3+A5sqJzK
zyQIeoiyn@44!by^HbSaa-bM4Opxs{NV3yN1eP+$b4>{YELDdagrJGOdk_dE40gYtj
z1yGU=8pw^dgd7&$P6O`TVe|X$Ee(q^SQncC``OTOjj)uTqvbK=1+n$hCRz~@>J0e2
z!W(3@(XxVB#UD_2;Iv$wBj2{wmlt%fyl|aFjXrudU%^&Q)R#3?zg$_*%ekZ@MIZe$
zGNjme*ngZmg5(3uKb0!5J-+s7D&dQ$(Bvwwb`s24mSxGJsoL_Ja)uwMHdtlzEtMS?
zoB4U_o#VoE7mwBc>e@p^Qtzf*FMr8IzP(R~4C^x}G0dv_Wsr24+?*l-?`qTFI+kU5
zs=rTX_wH)53BNA1kcPySHgtQHPx()9SQzZtV;&$(-TqL@|0#q9nbdE=-NH+WT!m7B
zY#yRJYx$ZMnww?m9<7R)dqNS)<!FGQQQd?5c|_#An?ce#wot%NiywB$b{;gt@b&9A
zi^TVR%M&F=OY$zx=OeQg1AQNM(CW-Y+aTJkTK-dkN8fedakPH%vientsCGOS-$kdm
z(;;@{Pm=4CORA_SNE%fggpTqN|8*Ly;1=JjyIgQ)7GKFOlqL5Ps1E58-6ggN$jg;k
zD6A%w9AM6YiO@SJwbzcnKinpXk&e24cZ2r`{$<S0erS|Ksp{KVtIPQ<Gsw1X$M#lu
zh>Mgk#m48oBVKAX&)-q-x{K|PVLpYA2f2O`wRG5Div=ipTZ`;swyt5U4&IB=K-+C%
zTA8)8GeiCwzn^4tf}C+gV%jmztk#k^Y?HLzQzx7qFFQ$|0Y6vMKkCc{Ees#vO5ITC
zFb<bVW4Ue+CJ6TkaJbW{HNy|ZRZpHQ6SNVv^ZR!EO~^*B@}{%5iGd>9Cp|6$C1ARN
z_d0j04w_<Sy0kLBCt`l?MYC+Y#>z7ninJ%m&j1}Bnd$5=QJ)GZ`&;0qzBbH-PGFs5
zrxQ}oz66Gyjp*eL5>QTAagDVHb%&)7ChpSP0I)5cUByR;<(d59v4Kgyyz!0#g822A
z@sD5q%bO_&<fMNe%4Xc1&;vSFl{d&ehdkT;$-~esH|6i*ZndNbbptR`-W&Yvd0?Gf
zRzD>iuKfYCL=+gmJ`#GBKeOR+CE&IyWyJM^b1iHU;`P>CUvNX}TU^${#^r@Trmpv)
z%WTiF#iD5_{uC{SARFqO&pJsDZ>O1yb$rRKq3{OPQ(iNxyf8b(gtDSqeM7_V`sg_n
zy<CXHCkj-`;1b8#ji0TV<+rz(F^SyI04%LR&l&voE#}CFk6Dzp#ER#GBc$dWUBMvL
z&er?BCwj$wMRxSc)hs<l%t}_C<lSgl<Qb}|e~;I0MHw7^X;dty1xt)o5p(w1q4|mZ
z3PM8%>}lXaf;ukgDm>xb-j}Kwr55Ove0)Cz7LLzFpMqONffQ{HMEfoOMFBth<>>bm
zWt){@$JN)j_}eTUhdvhRbtfMenNF^tt(KVMeGf?X(pAh^-pi6r<HuieJnSktbA;X{
zk-k6ZZdeh;H8Z*PTm3~otXyaK<9dYVOczLCnFRt(q*Aj{eEqNz-Bc+x{Y1Klrq}4F
zjY$zbDd;OxaB(+=mUS%L<0V0Tqgr84jA5->W4O(@;1*K|a6v>U;#Meo1#+JpHiR?S
zD_VBS)(KIS`zG9O!QNyH$~T;pt|?{rR%|2>ZL9-9n%b@ZFmy>BjEG)A-!Ce&L#7Oq
zBIBkJ)sXJ5Zqk3+IMxMy_Sk_N8@OQIS39u@(`}M5{dZGF(uFBbftMQcF5jg8z=;_@
zi0ts`uYJkgE}d-D8%zDE>wMo;s1dflA+^_0lD7g(6w7c{Aq|L<Wj~CwGzp8vAEg?r
zjt+kzzsTJBsc;=HkcfDVgIYUEN|KFOHQ^LCbHPOHnT?Cf-!4nfa16)pJfV#vqkrbo
zs;2YI_&OPGy*NQ?-U7s5zu@0(8Tw_8*DAGaP?|9(nFFzDvnBsF%S~;}3`dj7@M*h;
zxH#D+U+ZyxLxP`l7GTa3i_G!utK{IO<tF{HBvA`$QNlvWjxm@8`zCKBht<_>gmDYr
zX04pD*1INw-rLhS0tg{1zKLx0<+GZa_4<jereP-EC}O~4?P^0Ayxa$iL83zz2JpnV
zf!k0O^7LQo-x;#ZW0G2YsV?m`2h$Tt?%AiS8gYtws=6vmO38oo&80WbsFyjYPbuOX
z(JdH046ZkThsQiVS1sg$hVjH<9-7_r@_B^{gei>&FB!uJgsW^@L~L1az<zV>B+UPV
z4F#=#HH8=|P*(F{AF5@TrL3o$&lO<njEf2V^mjqmh5GYT!+FKtac2!<Pj?uvv8ErY
z$#xEP%ozcE)jlo2kyURciL@W!)#C|WnV$;P?PolB>++yHz&wKkdem~0zNDz)Y3eEX
z(g(1~(6!?VEvj!w`u-`YUo$%EC0+jHZUV$=ZzDR0rjz_YKQ^t%6;^?w&n-ugQR6Uh
z_2GE`rCbP_6ty1lO+>@qJmA%;P~c%N5;HQ5Mdga9j(>xI-Vcc)0f)I8>$^3FcEvo)
zS9-2UyEhYdd6~Ab+31o5d~~!zPp4HoM=p|b2098BWP00`K%4B|^w6n)CcJNEwqP7s
z`G9x#@Xw<H%XaN}>7<riRO3JTsa7Bp_Qq2C6!#SB)jT~Ei-|&11yz8c&z&um2P7KO
zd{%Tn$^?Ucg!F>&mD#}#BwAVFeEGJ#CT}OGImN9u#8K5>vW3~;s<lTeozZ(P)mJP9
zx0dwK&qt3yFf-#jL?OoSsJwEU<-J{~ak);u<1etFVexgF-hKK2r2OsoBlZYaS0LHj
zD8|6T$l^glY{*$}jiaM;OX}%Nkl)K@2>lZ7vY(;lA3T_ZV>V3=R1FIGqc>V11c8z|
zry-l#<x8C|@Op62OD#3Ki>DwOX#|t79dPtocK-sVrGlo79#KY}%}4Nq!ls2<QVbCQ
zuQ6)Br8Gpwc$XLRL3`LS%<<UOr($lmv*c`5^Q`z%q9}ec7#YKF@1J3PcSWITA2rW5
zq1mLr1)4CBokBWM9mcdV$JQxfrpe&_MVH*>qmK1cW{QsHsFKF61<fO>>%aXF@u4~e
zc*2DMdcB1L?^Z#q|3>;OA5p|%yLpL<wlMNUx#3641dPI;!T=m)>=^>bPqF@gECluf
z$xl$y9WE>K8P}9LeDZ90{!)rwqIJ|U%+HKy(^3@}Y#2(s5&g{`X=#O4g@yj;V(j&k
zpK!XB!z;gw=G%x8JyL7|MKq9?z345NnOW3>tLj`k$Ba3J>^N@z{Dqd*oI@BY4$$}r
z4q(B^bA)N2e;;^Rl6%J&?)d&ddA-?{72`=CvI5hDAK6AjkI1$=)4vRpd~`@)1BQVL
z?I>y7v=Cp;7TGX?xNA%`x!`vLgoPKS7FsBz^5%oQqPS_{9fV$MBUxYSS~xrx`Iu9F
z5;Hjdy`;DR!Q9}w4T0gxLc{M69NqF{Js72>fLE2$guUMMJ;`&6P@*y#L2x7})m6pU
zFT)ox26Z44DSVwaVb8r-*YcRG70LGDebb09((W|OE(330TNG?i#4^|yZtO2Gl|E(j
z?tKG{_Fc}OD7Uf7pl5)3rTM*35(5O8S59`Y)dK!#A3v$mCtE9ybNn}uwl;p3p^Cvk
z#?n(WRyQH(a>x6s(q+mzeO{&o&T*J!_=96fZq3nk#iegm>sMpqaqhX#I?bkvC?28+
z&UGZ9ILqZYYhdTnn1@OoOo1^@Fj(|W8NYuntn?Ev(Y_{HFkKj=25mrO56JezN^>Hc
zp1y_O6ook?V=axj_Tj%XuYaEbd5D-0&>0sv`|=KSVStD<S5>2@C#d}sdYJDzpGdjr
z4J|<`&3#P(0G39>N@a@;Ydrj&S&;jlxQ+6_`;M)RsK4eXnm$vT40u(jy6}bh0PlRY
zVQ-^-3^%D2zJrD$I8iDYKlMdOa;5diD6@xfK0vfAU`XaVK^a<>=SrB0Ol*yTwy$IZ
z2GpU0Yr%{WCfs0DVSXeqac9WmWSlx}4kma$F?V;aEWB^!LlJ60LTMI+tp!9%8EuP1
z+TT~QGD)!+)A5oF48PtRzj8`B-w7&YEYeZ)bLS7f92OgBaNz5i3#5!EteZMaiAPiJ
z_S1b=us3a>ytim5U-xS>i99hWfJEI?;%ANE*<2rEO?U{Lmr#V{Fk;i6x@3OwWsdL%
z#KS=R$@m#1z(Hz-xXdITlHa$9&?9Vs|2PE=<-`RL_0weLGe8J5*i0BVKkQY;f4G(S
zDi+Wl27TlB4y#md6jhcWtJsS5>{`HRzy<GI?p(9ptz`*H9^ToUSuRHQxBG%n+d+X?
zB*CAV)T!mbPh$@YlIOmX)p5{J$Pg`h=nrwb_zOLn@+u#?wjys2Z`1$FFpi?6quqBM
z$|o#xk)OQL>%jL?r6Y4y4X7sc5PoRZWft6MG7m?li5aYm;hoKl<d%1Z>&iB~u<=~T
z`~&1-)QNZLXMU6h-g(-NGTgwDOX<pzDX*C;+0(#&uPs3<qqm@+ZDm86+5mjneFY8q
z2j)-C-m<V!g~rn_vH5&G?~uM@fPOZopZ8yWqBWkU4cxm+r<gWh59z$+8^ew4k|*Fo
zk-`%1nswVfLiKrJB6<%TM^ugAexI^>$~!Ra4c*$D2<Bl6Rj_gseK89LmykchJ`3p~
z=cGbZKvuS_Ppvq3oH}(t2@t+P{Ki~~STqh=anNAv)sQ;x$88@**-CJsy2RI~k**;v
zua`}I@#T1*23|pvq%6j8rnalVzozY6KV|_-Gg`m@)-!C$N<xdEO}De~I9s4)d$ZbD
zDPBDVn$Y2$1yIRfl`2bw1HVX6z2SG<d!pe3yi@{+Y&GgT&BAfMQ75sgDD4*O;-VfM
zDMA;WlH_S(h09xtEW*rPyFefG8tbLMm^2U>mb2P`Sk|eRB$86Z{|8ZT84y>`gbhP!
zp@jl1(&CG|`vQv<cXun!;_fa*7Z!JSEwVVp-Q8Q<-TnUdc|ZGmPI8h=CMPqQYp%pl
z1V$sUq7<owLOyL3F&EaYe|I`@@)rh`o4QU<Np%V4Zqt?&HG|+w(R%fFL1OnJwV**_
z1yv7YQy((q`ypnY<LmjIxBE?HvbnK=1ml>~7#yn*Qv7$5vrcc~TAo6hBD20pRZ&*E
zP=6HZ^zB|mrkfzb%J@tB(|lo*>2p8I{9$RNI<w$kIfN3NOH}AsE#|W&gBZlUW%@z0
z_jC)$QV=8@ud^B<3?ih#s}A0{t#9ls4Na$xUZI7moiXwd`7&hSe%p%YF5p349+0)U
zRG8-5oZE=rYmLQw(AXB4Gi{FDYe<_+9(}<-u^yK{xd-8FGbtw(6W8)%2Qk=p6ZlpQ
z<o;3)-HB9D!E3J?<uHj;6p8hUV5obXBR7rkS<CS{g&890^tw8~rrNnbN+{b2(>EXF
zZVTji$!o*SEPRG+Z`Q{cSB-4s1?J)=7sL7#%120Bl4B+oGKj7sq=aP8K9n>Oyl@{z
zyl^qZ)8;$>9|92ffX^_L(I$RnIc;TAF?}K+K+Bb8X~)4ArAVx$D4%LBNKzN6LX?nn
z!>!b|eNKeXsbWA;HCE@Sgu1{aNLFQwD0&vrxRYnZc}i2vEDxRJ>!q`dJkNmW8SluT
zH=dJd91jE&^oin8v)H5NncjQy<kY~7!6%5{qwu+HLc;kL>2k2N(u_ff!h^?HF|761
z>I<f^c7z9FXdUoT5m3Yu)WEjcA+Y3|J9gqse(??ph5md0d0xUxD?P&CVD|D`gl+Yq
zla4|OG|3JQ?7#Z6emL<dpN!{w-X<SaQ+PW=^_rv=$`i0fst5}awv<Uq0~`eMPTB!$
z%;P6$_qE?Mkn_J??+#CzXsuXF*d2-AFdPsp&HY#%I+*E`Vs1w!-H3L54>(j``Adkd
za(1%1x@ynAoxm?vg5+rL_kmWyAt|GkS~<GFwWX7k6stvWUQ_aqt*l(wp4i8;7BduH
z!KAtL(&}g(M>o^xjJas15eXET`UaId(LF)%C3mQ2F8FHYuSEF7IQe@Zp){p&$84Vg
z`iy|2r>$eDP8tab8bczLF9zRr<amFHiN~Bg*c0%poht9hk0e38W8UI=d%bl8XYIus
z!@<{E-PT}Yzsrxs9Rd^xH(e*(el_)V%zkmaEwhE?f$5H&6XMFuX;I5)ENSBSk<Rr|
zqHOG`7SGNGQm9(jK4ePbG3T-#Lys5VRkKfpLClD;JXWM$`ZPZz@U9{5_)$dKqzVLi
zoJP)#7dQ_PG{#mQqdDeV6%VW%qmBrFSe*-;ovku*6~M5h9&G&ocg_D3uLQgk+A;Wl
z^5b=wBD}K;#MwS4^*JVkDmV-~xiy%FME}f&fxftNBKCUx)7Lej^%OM4_W`t=-qx^O
zYW#LZTa4tP#x<6Hd%T7FjCg!S$N%)($JZ`E^c6~7E<aN*XR-mPIDVWTr7>|9fw}Kd
zGH0EGrA%}0i@$Y|Yy}Xfn_D8jHB5O;A<fNlM5}%!!?7SwefNi;z^<jpp_a`~u|o{Q
zvRNy&lSz&U#!91{gff%!@e$d}0y%&fZ>g(k9W)58N5F`|#?11B`|SbL()eor-~Oha
z(bI;b{5)J;f#*hURIGY;RP1p&4PL+d_1?eX&vx^HxPx1I$P;$~ZXcFSuc^gZ!hxlU
zV9M;-E>!Hhx6cMUvENC-bMFhkhGqzi7M6vDk1T<_GpT+H2;9oWM6ki01XiTZngnE>
zO-TNjX=QX9X5QsJH&~@_S@C_~GSDu>yh1?9#bKk5hm57||CQcf^HSO#7f)w>UvZ!4
zj+>+EvP<P6{yAS;2VdcvaudyiqRE@Dm01%C`mDqD@k-M4w{-Ha9J-S>+gQTxflpsm
zI6DP+n)e0uwr?i4XsyfmY^Nr;7^hNqT!}(?x2h=8<@=XYL>~hQ7pSPya0*6+y_xA}
zfYj@wwdIUf5SZMWEfGoLSml{+OS4u1f-6m+wme(%F2}4x;Y6!~y=?jgvU<A3E}5c1
zr2@*anufOED0waE$VSe+q|ih`^113%j4t)~$1|Rgec*=oJiir%)x%n8?kP<fsGJJN
zWwvibq4)2}<390fZuyK9?kHls1PY830#4c~@=n)q&o)dWzbJI69HM&dE1Qe|SdgSQ
zlg(7h-NZ+XX46!K)cDP(Pq31`^Wnm9;&)z~<(_^_qZktUfh6gCk#l4<x#qn*qQj+>
zZUM76bKUo670DR4L0;`RjP5QQKm5T>QMM%Wzm-8iHJ|a}6MBS71kmAC5|JvTAExri
z6b)nlg!IlZ#Nf}{+zB+~NWtzjl1$hC-zhpAoTG;4Ahbg9h#Kjc?uoHI>Pj&?6QFAS
zgHMXe^8!ug%t;zZc~AKtf)0%0!{=~E=yg-g@4F%sb-^8SsVdBoZ&tr9za|;xo<Gc6
zB}UsCfWX=^Qrc$Lzt%b0vR5IFH#f!fJXH+1V=}*yro-q)xaTQp*u|T+6CPFN`W)yA
zBlbW%8<D4%$mK$ZGeWPQ=P^Ul>kgk5@+jll^JPs96Nr2U6XF>Nu0wf?mqf8L?v+=4
z<YApHK{zmNs*ucf$2=40)TREHOYU?-mmsN0xzyn|^*#C7%*RCNyyA+g#xCu{cX;~7
z5`i<QOQW^97rP05769{q!&;}cu$FOf>|=Vvt)!9xSVolBdllQTirYRA#*U^B8R8zw
zq1h!O9;0FSrB;Dp{AbaxNs7DXgULm~Nrb<CXc=Cl9&WLZ5|4gi|2_ib^akrT3Ylou
z^N5BD7@4<N#}d(^-TAU_#`<%hq8x$sy%@?2HOrrfMf7TM$Cf-A6xnoPS2)o@<>Rp?
zgMcd08tLCI^vu5`PLvSTnn)eBoE`=xAs$tK@3=g5KkWy95fWV**Ay@-)Mmzt?Sp*Z
z)_r6uWUs}*hlq6FzPV~{?XBDE)z!|DC!am7V9kUFnyCrI?uy5o&2}>IcYd&Te8`c3
zEl?;U{q`u1LSo*=r&#UWHZa-n{|^4@0ik6{G6`E&I4B6Op5mn-Da_1#%cA4XeN3MM
z5+&506`FqWC(|2wc$UOUU|$ns>oy${f>KwkbRRnT#)wZH7U+=~B#rp@j7;@yam&Kw
z0DtkM0de~`CHS*CdJ0MMCoY=YqmI^8?FF>GADr(yVx;W__(CgrG)`NAIDD*U=*;4_
z+|;KW;fpWkKHs@uk)kP|2iYZu7~-XY2cCjS)`KJ%u*>&%E8z(&cHO4Y(AoqCmYgnv
zuomLksTB+?jpTjhV(V{SRasTz4Vn64R55rzBcHZv+d+s4N;&Gy8mdCW;0#`=Uy4O~
z^esZ<H1gtxWwRKVphJPVZJLCc#?~`qCn2mGv?}jjmJczlvV6^Ues@!ztLOSAYu$i<
zNt#G-NDx-EZpSTW$SyI&P@>V+zNhhSr3y9<TTIv;Zb)6%LoX}?lz<OEye1e$r9u=C
zKLbmZBE9oQ<l72;I;yNVELZybL;ED?$oy|@y2DjRb|4%<9EQRVDc!B#BbQB^SDM(I
z>LeVbZtRs6?pBq{Xj-mc8$*5lOXN#+x-1|_5m;1fLm(-D^6}bTO>Vo;raq5Moh8$D
zu<6|>dRJ^=G@2dF+Y@I(0=U<XcK6SR&z1x=uYQ=X-(X7~lD!%Rc(AtmIt|oodp9_x
z!h>;R3B$S@d%E<2eJ$!IPzArioJ2;ktdQmag`q~(M}dFv&>i<D&WD`P?$qG9JH;dG
zfJ@(AuAZLrKU)&)LbZaO`$e26Tpac2<M)L^MeGt|?Jfo=o5Ei@Rd0(E#z~1VaWE1Q
zLUzm4e3ox=k$FCYrLiLsq8lBhBxn)cT?=zRs7B9Vgar`kWC&yDjm*%<7~vhOm=t|O
zjBMX3LfTV37JgTaE~SY9It<VH&bs6Niq5zAYsrg820_2i)^+3N#VRJriVXA!&_Vdz
z)p!<geN+Bi=lt*=LJSh~$T24+>m*TRVO<iH^x;eZNOkjD$DL<}M7sXa@t$sHDG|{#
z1o2LAe$azW_gP%u{7ZNVMEF01{%^Wgjz{gM_V=Mu_&xZ5+7i8C#%g@Ez%`8dPPUbe
zh`i7V;Q<tN(2+M1`^IoQCX%Zekhp=4riFg>y<APrkJXh*y_2i7lpqwj1-;Yy`sD$%
zAIw);0rXY3cje!iD|c&yv#qrXCaN?}Toh_3`UcT>;ZCV{EgXH9rDAczTfw__GQ48~
zLv!#ueuX12bRSjC?78G}uBYsT&M`Xo|Ats7>wkUxMS&7&iLbX%A{b!C+fQFCLX&$e
z<%WkB+}6SH8NSjl3Q0l>ow=tb2R^yo;A*4z{Tm5-Es9bePQ&;MpO(hM#!YgwQ69!W
znU3knQR<9meg0`nNL_mWA%u#JRk&@$^u{53&n|x*AM1-rGchNQ5#$syv+<s1if&1V
zV!Vz4{s2oo*ZURvIqeK4OlNPNW4AtD<UF96`aw`O)%pq#4U~`S!vLZsLNn<cjfELY
z+f^d%^cDiyAKujp)LZPRUa>f?J<a=UD$WEZe3FSqiU(IqJ4x<Y5;ri_>?=`ZBLZ)W
zsUspk3EG%vRtxvsp9@z%eSJjxhjt<_J)1K&rIJ8Si|{-ba%Kmks*<pwoSQBiR}Z@5
zZl`cyVzHBUNDKd*Q)T<YaPj&p{8nFMQAxAU?O?jy78Z8~q=4hp^gPN9X6PC6W`C8J
zNjJ^S>YGU+76VvZ@y6Z!FUnomX0G88%g(a@5c)Jx=62j(olvOLTS){KCZFdQF+*Ev
z54}s_k?+3$5Vq+3ipb!v8D7i2kfx=6eOq_!-OP^bV6h*3K^J<6G*n6TbxIpyE+EWF
zDi8{+Zua4s84#L<VF;}%zLCe4$@%LQI~m&G52i=mjJy83sCKD&e47(G-}1lY{8PmG
zw9AorJ+z~Dd#<nj;93ND^nr)7{>;MTSp`R`nANHR$9+07){O_T4q;Ld<GBA25J6tP
zAH2vOYzwlRH2*`mzm<F&)%k(013)A9NauKf4j%BNCr1=kMT_<?ArP2+!yoLd_O<>E
zpLpAX9RSSG)N7>Vu}ZZ-|CVND^O?jo|6q~8VnxWGXxxz|^{jVq+)L!ay|C`|S^a_}
zUmfLoRaF3F^&;22W!byQ`>!5_*Ij(vEeeiRlvP4#?bu!r7kXy0xaBMIwS)F^Cw(Go
zU+P9FaN%0@??EcQ>-YnZ#)gk2?<WIu?xjmws96;_S&6FM-R&LTgPK%$&vuF7AXkGR
zi50j%bp1@xFiYqmeQ{}vsU^0dx2c?+vJ+zY)lR>}RnMrgPe{*6vUX!(_yRdqLU7qP
z_WplTDtqd%I{;S`3zl$E%{Du77jZ8FY3siTA+EGiTpUaBj*lNAHQvCd(aKk&$?2_L
zvT4H^02Z5IF4-~KGtn1%VaOu7&AxtnWBCsOTXLTM9<X3}2SoP9Rl>&&03x>Rwa#8F
ziqX8fp1-~UHsJ-_F{@^P0NCN>wxt@#;e)H~#i%$1z^W5z1rEg8vV6+yJ$9MUeAZcE
zJcXtR=^Xxtu&H5bQ8oVv$30cR)ou1Evn^ul@D3>DdO(VSWh^kjyi7d7?)ijzq}Y>%
zaNFD|bT{w+kX}<h*@B5Je-@RE)cS0Y<x1)X6?nt{)g&-~%!1CHVMSb&T<`z>>t{yr
zZ@}DubagrBI_G)WsE%XlSp%TPa*?3nr*S&(NN@vRJjW_mnLT#z$C&+iH^3mN{@~jq
zf?rZg;0}-f`&;kT;+71<Mk_+foO+)B5O56!g-NL3NGoh5IMVaB^9?EIjlQEXKtSjX
z2hJPxijoT_@^Xg%NHv?4P{LyabOPX@t{cDuhW|tP-4Vhi;6cgje~<p(nymo>QZFCo
zFkc3%;HN%B{~=tepEe+Fp=_YT1@iw@d_sUL)PvIZ{}66Sg5f$o`M-{A3X=ff9eMX3
z!dvi^{{|2)-ikF^q+Ym0g$?29Twa(jC_*pl-k}H{aSxwLF&|^$WDWm!u~kc?exc1D
zN4)<b#Qm=)0QeZH0|b<OQ06?~N^{!@xH<V>hg%UYoiA8aK(4FyFUa=rLiqs#Mn33s
zU4To$#v$P3a>joMku?@24g%)(fh(DHD14{Ce;(TWgSW~>Dxl``an8wq%h!JLmH&S&
zlm`gN|9N)=Cs1NL_JgM~#+v+3PK`<^P%8j3OrH9Up=hD1@Kc=HYEG6^x9YVEMer@x
zaRcJje4ErY1DQ~=RU`Cgm_B$XRuR+fY$A-eeTm&c3a8Jhao>F%X`zK=zmw1jKV`;K
z&;7*LT1dH>iQ{LzZV%%X8&|v=ox*bj?;uQcOsA=KccN%%0a##_KcLqaC-ea%`12N?
z$PXuaFw=7mPItqClAmg$W@nYv2N1E}2*eg~ib0EPZR%K>^wiQ-?Nk#cy$aH8i_P@Q
zq@@Oc<NEWYh+*GRIMca8s>2z327=u2KV2gpd8=KN`{r03(f)3Mk=0qmpGlhneG3!L
ztdh@biv;I>F-fJc>s<~rsV?=W&9^D>7i!Uy*YB8=6&uL6eSv)9EtaT)qolEAmX!yq
ztaWt4H{Uy0X-BG~s`QwI#A@JDUEh&(R`KJab!e+PFwK%W&|6!|W@4->79k}<Nw{+|
z5|-2!zkD1-L0W7n?8_P{JQKLG>Q?ie@+UxR4%o_Sf+Qk`40&NWE7#y)O`Ge7BJZJ{
zsvC&3A(+Bb0zP0_ZlmB84>EszbBUU3tqqY&j~-1N0MC+7?B~8<zyW))TJT-O6s~6D
z4(a~k_6=y(H?xIovi@H}cU8m0QG{4pb~HG2*5;lVlrD3^8p>AwUhC9Hnm$iI=GMIm
z?`V#K`?94eZ>ekDxueOSa%3?h3@|}lrLixFojrdX!so6zJf?yL;{jjWDZQ+}`u{_J
z!@V9ey_94t@9Dm^nEH27D*(q!iiCHb!=tyT7q|%1V?t%UE$1<p6g>?0u+m@u=R^M4
z2&0m0>^+p;hYwjB(~Ek)-X82PYBFiU^GGAz*<7#$6B{0)Hu;YWVT}vq$yOL&-*Ia_
zI~uYXm3=O(pc^8+LdUq9nq1D&@we52)bT#`{o6DIT9Hs)79Bo=Lg)JzlYtW$DBF4o
z$<+MKZxqVG2b1|3hO=Kb&w_I95@Qbye&+l4s8Z^}l9aM~bKm=kEfQVM8@-L*@R@{#
zq^Qp9UrNN-<P~+OtNycskK_80fDTRkI`P1#k4&=BS15^g57(<ur$_Z@d95i%K%h1a
zqcB5K1vmDlhGy>6V2FP(hcO7l8}LK-2O$uI#rYINLs&eiW=P`kvnOdjpa0K6_wY7|
z%>dt~WGFP1*3$H9Xpom)(NKY*Ma|BqBuSli`VQ*u4qAHn*Bi<1N;&u(^M<>9Q9jh3
zxev2)o_HEd`LQUpDR|Kdxa5wC7k<=j<XHXNE~E^57O?3uF=5Q9Uh|Vv+>{5|xJcBn
zw}S5e^*wPdK4t9qWIA9zo*F1`$|jk|h&(z;y@Ux>c{XRN>o6U}DkR~lNw3|uxV|#u
zSj3;ItC?mJT%U@Hd6I{wA~-Vyzs~QxHCKCihFXDVnr`<jFMjJQJa{BXQ3bT?m341i
zK51s^m@ZY;WL;sCO&Tazjk1<zRYqz_akYWIwyWop^%|1GBg8ouHH;m`g_p^o4|<OC
z05PGm*|SMSLAEIzHoi!N+=Q!aR*u61bPw1~V2brle%4^X589n15JckNK<lPKVUq+_
z-iLZnOuP4Hh?&NG{)Yex`gKf&`EBkmyCHSdrp;ulubKNu*Y8?!{;6gwy{WGJ8}K8M
zErj{XaHT7@7U|~9w0k`*83+faR^FYqDDZ9Lqg(;Pi%uC&$@isYavBO>HPcRQx6tXN
z(A{#ggOS^fSN8k9D@A#~hSmpNHu7ksND>r9ceE%U$7~y7lrp;V-i56wU~nA5zk7yB
zOV$3vtq0Ad-a=^fai)#W0z;Oj<ph1(AHA3d3=NWSgthvqZ?OwFvO!Hhu%(!EAt*a@
ztV%+BxF^1Vp=UqixSJj{Lv;1TsgDOYct-IA9FJqYCS2k8ZO(&h@Ez#DW3qQ_q@s+B
zX%=<xNi*tD6Wx){s7RR?a7r+~kgk*dYJh1z3C<WGKyUq?qAaew2DcVbugWS{Eb*j<
z30QZAynPv{qTJ`EjkXr{Oa4RXao>#3>x5BsA6~&*h76khY%>0rj5hcwfWcK;Fl@SO
z+FE1sr=Y;a5PavW^uO=*5B0dVa>!|O(ekBF(hcrb($_u#T;zC|yam<5jZgR=Vf(OH
z7;}<>r%9&T?MKhg#TARXUbv{%u*m12wTrv(l};JDEp^wGOA9J1V`HSEW9YA6653>(
z`VZlNGza>>{!$Ri2%7WW;F`7*sm$zG$Zp4?_rBGuhXdpQ7d5Pnv$o4ke8@<dZY6n9
z-E-a~IRJ<NkmspGjAsUJ4WM3CS!n!H{52A&>4}F3PPDJ(qLqm6kR~Z%fxK{_*(RvP
zDCTLv%RN5s#(RE_=icMWFO21LO?sU@_;3lgIaiCT@Zp*Ymq%cqqvA3JkAdVA+O|DL
z=la~X(8V`+%}X?^$gMKv2+Xg7gKadC4U*4IL;f2yFARgHHKKvzn!jV6c_Pb}NhZIw
z6wBYUP`r55&;_dw;OeFe){tyHtt*t?iMc|56d<ck1;<$jUhFk7742A_$hVhZ{K>w}
zJm9R3Io#T&v9k1l<spl-YKTvYsIIQqMHrAHVdPHJayNhkxGW_Mu9^PoF|y)KKMtZ0
zgC%DFOa*2jWPb!P&?>vPfruo2eyk8baK<`wr%6AQ2rJlQe3uZpy4>JV*Vo^nU`0xK
zRdP|?!f83R@iXW;+p64&Vzp~`=3kSduq4+^7PnAV6dlskK2GJr#{M_wSnyCrV}kBt
z@|*r2l(7)k<4AiGF6woNdWMrNrI{i?Ml<%6!j3N;Qld~fKa2e}A)27c7EcOgwu*Di
zLcupu^R!ihSX9Novc+-|$T=jN;#6oh!nU0tOX;V)Mo4;-TQxLA9FM&=8}W{6``pRu
zvoDnF?vB_hxeAuiDE=-^EKFY#GDySuj0~_hmgjOvXpnEzuZ>6kSXylXurMxsN1p-Q
z$}m?883&5v@8c2&bEX8izP~Bc$zs1(uD3VY?M)ay1J5d6Jz6b3#Havu6WCcOjCNRF
zZ@}|?j60@rf{7ej9Gnyuc!!!Ayxn^;Nm%d{CIKh#O`P42-)B#%bsxwh^e_<1da_fC
zjH{JRL9KIF)yQn)<g=szI(S5fRpd~rVcM9Rrb8?i(yqiV+mD9Do*BRPrZ!x61Uy%{
zLzhBgDMly5J60NzPmox<E#*B`+OkKDA&AS)oToL7bK%N@EU9<?Fcwg^EKYgOFFa6@
zYyVE3LDX^_DB^`%ZF`UOq`p-_KJ7#$&yf{RUMWJ%pmDV5ZYQbe1(p;j)ioBvv|cnI
zZnqAXcvy{~0O9$^sI&T1($kA(iCa}9;Egb>gH#nCLh}qHK3Q!g5Do4oQ(V>XOpg!b
zc-~dGp&rgskiLpLsC6m-6-sk(HJwyW>DRFxqD^j%ZaHgIki4q4mV5Z+l)^+(yVHvZ
zVEo;)25{ieyXqQ2J>t3XzxZ^wkW_eLYv{Syh*bn@?-9*mbY@;iLecYY;P;B03ff!M
zY`m^?{b&~s)QI?F3Ti%Lz#E@Qx7D31>sJqsOx{N*#%KTM`b+L|viYUo_92i_#~eEM
z!v}*|-Q&mX{?iq2ja`hHy1{~g_GXFvRf&tmisCLbdgE!{GoNi&8TDfqX7YA+2+39}
zi!;Q<w2AW;Ps3Y~Z>TcB8yY?5R*xKYa?)L>K`0E|e~DE5IUP~}YM}S<d$qMYolm7>
zisM+?P`lLWB~JVnuOo16XWw!^AFvTSdR57WQ=rZ<)vK0BQ8qP6vr>KL@bB2CLj9(D
zwDV96o)?UkO8)1yOVd+pM40phHTU_phFgtBhL5R}+l#)^bi9G#&T?6j(BoZ*<yM`B
z6*87kT5T7CvmTS6HL+ENUe5wGYfO3XWX`UkNH)h!c?x#b&Zsjv8Ag9HkkCpUDcXI~
z>&9|scR>P|a;GxP-&_yuXVf4XS70;Xlp7e2lDqzINtJ+b=;VZ@iZbc55@qcP4T}4i
zEaGCk*J1&x+uTN(#@RaZ%9(aDS|J-hdhCGUA<ZMUW=V6DcGLGyfaSBeodHbXk^ewO
zTe}vGc@arynvoD#T25R;&h<FE@J;q9Jw?&2MQc}m#ch9<hIjJ7pKl^Pr<V!ZQO1`&
zZMD|AYP;<j(w`);-GTx-=DWLOt5v<1U2<-J@?~&!F$o_d&tVd3ABzd@r(8tbc1M;_
z>cbu0zUeC6V?0u;9VJSxsNEf@Pi$bl72@7CX#&tf>qdAH#VJpz-tzAFS@Byr?vP&S
z6r?GdFlFTb{>YCwJNvfsZI#IA6D9F$D7Zl9&;ifL%mQ#sMbim$2Kv(-ma;UAPL1M*
zh~gozBhEg<bfrV>><BiHzh>UPF}lAT4#cvz0UddtJpA40Pp0crPT!B5b|@Ok#80c~
ztcgbA<WZV5R~9Gg$BKrkYoJ<}MX`Qam&rA<z<X0O<hIiulvI)j7kOkwXhS0i-`ff4
z80{4eC(oD`Ye0}~`!bD8?*&wF|Ne}99&djAphmpBui3DBvSqbGy`-#W8mK`*sD`M!
zb{YaykDw4PVH&<E&6Y2Ba6FfPpJ`M>#&XM2FQy@=JEi_<t2s56nI-PlL`MQNN%J+e
zce8j*d&N2;ikI<p1vZ2DU3~N@eKm_ImSb6_U@wDRaF9K^F7RQYtu%JeK5bG&w6sT!
z+^iDovzcRdS_M|Zu*r4KOqq0uL)p=dxLeLBKaQO}SX{X|ecG@rOI#F9UfUcc0pA%O
zsgcrjz_r{;tk!KXx0o!_o;{7=vK){nW{y(}ASl;OZWOqb8!Q_Wx9njmjgI6`&|xKG
z*0CXXCu@EwXLN00V`Ab(Ofn>9=aBWR&|c1yqW(8XNg_@_Rayo7<XPHVk}D=PEEvfE
zo#PT>w>`Z^yqBz5s7*2$v?by+s5giG3)ffTz-l3~1$V<#9rmm;5@sVuv+AoV1ze(K
zS!(q8&3;l^1P!JyUJOK7SAnBbzRP}q!R?)W#3hVTm7S@?($^y6(Bd|nu5PysSrSaS
zS1_Dn$Sb?Trx+K4lpHAioJ#^c?^m(Srx`|9dLF0!BzEaD9h`}{QZifpPoYEBI|Y;Q
zbzlN)mUsBmH_4W;o-?M<{IhucJJYW^d+(!}$36@o+*Yc(gZ@>B%8V1qP1ot??f9&{
zxxz}dOHIgcCqzKUS`+)R;a$3F&dz6%tf?aqM9`7$zcid1;EtC@Ni6MEU4-w37h<3Y
zp5WZ4I&rh*GcaSEWOHr?+9%61H1FHe`MZ`I>Y(&i&LJyfQj(8lHeHrYcQ|AA=9;s%
zFLg7(!MwZDYBTQ$@j@bI&-=oxG@okS<lTjW@RKG**4V9zCPD0Hp<fvJO>v*Tx~KL9
zyYab{l(8u#R+<P_IZ~YYrwB&U_eWcEwNa*S+>)21r7O^9$DrZ9U==S$f4fCN&d-q>
zV)-<{PhbVLWOsvVrtW~eNMPylLp8On5ZXP4xY^Hm7Q8|_0dimYE#_uhxM@}}Jo<&3
zKD4I48-x3xv^~|^$B}7u+hI7BDsCJRM$24`(@7vFD*|dhiajDPR9&*?fypAV8;Ply
zrM(c6dHd0EGiO6{a{*mUIraOH?WCa+Cp`k%g5N?OsPQ7Z*%aeM1u+Kt%f@tUb9))c
zH~Loy9Mjfx48(JLE1YBwitDADX{!&SvCi6US&mle4z2FOU_3*ToKX33Tc^H5!jh3f
z0zbTJ%NBuRx!-0Tz`bg@RGT+f9<nY7&PRyV@+0cgOpDAgegye2L*evUbhfxGpU)!_
zes#Gm$5_ocNeVN2_j9RD0yzKN6FUs)_(5IQ)VUK}jWCKmg;}MRsHvN2r<xq2HLZFr
z@qX77s-5S)v-#UWQPi`Xg(fLxf=uMzr@Lk7&LH@*R9Np>>&vQ8JaaXfe)X<&OyYGD
zs0$8wE+Hv`HP6+kd9Mz}_L&Fpi2m?K)Otxu!gl;BGzu6G;8-%+CxH<s=pJ?dj1#|$
z?~m(o8+38Jm!b&U&O46GSgO#A<WA;{_HCUT57NfJr|qKYH0CYVS9C_LjZ%!<qK#Pl
z+Qc3>_eB%r^`wq9AM>pl@?)1Oq4-^00x7L`(<1Soj$`Kaf;KF9dn5@HdV??B+eF(z
zj+6LZIE_EeZzZLR|KSFnYGtIZ8oh@IQ@#TYFi@F!d%!stS;+znR#%A<PEhQ<20T3G
z+>cZ$4^z&WDARplbF38(lVk_pU-<Wr_14mp_oz&+@!-F;UQ9fOuL5d2cYL**QQ-|k
zrok%K2YG}#Qd$VXpm%|pe$=&a{-a^Da!G1|q=a23%n!bUP4HtheJjeDsR?!!H<K#+
z$YTy2sZmBY#OJOOhU~uELB%SA%Sug!Hb$SW?|XD42)2I<F*8sa;S5b&R8&i=N&N|b
zC)w$=b}5@^+-WQ~qMt4!Yj94flG;WaCQ16^LT?}8m;gl8Pn;*Xbn94p@<hs&j-{vt
zrF$)H8N*l>!4A%ZI5s5g80g?wx}Q6{rRjeP2Slu~ZB4q(h&#%4kHz`eI_>G+4MlxK
zzs?pk>OGd^ZUmva6ucK^7OHa@#=?tcA>WvgdW_9|;ZR*Va$H)YXkc(ULk%Aq#@{cd
zCvxqmcJ<b@#<6L5l{omsbfTYUn3W0fJX5^Eon1LfuRLdMx2HxZ$6EhF`q<%}5c?m3
zx=wo*1U#=!`>)Jdu}O+T>zBCFr#F{}R7$GuA(~iKU7ZyYoSmJsdmP><B@E%K9YoBl
z%1b8;*?tQ^+b*Wu7eu(~U03|e9-Q0FJ|osE@ypg<9eka!VquQI+TstdQxQjTWBqhj
zSYxYbj2+!*YGhHeDU1CxiX<FO*?g%&Lv7oya0-1f1R{!_pQ4BYJhN8M2Ofx&jNNlV
z(LTEr3;a>~80-?$^)7|uK;6fTo{y)-tVL%xXYvZ_j;MnMHVO|8OME?XyDNQK(}+?X
zD*IIvnZhMG(uTjfjxtGJ1Jmpb6ZXV=|FozMY&n6@r_#H;(P0xdza6~{lu8H`KX+Xw
zL>`6$L1XqZ0Q6;tz6o}PDXSbyMmBx=#IG#4n#CdCzm^9f<XepE$1e>}I6b5(=9S^{
za513nW++JymQ{TF0wvulqX2Rg%-A~NKy~;;OV!79S=S@HtwYwn7um|c8%D%wiQJvK
z%(B*V7qW<{E10RTGR|Ck3<EU-9~S`=Et#uspd=a2Z5J56t+lH&DJ5_8t%*LUV@0pG
zfj$f=8i7MCKblIzTWjx@&?w~tJH|X-zxD#tn5N8WRy8Y_$mW*2h}m)}j4W{_#MA{j
zc^S8Bea^uTmVrHeL^(+*<xH}$;%DSpW;)t)Fd8p{unm>gE1b?Mz8ELN$M3GtV3W1~
zs$VVwB#o?izdn3!tXO{*;!xlqm>AzIRfs6{xn<D4Cy0Da$MWc=?O)>|+i)9m`KL)^
zhMbFzRYojfEhDE7Ke^0(?J#jq;WsqJbjaoBS=jLqH$SQCD;(-@5e6a!50XyfSVfeP
z=wo=DN)0G_NvE>!JJ=~O6w^LNp7J=A(EP(WzbvmEk<i57&Qw~yEiw*ASf@pBFFPe)
z7ZTV?3Tz|4nek2UquqdMbSChi;Xmg`Sjv&PJ2DO?{~&1@<{~e~(Qr4f?(Yyi-2H8e
z-#sXzRI3pIEijhaMh;n~qa3QVlcJI)H8PEioZ&{vLxB^@$1upIQCf2rjBdJ<tj35r
zeF<_5OgI5qbzsPXHJf55=35XPS4w77C(?D}SwHn$t0fnyxoJ>z*2O8vRPP8TRZ*0e
zsh|iptZ>9gBtgujs1-BtN#<dc4nu{Z`zuK?xDqBJbm)!ATSCkRd^2;?+R1*J25M?b
z(#=LwG`XEehE6O7^m8)AZ3Gc8TltH_2@QqbJSMnPi0g$nNL-OtKOZW%V{Lc7(qOV(
zI?imOF2k7LYP8-G=BWD-2SVqJt{miz$Fa<o;4j$IWC~j!DoCv{BV5}h;1Ab}&lvDp
z6k*DvNXozeURU30F8BSX)LNUoEU0?mF!C3UN+Q&e8Jg(RQ<vdF<DmDfc2)OU)g)0M
z6O(B=Ph0XANa~H6!JarvZGQ7Y*Mb5g*J++)oTSgcJ@PF#Q;%GbcIe)D6hV5B)br2T
z*Aef4_8YiO$LX(Cf*TilNu7tzkF0D@sMA8&qUC8OPG9@~LlD3$f!XB80>{&y(SRr}
z@fO)Hiwz#%yDqJS|NdR+yAqI^N`8zsO~@G}^sJETAG<2L7-(W@Y1SwI@dcl;uclMx
zUf!zeWtE?ODBe&`w2YS-fzk0lILz7DbNTv3*3rxrHX}v!wS~wCvhhajY*4#hgCSX}
zHRz7_abXOB-_up#>r3pW%pyG(f2Qlr_Os}ussZ{5S&qGUl7~<Z{*xMyBeq%LN;Y6D
zm8FzX>J1iOXG@G`4Uq>iGgDhSm0q5yMSsym*(37L{HngAwZ}#|c<%=u`QG?(!{kWh
zg|@M%OoyMK{>PrScJ1gl4I5~5{NU{Yy*~M_c^!m?#<w8zv(sp{<8c^`kRf;honoS2
zRPnF4_|)v!+BB}oBFxLUD7&PjqTMPE=-cUjAPN3EoGEbK7=4X0t?lnk`0fkVkO%MG
zZIvN5*i8pK{27a}x@50=+ue-hvAZ-gUPDs;<9uTE9W6K^;*6QX9_Vw3L+zoVK1?OG
z4|3M>a(#OhUDi3AW}@%}(4*ObgkKz-_1YUj3Ksb+`gq&3EE>FH$`a6_W+3Hb#H7$$
zYuH8YEVEBT()d_yU*Ll|e^Vy>kd9QXsGP3j^SE|8$Y68bJrA`DQg#)b>1DxXr!Tl=
zYq#qV^;twYqh`Fp!v2b@Qa|5#p$NOx>fITm@MjSsUu#P(5rVjhv1MYd&we;9agW-G
zm&17c+~-1G5@)`kqn9UBcSbxJ9Z-oE#-|&A3{C=}Jk|=IcyZsB(K+X2vjwyPf%ipT
zG+EcyrKcR1N9IKB;!6^)W4zp6jr82ox+lWywu9r9D+vr@mNMeOW24kZf!hNO9v0H6
zv4m&_rO$tvC_eqWu2rc*{uyafMR7l#c2<JMg#1+Nk4-|58iwM&wxUMb;mp0!RwFg_
z(e_C4nHGgga~Li!Z3hZK0i1bEYC1=y*4m`CYNVP8JWSV+Czvgxx4UGP{~RawP0Z8^
z6Q5YPY`61mN{n5eGM0jQZuP3`U4W*LlH#^({3@YnQ$u;?a1|Sl_$vyo8=)d+A?OSG
zgl|Q*bF?dTIXrWdM8_Zd<&(<+H`$+*^Ug29t8?=yar|}zUnRbHp~4+<b)KbO%#au+
zgSxX11G8;{w+2>PLEqnniwtLte5d;hYxZYXQV=}CxS6Rfu(Qur+%@QD5apKS(jl6-
zlW^J7_B(;Mz}LjlcK@!`M7at&EYKnA_O{IlPs0i;O#Uk=<l(2@4%XW&OHngKrTEtx
zrz`U(tU*PR@we-CyEb?ki9pt!c8LPHiqmm|9dfW%ALxWGY3+o7&y^3{b<T$g8p>3B
z7^2IZ66IdF1{0<CwY05%?am*ns2@*u=Wa1K)~xpAe2Bm>hVtE@Kn+|9eO>5iNHkl}
zi~c3{W(Ikd-#ZXmK_oDX=?j#0DnhM}T-e3Zv(loC0Gq%A>+FU<-tM0(S%pF}HQI;I
z9QoLT+oc`35_iq?*e^Ub7Jj$Y&Xc1#{LxE2;?3)KF?-f$;{5TYDGPa-8+hVbYRBnG
z*1I}JtZ3sz5A+GYCz;_y&L;nmZKURU`f^Ncd!BhRlKXZ?x9mnW*dB4Ky%B~;B*ej^
z;$I|Ao%x&2GsD}Yj8kidWZac>y6CD}`|c8wq)o-ZfV(Y#8kWKT`!)-tT*P@o!3)Ib
zGkKb4?+-xlrl{i0o3C6U5oScmJ)pZFeF6De|Cq=Ox$@wB0WU>o@_;`-M&JKJRWzXA
zff5<H9m|2m3Ds9Auw_}Vg1`QS=HEv){c_YsJhH+vjH{j+tc9a^#VU2{+BB7Mn(yw4
zV6y^SA}SxxFgPR`s1>NgVeUvHb{$7`hfQq0t3l2GA%L^eIUZBK^+=`oR6<$YjOl|{
z%Xpic8Br>fn00Tz?WuawEHieq?e96i7`}q7{zNXztVZXYdrjwAaW-%&8`TF;zZf~+
zR^l^LroQ?r%DDgTNbs)P9Y6X;rX9Ob-v}KgN?1~Mmg**-)0{z?lzW<E8^EA#PLE*f
zW^^~H;;lZISi2OVEBt}qUA$AFq+45Nf}b?en*agJ<Nk->_lWoCUuC9gQi0rYXVq^(
z&8~;pQ!TOa`Y&~n?I@qjQWy@^^rvqxFGsY~ohiznrDv*u_zd8BP@ktYzsoBSCyQfX
z(fkbRm(IKd*xi2f9hq{B7E*sHL!emX)Y0!1t?G{)!k}L^Mj2O3x71)YZ=j2l+}35m
z8a+Q%TtOYAaRgsVnZ}kB*~?oZdu)|Sa{xu`oR1x;|D~D`gV%3!CXNYSO86ZQb-X#4
z-C{m8{<HdI6RNImFYI^iSE#9+dszGTa3NY^V^INcf!G-b$fB(v(bsOGUM|{T!8mKN
z9u2=9xYIn%#`Q;>+}1D7`UGR^YtdTuqycyacA*@6u!D}R4;=R@)lIGF<K|=QRA1tB
zB_$#3ax6ioo)$um_sH{hI3qva;^V*41p)VFxyB{6W?{bO{0w+5Jy)KrLuTewsPS5%
z2ndR}uq|&#@6F>?04UEoJQC$cP|LaSgum9MV48RSC{spTMYPx{8ht14NIU{XiQFQ?
z7L)PTq<GWp85V$@$Op=d!25{f<<5KZ;YU(rG%#(%(7-~|Fk`pj{quhC;{v1VV^iPx
zTDdI$eznlkEm7s)Lu@by)`({9<43_Pq15sjY_6lcgB*_ST7jHHc_wjRM#hwTHy+fX
z30a_xoq*f+Z0EjWdXH?OZYgGJ7nbC2Qs?t{x&tQZ$%q;(_S96<s}G~)e!|;;P_tf>
z&kOPl3h$r8S^q=$Zm<<a*>6|q{(V+Ys%=>oGYN|=)G)V7mzZGkq;`03m}<5Rs>1cC
zT0Ax8aXSITFI@Q{UHinH!`1F&ue?8b6{dnQKjGl_XYM&_z4C5>_H$^2xoXe#5Fne5
zd+7vfRRqy3lBC+G8;eaIVp_u-v;@eEd}phi((3769K#d{E@<>XT2j0EZF6YMD-zF>
zS<ianRr~5d`-@}s?6K2>QdqIP@#B98f=MinRngk^#o2D(Y6nbIU#y#<N1k}NPAjzA
z5f>SLMh5{F4u?A0*lvOo8&dfmi=#%OLpei{Syd$X%tX2BSCBWhY@sZ%N;UoA^R?9j
ztuXnRJ)Sf&<q*Zh&t8tl2k4R&Lw=)7EpjO#NwL7;E*oDCQJt6r=NYOKa-e}^INv#h
z`tynAYo2CY)%beDfzO;rpw{RYWWH)|S0asJitp5n=KQ0;Vo4FkvW8~L3TBBd{#ZFR
z?xMW;y-<m{*620zz@Fi`z|-idDZMH)k2{VpWvN~bw)bxCFIG==r=4h&@#ENkUi+Lp
z-m}c6+l_-i!D?Xzy=Gj~&9%*fdIhPN7X)QRN6C%J(vHka3U`#WZ!9u<l|`dx_2nT2
z<L8vF=poeag*cokyu46LY(0s`%%!eA39<eRept-l9eKOkFF?Turx{x;b^7C~SfO#V
zTS<K9GY)IdP|J)T$nzAm<6HFQ)m2%)6D_aZzvY}{h1_=RPC+yxX8!qWl+Ns1P}lp-
z=J6$T1gK9ORl9n;9r}0^U{y*sL_Ui-^$d?>^`tSJjrKjzG<sC00$s_~Gu`H*f1lBR
z%W%^W@4m&IB{9j?8WZSPi8^QO<sLv1P&d^W>NVAvPT*zu>5!y;*v8?-5QJkiZ|&bI
zm{fax9g`LH{X2>Um&}m@tqlb>0U7G4@kHN+JJI#22G+3MX|fRKfM3$iI=And)oAji
zxb8EsNc2eivWuo^dtBPFXl(^ES(|%1+5!*@1`mz6?7Z=&G7(b$K!lzvw8g}$=u+NS
zskZFRMMW+Bg2SvI<vH9ughG`kZkZ)Z*GkYh0uR%}s1T4I=c&gEQ%qz)5(nEX+4QA8
z7$_ti^wF8(ZEf*Cf}}x*kc^CIfDZ9MW#q5&(lgJpF^@!(MtLUr{2L+(x9B$<DEgQa
zhNakWy}}3=-6R)N;!n+zZ4Kxsx7Nq<jw3S!%-!_QXBtq^=<Gsew|``Z&_m(vGN>1?
zy^d^f<}1BIs-@D8;DnhRf_)%yxqJpo)mY3&bz|N^o$AmOJONeqy5<_n!JzTSxmDR3
zyaHvtirw>_CS0Rl@*PO_oOaTfFqyC?u*l?QD+ZB2rq3joat_MRauMl<F4brAiZ}>v
zt*<@E^-DYn*%^?uk=d$}SoJf;@dM@$jW<esp%pMLv=rNRfN#Bmro%&JyMZZX<ML5*
z^1x_kHX0;c12;BWILW4CE1#vBQi6r7y+^JOY`ou_2f+xnd)5NnPwhIrPABx2Yhut(
zlQ8G1Y{xw6D@o;%Sofc2i&v=L6}X&#uGIz>{>!;UTQhc7@90RL8iAK2sARcb9R4uR
zI*ABFcc%VidS@5X{|eT#(JAwd%DV@oo(Tq(L@0m%*m7USDJNV7@hHOt9nePn9z>6R
znl&Snq)s6>627C}6TtxwNF0=VO;dROhPKpB@zuWfHgzc(<-KKdOJ2=l2|P<)u$Va;
z85rx{dJKH*>c&SQsusenCfMnS=Uf=y6|j!`USQI+n^Vl7RX^D`Wv%)TpbV?6O+MRF
zhkV5LsUwLxHlQ%OWKn5s`XWs}O8EIJpJ!C8!pAD2^aDp)l68!VpDxDnE3S#YFU-zC
zCAga@omed9^3XH!LAHVSj!kkTr-Pv9Y4;_cU&Rk4eaF^WbzQ3>?EHdaNR5)00Fm?y
z?^lDPmq5-O^2|c(p0e9}!Mzx*IpZcS*Hhn`Ju0-|A(cVK+*@P+KmSU0qpf9uG-!;_
zmS8vb0;)+pOfMW2rqO(+72L43iR~TLuD545!`=?Vrs?Y4c>bt)$O_z4(EJSfZVA78
zM0y0&mq0(<I?**w;rC{zNId!YN<V-4C;e>P)t4d{D77Kq{B%CxkEtUg<x@vzwV0yR
zMbYh{b%mL`FI2J^`MWhN#WpEyoDd0V+&txm7wI~0bO=mh_6XDcWNcsit*zNfP`rWG
z?H>4k|7x!SfHM$X9w*Bv?As%ztC-s*;K#+1Q793A)b9b)J?Q@Cj*LAx`mIaRUvVU#
zgsCkiV+a<i9>zVS-6dE64L+Csck(UP@i_lj0)64mNTQDnofE9s$T3qPxfL~J>Mkow
zI=RROxGY|KBE9#H{6n~~6$iBp2s>b=*nm4+YTMiBN=f`oh$5ln%)zr&4D%<UqZs>=
zCCh(NOh65B<Gd=Bcv8^jTU0m~b^2aHf68HV<+m1wrjVbpj$y~rWq*PjG{%n2>#qu}
zn6bQx2lu1<#sTxXQ;YoHqg*w@%Ox&b1f(i;0m92TRBTt(VUJqXxMWSPkfkp#zs*zk
z|6n5V-d5Icd1oY3U(FY4lG00KwX)c)$}_NCEye20ePh!9Y{b!>%-i+6gaG)_;J-a@
zpw$j^r{NgJfwb+!zY|{1Y5O&}7=2rzA+tbM29nxgAWVr#tI}_x2D0%e<P`HqW;|1G
z&F%Gddj<T=jin=d%G-GrE>{t2{?lt)6EClTpV<&yYSOKr={$S$QZDsv;)-H7!q|+4
zVZM&9rpc=!!T4_x=8J4WKqHl!CVekv&*g)oTuZaIe8(mlnexsMAd~K(>y7uNTxqP$
za%$A{3=s`mPO|L2Bs_U#3(u#|oqwB3!9$=5iAn44T7T)U)Bs9;LKKRX3R=WqW{wu;
zSOYM<hzz&+91RthNuH;|mUOu_OX{l!?&wF7E3*LUz0$viID};Q_J%U2(S)oOkwvoZ
zW#-yJm|p$l+8Fyz(p9)z^YGnkLUuivNs()MS2Xl#UXfX0OmSPMw4aE}S}i&GXS{;e
zwTh%Ig_E^7Z*>V_o`%kn4+|+N1z@78AFB_tw2z15+#jP-L99P?HMy9cOo|SU2hspR
z!&S4Cj;a#ek%dOf)*fQz&eHwd!^+KYn@C_DUvICdUL0FhQ9K^hUhZbZ_NXDigC$8z
zHbgw(DHP)gVSFxEkDrlRZl1kG6E+X}Nl((Zke%ieMuk<1>fMOWWm2qWLgnJb<Blza
zJ>Yy>J@G8l*sRL=wXoeE<-H%yD~9%F=AXsCo?ELa#{F*5F8wafs~?M&IgOb8j4_0Y
zG}-zYahohyr%y?tlO6U)ZfxOladbA%CXvl^ElD<LL9KH=>&?Ma7<8g>UWgDNSGhpi
zK<_kO)~fWlLI(G@p>kH68o_y}VTp+<ucQV_;A$kkgPW-vbT>6SSAB?y=9!{FImMXY
zjRZ!t;AWxPtDfl@4<O2L;0V<2(|adEN>D^;bTNBk?YR^E4tQS~w}=E_-GZ3#h{BJW
z^nxcO!4vVG91S(dS^Xy~gmg!kI!fgdhLQ1Slx#*}6u}y}?7W8xJx<NN7?{MEhpM@x
zMozS~JOqIV-FVc60~@-FFm^lD7Bb+P^SP=*XWL^wGYCaTuDQXYp{+Jhd8mZu;|Qpq
zLJi7ZJ7}YZS~$gnV<}FKQKkNc{T8pp$rGD%!6V?doLcVn#N<$X_|80DCUqoJopv=g
z@lqcABhcJ1t@&8F0t=;xRVwWC`>6h!S!WSJ#CU`M`TuUTCNF5w`|&!1E{PK=m_#v`
z@ZJ@d&+j=@e;_qHrOHzGkX^=U(DEjKEXN8*+@5;iQFyYfD#Mom=cp*Qg9a_AuQaA{
zPF+f=SP~R!*=+>%?L&BqoKUN&ZRfzrE{bw<3Ov)3;3qBi4XC$cT&{Nim2u=c$d%h9
zmo8gGvD*_Fu*4Ue6b10I#+Zf)%wp11)EanRXF4m7#f!<bvXk^h66v|~y|sxCTqeDu
z7T;;<I`uie{Fw8e<M~dM6+(1xe^FTYM0Cqg=JGtU*c5GI&q77`pm;HdT~Y75zy#SU
ze>#GAD#+{XNfAzTXSb_wO|Qnw`m8rzKFwbfy)+VHKsi&{TYEs5G*_iNZOKZw3OFuB
zr6*CNwnZ!GGvCsF8GB4#y&SU!4KlhDX!vTud7&59B}TRSWRQ`8U2jRr3WDm#lJsbG
z+V(VQe?0MW^sbdR)s#&eiF1@TJ&&x=kY-Xt7_9`A=k}MB-kD}6+P@tIO60eq7v6H_
zu;q8V@EakO9Lui^M6vGucp%A1lB_NL{g`5GwBugt_B+4(^iyAmd#T{hzYyhT(G@xP
z7CBLlOrAmpF3YVC?xp`dp&+SKNOfB@GI)y3xd|<x7*5;5-?w_67csjjGUx9*WP%l@
zYxy^vMzQh>&fqa1uF^CQ*5C#uKnG10l7fboJ7#ww$mwXYFsrNI>+~4P&Y7&Ivo%fN
zJPUtN^e9thD66O3OV!|yBTy_3_(#c7vVIfRH}S7uM3gWbyDDm{s!M&2Iki|Y$GJ*r
zte*#S=DgvV%LV!I!FqKRKPXtOK2mvZxN;Ui_!sQtvmA;(@)t^3th8aDlN7@(Z?s{M
zwN??11$}&S#4?sW3Is;g(dY?wDjzD5pj#*V8BrvFLI3i@eiZM;`n&QCpBjG*WR8Wp
zT9o3AV_Y^dtN=%%6h(}>>ilbLTheVgBrl)7!TW)2?vx!nd%oRxuz-%MML}JvVl$BF
z3S@0YM**BFVVaf!I$r(mmvzLmn{NmIDaq1h4uL^Jtsvlbh}tR0TAgx|#Kla0MT~<0
zqA@NN&%7p9zJx6t;P0aBX#Jkqr^mwnaUZ|yMe#xN>zz8!^fxBv1%VHlz|&AJG5x=i
zSF2?(!zW`_#{=E67qFg^1Wf9NuT8hDbYA=a0;ND&zu=WY{x)0#T&xc|xNI;a@_lJ-
z)aZ?A%SOKgxw_NM)ElL<n<uNRXCIwr>Bbk2GDZ8qIB3a6{KgGCrdy4Fvu04&Ac`G5
z06SQo0X){;xui*Pq9wy$-OP+KBAtYRewi5`ooJ$#sOFsN)Vw)Xhn`DppqFU@_BD0w
zZA5}Uon;4{o()o&<yONvJuPyBFpq`62lJ`k(Ib7nXU(2@&MQ?U&I#eA2dQ0{mCyeG
zuB@1#jEs}d=TCD6nqgtDPi0{(lfoW1IY!P3dXI2=)Gq`_Bpl|ZRO5S{G_h4Ft3~K2
zD>|7Y$>)%JRe0rVM=B0ualx#bXzZt26&Jm^Az`$)Tvc_K5jgcVXu?k9V(WQ3l>1_*
zw6ys$%1LUpnae38I^I3)xv~#oiqMWCtA$J+=Bt##QzKOb9i(wmJ+rzMU{!O;9jVxa
z$cmK*G7rj+uqrT7D;$jEo|IUtp$xF1{LT4O#e_iznTKQCaZ6EMT&VZMf6D3r^#-My
z3soC-0N%KzsiNiVSC;YS1y)4^*$oFU{m!1$YRXA;B8~!2BvQTGWbkPXCOpJfp0!z}
zc{t4la<S4hdB)ypsRK0w<rXuv<Ps`$w9k43O&RkSattsWuVd~h(Oa?*RPrgSiL}Ty
z06xrwcYdFNtpjfyrFRY#=M*(_E@d4|+bucg!#+a#S4D3D+tX_L(PJrAeF*Jr<ADYi
zwIP5Y+{656FqG~`6fD>ZwR#2*$l*s*McJE>L1NGMn;%NK6X&M`)|JR@ikf_Q<=gYC
z2<kW{i<!2@iwJ=G&H2;ho;V?CrC_RqJkfRwnUFoSX~&pBJ&jPZyEiaPs!rP>B>8)|
z`A2%5V%%DG>Q&Zm64^Yj^^%eD1Hc~S*E6c?CVRJ%J<QBVQzttY(0W#Kl6qM6F!Uz{
z%-xA(znawun&Ft^?ka#!u|0>=n!_cXw>jK>>pLBG^DNw7&2mUVwK>5506l8CI2h?p
z#avCj2|2|hao4Y<GHxwKPrf}(0dZNg!2ErwjKiYPJC4o<2TF^RMTK$2ht`!-H0&#}
zGRN3d=^Y{J$d)>l&Nvjsu+6F{3QtO8+$!8Qx#g){BCOe3ILnd88Li7pFvM+K`+Cym
zFWIv>Egiamqp<B=JX&m+2g!arV;QA#{gHy2O1r4yLEmU2@UFJf&L#WHob>?Et{%zT
zLk`!$@@f&gTZRQb^0#lsy;nkl8)#YfkfeOd2U2}2SX-&ZSK3ABc!r~*UFq|Ab4JR2
z#d}__q*-cLOJLEX&AST2IRqS#$FM8>slHNSIonitL>975aImV9&zuj#HRyxF@x!jP
z&=B!jD`r8!E=~X*-D@edk-To%=Q2SgN*^C7uEys`D`2KHj4(1bX8Ws&I<|WSVt)#b
zWZ9*TFh8wtPhqFMv{OZ>#deapjFWC*DA>+3pH7tSJD95FdKxaRn$m5g<xeVoXtH}1
z%+}=5%*yK|Y*JJ?RvnF2M&^}@=DE?qd;O>xro!H+`N-f(KfU$!tT^O&<XIy-T@FF0
za7@-UWfOK7z~s|mx{c6F{qCdC)IMXTpCzt~YDPKXf1N}2ls!nrN{OqMnikUKM+1RX
zmhKV2UEhUo8cCe;tk+at69O|;)?{vgtyi%`s?C<g!B25i*^gsKv4}eITN6(zfw_1d
zm7gqxb?I9|pD$C6rYcG3tY}+Z?oy=kD<n^`w+<^#Qt0F4ou+R2p@t1+Lv|aC)HvAm
zVOkY7WAB#$x1~OB-$qA#QjDx+h{LLp(93BmyRlX(dmL8Rx!|~RyEDC>9i)hr6WLUM
zTGhVOo;g^vCth%PU#YG-ke4=x=rF5BQ8`PvE&<B9t*K*#e-D~_7T|te>m|&d;++^z
zRE}~OZ)B4Z?~@%s8LjDy81}TASF*1I@TTNt^=7qfmrtC>gt$}(&=5)ds(^$<!+F?A
z>D7<nNlIOd(JMA|SIMhBm=@l|EwRfil1d2t0L?*W*P<UHH&~DNWjo*SUW4mfH*|46
zWj}XRarc@{>6C6=k&OJxF`uWcYU+{QTZgzR^Ro;R?UH{E)UMGLF!A`LzY~;_)<~F?
zd~?**nP#|^*!du1uobeNhm}^08_=pEa=;ea4^BBWrftj{VaL{~G;+$4=)_kj#YA}>
z$u!}<Rmw+<@y|dhxsw_f87Nj?nQ*73JJaNdhxxh3<yrGw*-=sGRE{6t5ZwnO-n1cz
zx*fclS7y+n_d*nxa{<OW4_cC2d56vVinTimt{yG?BLuSJ<dB2(s9l_?R$TL(_Z6a%
z=FX2RC1<)|aNv()imB#EGCF}(dzo@HKXv*LJ0HfB-MYl3myh%QG_>v-_8YeTKtYpf
z9jQ&*x^7YR&uT-ox*^fSGDb$y?f%KlH6yuj3}tiJ6GnxovKN`R6~M^q2&*r5a{%)0
zt)rDaBuwM!=}DE#ZiPU~-+43iIIEGSmvRTsC5xWtGDrUa#eYg#l#(l?P&Yq1`vID~
z+C}718R1mBfOVGL!-4h3L+wGTita@m3Dh|JK@`-GX9t2i9@G%xT3C;$KSN9oJ!+G&
zlesUJM`KYLaC+4lw#$}^8dJNV89AuzWG^uXpcynX8k-vFf%6f8*i|`haq^C*r7M_n
zwpwXS(gah$>S}<#g47cu@Hsx_mB+~X8?Yg85qLpdjqi$XWz2TN%8%E%skqe%v$m30
z;vY1Msyg#tXFcuR&l1e!dS;c%SXb4Z+jx%5T_vr#GRW<YD~7R>TT8g^QF)EC%`3A{
zt3e~F@lK}F+{2}#;5i;?2e+kj7jwyTsJ+=gGd4%HBH>CgUS_X|wWDyUr`wM%+7Jc@
z@ex=|$IQ!eY8kZ}FowNE&%R0PnoZd~Iikj_et$Y?G+08G#(inDSOg}KxL>URHIHG^
zl7JL0Y3eC}sTn;g)CGMiTd`>g90b+))EbUvYM_db9>nR*UwAjK1EnrtaO`9V%Nzk(
zYX;ucT;%&c<!GS^*R5Dp-6lH<T+F2eShj<C1pWYbspGhg+zgg3!-PSWZ}1eA*|#hr
zuO@Ek?)G?PR6+-!WBGL-m0>rCui}h-nkb?f`BjegKKS}piXwHXBcZyyk<gzqRvx&=
z>Q=e?jdxjDN=p^95&>ed;~5#ob3;RgSUr(>u4D%}Z)1=@A`kSabhvJ>?gKa{oRdiI
zr)G8f9o^f74J)ik_qoeAJqP&Jn`mwAAU79Hwk5_@MoW8UrO2AFr}mKBjcuAs_@|Oa
zx@iGTxY`G<7X*xZ))uckdbY0}&o=6-<$mElVxu_YxyZ(Pb6M)oE=nnz7dnlyz|g{~
zBY+mp09P3M-=Y0_S1vBv-Pm2vCK0!!U|>jBfOFV;b)%8DsIHRG+tu{huQb7?3z(pl
ztlDYbY=I7B+(QwQouKjD4)sFUQj1)Lt)0)7Sji(saKISIBh(D>p858wO3Z^C*m!qO
z^E9EV&3pEQ21ynzu}2{2LuWmBIRdAi-^2Fp0kgc4{#JyI?;^$%frVndPi!CXt71kr
zp#sfq6m7k3V_z;*lc;qZ5$YI!BTtj<mkcA67Ks;Zd>pSN{{T`cS?*eD)@M98HDdDJ
zpngFfwPYqSeze3jCaIcMKQ`q(J5-u_sztqyHyW08(Or5LlH9+rsYJ)3AwKlcvq_Ij
zbV2H924?GlRm?ADyAp*Ii@1H`nmLN(@)XccI@9J8rif%twDXF!CSxiwWeH(8%KlYm
zMaLtpQkyxahKZZz6+Cch*8m@#e;Tyza!Q+~?u)k9o?ydp!m{@jIR_OkH##ZfCmkiJ
zHSPt=W1;4tcV;;!CYn|-tu<17$sXsa>2pg6xRucqeb~nUeFbb>MYb|u_mcYdU!`j(
zUqibMnkcmshPa49%dk)PhFJXtORL+9ONNqCB#`nDo)_J}!nq|A=;>B<V|3dyq&XXx
z_Ywa9&qn3j>Hh%Mz^d{jvbcHo7RL&BIQ8}wi_>D=p6~S!I03fV3~Em;)!Xs@f`E1|
z<ZYnwybe$Mqx#ZG?$}*z6h7U2nGugJry?~t$GacsDT?QWRl=T&>-9g@f}C!>_o1mJ
z@7hT}dC2Ia1M;lx_;pq3oVf$<fAy-pL0vC#Xjjx=Cv;ZP4a_p{j^2W_qL>^p+qm*e
zbRXxnGdWXI(VWD(eAe-V5$<ElXOZ--j&5?ldp@1%bF(*sg^pZX+{{%rh}RsN)w`6&
z+agfSj@>Dy*@WoA9}{fFyF!^8Km&j%yNYR5+o&TW`qr^W1y|V=qPu8k1v&xK)}s*}
z<ki_6N_8U6iqh`h_S<VPSI>U+fe!ej3<(1~;AW-MPgHhO#>p;OtD1M#j~^gpXSFPM
zZz6I_400(eqUNbpT4-koPW<BoxHYCkO&^#OY0Y0hk<9REtEOj&<d!oT$tjYf`Tqba
z&@rrkyii9yL8hm3KdM`HX2$^9Km#OXA5UXiQ^2c<)Mhv3vD?@3rOnJzp~z8=7UZgu
zN8VoR^s9`Ug86TW)MZHiG20*K+Jz=)siZ7w#yl?B5RbaUKczz)?Iu%c51f=<3HM)r
zYO9j@c4RkNWKtdW%prg7G5JaK2dF;BKZQ>BI}{0(WR6Z2TyW#i`Wmg#l%oS8JBz(O
z=W;x@^CxvBSbid|+_L#QnSRl14i4k{$WPbP-|*t3=W*;#y5+8*zFx6vjM({ZVO_32
z-UIJ{I?9&Jn9R1!W{7fdK4tok>Nuc_8aFl(#DZIUvE(=0k&aXGVbjy4W?J4&YTKcc
z8T-n4R{Dwsc1=26Km$n`3H|luPvhJ1so;I%eS`g;ZmkA>NB;nH9zO&4QFcy&r(B)#
z6M^#7CUuu^`#QxiIAWOq{vNrjcQTw)M7I~LT<z;s{E})!l0-#JoVg~XWZTu~D&!2H
z4i=_ZcXu>ce8piKF|8==)OMo6b1I|^M=WbeU>=lMj&hK-TGLx%M+-%X=P|CWSrd8;
z<E?IsE!-2<lZDESZc~&Vwb9Gr$fP8!tc3eidpRRjUJU26n%9@ZQ?L5DmA#0?9Fpuc
zS{=-sRP$-~mcQ#FI333{tjki1*_EUooheQqr{zdwYwJ%`EZab)F;ryL_$-b90Fr7P
zk~EDuG8o3tD^WCA-FJM8?92sc&W%+mBCWi*V5W)fB)2~{GCdAx(T>d<mNx!&02Qm?
z*cRpEc|PFkYYDqESn7=X?J7u7pKn2$^efvdS<tHEk?B&WV7HOvdGcd<7<BDh-YD?g
z)|TqXSIU&+93DNZc(lPMZ*^*B=(5KNH!^LM$Ov{lRC*6;&5p@|*@FK7PL-w6A61mT
zAf5>0zYV9(OlnTn0|j%~p7pI^qe&qKZ;?N`I+|SPMLZ4n6HU|J;r1zJ?%4fJcU~XT
zV~-h&<x!BGHy=W3B~>G((}fjwNuy|FMzNw0r~LElT^5n7+Uh!F7nYHEYH}Jk7-eAF
zSYy-?_|{ctQrEH1^|bVgGj)wG7EJE*z}C>a5`<xfJ@e_>w0u`On+JH=N%Fp8MtJo9
z06w+L3X=A!H_@0%PNZ9Bn_b2&ZBj5)Ge^1E7xEuN$LmnsTr`&gYe>YF=&C+v+EvDV
zeF(2n93yS5vdd=CnZ%4)BOGIC>OC`q_!^T>lI`xInj3iCfW})pR1EXSS^&orHlFfK
z@t-E+%#5liUn@VteMcid<663fv}vZee=2nuA)-5snFDJQIt-K76t0-{E2_gi<Z|2C
zTSxnWv;ozZt9~Gl%9F#^2^H*ePQ~^qV6BomRLWLG#x1h#y`+vG%?koVGbGM_RTWoR
zk*0XG$asi+@&)csO3$88$lXy(al1S};tRj*$gb`nx__45;#E-;uh~e(MtdC8TE~a}
z&0`JR8C>*X!B!MTX|R<SZ4O}B%?;d^(;-KR%D6p<u7h5XU0+40?E$Z0No2?T^u`<Y
z8T@NbYl@vcbt*!t03MXsjY-`bBI=w9L|{iW8(fe+C@GPZ=71DELPII0h;k?2ny<ed
z>WSwhV}nfI7*!%$>;&hUg_xnoW8SGFCp&Iexm}OAeb(zvl<$nG>N-}4<gAm{B{N8R
zRh%Pe%_cN`A3W~zG*;kmQS(1!{C$1ulx$_&3}bA4{m;E^%5zh8c7iC&r6jJ>0wi(E
zNdEwJJ;3gL>!O;`ZS5Cu89d<j_dnrLblL6Dsaj9om&qP^sM`o7AgdKTfZut~_nZ7H
zb5@Goqh?0)TR$@%xu~5r(E1EKlU8Ws+j(y?PI7Pusq_^zip9(HCy2oQ5!}}+jkh}W
zy`n@h8XerTed0fmrd>MQkPZt4;eV}5yBE6$cdI6am1Ain$qT>(s~*Cu-Md<W898{y
zSoQtme}Mi+)|$}5Rh*Wa63=-Q7!o-wbC8|C&q~CV{{VXA@T<m6J6yrmYh$2}>N!r`
zyA+NNdB<{ntDKJgmPJ$tU_AP_)`b9A#I!n=wuWedj{tjjtXnN)KtvAVfIoY-d(h%*
z98D>FM3-88oD31q6?#2R37u1egM!_v)zGA<Pg|JdO(5fEZaA$b)vZOz90dm%s$9k{
z6g!x|+42@R1b3^gsYWFra1Y+*sa)lhXeXl~TTuWHB9Z7tM>e44TVja^*FTjmR>*Z}
zLt@hiBeyJ6GB-a%SrFd0o?+TrIXENJKmB?+S|wtmEfOY%<~eR2+XZ~ZAqIQ(AE>Ov
zxCwh31}Z~<LG8~qNaOac&dk-cour0qS6oOwWj*pbkHmBMR(!I_1>;Ju^G7D-KEUul
z0ZqjTR^@i+S@Q(rV&PaEp$G1_r_j__J7b+@MZ{CC_4FR!f#Q{iDOuUQ2g#e%+%mUu
z{opv?j^o~#cTN709z?14@Oj7gzYg@$eF45@uA;$o9bIKm%HQh~``_&ke_BZ*`%;<W
z;&kQJMmJ;lhrg(%k~8<0!anBJ?e^SRFnHN>j=3MyQDb<je5pb59ObZiM*i~pQDZ$M
z^wE_BUTb}$Pc0PaH#{nN9_#8ot48w6%bgfVBbfZq&W8&p*B^DLwa96$%)r*yr1`sT
zXa4vYjnBC3eg6PDxjnjHu>>p{i2KoS7H@IbH|Tw-G?TG6Go-hc+D|TID8r|}xHV$l
z;G|*;h|I8q{;jwv{_mmo=}%HL942YPnN&1`k29eo(E9yp^4qLvC5BHn2RIGTgXl-C
zQZtboG3cU{3M@#)k35l@vmAM%!faUL>BU-#F;2ue1aR@vti+)EDE(=qLy)msgV25^
zr}F@+u@_^~1@Fx_&jYEZiPW&MsO&1g+J~(@1!h{pilq0d2M08+K2}<CzSWy0(>F>k
zH3mBEM1YUs^s3K08kT7k(VcUp#@T4j)~jtUIXu*Iqg6R0A~<A%U9zqT=zVJS%LhPP
z0FITMqo#*NDw^s(JDBap=H!mq#bzlpTqJCMT;$g~k8{vfTGA$<@g4Y)BDZfKKSBoH
zDviaT+O7gy$>z2&PB>BSY4XI*Q>fjpOD>}{w+1V+9Jk$&3~u+W3oRl)wL=kOJj}os
zZr@6YIHXi#KBs%&-7oCgJZ$TSag+Gg{+$?t(NK(WUVP`w&Zw?cp(5IEHGa$oYLu=L
zm5j^XEA1Ao41+4%k7Hd7v7<srz#_4#=TF{DP_3!vlW1nzLZ&~*71irYB3U`LLT2C$
z4%z4F#VR<d&RnN=)|^%CbCH@av0BSMRSFIZ@H5zcHKli>&2rY$UtKC&M5Ui|e8ik_
zk5BQg$W@K?W;L->lzCFM%FlhQ!KB+wX*INBSizB|#C}s1&r{c&^X*e!Y8KZI1U8N5
z!yLPr-anM!9sxKU=Bg5#yqr!cy^@h<L9;q$n2tQlha{cba2?o_yWD5LdZ#V*jQ1~k
z79({l0K~C5z`)?~pQU9|w51y~%BMwNp+2E==0;55pd)I-o=HA~-}S5wLjB=uiIrFV
z7Cpd|xEUDjLG;O~p+U{_J&kKrjVSW1%D49SvEQWHJVAg3W+Q}W9YFeW38*bEBek@Z
zW+vmygn4Tz$jLmPa!-2n?`;w1Te)(=87C6Os9nVb3b=SJ<jj&1KJM<ro@+87Z7#r;
z6pMZfEO<CQdr;dNiYQxNPYubkwYPUIlvvQ@vH5@p*lsn^TH1>XXx=O?p{TCy{o;v_
z822NS>_F+9yWC7<a5b!MiEu9U5TrG&s)i9L`^B4~$8b3n9n2QiPZh1T)VgKOl#LJ^
z4g0aU<aN#m6tr7~rv4UHgHcOuO=ox(Xy-*@8>r&1MWD6g!isR~89e<>VBA^Ol$Pev
zFzVVp#8D{#F7;E`iml=Gk{NCyel|wIg=F-48u^^gy>Iqg9Xy^mar%nU@vI?qy*<xe
z?Zt0R+=)9gnuq2Y?NBVleifrd#%rmdpdKo;cNpZ<*_$cE&`6K)u@zc7lE;&QO3cwp
zC8tR-&&@(yFHGj0*xHLKO5`3FpF>uqRFr2N*0E^jt4?<JOz%8~<p`<W0`PgQ)FRI^
zb!$QDhKCNMRBXF>6<n4SaaYnvYvdoLBb<s79Hy<IS^n{*Va+p{y-1A)lNvsX(p6Gm
zSc3hJW&Z%{)AY+|z!_rg=sF78rpG)hNxMg5R{2bpjkIy<Y7J6WhTJA_f0bhddw6Kd
zO*4kn1O2E*xf_l%?cS!;ZZ|e@x<)=)W4S$P&RU-RI**p!j9a3y@0=!8epuJk_8)~!
ze8}^8aIfy-`B&f6{wA=JI_arOJ&~1V2{Xxgaq|m*pP%Am_ebZ?;aV3_G*+;{7nu*7
z{{4sODlKCdPufkTvN11#1$P{f`;Wg&{{UK=?%Lu|O2FqC{u*4YRU@W`9o(eLDwB_!
zAXHb8%p~0LuszN?)vXUEtCD*aE+%MKYMe6+lSwGumSrP>$rYkF=gjsXj_OFrU~`XA
zOfI3Z(S2!{b2RrXe`wGB>~oroBn&g=b56)kq`Qxn?{WLYjQUWp!QXRJGje1fX^?bd
zR(3+YBmC);c2<t&;dn=Eb;Vi?pm3mm6bkIFX1ZrC7=}Ga7^xai4o+z_K4ym8M9*-|
z&^{aU=A@cj!wZ0V75u;bbkakU>Ps^I@JapU3_mJkyS=O~I5;YQK0ittBK=Ddfh-Se
zjQ;@3NIU-kytM_p$+u|#09hM(k)M9!@uJ46Nvknqm7^;v95SAN&*@W2BQgn4e(GVk
z4^L6}8U*ha4&GCw@Nl`~{gdfQ9@H$7a~q5|x1pdpl>C_Kwo3kDs;=&%(9;+goW|d$
ze|i%Jp3=~{Yk3=gqFu+nGyFcj<KOw#bjKsFB82rPnn_ufbsILf+qn7AH~NXkw{QNp
zYLdeINfX-KAarr_r}w?bp{g^Iw#8SOZecsY6R96IIC1aqOL1)0j!-dpr2ha``27!|
zsksb*ao&<8ZRd3~C$VNBo@z+I12oK=CW(2)J|l(qG{|h5rW8{*0B9tfxKI>y6-r6*
znq%cBdyH0mvxONZmC8^oJ<;{Rtck8moG(gT#)_Gs_rP^($03~h)bkdpqfNC))bW#9
zf+qB-=VSJ9G#gf9_hbC(lA{jPb20lklSzMY-7p8WAc~hW8iMRJT}*npq1OYaG;=ZX
zuES(@mm7~tX#8jdtSoVv8#(qha>CrojkJFjD;c{R(~mK7#aNLAy5*1g=C<^^4=HjN
z1obt`I;kCzjg7AjX({&D*}i`)WtjISvfIQBr{B$Q8NrN!_25=bPDrHEJx1celF&~X
z1$tw>dBOOFs9Q(FrqTW3nxA6Y*g354eGcjgZ!M(`u@brD5-W$({whf{!aIeGjQ;?4
z6rBkn`$)?})0Xl%;X5*~j!@Sx{ksLsOEd>-9-tb=borHmK^Ap3Yny3SHYp$*qdT#N
z9FM}HNkrvkz$+ln_>a!GIb9i=G@7x6t=cTI8_S5^?m`tU?9iQzM?B}gc_+|UM>VOD
zP=W^2$Rjz;bkN1PD;Z9blexj(wZ+s&`#^B03&RK8g!I}7;QI5>p0&LP+C{NTsC>Dh
za*o7s{{Za|t#*BF#q3OQ@tWMLZK_8#szUNY@PUCiLC!lc>ZjkK>}v;Gu!=ShYZ9lL
za6sS|?Nu1kt?wmfDJqnew?%zg>Pw50WooMoFgKei9bNwb9>XL7^*=|fwx$3ii&0q@
z<tzecJqJDU)7PbT#U!kbS9bL?CRP}cl1T#acXdDET5>!O3M`Xc;X&FYw`Omkz@{1y
zX=G}bwwh&?pV{N{H#=u17{UC3CbS+!(xbSB!ZsR}#AFjAF8Gw^t~eb=2UFjQEpWeL
z)YnUUtlV5cry<|T3NyrNagV9SD<4anZL~~;$0R7xZBhJN2nXx;e>zqrm0Ivk6!I)m
zB%xPh%EnCK=c(`YtM40LZn(R&jwlo;YpDKIcG@s8{&}KLLaD2}8om^|xnOQ?n6btQ
zK7y;-S=~;6#g>73A{=J9<gRtCj*1<Ht|W{N^BvN#=^FZdY8Z{fMIEcd_ILryo}3P~
z7AWUDcQdq>7X1`O=40#9v$gx-t7{Y7r~0sVNB-#GS4BA43zwTy69gKy<yNEQD~yxP
zSn|8#or&dWXe4kc!EkA)rbMMAOAk;<q(<6(s?;uL99Ju|E)3$48za`2Esn)iNodBp
z<YtBhRV8B?+33ibY1s#g(k-5ST1je(&b2Fwvz0QNEj~^vS}3gDXlj;dkEC?QD~~QC
z$9kI7&lR~Pl|UTgw3EJpg`nHHiQ)_S4ab(b+)f2#>a#OkO)la}f)x9nKgOZHRb%UL
z^i*P!-I@0~ji~b+?%i%Mht!@k`R1~wiX*vMj$0V~E1Q_@ba`}N@aCSJ@Xn6Ff)}oP
zR%MfUF;1WmOE2S$(5InKntB^rqY<de6M&mgW7rS=y*lAiTioY9YKBwpNBGsfiNm<U
zlDVHH<=kt9k%082xChHe9AxcpVk#Sv(e5)U=Tew+Byqp{$|_6CU8RyigWKMVzNa+f
zD|@WOztsNNqS0ZBBP#{o2TYowsX$_o^%YjrP0t$<R;~GGv7LKuC8f-^(u_P!wEO$k
zzlcD-var?_ere<KXa4}MiO1?WrwLz1dDUrIUOO0Ij@@aZv^lmdr0f@f2jNkmIO#=-
zyCw4-#u>P$BxLq9^dBsk+OV)O{u4(sEl`{2SJ{)&t&3;=?Ji_LtIx3kOxux<y1<X+
z{uN_lqO!XIkwwt`l&sStMa`;@Ne}WAGpbt$>b_<&J#kLx=5T2UpsTAd1Rg58UbpZl
zA6hO6GW4J<Y@>z$09v0z_lNcT>Z}(JDiHjX`IqV3eqYv>hN@RwtZQ8;^)(aQh~z7~
zBRpf$nVX74{{U$FTpn2BNQPGj?+;JHp-<g7WE)r>DQa4pYSI-Lx@;$(;Uv;dsu)Ov
z2>j#F_Y?-iVci%IO0u55l;-{7@BSqpH@Nkth;cO{zcbq_v|;ugv&GOdo{T*R^{mTz
ze3_N{fx{0_dWvM4GQbg*NZ82({mC)<A465*-5ie&@=zS(JQLGDl{8~5iy6)qnY-rt
zQ!}w>NsOL=RX3C5B7t&`H(Rrc&N6HS3Y(Y6ixS;!`qdmXj4{cnaxFm)d0s>7?N(xi
zV;OE(_NnGABr+RjVV2-lgch>R&Ni)0*-FTOM;Z)m$@Z(p7~xWTid>~Rq*3yjj|qyh
z(_^5;S1Ar@ktL*uy-JgxS}egy4K|y4ntMySp|)!jnG0Jk4>Yd$6*`((3@|7Sj&VV6
zl$uGhoMU5WK(itZ&W8qKt}*vG7$@tHKN_W{-4+`qGD#XKk>ggubJKUeI5iWCniOTo
z&gK{u6k`JwPQy?#*heGe<Z#R}ymoKLt$9<7qboDEElFJ*cD_x%vv|imOZ^2|@lvZ@
z3xGJ0cDK}buS$fJppI&*m6$V6=cdql)~<`CM{^6@Fx`T1D`zOhn67CVv1qrlsCc8?
zJ*(0@GJKn5nUPsfP)0^7T*t_cXGd*JZMTlr;oGJ>wgdA#SEyR*7U}ae1I)%q0|0-G
zWhCCn)l}sjqiIvaq{Nf)Aj=<YoZ_snh$9pEUnm$I>xQltrA<b$5he0?^!25OQnipa
zeo#$uyd720rCvSE{Xzlc9m6Gh5ARj`&n=@of(Xt<LWH38HiYhZ<^7^xS+T=m$?bxy
z4`0r;H976<nUG<m{_cHG=lBjycS2Hb@I#c9tu}E@qiRyjs@F{+=#j=qW+VLi)`qR7
z>UU9#p)y!Try#tWByxWq2VlK_JXdu&rtIW$O+Dg>WSMm7lE%{LUFvhMmiG@Kc<pjI
z3QzH4sr-PXw$N<t>>6Z|E*KEF4S<Y&M_!(_r?rnaD%_uCE?#ELt9>#%ok1e|XoBG7
zgpZOL&q0ySPp>^`^P`Db*>;=(k;fookH}R@_Rxsfv1GF(w+EQ+06XJeIT_0X+mJKI
zr9}nIv0f$2Q<&xBGDZPzeo{_&&rY<&(Wz{!9nG@^xLJ^Hjcz4bqAXO5s^bH%_|=$J
z4=*!andKY2l$UI6Av}9}ew5h>*_W>MSZ$-+OmRNX-fZA*$2k0Joz!$cI$T||#dgV;
zK-pdR>IbH4B}dHZScI0sUHG2v{@v}8Qex;^xgM2=WAdvb3^JS%=xb=wiZq1z)}-(d
z0Um~;Q;A6Rtx`<JbCe#1#Q~}Xa;Jk$!4Vu=*`mRZ(yxDI>JZvTb#-s{cx5|Lr8{%@
z8poau*)?;mR83sYWUYy`+X!^&CzDf%+>C664s%UOUcz=T$ic-)D)E}yEaR_FOQ8|U
znqaMLna3H*FGAkn%TS|nP}Ppwb-C4;kMf++FG>*Su-0xW7jKrV;goF1<uwzvOBzIM
za%rq-7LTSTGectKuz0N9NIY<yTx4UWYfEc$)WJ}9NapXKG3By1IXrp~@@l>OnGvq>
zkfi>#gx%Tnlv2BqmwPI-Pq9eHb5Pt$$fZycN#%&F8FQYNDMc*aQ|)#oSRccm{<T+~
zE)+9{Ay{?JDcJMnQc&(}+UkX&Smii$`;Gl8pIIhU`HpC@P{lR6I+WDyE*;{UQ6HPv
zJZ*8%k80$kkzXES<WsX`yi>l1U2!BAilwAODh;^@gIols%VHKs1F=(2l%MSjJz{O^
z5pOIw$n>stu4WIFncMy9wG)cIE;V#$*3$vI({&9j<g)=hBmK4O`H$sUk+tGTCXBj2
zW*i!sM_Z$w6S8q>w8%0L;g_12+04zlXH_Shv8v`qE;6|2u3}MwnWrVCu)&h!C(#!)
zxsh@9i>f9Xj39il=8!Fl9wU-N{?WxI&6MhYG8F2M(GgecN#~-U-N73Dd8P6fw4d{g
zYpbb8QU|c5CTWfpS+m(zo`k+<)v^N|ta<6~YRJ=WpYJG7_t*+)qCJ%V0F{~=g@ksV
z7PDixyoF|OPndC0n>Tw$j!VRHkp)EDNdbBR`qoigvb$!^683hnkdYS~l>(^h#L!fn
zb*`Hn4rA4l6T2oy{V4(t^jL>BIsX8Gho&gbP^kKfWI0@f^)$GqA!9p%g}OQ8*wa{2
zDl)LdW{OPr91v=0K$a0O`|I+_@92M}2)kKbY)2{u!>8PCp5D~4NEayl#{GT0=oXLE
zk|H*TC+6ryM#KT~`t_z`Bu-@H^HqbIE>9^WvfyH&AhD#`NfCfuy=uZP++bymNkgKH
zMmQN`SM6W}4BWLV3gotSJC6A(*_X>-dMqws^GLC$mV+3oGfjc&O*DyeBA#8%j>4ky
zo&NwDs}pgBiyR8I6}H|_TBM2QL@N^kS_uzASBiY1=O~pirsnHc9u<+f?b@N!HH~X-
zXDYmnk~pi^5p0WUedgd)x@L;9Go)Zn#pfMr+*Y1mbIGjjt&Nngt0}8I4w9z~ha>zy
z9+i2mcd1jxY4WmMmvc(PQfXiivmKEr5dw3B$sm3Nk@!|kqckoRn+zDME;?tBe?wfg
zsVAw?LZ?Go=@xjM<QtU+K=-Ha3u+mU8^J$8&-hjl(dcwd=xz9SMgvSG*dQYXk8Jg>
zQM^fY434QG^2z0UR<0A1xtv^5JrHZUCYFn9BsfGE3_6<g3yoMza&wbQ*eBG!bb7#>
zOVy%3+4r%RJr#}(c+kF*IXA^F#vKSar|hJ*W^Pb>qp{VmE^KdFHhf75!(eqE*11`=
zdG5~X-*aOHiR)c3Zr<_5QkuWIb=JE3GBMqayyLgwT%Dwu;;zRu;bXglT$#XV-1O(3
zE1t5DuN4x6WVJ?o^Mm(U(v3%K{{Uz<u$gx458ob{tmG=nGa{+yAcMto%8fmmHI63{
zx_^;7%=7uP-IjZTtRivuV#woY^{S=!I7K2xCkhbfG&)r6rh|?ux_#Lt;)xVbG$;U3
zx=qJB$Q=Rdqtc?e*t&GHGsh{6c|i!y8#u^4dT068aaNZp>o?5~W@JptNG<{ABzCIO
zoy3gsp7q;fhUJ$cSlJ*z1Fy`47bED$k=w6d!j&!_8v@0a1Vv!1xdvBm27aHH9<`jb
zdK$x3c3YAgo8fHHNg6G;%q{K++bKNoJ#cVy?NHjYO+0^UwKB+IX#C>1!0IwM$@l(r
z)96E8N_UY>Wvn6_N#HV!lw*|)dD^YqR;0F<7ZJs6bv&_&2Ix^UB7zS<+?Bx{s;^;K
zp?hT-*&Rwj<y=Vr05o~vk6d;&CBj<i*Ct!`OWV~rF`@n6vp>JEsrHMeru3Ae?ozeU
z?{5`u0{J%Va3z!YzJtA1y3{W93Cxm8#flio91W!X&<AQw&G{DEZ&O|?%Pj<v2HitY
zDt_q7KtcZi$B+1Ss(N%$+PY8VF^wDsn12tb1JbdYO?%Ge!Y=n(F6H~H4Odm1Twh$9
zubFse<{*8mO2H<M?%|dE$Yv@xpYHA%{&=F5)wygH)K`0%J}%K<(C;m6;dXYC9qv!9
zXnahSAH|Cte|9@`{cE-wqt2UmImqPS5YJIiBW(t)txY3W-LoeKq77Ojm90sYuxc{Y
z!8VB`e5t4;ekyE@J*1wDW49F(8bZCsvCSdrNNEf-8bu#K>k%rU+YrxxnyG8g2$7aH
zl0pYK=~}jZHWC&|>|<Sgtf6BA3M!VRaWY7$3~XGY_UXlQJKXePIL0A4#7s*n@mZ7E
z$>lWnlVF~agITmp>&o8=OHnNFjiYd0q%~Ka3&vv+l}}#O<79c2q~^9W-f+AEN&3_i
z2+)v(lUpQ@Db7o%l3cTKo+`PjNf7GE?!|dBDl?iaE^C%78GhQE#zEups3LDJRCC#Z
zsFE&}?KEDVLkL1)N8O%Vo<XVZqgN5B?KkLaB+ja^mDPx+NiT0uM45RJaI4#aTIHj(
zONgzZ3nGFHr1OtzsHs~+I<ln~dXDXMMRB6r#6Qw6mw4)tuHru$>bx~!6t>Z#*hzIW
zuI?KGwp28{q<OV7H5X{pM>3jyfaFDSDfi%4qD^miXXQq8XdyE`#N0O$Ygpmp^|^!1
zs`P?K1r3vD7W!UKs}07xtyarc(}FeR%M2|bjhYdSnddxHR4FlkY2sd*BaS*oti$HF
z@)!NDlnU%^d^XGCcr0}wZeW#5tbvFQF~O(o>7{f-#^Pk`<j+67)1<c<mhLQ`%KNKF
z#P+uEYV*fzg<@GqMJJ%F=gO9dAy#wMvzGG=H)=y<2ISOtTwE~stgLp^lU8G#1Y$GL
z)0JY8a^=JS0JDr$BS}X3q998eRTwx2G|X7Pb;qSS$I1_S2^d!lY0b)lI;{b6CgjlE
z8UP`cVwdL`MjKpuii!HrYZ){S0+Y1lXM^iXiROH{#RObvB#L{*<sZGA&?d%#PFe{U
z7y_QFlSnR0Zi73GIZg+#qQbdpB-%4oOrMnkEVQ}gnt=}Up48BpGOp&_Y3MptScb`w
zK&1w$5IsPq?!^MJWldJ|G2Ve#Tcd&)cc`7WoKRBG1Ma~+sp%a6J||G;aOx>UR3w4P
zwPM2geQ+u?Pg<f1xA1OZJbV@9hxfnFHGH5TiiyEVSkj$W2cesLcZ*o#2ZqV2I*>7t
zhxkbq=c;_NcRgQg&lJ&0IHtLnM+|KwjnF8~ZTN2TJ1FFo5VEsyPo)x<DRk-c>e**Y
z@FnHOCDa^25ag&l;;-L$u2~~0&y4VKQO`DXDs<zyxd(?lEo6c@V~*kW{{YW1kN*H&
zabsSc-w_fyrD+eEnmY@h4|sm!J(20Ls69%Jr$1V!q+LxW((Fk+b5P|Z9_#32Y8pMg
zhN1R*bl8DG!EE6DYW}LmZE8`pxflUQQ^!i_rrU+t<YE=-^2DtbipGrP-{ru-1Cl^J
z>SkH|n8=Nz4{!p8bC7mYgs&}Gtx7Rax<t<awlU8cxKspy^Qa#-GsZh(J!%%bn6e2}
z$nd`^>M%*}Pk(Bq%+{oPI4LBw3H2Exg_1wrvk&#EF|~e`LT@GRN|i9j!D2eEAJ&qL
zek&Te&3mbY1zOSLcG!-<#4*%^*Xi3n^-g=RkCt4v0-~>B=~Uy*sWF>c782LWB1@P_
zcKeX`{{VRVe>$aea^*xj@{5pgc|L<QlAGMS!Ye|1o6yrSQj<x71LNdOj&_dc-l)YB
z&mmAks*JM{!jE6Cpr^?gtwql%J0k9qv%IoM*8oEp{pXlY+!Or$Dzx*Biv(b-D0m*H
z)cTs2E9_ip+u}5DwYMnN){i7m6ZeaJ@_j$LI2}DR>r`#6*@=~uI*y>9z3MYjV`QSV
zDmzOoik%K{SK71|$f;?Wvg0@dwHLA}MM_$gj2>OH6&u(Qlla!wn`{~%Ht9g~0W7D}
zrC{rF%Uw>NtEtCL2UzMFdgzAMSrKF)t44nDZ>3O@&N;4`qD*9xJ%H&&p(%50T9rt{
z+1Xgw(XY%lEpGn+G?4t5^-_30o+_+XZjxKx0f?XXZhiWS+6gyxx#v}FN0dBtOuFoM
z47*-f7kA-P_|DZ58#}g;wDLP4Udo_xU39w~Q#nqQ8q~~C*rpmlJiSLw;}x4U(?|p~
zH%d&TFq#b^j5Hbsd#0_Zz>MBd1p4Bk@ekObCL3rZk-M>~ld<)<ZdV<j%;zt!Z?19n
zbmtf(3ZXk*E1x77#bVZ_&S@)I{Dw)VTrcnMt~eZo%_XWt@@~7B&miO=JXA^aGK6C%
zdwuMseRpv%#?7%F7Xz&`Rlm%qb`%Z*j@3~|9xAL;w=*(l1QG%ODN8hiAa$)$JbBbj
z=u~aYdSagAVfl_tQJu(yWSnM(bK8MH#;_T{q|GY~*Om8L$0u{OswjwiC{&(u20t3-
zwWQBU%bJ{f=x#hM_K6#YSrDENL+Mi5c`bP5TRiRlm8^6}osDpF(Q0+R3b<=)-8e2k
zd6Yj&&hW;m4V~N($!^j~9E8Te;~dvoca4t<X~tZ~qiQQ>d9PVo$us%TH<%*}$JCm{
zz1I>=MQ<au{ravmnwY{$HX>D1mD0L4wU{k6OI<?6CKAsieqx~P%hI`+uC+46*xBuF
zerV$7pL%LMtyz@nIX9-}kAn4~cc#k~>_x6L-<2C5HpAQN!5@WY$)xy>@E4mg0y#2B
z5{W3cdzPg;TE+*DEMc?NB52qHGZrhj5(ls2%~J72g}tI9NvTD1a1SXHZVElkXH#P6
zD>rkAyurD0I%cDtw@Acv&18B|*q3e~jl+HN!24q#<4uU}wsr2T7=I%|dYj8w=qBt^
z$qbRivpNuu_fM@?GBR}JmD&N5P#F(uWZ0B8#>wb<iia%Trh!<o<g>TQ^C|8-R3pqz
z`1%?G#f5-0p2DM7Y<=udElDJWLE61Z>q4*xEYt?1a>cL?bCXY<w;&*Kp0o&vQJMgH
znm}<thnK*`LhV2b&AGOYl-T(>q&8iIV*-*Uszz-Vkkb&=5=>te6HqG-GF4}C8x$2{
z#`=L9J!ln+aq~n*nkANyQU<`LK`6yW8lAwD98^Y6N(ExTIjY`ape$F^t=3DJ_kGtG
zKU#g^-*1ukknR=FS}m)iuMIUBa?4{H^+Z#ZkA)lxbc^P;PdTnt=6AK&sq;DRYj01|
zjIt`K<(S|K9FheVMWxs@k^)w`O+FC{yC@k<aC`Gu%5CgyqM~W()<QLmF$!4V1Kz1#
z-AX+9!Pp~^Td4G2r}N}gs!mqFP?c2_+SwB|q!*W_Q;?It$sp>iKcU4`I;Fm3lHT4$
zK4ZvL`ZDJ^Q`;FFR<ewgZ37B?j(pP=c%Cmd+fbV%$?~{AbdHDVjDEGBsos6AX7d$M
zrNW(uc0Elc8EECdk=Er+Me^#r!Cf!Y%rLw$?GcDk`6ss{w<e{v(l3-5BY4)>51Qs?
z^5S1YN$FJA@@!)XLwc)IHunQ09LMG`EPLZ0ndH@`Iu4b-F%+83$l;z+V}TEStJRO!
zBe*r9b~&WErJ}KsCSoUYjJe&z0zXgwwN{!$R$zl_<ltvG{Cn1xxt^;*LBR^ExryL<
zlTz5*%dp-uFO`C*So5DyDN0sFLLAJNb`eANXwOfYMjyo00pPc_W4`B9QcFR*wyrR8
zc&7yo7$jr42AI{_xvLV$kpUY8R9>5U_Rs75>20HuYju$nmILO%=qiqPdy~hP-qL+e
z2U>|72378QpF!(b)M-Y?MjkSAMY-qNp-JNy#ya}dR^M@b9k|BXRDqvf)y)WLbJD3-
zmDqezDo9j=ms}5HSG1dClHOTlUz}wCJ&54bZ6|3E+@!A(blSF)*lzvoaEp(pKmBS*
zjAHt0*JUPnl@r`|#cv5OnZZ^KmcAm!t7E6EX_Z1)upKfn&22fizVM!>b9BmDI|7Tl
zvkd;Zu9!;9;gY%KC#5ZPn30-!6agM7`)Lf0oJ9aIic>(y$fY!fnHi-bkw>{}nwFnw
zya_(lap(p|<yhMH_HLFPZRRa0Jyd^MCv)nkMk+ULm{&H}`jfP?$hPo~ahzaObNGfq
zU~R4R@H5LzFC^A>&?P!o_pZt^Kav?NW5S*c(s7@qV_n_c&mj^?kUK9*e5adJ7YOMU
z`U!MnjfpaOuiW;lmIq6+vp19f0Ht0YBDWyyL;dDG{*`vI$0;f6qBG;2<3BR-=x9D=
z#LVr>vB@<@E49gTjxtj}T8)m+mAg<%Y>sDMa5L@cPa7C~k@M<kDK=HZV>rc0DxC8{
zL#$oJ>^n%#@qpbknuKLYU~or6QzfBrmn@Og+G+ybW`rzZ_npnRoP%7$-Ad9k=jLOQ
zX(YO#PI8si^g3%@S**2)=UuF0lXe@StQ(haJ)1oMHKop4lvB|3iM}G<jI=9j0sjDE
zA^mI2Ow8w#nmLSqu94l{{6f{H$bCjt{{XIMXY#HPE4X3?LX4kbM=^fI8`pj#xVvI!
zo=Hz)?oD&VoYJ{o#wfjWe9|cd^{Pb=%1Z(W&mE~;wFHF?-kQdw(Gs-e#=~lla4IO5
zb8XL1Xf0jn1AL?@Y=O`d(yUrQlI}-sjz&=SC(?_elqEG}_7!d8QS%+ftFM`oyzp^@
zRditD)6}rhu0CEql+;oDLYS_G`Aj?K>sCYz24j80(t&do!ZA1jYG5{f^&{}0If(wt
zo>{7IFdw)#P%E+tV=<^3@-xzwd5eIh3Xpr!7Nk^w(*bjeYaG%T(sF3U10I6{oScD7
zmP;Atg{CH1jjAOy$tPphqs;;%ZnY-DX^dj=O3GBy0(5*-3izN+7uHIwkD7NT#m&`Q
zd7xG&-B1@aTy`hlP;WF`EJ|2o*EIopE*2w>FecmRc%*j&<}HFd*EH!ahh-|=ByQ=K
zQAKcw(K9nJ+RA?E&jkMfFUGNc)#W+=0G5jDf&n8q=t26{JenO2rn(&j7x1ezDY!5h
zZ<{BP)N#dR!v)-vJ;-<?4gewMRBVpFE^;VPT}0|i%(o@a*_vxvo=L6lj^&WxvcDaD
zdscPfjwHdkj%kVEV<d?B^PgX(T;mq*jOjU~)V5`*uI#PUBRgC;;EbKQ$82-bu!#T@
zjAZ7KR!G+x$hVY9Yi$xR=)`Y1^#!wv$1Rc91M5z~bXvH)n&s9@sorN_x@2<2Mtc*|
zvz|~mPtKjLE@I4=0a=e$CpaA{nyG$C;*L>svEd|~;YM-Rr0E{s*y&YbFR7yehwOkw
zwCx`;#tHh=;x!N|g~K#ze^36m>sZfori#(Bnt`+H+tQq?n1LMQB9k;*Tt@yLc7k^k
zNrGc6PE?`V52Y=OPU_~Aaa`G`5$!J{9L5ejzp3@9qZc!Ib4e>SoNmhVz#fCwKb1q1
zT3i$(HG2sZ7b|Tf(kAplfVlaOeExN+%+TAb1S(lJjOX`+j8k!quO@R+#nhDfArmGj
znrp<CM*=iOeB;)%5JVlLhi=$Zd9pW6!?)#J>ZO*5^(RZ6*|G3V6`IO`@XTEHz^W1x
zIy(*{RtG1mdeLrqUPYzs;b<d7O98c8qaNV^2!VfxaRW5{AV$VW_NiAwKs^VwX>MIk
zX8Dn&VQg<SePYI6m@TWg1Gxhu^%VaA1>8UO%0+n=-#Hmb9D)a-@4@{mcu3%>8#|tI
zqkw9A*rK=8uI=sLd@)Yx8+xc>2kBj@-5gD7LyB5r8bt<yjA|N?4Kx}CFw&X^F@-4=
zBS)v|xA(J3!Qe}9KiRD<O8UW^nBbQ3f8hc?1vy2%+4eA!<=EnHV|GH!S^ZhKAB9}i
z?_;!L^Ri8PN689ukPlPpYB_9i;wMcU<dE&)j1$mfn!g>zzMm1fn->?7jG`Vwd$+w(
zlCkIe2+8wJ*%YO)TYGhBK{HABA{}w+Y4F`RmP@I>d2ruzJr~}mTj&>|%K3C%`jJg!
z_Jt!4zG5=!2i&K<!G;$^Ihs@Rzi>S%>|;?;we4@r&yksqG0u6Zk`fzn8LCX<x&T!W
z05AtykfONfpzT4TU5e63AMT6_NkZox1u+n}KT2$k*iZvcT0@c6kQU?#GMP)@?deqj
zXJDhL=qM$QidKo$*Bg}NX0OGdYBGS{Tx!0pD$*g!9m*y~W@aarJmR#+hpq?u>$^yY
zyqW%#D~p~T%(ZYEb1p#bS~6+2F#iCo)gxj$h}Sg7&l5bqTnq!+r@H$d#am$If#sXO
z;Xu>BLcD5ZaG>%|1qi1(=toMIdlwdy)Os!?0G*^B{i;?1X54sUK5uhEOm4~OI@MN5
zHb~n)y4|U^<dkv2=b@)&TqhN)C6XPo5ag?8lj}~2&)k!h{#4P7<eSiPB)Iu@xb~?9
zsxQp0ap_FSvOKz;7Y3#wPu%P1XbX(<orV}>Q(cq-tO~UNMJZvHjYra}xhmOLIH0=&
zPb7dNfx*scha~PNG=YmrOp$SuN}_-l6(TNZu&yyiDt)6A#c`sX7J&*mz^BH%(-A61
zQ%1bgA+QmOl;ac%<ypW2iO&Xtkw}~xW1O`~p*lakK!*m4ip9Xy5Pm7#EJ|LWEk(k_
zhDw03pe#(%@m1R-R&Hq;#*>iQrQATy26?C*w(v%4Hzv)L6ic!*G)P%--#fFo9+(xL
zj#M0DuR};g<x!4VK|SgdImbbs^jVJOxYc?u2e7JMYySXvkEJUy^CLoY6jAJTQu+JY
z{#Bj3IW3Y;trinw-c9G_AC)9clZti~#YpmU-<3>*2O_9KtY)s-+sQE=Qig12lYpbt
zp2C6~AQ6NaEEM-7^r@!2wYAFH?75h5W|4Rw?ypRJb4A5w$)N`^Zj*aD5_^SF!2C`r
z-)J%v&IpZgNL*!`ll-e0qpE{?v4n1jvUF}w9<?NBL~4tGK<IjAvpQ9p8wI(ZHC@AT
z3^#PCtzeQV8xBsw4o~=Yq2F=EuZG66N%pe~TUyT)jN2WH0~o+OD9%T2-HkOe)>?$N
z@+gtE$1=7FRVU^n)SM_j+4aRIYZygMOO<XS?RCr&V`BWSe~a*{Y`d5$uIS0j0^fK6
zSg5x2XQJAIS6Y^t(ct?uQV@&Fw^GfY>P<qBt0>sJ0|sEDx3}er6k?sh%1r^g3q<r=
zpyF0f^lvj{dHG2l4{z405Ys3N^K<h6dew5-gO|HxYUqvrk-3`ye4u*fqSKXg6v)cN
z9I?;qPE$!)nZ^yh4ecStmHb7Ua@o!gsS0u!8i_)y9YPU_kNU{*o$NUVv4vI4yM<aZ
z<#*WQH8_R7w|@%#)wz>zxGYHh2(5n_TEsOweOlHpF4s|rHs9_@qYOVR5%|}uN=d68
zEl9M?8g6*4OvEOdO#mS@l!hfJpaiD@KoOBr5kT&G&x!A%wu8!vT~0cWmo+Z6KAmF-
zONM)g1A`|uq|)louBY&xaBZf!a?wCQC+baW-D;P2Sz9;+5%*#-SUvPPB}SK4-^j>)
zG{SdGz%lKfO<sZ<V<SDp=1{+Ry94MwO-;<57`>89)Toy6mQY&*yLH7`EVp+%U{H~#
z<b%}v)vq&^(Y&o4N`}ot5;6z-U202)X>ze-M8Cv;i{6#YYSB^KSNIiXbWyfCjE$!v
zoz7lCg>W&><54k`$Xh(qAE5sLv6noYQY^AZ(>b7b2GiIdm2>r}7WAe<g6?{gMag&Q
zRGQ92Y&RtK9je;gKP7+^^d+`u$!#Kh#1&E8){{inC>lojeJCfK#%5;f%tyJc7qpF}
z`Bw@!1G^du=0<NH-K|4o8euxI<&UKU=SFG<CknN#<s*ENi~-fD3!J6)Vi2)4Z|vec
zmu#GHGfbXhvLc<GfgGAym~B|s?+OasJmrp36><Q^Y4BU`mj?teplZm5AVM)6O<7o0
zBZfE`pqC`XfbU#43@Dmtl<nw9>L?YiN(%wEI3uB~^E9I?oT;QfR$6<o4jBkFSjoTd
zW2G_K3fB(ggUI)&_P1&n?1ybu8O}YayIVgnq=P|rh3Ioi$Pq!#b3*|E%^;wbJ!o2F
zmJ*2jQzeptK&BuI%{Wj4kWzz8MTFo~US<U{Y)H+Dtt6CzA(s!4P)OOSW{DA=w6uy4
zb)}#M%_RVp#m#HjX_mKE$vwTo%W#J>Te-&`)r9d7i?ceRg`D;1P`9&rvgd^!z3WZw
zBbCpWz8Mr`#zrfeb=<s;`gll3`^J2*@P1vO3QL&`k%=4>$iO0na_7pHg$bfnIAex1
z=?gM0<B&Vns{Kw`TcJ^xZfWw2RmGS}HX#C(4r<KAG#WqxmVg|ed>+&x#Q<DY#Yn$&
zP$9_ev&eZITe-}Lz5f8ur6nLChRxqmirH6o`4vG?)DzaWG})xHjz%Od@7<#j$j@K!
zsv=FT5gCBM7?uDgNj-OD;eSE;)NgV7vu_wI4$$QF1a|)bK9z&JcRMNBM%_g&LXtV-
z6wzwS8q0#uy?D=RT5M@)Sr@IC3BvLVCc}=*21aT(D7Ucy1_(!N9=SNl{3=$cxu*#h
zR#+r-^8&{vavK~s9dk@OR%Mb<3~b#WlaX1<8XZ*X^E-ZGVGYDVl$<ec4mN@R0N2GN
zyQGt3c``F|*a7cT9gC{H<QtEYxtDJIka71<PyV%AvVk_VtUSq8$SOz6^%SKg7AVKs
zD;1I^jF)eaI2?ES){evsg@KUF`#}RcfBjVsZI|rh9;E<fi2(q|8ju+JkELFC$eJa!
zna#)gqZu2O$6Wh;XuZ%$a_lX&<f*Exah=CtDnZB?9Zzn0QVZv^-*2acl2y+8jAA&C
zaK5<o=|{7i<7mg`Wh_NFNx4hpZEF4>z1DPt7MmIscJn^ZJ8Uq?{p2U8Jn`>b_0)H>
zI?H)}^U3BMD1E93Jd6&4wxvp~EnKuXD%7V|Ug|I|t!^xCWVO19qMjqQ(Yk|OjmLt{
zysvqtPvqRHW=Lmp$sz5vk3Y`3;faK_Z&Q}09&yy+jY)R|wpWu`+yJvc<sCuJ2(9jn
zZDJhJMZj>R2AGLJp-&XVBT(n1CI_%yMRRwj7oID1U@|t1)ZH}};?_(v<PHfKMfz6K
zypi?Pq?0`JQhSEDVLK3tcvHveSGB7+7Uoux<IQo<FF33;*yP31mHDstn0{-@!X(P3
zdJ1kG+iYl0(u*Eql;4?b+$2zlV>kfvqoMkq^;knBZNc2b`<&zPrDB}wb<q=-oX3aA
zR_C7e5uLXWFqb}=qm;&_TdOOKmvW&!KD73DrO7Bk?adZBuF8?5q<M^X`g_#k<6+B3
zx~j+<WGyVcO5}RggESW$HdJ;{D9{w-ih|__PQKMK)58MlN3aK_A@kgVS(k#(lj%|o
zKWgqhX@@c^{i0Lyra-I7Vg?HwD4?67h?+%w?j^sKXtWSV!N%j-fpad!CcDA}<dgS}
zT5EfF_#_7Rpc6$ewYbLBz#RoyXkr-0B=i&x=%n+={Y_jDD9AY7{73tv(v`-VD_bEm
zDQ@($+zrZO7}|SLaq7!0D2xEVeGOASWY4>%d(#&v`K~<n!D?#4u1Y!B;P87I2cjAi
z2ls&E)}B?N+`GdL;*jnq%8GV2bL&wS&j*SE!G}KeBxfDL6b0-tM1!%Z+jf3d$n>Bt
zL`xIb6sH;afd-ge3~b|*-mS@}$^DMnlJg2&t+_w#6)pUQCIv8QpwJ>!>rDoMG+9Qd
z)YTDU(wd216kJv&%|*VLSdcYGx`44ZYAxJS3j?(=j%XOtd8ExC3Ml|lOkqe(0buAs
z95Cod1XD?+pwno&y2bQr7tIoF>AYnAbwHffPK`Iv-B+r%lQvgVvAF>zi^^yF!}xPp
zjmWH?8a)})9Cb~klQgBVX&Kr!4-h>EQB>_O9tYaQvK~FfOrbdJX&SVq^;RI9e(;a(
z@y$MX<Z>i$xK9N2BCagPqSokzS1QN2=A?wFkPq(+3bbP-VzH?tO{_<5)kKj4ih3ww
zbDzSH1~?2le=5+2M7)s|n8OzD;gV3S2dM|OF<lF842Zi%;(H3WX{Wf6B1tx(Qtd8q
z6$e)OdU}#63gppQ`D#n!co{k9qx@<c+vtHPposI8LUKs$pXW_3tXjQ<yq!o<7X7CJ
zJ;ND4N~)Iu<yKhQS%*Q`R7ysWh2ExwR!$)&cn-vuEP4;|suEmJF7}$>0iU>|h9a|2
zl(jnjtvKkBa_;IIRoaqyj!7qu{RLvnH<`5+c7goq@+G-G*}W1jSx(Yi%_E*l?(f+2
zq|+3w(lPs_;L$CKjoZ678a9-9Eht5nO{z%F!`JosMMPnm>QfwXyKg(=UsAj<{JE^#
z^Rc~>dMlGd5x9{p?aKw+W0mvN{c3T(;Y=3rZ;ey;-dNxP)C$e$bZMb2ylu_8a?$ZS
z1C9rA+aCQ*H_Q!)tg_Iy0F`(%p1XQ}wGCfZT6R`nfhsi9yCu9nQfB}Go#b~Xu=-W2
zJF928lEEfZG->mqoN<h8Q|diyIcT=|l%;oT-S#Q9#+zk3+f8}pIZ@`yxlrDyr?B<M
ztzpfj$!mPpGr#W385P*^AoUsg{&mo)#l<_E60J8TtaNbe8XBJ}-qg&h0K{lW$J03N
z`c*p(E6KFDk|_icN~#Ua%oS6H>7KuOhf428{_=gQTiCqX{-1>jqi?|K`xE)qNNn^N
zfNv(}R-60M6AU^0c|U+PUss2J#LMe4doRG8YFhTQA}*2T+aNe55=mTf$>0ydu7;lW
z5gf9JEff~=ZI{bT4#f2*A5W!RX;Y4<&Q)qlLrUJ)!(!QN{6i!+_kCDfsN4_Kaafny
zRlU80iEhmd@wVjHGIQz2^{usQLj>ZVy`eq-0ERpz856+w(ZUW`Jfj%M7|uB4j%%K=
z@m#i!Vbmd$3&U&!cjS1io2YcC!=pOAKf(H5x1)(#&OI*X;9}j9T!Vsku>|)WYdJhw
zrOdf|^^EyqH4;MkMbF((pn>_+^H0oNsKL~9V{5M+9>J%+nMTvL0t-0KLX7gfed_G2
zd}pD-dj;cacH(Pr%M&fMz^GsWz}wH=AC+IVowg_0#^;?~LnW=$mbVHLc#hXi!0kSq
pab4$$wcAe>MIDWejm)tJ^CY?usym*+6rP=Pn$k6Ukm>e6|JfDM1yKM1

literal 0
HcmV?d00001

diff --git a/modules/model_flux.py b/modules/model_flux.py
index ce2c55f70..8d6a02ef6 100644
--- a/modules/model_flux.py
+++ b/modules/model_flux.py
@@ -34,7 +34,8 @@ def load_flux_quanto(checkpoint_info):
         with torch.device("meta"):
             transformer = diffusers.FluxTransformer2DModel.from_config(os.path.join(repo_path, "transformer", "config.json")).to(dtype=dtype)
         quanto.requantize(transformer, state_dict, quantization_map, device=torch.device("cpu"))
-        transformer.eval()
+        if shared.opts.diffusers_eval:
+            transformer.eval()
         if transformer.dtype != devices.dtype:
             try:
                 transformer = transformer.to(dtype=devices.dtype)
@@ -61,7 +62,8 @@ def load_flux_quanto(checkpoint_info):
         with torch.device("meta"):
             text_encoder_2 = transformers.T5EncoderModel(t5_config).to(dtype=dtype)
         quanto.requantize(text_encoder_2, state_dict, quantization_map, device=torch.device("cpu"))
-        text_encoder_2.eval()
+        if shared.opts.diffusers_eval:
+            text_encoder_2.eval()
         if text_encoder_2.dtype != devices.dtype:
             try:
                 text_encoder_2 = text_encoder_2.to(dtype=devices.dtype)
diff --git a/modules/model_omnigen.py b/modules/model_omnigen.py
index 64c99ddd7..a08ad4ed5 100644
--- a/modules/model_omnigen.py
+++ b/modules/model_omnigen.py
@@ -17,7 +17,8 @@ def load_omnigen(checkpoint_info, diffusers_load_config={}): # pylint: disable=u
     pipe.separate_cfg_infer = True
     pipe.use_kv_cache = False
     pipe.model.to(device=devices.device, dtype=devices.dtype)
-    pipe.model.eval()
+    if shared.opts.diffusers_eval:
+        pipe.model.eval()
     pipe.vae.to(devices.device, dtype=devices.dtype)
     devices.torch_gc()
 
diff --git a/modules/model_sana.py b/modules/model_sana.py
new file mode 100644
index 000000000..06e6fe981
--- /dev/null
+++ b/modules/model_sana.py
@@ -0,0 +1,25 @@
+import diffusers
+
+
+def load_sana(checkpoint_info, diffusers_load_config={}):
+    from modules import shared, sd_models, devices, modelloader, model_quant
+    modelloader.hf_login()
+
+    repo_id = checkpoint_info if isinstance(checkpoint_info, str) else checkpoint_info.path
+    repo_id = sd_models.path_to_repo(repo_id)
+
+    diffusers_load_config['variant'] = 'fp16'
+    diffusers_load_config['torch_dtype'] = devices.dtype
+    diffusers_load_config = model_quant.create_bnb_config(diffusers_load_config)
+    pipe = diffusers.SanaPAGPipeline.from_pretrained(
+        repo_id,
+        # pag_applied_layers=["transformer_blocks.8"],
+        cache_dir = shared.opts.diffusers_dir,
+        **diffusers_load_config,
+    ).to(devices.dtype)
+    if shared.opts.diffusers_eval:
+        pipe.text_encoder.eval()
+        pipe.transformer.eval()
+
+    devices.torch_gc()
+    return pipe
diff --git a/modules/model_te.py b/modules/model_te.py
index 606cdb86d..8227ba3e8 100644
--- a/modules/model_te.py
+++ b/modules/model_te.py
@@ -52,7 +52,8 @@ def load_t5(name=None, cache_dir=None):
             if torch.is_floating_point(param) and not is_param_float8_e4m3fn:
                 param = param.to(devices.dtype)
                 set_module_tensor_to_device(t5, param_name, device=0, value=param)
-        t5.eval()
+        if shared.opts.diffusers_eval:
+            t5.eval()
         if t5.dtype != devices.dtype:
             try:
                 t5 = t5.to(dtype=devices.dtype)
diff --git a/modules/modeldata.py b/modules/modeldata.py
index 604ff4623..4b7ec1776 100644
--- a/modules/modeldata.py
+++ b/modules/modeldata.py
@@ -3,6 +3,45 @@
 from modules import shared, errors
 
 
+def get_model_type(pipe):
+    name = pipe.__class__.__name__
+    if not shared.native:
+        model_type = 'ldm'
+    elif "StableDiffusion3" in name:
+        model_type = 'sd3'
+    elif "StableDiffusionXL" in name:
+        model_type = 'sdxl'
+    elif "StableDiffusion" in name:
+        model_type = 'sd'
+    elif "LatentConsistencyModel" in name:
+        model_type = 'sd' # lcm is compatible with sd
+    elif "InstaFlowPipeline" in name:
+        model_type = 'sd' # instaflow is compatible with sd
+    elif "AnimateDiffPipeline" in name:
+        model_type = 'sd' # animatediff is compatible with sd
+    elif "Kandinsky" in name:
+        model_type = 'kandinsky'
+    elif "HunyuanDiT" in name:
+        model_type = 'hunyuandit'
+    elif "Cascade" in name:
+        model_type = 'sc'
+    elif "AuraFlow" in name:
+        model_type = 'auraflow'
+    elif "Flux" in name:
+        model_type = 'f1'
+    elif "Lumina" in name:
+        model_type = 'lumina'
+    elif "OmniGen" in name:
+        model_type = 'omnigen'
+    elif "CogVideo" in name:
+        model_type = 'cogvideox'
+    elif "Sana" in name:
+        model_type = 'sana'
+    else:
+        model_type = name
+    return model_type
+
+
 class ModelData:
     def __init__(self):
         self.sd_model = None
@@ -82,36 +121,7 @@ def sd_model_type(self):
             if modules.sd_models.model_data.sd_model is None:
                 model_type = 'none'
                 return model_type
-            if not shared.native:
-                model_type = 'ldm'
-            elif "StableDiffusion3" in self.sd_model.__class__.__name__:
-                model_type = 'sd3'
-            elif "StableDiffusionXL" in self.sd_model.__class__.__name__:
-                model_type = 'sdxl'
-            elif "StableDiffusion" in self.sd_model.__class__.__name__:
-                model_type = 'sd'
-            elif "LatentConsistencyModel" in self.sd_model.__class__.__name__:
-                model_type = 'sd' # lcm is compatible with sd
-            elif "InstaFlowPipeline" in self.sd_model.__class__.__name__:
-                model_type = 'sd' # instaflow is compatible with sd
-            elif "AnimateDiffPipeline" in self.sd_model.__class__.__name__:
-                model_type = 'sd' # animatediff is compatible with sd
-            elif "Kandinsky" in self.sd_model.__class__.__name__:
-                model_type = 'kandinsky'
-            elif "HunyuanDiT" in self.sd_model.__class__.__name__:
-                model_type = 'hunyuandit'
-            elif "Cascade" in self.sd_model.__class__.__name__:
-                model_type = 'sc'
-            elif "AuraFlow" in self.sd_model.__class__.__name__:
-                model_type = 'auraflow'
-            elif "Flux" in self.sd_model.__class__.__name__:
-                model_type = 'f1'
-            elif "OmniGen" in self.sd_model.__class__.__name__:
-                model_type = 'omnigen'
-            elif "CogVideo" in self.sd_model.__class__.__name__:
-                model_type = 'cogvideox'
-            else:
-                model_type = self.sd_model.__class__.__name__
+            model_type = get_model_type(self.sd_model)
         except Exception:
             model_type = 'unknown'
         return model_type
@@ -123,18 +133,7 @@ def sd_refiner_type(self):
             if modules.sd_models.model_data.sd_refiner is None:
                 model_type = 'none'
                 return model_type
-            if not shared.native:
-                model_type = 'ldm'
-            elif "StableDiffusion3" in self.sd_refiner.__class__.__name__:
-                model_type = 'sd3'
-            elif "StableDiffusionXL" in self.sd_refiner.__class__.__name__:
-                model_type = 'sdxl'
-            elif "StableDiffusion" in self.sd_refiner.__class__.__name__:
-                model_type = 'sd'
-            elif "Kandinsky" in self.sd_refiner.__class__.__name__:
-                model_type = 'kandinsky'
-            else:
-                model_type = self.sd_refiner.__class__.__name__
+            model_type = get_model_type(self.sd_refiner)
         except Exception:
             model_type = 'unknown'
         return model_type
diff --git a/modules/pag/__init__.py b/modules/pag/__init__.py
index 29cdee8ca..8fe54c198 100644
--- a/modules/pag/__init__.py
+++ b/modules/pag/__init__.py
@@ -20,7 +20,9 @@ def apply(p: processing.StableDiffusionProcessing): # pylint: disable=arguments-
     if sd_models.get_diffusers_task(shared.sd_model) != sd_models.DiffusersTaskType.TEXT_2_IMAGE:
         shared.log.warning(f'PAG: pipeline={c} not implemented')
         return None
-    if detect.is_sd15(c):
+    if 'PAG' in shared.sd_model.__class__.__name__:
+        pass
+    elif detect.is_sd15(c):
         orig_pipeline = shared.sd_model
         shared.sd_model = sd_models.switch_pipe(StableDiffusionPAGPipeline, shared.sd_model)
     elif detect.is_sdxl(c):
@@ -32,13 +34,14 @@ def apply(p: processing.StableDiffusionProcessing): # pylint: disable=arguments-
 
     p.task_args['pag_scale'] = p.pag_scale
     p.task_args['pag_adaptive_scaling'] = p.pag_adaptive
+    p.task_args['pag_adaptive_scale'] = p.pag_adaptive
     pag_applied_layers = shared.opts.pag_apply_layers
     pag_applied_layers_index = pag_applied_layers.split() if len(pag_applied_layers) > 0 else []
     pag_applied_layers_index = [p.strip() for p in pag_applied_layers_index]
     p.task_args['pag_applied_layers_index'] = pag_applied_layers_index if len(pag_applied_layers_index) > 0 else ['m0'] # Available layers: d[0-5], m[0], u[0-8]
     p.extra_generation_params["PAG scale"] = p.pag_scale
     p.extra_generation_params["PAG adaptive"] = p.pag_adaptive
-    shared.log.debug(f'{c}: args={p.task_args}')
+    # shared.log.debug(f'{c}: args={p.task_args}')
 
 
 def unapply():
diff --git a/modules/processing_vae.py b/modules/processing_vae.py
index 77a89c512..772a48adc 100644
--- a/modules/processing_vae.py
+++ b/modules/processing_vae.py
@@ -114,10 +114,11 @@ def full_vae_decode(latents, model):
         else: # manual upcast and we restore it later
             model.vae.orig_dtype = model.vae.dtype
             model.vae = model.vae.to(dtype=torch.float32)
-        latents = latents.to(torch.float32)
     latents = latents.to(devices.device)
     if getattr(model.vae, "post_quant_conv", None) is not None:
         latents = latents.to(next(iter(model.vae.post_quant_conv.parameters())).dtype)
+    else:
+        latents = latents.to(model.vae.dtype)
 
     # normalize latents
     latents_mean = model.vae.config.get("latents_mean", None)
diff --git a/modules/schedulers/scheduler_dpm_flowmatch.py b/modules/schedulers/scheduler_dpm_flowmatch.py
index 1afe54498..69452aca9 100644
--- a/modules/schedulers/scheduler_dpm_flowmatch.py
+++ b/modules/schedulers/scheduler_dpm_flowmatch.py
@@ -22,7 +22,8 @@ def __init__(self, x, t0, t1, seed=None, **kwargs):
         t0, t1, self.sign = self.sort(t0, t1)
         w0 = kwargs.get("w0", torch.zeros_like(x))
         if seed is None:
-            seed = torch.randint(0, 2**63 - 1, []).item()
+            seed = [torch.randint(0, 2**63 - 1, []).item()]
+        seed = [s.initial_seed() if isinstance(s, torch.Generator) else s for s in seed]
         self.batched = True
         try:
             assert len(seed) == x.shape[0]
diff --git a/modules/sd_detect.py b/modules/sd_detect.py
index 071a83d7e..1931b9077 100644
--- a/modules/sd_detect.py
+++ b/modules/sd_detect.py
@@ -71,6 +71,8 @@ def detect_pipeline(f: str, op: str = 'model', warning=True, quiet=False):
                 guess = 'Stable Cascade'
             if 'pixart-sigma' in f.lower():
                 guess = 'PixArt-Sigma'
+            if 'sana' in f.lower():
+                guess = 'Sana'
             if 'lumina-next' in f.lower():
                 guess = 'Lumina-Next'
             if 'kolors' in f.lower():
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 5d42e314b..661559ae9 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -705,6 +705,9 @@ def load_diffuser_force(model_type, checkpoint_info, diffusers_load_config, op='
         elif model_type in ['PixArt-Sigma']: # forced pipeline
             from modules.model_pixart import load_pixart
             sd_model = load_pixart(checkpoint_info, diffusers_load_config)
+        elif model_type in ['Sana']: # forced pipeline
+            from modules.model_sana import load_sana
+            sd_model = load_sana(checkpoint_info, diffusers_load_config)
         elif model_type in ['Lumina-Next']: # forced pipeline
             from modules.model_lumina import load_lumina
             sd_model = load_lumina(checkpoint_info, diffusers_load_config)
diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py
index d8416e5d9..1b1be2d1a 100644
--- a/modules/sd_samplers.py
+++ b/modules/sd_samplers.py
@@ -3,6 +3,7 @@
 from modules import shared
 from modules.sd_samplers_common import samples_to_image_grid, sample_to_image # pylint: disable=unused-import
 
+
 debug = shared.log.trace if os.environ.get('SD_SAMPLER_DEBUG', None) is not None else lambda *args, **kwargs: None
 debug('Trace: SAMPLER')
 all_samplers = []
@@ -75,15 +76,15 @@ def create_sampler(name, model):
         shared.log.debug(f'Sampler: sampler="{name}" config={config.options}')
         return sampler
     elif shared.native:
-        FlowModels = ['Flux', 'StableDiffusion3', 'Lumina', 'AuraFlow']
+        FlowModels = ['Flux', 'StableDiffusion3', 'Lumina', 'AuraFlow', 'Sana']
         if 'KDiffusion' in model.__class__.__name__:
             return None
-        if any(x in model.__class__.__name__ for x in FlowModels) and 'FlowMatch' not in name:
-            shared.log.warning(f'Sampler: default={current} target="{name}" class={model.__class__.__name__} linear scheduler unsupported')
-            return None
         if not any(x in model.__class__.__name__ for x in FlowModels) and 'FlowMatch' in name:
             shared.log.warning(f'Sampler: default={current} target="{name}" class={model.__class__.__name__} flow-match scheduler unsupported')
             return None
+        # if any(x in model.__class__.__name__ for x in FlowModels) and 'FlowMatch' not in name:
+        #    shared.log.warning(f'Sampler: default={current} target="{name}" class={model.__class__.__name__} linear scheduler unsupported')
+        #    return None
         sampler = config.constructor(model)
         if sampler is None:
             sampler = config.constructor(model)
diff --git a/modules/sd_samplers_common.py b/modules/sd_samplers_common.py
index 723f7b181..5297d0bcd 100644
--- a/modules/sd_samplers_common.py
+++ b/modules/sd_samplers_common.py
@@ -9,6 +9,7 @@
 
 SamplerData = namedtuple('SamplerData', ['name', 'constructor', 'aliases', 'options'])
 approximation_indexes = { "Simple": 0, "Approximate": 1, "TAESD": 2, "Full VAE": 3 }
+flow_models = ['f1', 'sd3', 'lumina', 'auraflow', 'sana']
 warned = False
 queue_lock = threading.Lock()
 
diff --git a/modules/sd_samplers_diffusers.py b/modules/sd_samplers_diffusers.py
index 7c23d4342..c95ae0858 100644
--- a/modules/sd_samplers_diffusers.py
+++ b/modules/sd_samplers_diffusers.py
@@ -4,13 +4,12 @@
 import inspect
 import diffusers
 from modules import shared, errors
-from modules import sd_samplers_common
+from modules.sd_samplers_common import SamplerData, flow_models
 
 
 debug = shared.log.trace if os.environ.get('SD_SAMPLER_DEBUG', None) is not None else lambda *args, **kwargs: None
 debug('Trace: SAMPLER')
 
-
 try:
     from diffusers import (
         CMStochasticIterativeScheduler,
@@ -63,7 +62,7 @@
     # prediction_type is ideally set in model as well, but it maybe needed that we do auto-detect of model type in the future
     'All': { 'num_train_timesteps': 1000, 'beta_start': 0.0001, 'beta_end': 0.02, 'beta_schedule': 'linear', 'prediction_type': 'epsilon' },
 
-    'UniPC': { 'predict_x0': True, 'sample_max_value': 1.0, 'solver_order': 2, 'solver_type': 'bh2', 'thresholding': False, 'use_beta_sigmas': False, 'use_exponential_sigmas': False, 'use_karras_sigmas': False, 'lower_order_final': True, 'timestep_spacing': 'linspace', 'final_sigmas_type': 'zero', 'rescale_betas_zero_snr': False },
+    'UniPC': { 'predict_x0': True, 'sample_max_value': 1.0, 'solver_order': 2, 'solver_type': 'bh2', 'thresholding': False, 'use_beta_sigmas': False, 'use_exponential_sigmas': False, 'use_flow_sigmas': False, 'use_karras_sigmas': False, 'lower_order_final': True, 'timestep_spacing': 'linspace', 'final_sigmas_type': 'zero', 'rescale_betas_zero_snr': False },
     'DDIM': { 'clip_sample': False, 'set_alpha_to_one': True, 'steps_offset': 0, 'clip_sample_range': 1.0, 'sample_max_value': 1.0, 'timestep_spacing': 'leading', 'rescale_betas_zero_snr': False, 'thresholding': False },
 
     'Euler': { 'steps_offset': 0, 'interpolation_type': "linear", 'rescale_betas_zero_snr': False, 'final_sigmas_type': 'zero', 'timestep_spacing': 'linspace', 'use_beta_sigmas': False, 'use_exponential_sigmas': False, 'use_karras_sigmas': False },
@@ -72,11 +71,11 @@
     'Euler EDM': { 'sigma_schedule': "karras" },
     'Euler FlowMatch': { 'timestep_spacing': "linspace", 'shift': 1, 'use_dynamic_shifting': False, 'use_karras_sigmas': False, 'use_exponential_sigmas': False, 'use_beta_sigmas': False },
 
-    'DPM++': { 'solver_order': 2, 'thresholding': False, 'sample_max_value': 1.0, 'algorithm_type': "dpmsolver++", 'solver_type': "midpoint", 'lower_order_final': True, 'use_karras_sigmas': False, 'use_exponential_sigmas': False, 'use_beta_sigmas': False, 'final_sigmas_type': 'sigma_min' },
-    'DPM++ 1S': { 'thresholding': False, 'sample_max_value': 1.0, 'algorithm_type': "dpmsolver++", 'solver_type': "midpoint", 'lower_order_final': True, 'use_karras_sigmas': False, 'use_exponential_sigmas': False, 'use_beta_sigmas': False, 'use_lu_lambdas': False, 'final_sigmas_type': 'zero', 'timestep_spacing': 'linspace', 'solver_order': 1 },
-    'DPM++ 2M': { 'thresholding': False, 'sample_max_value': 1.0, 'algorithm_type': "dpmsolver++", 'solver_type': "midpoint", 'lower_order_final': True, 'use_karras_sigmas': False, 'use_exponential_sigmas': False, 'use_beta_sigmas': False, 'use_lu_lambdas': False, 'final_sigmas_type': 'zero', 'timestep_spacing': 'linspace', 'solver_order': 2 },
-    'DPM++ 3M': { 'thresholding': False, 'sample_max_value': 1.0, 'algorithm_type': "dpmsolver++", 'solver_type': "midpoint", 'lower_order_final': True, 'use_karras_sigmas': False, 'use_exponential_sigmas': False, 'use_beta_sigmas': False, 'use_lu_lambdas': False, 'final_sigmas_type': 'zero', 'timestep_spacing': 'linspace', 'solver_order': 3 },
-    'DPM++ 2M SDE': { 'thresholding': False, 'sample_max_value': 1.0, 'algorithm_type': "sde-dpmsolver++", 'solver_type': "midpoint", 'lower_order_final': True, 'use_karras_sigmas': False, 'use_exponential_sigmas': False, 'use_beta_sigmas': False, 'use_lu_lambdas': False, 'final_sigmas_type': 'zero', 'timestep_spacing': 'linspace', 'solver_order': 2 },
+    'DPM++': { 'solver_order': 2, 'thresholding': False, 'sample_max_value': 1.0, 'algorithm_type': "dpmsolver++", 'solver_type': "midpoint", 'lower_order_final': True, 'use_karras_sigmas': False, 'use_exponential_sigmas': False, 'use_flow_sigmas': False, 'use_beta_sigmas': False, 'final_sigmas_type': 'sigma_min' },
+    'DPM++ 1S': { 'thresholding': False, 'sample_max_value': 1.0, 'algorithm_type': "dpmsolver++", 'solver_type': "midpoint", 'lower_order_final': True, 'use_karras_sigmas': False, 'use_exponential_sigmas': False, 'use_flow_sigmas': False, 'use_beta_sigmas': False, 'use_lu_lambdas': False, 'final_sigmas_type': 'zero', 'timestep_spacing': 'linspace', 'solver_order': 1 },
+    'DPM++ 2M': { 'thresholding': False, 'sample_max_value': 1.0, 'algorithm_type': "dpmsolver++", 'solver_type': "midpoint", 'lower_order_final': True, 'use_karras_sigmas': False, 'use_exponential_sigmas': False, 'use_flow_sigmas': False, 'use_beta_sigmas': False, 'use_lu_lambdas': False, 'final_sigmas_type': 'zero', 'timestep_spacing': 'linspace', 'solver_order': 2 },
+    'DPM++ 3M': { 'thresholding': False, 'sample_max_value': 1.0, 'algorithm_type': "dpmsolver++", 'solver_type': "midpoint", 'lower_order_final': True, 'use_karras_sigmas': False, 'use_exponential_sigmas': False, 'use_flow_sigmas': False, 'use_beta_sigmas': False, 'use_lu_lambdas': False, 'final_sigmas_type': 'zero', 'timestep_spacing': 'linspace', 'solver_order': 3 },
+    'DPM++ 2M SDE': { 'thresholding': False, 'sample_max_value': 1.0, 'algorithm_type': "sde-dpmsolver++", 'solver_type': "midpoint", 'lower_order_final': True, 'use_karras_sigmas': False, 'use_exponential_sigmas': False, 'use_flow_sigmas': False, 'use_beta_sigmas': False, 'use_lu_lambdas': False, 'final_sigmas_type': 'zero', 'timestep_spacing': 'linspace', 'solver_order': 2 },
     'DPM++ 2M EDM': { 'solver_order': 2, 'solver_type': 'midpoint', 'final_sigmas_type': 'zero', 'algorithm_type': 'dpmsolver++' },
     'DPM++ Cosine': { 'solver_order': 2, 'sigma_schedule': "exponential", 'prediction_type': "v-prediction" },
     'DPM SDE': { 'use_karras_sigmas': False, 'use_exponential_sigmas': False, 'use_beta_sigmas': False, 'noise_sampler_seed': None, 'timestep_spacing': 'linspace', 'steps_offset': 0,  },
@@ -92,8 +91,8 @@
     'Heun': { 'use_beta_sigmas': False, 'use_karras_sigmas': False, 'use_exponential_sigmas': False, 'timestep_spacing': 'linspace' },
     'Heun FlowMatch': { 'timestep_spacing': "linspace", 'shift': 1 },
 
-    'DEIS': { 'solver_order': 2, 'thresholding': False, 'sample_max_value': 1.0, 'algorithm_type': "deis", 'solver_type': "logrho", 'lower_order_final': True, 'timestep_spacing': 'linspace', 'use_karras_sigmas': False, 'use_exponential_sigmas': False, 'use_beta_sigmas': False },
-    'SA Solver': {'predictor_order': 2, 'corrector_order': 2, 'thresholding': False, 'lower_order_final': True, 'use_karras_sigmas': False, 'use_exponential_sigmas': False, 'use_beta_sigmas': False, 'timestep_spacing': 'linspace'},
+    'DEIS': { 'solver_order': 2, 'thresholding': False, 'sample_max_value': 1.0, 'algorithm_type': "deis", 'solver_type': "logrho", 'lower_order_final': True, 'timestep_spacing': 'linspace', 'use_karras_sigmas': False, 'use_exponential_sigmas': False, 'use_flow_sigmas': False, 'use_beta_sigmas': False },
+    'SA Solver': {'predictor_order': 2, 'corrector_order': 2, 'thresholding': False, 'lower_order_final': True, 'use_karras_sigmas': False, 'use_flow_sigmas': False, 'use_exponential_sigmas': False, 'use_beta_sigmas': False, 'timestep_spacing': 'linspace'},
     'DC Solver': { 'beta_start': 0.0001, 'beta_end': 0.02, 'solver_order': 2, 'prediction_type': "epsilon", 'thresholding': False, 'solver_type': 'bh2', 'lower_order_final': True, 'dc_order': 2, 'disable_corrector': [0] },
     'VDM Solver': { 'clip_sample_range': 2.0, },
     'LCM': { 'beta_start': 0.00085, 'beta_end': 0.012, 'beta_schedule': "scaled_linear", 'set_alpha_to_one': True, 'rescale_betas_zero_snr': False, 'thresholding': False, 'timestep_spacing': 'linspace' },
@@ -110,54 +109,54 @@
 }
 
 samplers_data_diffusers = [
-    sd_samplers_common.SamplerData('Default', None, [], {}),
+    SamplerData('Default', None, [], {}),
 
-    sd_samplers_common.SamplerData('UniPC', lambda model: DiffusionSampler('UniPC', UniPCMultistepScheduler, model), [], {}),
-    sd_samplers_common.SamplerData('DDIM', lambda model: DiffusionSampler('DDIM', DDIMScheduler, model), [], {}),
-    sd_samplers_common.SamplerData('Euler', lambda model: DiffusionSampler('Euler', EulerDiscreteScheduler, model), [], {}),
-    sd_samplers_common.SamplerData('Euler a', lambda model: DiffusionSampler('Euler a', EulerAncestralDiscreteScheduler, model), [], {}),
-    sd_samplers_common.SamplerData('Euler SGM', lambda model: DiffusionSampler('Euler SGM', EulerDiscreteScheduler, model), [], {}),
-    sd_samplers_common.SamplerData('Euler EDM', lambda model: DiffusionSampler('Euler EDM', EDMEulerScheduler, model), [], {}),
-    sd_samplers_common.SamplerData('Euler FlowMatch', lambda model: DiffusionSampler('Euler FlowMatch', FlowMatchEulerDiscreteScheduler, model), [], {}),
+    SamplerData('UniPC', lambda model: DiffusionSampler('UniPC', UniPCMultistepScheduler, model), [], {}),
+    SamplerData('DDIM', lambda model: DiffusionSampler('DDIM', DDIMScheduler, model), [], {}),
+    SamplerData('Euler', lambda model: DiffusionSampler('Euler', EulerDiscreteScheduler, model), [], {}),
+    SamplerData('Euler a', lambda model: DiffusionSampler('Euler a', EulerAncestralDiscreteScheduler, model), [], {}),
+    SamplerData('Euler SGM', lambda model: DiffusionSampler('Euler SGM', EulerDiscreteScheduler, model), [], {}),
+    SamplerData('Euler EDM', lambda model: DiffusionSampler('Euler EDM', EDMEulerScheduler, model), [], {}),
+    SamplerData('Euler FlowMatch', lambda model: DiffusionSampler('Euler FlowMatch', FlowMatchEulerDiscreteScheduler, model), [], {}),
 
-    sd_samplers_common.SamplerData('DPM++', lambda model: DiffusionSampler('DPM++', DPMSolverSinglestepScheduler, model), [], {}),
-    sd_samplers_common.SamplerData('DPM++ 1S', lambda model: DiffusionSampler('DPM++ 1S', DPMSolverMultistepScheduler, model), [], {}),
-    sd_samplers_common.SamplerData('DPM++ 2M', lambda model: DiffusionSampler('DPM++ 2M', DPMSolverMultistepScheduler, model), [], {}),
-    sd_samplers_common.SamplerData('DPM++ 3M', lambda model: DiffusionSampler('DPM++ 3M', DPMSolverMultistepScheduler, model), [], {}),
-    sd_samplers_common.SamplerData('DPM++ 2M SDE', lambda model: DiffusionSampler('DPM++ 2M SDE', DPMSolverMultistepScheduler, model), [], {}),
-    sd_samplers_common.SamplerData('DPM++ 2M EDM', lambda model: DiffusionSampler('DPM++ 2M EDM', EDMDPMSolverMultistepScheduler, model), [], {}),
-    sd_samplers_common.SamplerData('DPM++ Cosine', lambda model: DiffusionSampler('DPM++ 2M EDM', CosineDPMSolverMultistepScheduler, model), [], {}),
-    sd_samplers_common.SamplerData('DPM SDE', lambda model: DiffusionSampler('DPM SDE', DPMSolverSDEScheduler, model), [], {}),
+    SamplerData('DPM++', lambda model: DiffusionSampler('DPM++', DPMSolverSinglestepScheduler, model), [], {}),
+    SamplerData('DPM++ 1S', lambda model: DiffusionSampler('DPM++ 1S', DPMSolverMultistepScheduler, model), [], {}),
+    SamplerData('DPM++ 2M', lambda model: DiffusionSampler('DPM++ 2M', DPMSolverMultistepScheduler, model), [], {}),
+    SamplerData('DPM++ 3M', lambda model: DiffusionSampler('DPM++ 3M', DPMSolverMultistepScheduler, model), [], {}),
+    SamplerData('DPM++ 2M SDE', lambda model: DiffusionSampler('DPM++ 2M SDE', DPMSolverMultistepScheduler, model), [], {}),
+    SamplerData('DPM++ 2M EDM', lambda model: DiffusionSampler('DPM++ 2M EDM', EDMDPMSolverMultistepScheduler, model), [], {}),
+    SamplerData('DPM++ Cosine', lambda model: DiffusionSampler('DPM++ 2M EDM', CosineDPMSolverMultistepScheduler, model), [], {}),
+    SamplerData('DPM SDE', lambda model: DiffusionSampler('DPM SDE', DPMSolverSDEScheduler, model), [], {}),
 
-    sd_samplers_common.SamplerData('DPM2 FlowMatch', lambda model: DiffusionSampler('DPM2 FlowMatch', FlowMatchDPMSolverMultistepScheduler, model), [], {}),
-    sd_samplers_common.SamplerData('DPM2a FlowMatch', lambda model: DiffusionSampler('DPM2a FlowMatch', FlowMatchDPMSolverMultistepScheduler, model), [], {}),
-    sd_samplers_common.SamplerData('DPM2++ 2M FlowMatch', lambda model: DiffusionSampler('DPM2++ 2M FlowMatch', FlowMatchDPMSolverMultistepScheduler, model), [], {}),
-    sd_samplers_common.SamplerData('DPM2++ 2S FlowMatch', lambda model: DiffusionSampler('DPM2++ 2S FlowMatch', FlowMatchDPMSolverMultistepScheduler, model), [], {}),
-    sd_samplers_common.SamplerData('DPM2++ SDE FlowMatch', lambda model: DiffusionSampler('DPM2++ SDE FlowMatch', FlowMatchDPMSolverMultistepScheduler, model), [], {}),
-    sd_samplers_common.SamplerData('DPM2++ 2M SDE FlowMatch', lambda model: DiffusionSampler('DPM2++ 2M SDE FlowMatch', FlowMatchDPMSolverMultistepScheduler, model), [], {}),
-    sd_samplers_common.SamplerData('DPM2++ 3M SDE FlowMatch', lambda model: DiffusionSampler('DPM2++ 3M SDE FlowMatch', FlowMatchDPMSolverMultistepScheduler, model), [], {}),
+    SamplerData('DPM2 FlowMatch', lambda model: DiffusionSampler('DPM2 FlowMatch', FlowMatchDPMSolverMultistepScheduler, model), [], {}),
+    SamplerData('DPM2a FlowMatch', lambda model: DiffusionSampler('DPM2a FlowMatch', FlowMatchDPMSolverMultistepScheduler, model), [], {}),
+    SamplerData('DPM2++ 2M FlowMatch', lambda model: DiffusionSampler('DPM2++ 2M FlowMatch', FlowMatchDPMSolverMultistepScheduler, model), [], {}),
+    SamplerData('DPM2++ 2S FlowMatch', lambda model: DiffusionSampler('DPM2++ 2S FlowMatch', FlowMatchDPMSolverMultistepScheduler, model), [], {}),
+    SamplerData('DPM2++ SDE FlowMatch', lambda model: DiffusionSampler('DPM2++ SDE FlowMatch', FlowMatchDPMSolverMultistepScheduler, model), [], {}),
+    SamplerData('DPM2++ 2M SDE FlowMatch', lambda model: DiffusionSampler('DPM2++ 2M SDE FlowMatch', FlowMatchDPMSolverMultistepScheduler, model), [], {}),
+    SamplerData('DPM2++ 3M SDE FlowMatch', lambda model: DiffusionSampler('DPM2++ 3M SDE FlowMatch', FlowMatchDPMSolverMultistepScheduler, model), [], {}),
 
-    sd_samplers_common.SamplerData('Heun', lambda model: DiffusionSampler('Heun', HeunDiscreteScheduler, model), [], {}),
-    sd_samplers_common.SamplerData('Heun FlowMatch', lambda model: DiffusionSampler('Heun FlowMatch', FlowMatchHeunDiscreteScheduler, model), [], {}),
+    SamplerData('Heun', lambda model: DiffusionSampler('Heun', HeunDiscreteScheduler, model), [], {}),
+    SamplerData('Heun FlowMatch', lambda model: DiffusionSampler('Heun FlowMatch', FlowMatchHeunDiscreteScheduler, model), [], {}),
 
-    sd_samplers_common.SamplerData('DEIS', lambda model: DiffusionSampler('DEIS', DEISMultistepScheduler, model), [], {}),
-    sd_samplers_common.SamplerData('SA Solver', lambda model: DiffusionSampler('SA Solver', SASolverScheduler, model), [], {}),
-    sd_samplers_common.SamplerData('DC Solver', lambda model: DiffusionSampler('DC Solver', DCSolverMultistepScheduler, model), [], {}),
-    sd_samplers_common.SamplerData('VDM Solver', lambda model: DiffusionSampler('VDM Solver', VDMScheduler, model), [], {}),
-    sd_samplers_common.SamplerData('BDIA DDIM', lambda model: DiffusionSampler('BDIA DDIM g=0', BDIA_DDIMScheduler, model), [], {}),
+    SamplerData('DEIS', lambda model: DiffusionSampler('DEIS', DEISMultistepScheduler, model), [], {}),
+    SamplerData('SA Solver', lambda model: DiffusionSampler('SA Solver', SASolverScheduler, model), [], {}),
+    SamplerData('DC Solver', lambda model: DiffusionSampler('DC Solver', DCSolverMultistepScheduler, model), [], {}),
+    SamplerData('VDM Solver', lambda model: DiffusionSampler('VDM Solver', VDMScheduler, model), [], {}),
+    SamplerData('BDIA DDIM', lambda model: DiffusionSampler('BDIA DDIM g=0', BDIA_DDIMScheduler, model), [], {}),
 
-    sd_samplers_common.SamplerData('PNDM', lambda model: DiffusionSampler('PNDM', PNDMScheduler, model), [], {}),
-    sd_samplers_common.SamplerData('IPNDM', lambda model: DiffusionSampler('IPNDM', IPNDMScheduler, model), [], {}),
-    sd_samplers_common.SamplerData('DDPM', lambda model: DiffusionSampler('DDPM', DDPMScheduler, model), [], {}),
-    sd_samplers_common.SamplerData('LMSD', lambda model: DiffusionSampler('LMSD', LMSDiscreteScheduler, model), [], {}),
-    sd_samplers_common.SamplerData('KDPM2', lambda model: DiffusionSampler('KDPM2', KDPM2DiscreteScheduler, model), [], {}),
-    sd_samplers_common.SamplerData('KDPM2 a', lambda model: DiffusionSampler('KDPM2 a', KDPM2AncestralDiscreteScheduler, model), [], {}),
-    sd_samplers_common.SamplerData('CMSI', lambda model: DiffusionSampler('CMSI', CMStochasticIterativeScheduler, model), [], {}),
+    SamplerData('PNDM', lambda model: DiffusionSampler('PNDM', PNDMScheduler, model), [], {}),
+    SamplerData('IPNDM', lambda model: DiffusionSampler('IPNDM', IPNDMScheduler, model), [], {}),
+    SamplerData('DDPM', lambda model: DiffusionSampler('DDPM', DDPMScheduler, model), [], {}),
+    SamplerData('LMSD', lambda model: DiffusionSampler('LMSD', LMSDiscreteScheduler, model), [], {}),
+    SamplerData('KDPM2', lambda model: DiffusionSampler('KDPM2', KDPM2DiscreteScheduler, model), [], {}),
+    SamplerData('KDPM2 a', lambda model: DiffusionSampler('KDPM2 a', KDPM2AncestralDiscreteScheduler, model), [], {}),
+    SamplerData('CMSI', lambda model: DiffusionSampler('CMSI', CMStochasticIterativeScheduler, model), [], {}),
 
-    sd_samplers_common.SamplerData('LCM', lambda model: DiffusionSampler('LCM', LCMScheduler, model), [], {}),
-    sd_samplers_common.SamplerData('TCD', lambda model: DiffusionSampler('TCD', TCDScheduler, model), [], {}),
+    SamplerData('LCM', lambda model: DiffusionSampler('LCM', LCMScheduler, model), [], {}),
+    SamplerData('TCD', lambda model: DiffusionSampler('TCD', TCDScheduler, model), [], {}),
 
-    sd_samplers_common.SamplerData('Same as primary', None, [], {}),
+    SamplerData('Same as primary', None, [], {}),
 ]
 
 
@@ -178,14 +177,14 @@ def __init__(self, name, constructor, model, **kwargs):
             orig_config = model.default_scheduler.scheduler_config
         else:
             orig_config = model.default_scheduler.config
-        for key, value in config.get(name, {}).items(): # apply diffusers per-scheduler defaults
-            self.config[key] = value
         debug(f'Sampler: diffusers="{self.config}"')
         debug(f'Sampler: original="{orig_config}"')
         for key, value in orig_config.items(): # apply model defaults
             if key in self.config:
                 self.config[key] = value
         debug(f'Sampler: default="{self.config}"')
+        for key, value in config.get(name, {}).items(): # apply diffusers per-scheduler defaults
+            self.config[key] = value
         for key, value in kwargs.items(): # apply user args, if any
             if key in self.config:
                 self.config[key] = value
@@ -205,10 +204,14 @@ def __init__(self, name, constructor, model, **kwargs):
         if len(timesteps) == 0:
             if 'sigma_schedule' in self.config:
                 self.config['sigma_schedule'] = shared.opts.schedulers_sigma if shared.opts.schedulers_sigma != 'default' else None
-            if shared.opts.schedulers_sigma == 'betas' and 'use_beta_sigmas' in self.config:
+            if shared.opts.schedulers_sigma == 'default' and shared.sd_model_type in flow_models and 'use_flow_sigmas' in self.config:
+                self.config['use_flow_sigmas'] = True
+            elif shared.opts.schedulers_sigma == 'betas' and 'use_beta_sigmas' in self.config:
                 self.config['use_beta_sigmas'] = True
             elif shared.opts.schedulers_sigma == 'karras' and 'use_karras_sigmas' in self.config:
                 self.config['use_karras_sigmas'] = True
+            elif shared.opts.schedulers_sigma == 'flowmatch' and 'use_flow_sigmas' in self.config:
+                self.config['use_flow_sigmas'] = True
             elif shared.opts.schedulers_sigma == 'exponential' and 'use_exponential_sigmas' in self.config:
                 self.config['use_exponential_sigmas'] = True
             elif shared.opts.schedulers_sigma == 'lambdas' and 'use_lu_lambdas' in self.config:
diff --git a/modules/shared_items.py b/modules/shared_items.py
index 4d9b325c8..9abb64718 100644
--- a/modules/shared_items.py
+++ b/modules/shared_items.py
@@ -86,6 +86,14 @@ def get_pipelines():
         'Kolors': getattr(diffusers, 'KolorsPipeline', None),
         'AuraFlow': getattr(diffusers, 'AuraFlowPipeline', None),
         'CogView': getattr(diffusers, 'CogView3PlusPipeline', None),
+        'Stable Cascade': getattr(diffusers, 'StableCascadeCombinedPipeline', None),
+        'PixArt-Sigma': getattr(diffusers, 'PixArtSigmaPipeline', None),
+        'HunyuanDiT': getattr(diffusers, 'HunyuanDiTPipeline', None),
+        'Stable Diffusion 3': getattr(diffusers, 'StableDiffusion3Pipeline', None),
+        'Stable Diffusion 3 Img2Img': getattr(diffusers, 'StableDiffusion3Img2ImgPipeline', None),
+        'Lumina-Next': getattr(diffusers, 'LuminaText2ImgPipeline', None),
+        'FLUX': getattr(diffusers, 'FluxPipeline', None),
+        'Sana': getattr(diffusers, 'SanaPAGPipeline', None),
     }
     if hasattr(diffusers, 'OnnxStableDiffusionPipeline'):
         onnx_pipelines = {
@@ -103,19 +111,10 @@ def get_pipelines():
         pipelines.update(onnx_pipelines)
 
     # items that may rely on diffusers dev version
-    if hasattr(diffusers, 'StableCascadeCombinedPipeline'):
-        pipelines['Stable Cascade'] = getattr(diffusers, 'StableCascadeCombinedPipeline', None)
-    if hasattr(diffusers, 'PixArtSigmaPipeline'):
-        pipelines['PixArt-Sigma'] = getattr(diffusers, 'PixArtSigmaPipeline', None)
-    if hasattr(diffusers, 'HunyuanDiTPipeline'):
-        pipelines['HunyuanDiT'] = getattr(diffusers, 'HunyuanDiTPipeline', None)
-    if hasattr(diffusers, 'StableDiffusion3Pipeline'):
-        pipelines['Stable Diffusion 3'] = getattr(diffusers, 'StableDiffusion3Pipeline', None)
-        pipelines['Stable Diffusion 3 Img2Img'] = getattr(diffusers, 'StableDiffusion3Img2ImgPipeline', None)
-    if hasattr(diffusers, 'LuminaText2ImgPipeline'):
-        pipelines['Lumina-Next'] = getattr(diffusers, 'LuminaText2ImgPipeline', None)
+    """
     if hasattr(diffusers, 'FluxPipeline'):
         pipelines['FLUX'] = getattr(diffusers, 'FluxPipeline', None)
+    """
 
     for k, v in pipelines.items():
         if k != 'Autodetect' and v is None:
diff --git a/modules/ui_sections.py b/modules/ui_sections.py
index f15edb4bd..fcf53cf70 100644
--- a/modules/ui_sections.py
+++ b/modules/ui_sections.py
@@ -276,11 +276,11 @@ def set_sampler_preset(preset):
 
     else: # shared.native
         with gr.Row(elem_classes=['flex-break']):
-            sampler_sigma = gr.Dropdown(label='Sigma method', elem_id=f"{tabname}_sampler_sigma", choices=['default', 'karras', 'betas', 'exponential', 'lambdas'], value=shared.opts.schedulers_sigma, type='value')
+            sampler_sigma = gr.Dropdown(label='Sigma method', elem_id=f"{tabname}_sampler_sigma", choices=['default', 'karras', 'betas', 'exponential', 'lambdas', 'flowmatch'], value=shared.opts.schedulers_sigma, type='value')
             sampler_spacing = gr.Dropdown(label='Timestep spacing', elem_id=f"{tabname}_sampler_spacing", choices=['default', 'linspace', 'leading', 'trailing'], value=shared.opts.schedulers_timestep_spacing, type='value')
         with gr.Row(elem_classes=['flex-break']):
             sampler_beta = gr.Dropdown(label='Beta schedule', elem_id=f"{tabname}_sampler_beta", choices=['default', 'linear', 'scaled', 'cosine'], value=shared.opts.schedulers_beta_schedule, type='value')
-            sampler_prediction = gr.Dropdown(label='Prediction method', elem_id=f"{tabname}_sampler_prediction", choices=['default', 'epsilon', 'sample', 'v_prediction'], value=shared.opts.schedulers_prediction_type, type='value')
+            sampler_prediction = gr.Dropdown(label='Prediction method', elem_id=f"{tabname}_sampler_prediction", choices=['default', 'epsilon', 'sample', 'v_prediction', 'flow_prediction'], value=shared.opts.schedulers_prediction_type, type='value')
         with gr.Row(elem_classes=['flex-break']):
             sampler_presets = gr.Dropdown(label='Timesteps presets', elem_id=f"{tabname}_sampler_presets", choices=['None', 'AYS SD15', 'AYS SDXL'], value='None', type='value')
             sampler_timesteps = gr.Textbox(label='Timesteps override', elem_id=f"{tabname}_sampler_timesteps", value=shared.opts.schedulers_timesteps)

From 7b258468f05160cee1025be692f438fba1f1614f Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Mon, 16 Dec 2024 15:40:54 -0500
Subject: [PATCH 117/162] add ufogen

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md                           |  14 +-
 installer.py                           |   2 +-
 modules/schedulers/scheduler_ufogen.py | 528 +++++++++++++++++++++++++
 modules/sd_samplers_diffusers.py       |   3 +
 wiki                                   |   2 +-
 5 files changed, 543 insertions(+), 6 deletions(-)
 create mode 100644 modules/schedulers/scheduler_ufogen.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3a07f154c..b174a7a37 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,9 +3,14 @@
 ## Update for 2024-12-16
 
 - Sana: both 1.6B and 0.6B  
-- ControlNet: better Union results, support for ProMax and Tile  
+- ControlNet:  
+  - better Union results  
+  - support for new ProMax model  
+  - support for Tile models  
 - FreeScale: run optimized iterative generation of images at different scales  
-- Samplers: UniPC, DEIS, SA, DPM-Multistep: add FlowMatch sigma method and prediction type  
+- Samplers:  
+  - UniPC, DEIS, SA, DPM-Multistep: add FlowMatch sigma method and prediction type  
+  - UFOGen: new fast scheduler for use with distilled models and low step counts  
 
 ### New models and integrations
 
@@ -108,11 +113,12 @@
 - **IPEX**: update to IPEX 2.5.10+xpu  
 - **OpenVINO**: update to 2024.5.0  
 - **Sampler** improvements  
-  - UniPC, DEIS, SA, DPM-Multistep: allow FlowMatch method  
+  - UniPC, DEIS, SA, DPM-Multistep: allow FlowMatch sigma method and prediction type  
   - Euler FlowMatch: add sigma methods (*karras/exponential/betas*)  
   - Euler FlowMatch: allow using timestep presets to set sigmas  
   - DPM FlowMatch: update all and add sigma methods  
-  - BDIA-DDIM: *experimental*  
+  - BDIA-DDIM: *experimental* new scheduler  
+  - UFOGen: *experimental* new scheduler  
 
 ### Fixes  
 
diff --git a/installer.py b/installer.py
index a12b09d4d..18f2b9399 100644
--- a/installer.py
+++ b/installer.py
@@ -459,7 +459,7 @@ def check_python(supported_minors=[9, 10, 11, 12], reason=None):
 def check_diffusers():
     if args.skip_all or args.skip_requirements:
         return
-    sha = '5fb3a985173efaae7ff381b9040c386751d643da' # diffusers commit hash
+    sha = '5ed761a6f2a6dad56031f4e3e32223bfbe2dda01' # diffusers commit hash
     pkg = pkg_resources.working_set.by_key.get('diffusers', None)
     minor = int(pkg.version.split('.')[1] if pkg is not None else 0)
     cur = opts.get('diffusers_version', '') if minor > 0 else ''
diff --git a/modules/schedulers/scheduler_ufogen.py b/modules/schedulers/scheduler_ufogen.py
new file mode 100644
index 000000000..e03dec3c0
--- /dev/null
+++ b/modules/schedulers/scheduler_ufogen.py
@@ -0,0 +1,528 @@
+# Copyright 2023 UC Berkeley Team and The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# DISCLAIMER: This file is strongly influenced by https://github.com/ermongroup/ddim and https://github.com/xuyanwu/SIDDMs-UFOGen
+
+import math
+from dataclasses import dataclass
+from typing import List, Optional, Tuple, Union
+
+import numpy as np
+import torch
+
+from diffusers.configuration_utils import ConfigMixin, register_to_config
+from diffusers.utils import BaseOutput, logging
+from diffusers.utils.torch_utils import randn_tensor
+from diffusers.schedulers.scheduling_utils import SchedulerMixin
+
+
+logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
+
+
+@dataclass
+# Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput with DDPM->UFOGen
+class UFOGenSchedulerOutput(BaseOutput):
+    """
+    Output class for the scheduler's `step` function output.
+
+    Args:
+        prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
+            Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the
+            denoising loop.
+        pred_original_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
+            The predicted denoised sample `(x_{0})` based on the model output from the current timestep.
+            `pred_original_sample` can be used to preview progress or for guidance.
+    """
+
+    prev_sample: torch.FloatTensor
+    pred_original_sample: Optional[torch.FloatTensor] = None
+
+
+# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
+def betas_for_alpha_bar(
+    num_diffusion_timesteps,
+    max_beta=0.999,
+    alpha_transform_type="cosine",
+):
+    """
+    Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
+    (1-beta) over time from t = [0,1].
+
+    Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
+    to that part of the diffusion process.
+
+
+    Args:
+        num_diffusion_timesteps (`int`): the number of betas to produce.
+        max_beta (`float`): the maximum beta to use; use values lower than 1 to
+                     prevent singularities.
+        alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar.
+                     Choose from `cosine` or `exp`
+
+    Returns:
+        betas (`np.ndarray`): the betas used by the scheduler to step the model outputs
+    """
+    if alpha_transform_type == "cosine":
+
+        def alpha_bar_fn(t):
+            return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2
+
+    elif alpha_transform_type == "exp":
+
+        def alpha_bar_fn(t):
+            return math.exp(t * -12.0)
+
+    else:
+        raise ValueError(f"Unsupported alpha_tranform_type: {alpha_transform_type}")
+
+    betas = []
+    for i in range(num_diffusion_timesteps):
+        t1 = i / num_diffusion_timesteps
+        t2 = (i + 1) / num_diffusion_timesteps
+        betas.append(min(1 - alpha_bar_fn(t2) / alpha_bar_fn(t1), max_beta))
+    return torch.tensor(betas, dtype=torch.float32)
+
+
+# Copied from diffusers.schedulers.scheduling_ddim.rescale_zero_terminal_snr
+def rescale_zero_terminal_snr(betas):
+    """
+    Rescales betas to have zero terminal SNR Based on https://arxiv.org/pdf/2305.08891.pdf (Algorithm 1)
+
+
+    Args:
+        betas (`torch.FloatTensor`):
+            the betas that the scheduler is being initialized with.
+
+    Returns:
+        `torch.FloatTensor`: rescaled betas with zero terminal SNR
+    """
+    # Convert betas to alphas_bar_sqrt
+    alphas = 1.0 - betas
+    alphas_cumprod = torch.cumprod(alphas, dim=0)
+    alphas_bar_sqrt = alphas_cumprod.sqrt()
+
+    # Store old values.
+    alphas_bar_sqrt_0 = alphas_bar_sqrt[0].clone()
+    alphas_bar_sqrt_T = alphas_bar_sqrt[-1].clone()
+
+    # Shift so the last timestep is zero.
+    alphas_bar_sqrt -= alphas_bar_sqrt_T
+
+    # Scale so the first timestep is back to the old value.
+    alphas_bar_sqrt *= alphas_bar_sqrt_0 / (alphas_bar_sqrt_0 - alphas_bar_sqrt_T)
+
+    # Convert alphas_bar_sqrt to betas
+    alphas_bar = alphas_bar_sqrt**2  # Revert sqrt
+    alphas = alphas_bar[1:] / alphas_bar[:-1]  # Revert cumprod
+    alphas = torch.cat([alphas_bar[0:1], alphas])
+    betas = 1 - alphas
+
+    return betas
+
+
+class UFOGenScheduler(SchedulerMixin, ConfigMixin):
+    """
+    `UFOGenScheduler` implements multistep and onestep sampling for a UFOGen model, introduced in
+    [UFOGen: You Forward Once Large Scale Text-to-Image Generation via Diffusion GANs](https://arxiv.org/abs/2311.09257)
+    by Yanwu Xu, Yang Zhao, Zhisheng Xiao, and Tingbo Hou. UFOGen is a varianet of the denoising diffusion GAN (DDGAN)
+    model designed for one-step sampling.
+
+    This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
+    methods the library implements for all schedulers such as loading and saving.
+
+    Args:
+        num_train_timesteps (`int`, defaults to 1000):
+            The number of diffusion steps to train the model.
+        beta_start (`float`, defaults to 0.0001):
+            The starting `beta` value of inference.
+        beta_end (`float`, defaults to 0.02):
+            The final `beta` value.
+        beta_schedule (`str`, defaults to `"linear"`):
+            The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
+            `linear`, `scaled_linear`, or `squaredcos_cap_v2`.
+        clip_sample (`bool`, defaults to `True`):
+            Clip the predicted sample for numerical stability.
+        clip_sample_range (`float`, defaults to 1.0):
+            The maximum magnitude for sample clipping. Valid only when `clip_sample=True`.
+        set_alpha_to_one (`bool`, defaults to `True`):
+            Each diffusion step uses the alphas product value at that step and at the previous one. For the final step
+            there is no previous alpha. When this option is `True` the previous alpha product is fixed to `1`,
+            otherwise it uses the alpha value at step 0.
+        prediction_type (`str`, defaults to `epsilon`, *optional*):
+            Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process),
+            `sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen
+            Video](https://imagen.research.google/video/paper.pdf) paper).
+        thresholding (`bool`, defaults to `False`):
+            Whether to use the "dynamic thresholding" method. This is unsuitable for latent-space diffusion models such
+            as Stable Diffusion.
+        dynamic_thresholding_ratio (`float`, defaults to 0.995):
+            The ratio for the dynamic thresholding method. Valid only when `thresholding=True`.
+        sample_max_value (`float`, defaults to 1.0):
+            The threshold value for dynamic thresholding. Valid only when `thresholding=True`.
+        timestep_spacing (`str`, defaults to `"leading"`):
+            The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and
+            Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
+        steps_offset (`int`, defaults to 0):
+            An offset added to the inference steps. You can use a combination of `offset=1` and
+            `set_alpha_to_one=False` to make the last step use step 0 for the previous alpha product like in Stable
+            Diffusion.
+        rescale_betas_zero_snr (`bool`, defaults to `False`):
+            Whether to rescale the betas to have zero terminal SNR. This enables the model to generate very bright and
+            dark samples instead of limiting it to samples with medium brightness. Loosely related to
+            [`--offset_noise`](https://github.com/huggingface/diffusers/blob/74fd735eb073eb1d774b1ab4154a0876eb82f055/examples/dreambooth/train_dreambooth.py#L506).
+        denoising_step_size (`int`, defaults to 250):
+            The denoising step size parameter from the UFOGen paper. The number of steps used for training is roughly
+            `math.ceil(num_train_timesteps / denoising_step_size)`.
+    """
+
+    order = 1
+
+    @register_to_config
+    def __init__(
+        self,
+        num_train_timesteps: int = 1000,
+        beta_start: float = 0.0001,
+        beta_end: float = 0.02,
+        beta_schedule: str = "linear",
+        trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
+        clip_sample: bool = True,
+        set_alpha_to_one: bool = True,
+        prediction_type: str = "epsilon",
+        thresholding: bool = False,
+        dynamic_thresholding_ratio: float = 0.995,
+        clip_sample_range: float = 1.0,
+        sample_max_value: float = 1.0,
+        timestep_spacing: str = "leading",
+        steps_offset: int = 0,
+        rescale_betas_zero_snr: bool = False,
+        denoising_step_size: int = 250,
+    ):
+        if trained_betas is not None:
+            self.betas = torch.tensor(trained_betas, dtype=torch.float32)
+        elif beta_schedule == "linear":
+            self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
+        elif beta_schedule == "scaled_linear":
+            # this schedule is very specific to the latent diffusion model.
+            self.betas = torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
+        elif beta_schedule == "squaredcos_cap_v2":
+            # Glide cosine schedule
+            self.betas = betas_for_alpha_bar(num_train_timesteps)
+        elif beta_schedule == "sigmoid":
+            # GeoDiff sigmoid schedule
+            betas = torch.linspace(-6, 6, num_train_timesteps)
+            self.betas = torch.sigmoid(betas) * (beta_end - beta_start) + beta_start
+        else:
+            raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}")
+
+        # Rescale for zero SNR
+        if rescale_betas_zero_snr:
+            self.betas = rescale_zero_terminal_snr(self.betas)
+
+        self.alphas = 1.0 - self.betas
+        self.alphas_cumprod = torch.cumprod(self.alphas, dim=0)
+
+        # For the final step, there is no previous alphas_cumprod because we are already at 0
+        # `set_alpha_to_one` decides whether we set this parameter simply to one or
+        # whether we use the final alpha of the "non-previous" one.
+        self.final_alpha_cumprod = torch.tensor(1.0) if set_alpha_to_one else self.alphas_cumprod[0]
+
+        # standard deviation of the initial noise distribution
+        self.init_noise_sigma = 1.0
+
+        # setable values
+        self.custom_timesteps = False
+        self.num_inference_steps = None
+        self.timesteps = torch.from_numpy(np.arange(0, num_train_timesteps)[::-1].copy())
+
+    def scale_model_input(self, sample: torch.FloatTensor, timestep: Optional[int] = None) -> torch.FloatTensor:
+        """
+        Ensures interchangeability with schedulers that need to scale the denoising model input depending on the
+        current timestep.
+
+        Args:
+            sample (`torch.FloatTensor`):
+                The input sample.
+            timestep (`int`, *optional*):
+                The current timestep in the diffusion chain.
+
+        Returns:
+            `torch.FloatTensor`:
+                A scaled input sample.
+        """
+        return sample
+
+    def set_timesteps(
+        self,
+        num_inference_steps: Optional[int] = None,
+        device: Union[str, torch.device] = None,
+        timesteps: Optional[List[int]] = None,
+    ):
+        """
+        Sets the discrete timesteps used for the diffusion chain (to be run before inference).
+
+        Args:
+            num_inference_steps (`int`):
+                The number of diffusion steps used when generating samples with a pre-trained model. If used,
+                `timesteps` must be `None`.
+            device (`str` or `torch.device`, *optional*):
+                The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
+            timesteps (`List[int]`, *optional*):
+                Custom timesteps used to support arbitrary spacing between timesteps. If `None`, then the default
+                timestep spacing strategy of equal spacing between timesteps is used. If `timesteps` is passed,
+                `num_inference_steps` must be `None`.
+
+        """
+        if num_inference_steps is not None and timesteps is not None:
+            raise ValueError("Can only pass one of `num_inference_steps` or `custom_timesteps`.")
+
+        if timesteps is not None:
+            for i in range(1, len(timesteps)):
+                if timesteps[i] >= timesteps[i - 1]:
+                    raise ValueError("`custom_timesteps` must be in descending order.")
+
+            if timesteps[0] >= self.config.num_train_timesteps:
+                raise ValueError(
+                    f"`timesteps` must start before `self.config.train_timesteps`:"
+                    f" {self.config.num_train_timesteps}."
+                )
+
+            timesteps = np.array(timesteps, dtype=np.int64)
+            self.custom_timesteps = True
+        else:
+            if num_inference_steps > self.config.num_train_timesteps:
+                raise ValueError(
+                    f"`num_inference_steps`: {num_inference_steps} cannot be larger than `self.config.train_timesteps`:"
+                    f" {self.config.num_train_timesteps} as the unet model trained with this scheduler can only handle"
+                    f" maximal {self.config.num_train_timesteps} timesteps."
+                )
+
+            self.num_inference_steps = num_inference_steps
+            self.custom_timesteps = False
+
+            # TODO: For now, handle special case when num_inference_steps == 1 separately
+            if num_inference_steps == 1:
+                # Set the timestep schedule to num_train_timesteps - 1 rather than 0
+                # (that is, the one-step timestep schedule is always trailing rather than leading or linspace)
+                timesteps = np.array([self.config.num_train_timesteps - 1], dtype=np.int64)
+            else:
+                # TODO: For now, retain the DDPM timestep spacing logic
+                # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://arxiv.org/abs/2305.08891
+                if self.config.timestep_spacing == "linspace":
+                    timesteps = (
+                        np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps)
+                        .round()[::-1]
+                        .copy()
+                        .astype(np.int64)
+                    )
+                elif self.config.timestep_spacing == "leading":
+                    step_ratio = self.config.num_train_timesteps // self.num_inference_steps
+                    # creates integer timesteps by multiplying by ratio
+                    # casting to int to avoid issues when num_inference_step is power of 3
+                    timesteps = (np.arange(0, num_inference_steps) * step_ratio).round()[::-1].copy().astype(np.int64)
+                    timesteps += self.config.steps_offset
+                elif self.config.timestep_spacing == "trailing":
+                    step_ratio = self.config.num_train_timesteps / self.num_inference_steps
+                    # creates integer timesteps by multiplying by ratio
+                    # casting to int to avoid issues when num_inference_step is power of 3
+                    timesteps = np.round(np.arange(self.config.num_train_timesteps, 0, -step_ratio)).astype(np.int64)
+                    timesteps -= 1
+                else:
+                    raise ValueError(
+                        f"{self.config.timestep_spacing} is not supported. Please make sure to choose one of 'linspace', 'leading' or 'trailing'."
+                    )
+
+        self.timesteps = torch.from_numpy(timesteps).to(device)
+
+    # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample
+    def _threshold_sample(self, sample: torch.FloatTensor) -> torch.FloatTensor:
+        """
+        "Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the
+        prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by
+        s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing
+        pixels from saturation at each step. We find that dynamic thresholding results in significantly better
+        photorealism as well as better image-text alignment, especially when using very large guidance weights."
+
+        https://arxiv.org/abs/2205.11487
+        """
+        dtype = sample.dtype
+        batch_size, channels, *remaining_dims = sample.shape
+
+        if dtype not in (torch.float32, torch.float64):
+            sample = sample.float()  # upcast for quantile calculation, and clamp not implemented for cpu half
+
+        # Flatten sample for doing quantile calculation along each image
+        sample = sample.reshape(batch_size, channels * np.prod(remaining_dims))
+
+        abs_sample = sample.abs()  # "a certain percentile absolute pixel value"
+
+        s = torch.quantile(abs_sample, self.config.dynamic_thresholding_ratio, dim=1)
+        s = torch.clamp(
+            s, min=1, max=self.config.sample_max_value
+        )  # When clamped to min=1, equivalent to standard clipping to [-1, 1]
+        s = s.unsqueeze(1)  # (batch_size, 1) because clamp will broadcast along dim=0
+        sample = torch.clamp(sample, -s, s) / s  # "we threshold xt0 to the range [-s, s] and then divide by s"
+
+        sample = sample.reshape(batch_size, channels, *remaining_dims)
+        sample = sample.to(dtype)
+
+        return sample
+
+    def step(
+        self,
+        model_output: torch.FloatTensor,
+        timestep: int,
+        sample: torch.FloatTensor,
+        generator: Optional[torch.Generator] = None,
+        return_dict: bool = True,
+    ) -> Union[UFOGenSchedulerOutput, Tuple]:
+        """
+        Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
+        process from the learned model outputs (most often the predicted noise).
+
+        Args:
+            model_output (`torch.FloatTensor`):
+                The direct output from learned diffusion model.
+            timestep (`float`):
+                The current discrete timestep in the diffusion chain.
+            sample (`torch.FloatTensor`):
+                A current instance of a sample created by the diffusion process.
+            generator (`torch.Generator`, *optional*):
+                A random number generator.
+            return_dict (`bool`, *optional*, defaults to `True`):
+                Whether or not to return a [`~schedulers.scheduling_ufogen.UFOGenSchedulerOutput`] or `tuple`.
+
+        Returns:
+            [`~schedulers.scheduling_ddpm.UFOGenSchedulerOutput`] or `tuple`:
+                If return_dict is `True`, [`~schedulers.scheduling_ufogen.UFOGenSchedulerOutput`] is returned, otherwise a
+                tuple is returned where the first element is the sample tensor.
+
+        """
+        # 0. Resolve timesteps
+        t = timestep
+        prev_t = self.previous_timestep(t)
+
+        # 1. compute alphas, betas
+        alpha_prod_t = self.alphas_cumprod[t]
+        alpha_prod_t_prev = self.alphas_cumprod[prev_t] if prev_t >= 0 else self.final_alpha_cumprod
+        beta_prod_t = 1 - alpha_prod_t
+        # beta_prod_t_prev = 1 - alpha_prod_t_prev
+        # current_alpha_t = alpha_prod_t / alpha_prod_t_prev
+        # current_beta_t = 1 - current_alpha_t
+
+        # 2. compute predicted original sample from predicted noise also called
+        # "predicted x_0" of formula (15) from https://arxiv.org/pdf/2006.11239.pdf
+        if self.config.prediction_type == "epsilon":
+            pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5)
+        elif self.config.prediction_type == "sample":
+            pred_original_sample = model_output
+        elif self.config.prediction_type == "v_prediction":
+            pred_original_sample = (alpha_prod_t**0.5) * sample - (beta_prod_t**0.5) * model_output
+        else:
+            raise ValueError(
+                f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, `sample` or"
+                " `v_prediction`  for UFOGenScheduler."
+            )
+
+        # 3. Clip or threshold "predicted x_0"
+        if self.config.thresholding:
+            pred_original_sample = self._threshold_sample(pred_original_sample)
+        elif self.config.clip_sample:
+            pred_original_sample = pred_original_sample.clamp(
+                -self.config.clip_sample_range, self.config.clip_sample_range
+            )
+
+        # 4. Single-step or multi-step sampling
+        # Noise is not used on the final timestep of the timestep schedule.
+        # This also means that noise is not used for one-step sampling.
+        if t != self.timesteps[-1]:
+            # TODO: is this correct?
+            # Sample prev sample x_{t - 1} ~ q(x_{t - 1} | x_0 =  G(x_t, t))
+            device = model_output.device
+            noise = randn_tensor(model_output.shape, generator=generator, device=device, dtype=model_output.dtype)
+            sqrt_alpha_prod_t_prev = alpha_prod_t_prev**0.5
+            sqrt_one_minus_alpha_prod_t_prev = (1 - alpha_prod_t_prev) ** 0.5
+            pred_prev_sample = sqrt_alpha_prod_t_prev * pred_original_sample + sqrt_one_minus_alpha_prod_t_prev * noise
+        else:
+            # Simply return the pred_original_sample. If `prediction_type == "sample"`, this is equivalent to returning
+            # the output of the GAN generator U-Net on the initial noisy latents x_T ~ N(0, I).
+            pred_prev_sample = pred_original_sample
+
+        if not return_dict:
+            return (pred_prev_sample,)
+
+        return UFOGenSchedulerOutput(prev_sample=pred_prev_sample, pred_original_sample=pred_original_sample)
+
+    # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.add_noise
+    def add_noise(
+        self,
+        original_samples: torch.FloatTensor,
+        noise: torch.FloatTensor,
+        timesteps: torch.IntTensor,
+    ) -> torch.FloatTensor:
+        # Make sure alphas_cumprod and timestep have same device and dtype as original_samples
+        alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device, dtype=original_samples.dtype)
+        timesteps = timesteps.to(original_samples.device)
+
+        sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
+        sqrt_alpha_prod = sqrt_alpha_prod.flatten()
+        while len(sqrt_alpha_prod.shape) < len(original_samples.shape):
+            sqrt_alpha_prod = sqrt_alpha_prod.unsqueeze(-1)
+
+        sqrt_one_minus_alpha_prod = (1 - alphas_cumprod[timesteps]) ** 0.5
+        sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.flatten()
+        while len(sqrt_one_minus_alpha_prod.shape) < len(original_samples.shape):
+            sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.unsqueeze(-1)
+
+        noisy_samples = sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise
+        return noisy_samples
+
+    # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.get_velocity
+    def get_velocity(
+        self, sample: torch.FloatTensor, noise: torch.FloatTensor, timesteps: torch.IntTensor
+    ) -> torch.FloatTensor:
+        # Make sure alphas_cumprod and timestep have same device and dtype as sample
+        alphas_cumprod = self.alphas_cumprod.to(device=sample.device, dtype=sample.dtype)
+        timesteps = timesteps.to(sample.device)
+
+        sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
+        sqrt_alpha_prod = sqrt_alpha_prod.flatten()
+        while len(sqrt_alpha_prod.shape) < len(sample.shape):
+            sqrt_alpha_prod = sqrt_alpha_prod.unsqueeze(-1)
+
+        sqrt_one_minus_alpha_prod = (1 - alphas_cumprod[timesteps]) ** 0.5
+        sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.flatten()
+        while len(sqrt_one_minus_alpha_prod.shape) < len(sample.shape):
+            sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.unsqueeze(-1)
+
+        velocity = sqrt_alpha_prod * noise - sqrt_one_minus_alpha_prod * sample
+        return velocity
+
+    def __len__(self):
+        return self.config.num_train_timesteps
+
+    # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.previous_timestep
+    def previous_timestep(self, timestep):
+        if self.custom_timesteps:
+            index = (self.timesteps == timestep).nonzero(as_tuple=True)[0][0]
+            if index == self.timesteps.shape[0] - 1:
+                prev_t = torch.tensor(-1)
+            else:
+                prev_t = self.timesteps[index + 1]
+        else:
+            num_inference_steps = (
+                self.num_inference_steps if self.num_inference_steps else self.config.num_train_timesteps
+            )
+            prev_t = timestep - self.config.num_train_timesteps // num_inference_steps
+
+        return prev_t
\ No newline at end of file
diff --git a/modules/sd_samplers_diffusers.py b/modules/sd_samplers_diffusers.py
index c95ae0858..6c05b2045 100644
--- a/modules/sd_samplers_diffusers.py
+++ b/modules/sd_samplers_diffusers.py
@@ -52,6 +52,7 @@
     from modules.schedulers.scheduler_vdm import VDMScheduler # pylint: disable=ungrouped-imports
     from modules.schedulers.scheduler_dpm_flowmatch import FlowMatchDPMSolverMultistepScheduler # pylint: disable=ungrouped-imports
     from modules.schedulers.scheduler_bdia import BDIA_DDIMScheduler # pylint: disable=ungrouped-imports
+    from modules.schedulers.scheduler_ufogen import UFOGenScheduler # pylint: disable=ungrouped-imports
 except Exception as e:
     shared.log.error(f'Diffusers import error: version={diffusers.__version__} error: {e}')
     if os.environ.get('SD_SAMPLER_DEBUG', None) is not None:
@@ -97,6 +98,7 @@
     'VDM Solver': { 'clip_sample_range': 2.0, },
     'LCM': { 'beta_start': 0.00085, 'beta_end': 0.012, 'beta_schedule': "scaled_linear", 'set_alpha_to_one': True, 'rescale_betas_zero_snr': False, 'thresholding': False, 'timestep_spacing': 'linspace' },
     'TCD': { 'set_alpha_to_one': True, 'rescale_betas_zero_snr': False, 'beta_schedule': 'scaled_linear' },
+    'UFOGen': {},
     'BDIA DDIM': { 'clip_sample': False, 'set_alpha_to_one': True, 'steps_offset': 0, 'clip_sample_range': 1.0, 'sample_max_value': 1.0, 'timestep_spacing': 'leading', 'rescale_betas_zero_snr': False, 'thresholding': False, 'gamma': 1.0 },
 
     'PNDM': { 'skip_prk_steps': False, 'set_alpha_to_one': False, 'steps_offset': 0, 'timestep_spacing': 'linspace' },
@@ -155,6 +157,7 @@
 
     SamplerData('LCM', lambda model: DiffusionSampler('LCM', LCMScheduler, model), [], {}),
     SamplerData('TCD', lambda model: DiffusionSampler('TCD', TCDScheduler, model), [], {}),
+    SamplerData('UFOGen', lambda model: DiffusionSampler('UFOGen', UFOGenScheduler, model), [], {}),
 
     SamplerData('Same as primary', None, [], {}),
 ]
diff --git a/wiki b/wiki
index a4eaad83c..2870b888c 160000
--- a/wiki
+++ b/wiki
@@ -1 +1 @@
-Subproject commit a4eaad83ccb8e82cb91fde4c038877616ed012d6
+Subproject commit 2870b888c1848930a93c8fd5475ffeb907f21384

From 84f6ec7189dbfe4dfdbcb22f49c90996f67b83d8 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Mon, 16 Dec 2024 15:55:52 -0500
Subject: [PATCH 118/162] update sana

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 modules/model_sana.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/modules/model_sana.py b/modules/model_sana.py
index 06e6fe981..eaecbceb8 100644
--- a/modules/model_sana.py
+++ b/modules/model_sana.py
@@ -11,12 +11,11 @@ def load_sana(checkpoint_info, diffusers_load_config={}):
     diffusers_load_config['variant'] = 'fp16'
     diffusers_load_config['torch_dtype'] = devices.dtype
     diffusers_load_config = model_quant.create_bnb_config(diffusers_load_config)
-    pipe = diffusers.SanaPAGPipeline.from_pretrained(
+    pipe = diffusers.SanaPipeline.from_pretrained( # SanaPAGPipeline
         repo_id,
-        # pag_applied_layers=["transformer_blocks.8"],
         cache_dir = shared.opts.diffusers_dir,
         **diffusers_load_config,
-    ).to(devices.dtype)
+    )
     if shared.opts.diffusers_eval:
         pipe.text_encoder.eval()
         pipe.transformer.eval()

From 191a720902e9a823ac268241b4980c245669ccb4 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Tue, 17 Dec 2024 09:05:06 -0500
Subject: [PATCH 119/162] update sana and changelog

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md          | 15 +++---------
 modules/model_sana.py | 53 +++++++++++++++++++++++++++++++++++--------
 2 files changed, 47 insertions(+), 21 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b174a7a37..bfd14cacd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,24 +1,15 @@
 # Change Log for SD.Next
 
-## Update for 2024-12-16
-
-- Sana: both 1.6B and 0.6B  
-- ControlNet:  
-  - better Union results  
-  - support for new ProMax model  
-  - support for Tile models  
-- FreeScale: run optimized iterative generation of images at different scales  
-- Samplers:  
-  - UniPC, DEIS, SA, DPM-Multistep: add FlowMatch sigma method and prediction type  
-  - UFOGen: new fast scheduler for use with distilled models and low step counts  
+## Update for 2024-12-17
 
 ### New models and integrations
 
 - [NVLabs Sana](https://huggingface.co/Efficient-Large-Model/Sana_1600M_1024px)
   **Sana** can synthesize high-resolution images with strong text-image alignment by using **Gemma2** as text-encoder  
+  and its *fast* - typically at least **2x** faster than sd-xl even for 1.6B variant  
   support for both 1.6B and 0.6B models  
   to use, select from *networks -> models -> reference* and models will be auto-downloaded on first use  
-  *reference values*: sampler: default, width/height: 1024, guidance scale: 4.5, attention guidance: 3.0, adaptive scaling: 0.0
+  *reference values*: sampler: default (or any flow-match variant), width/height: 1024, guidance scale: 4.5  
 - [Flux Tools](https://blackforestlabs.ai/flux-1-tools/)  
   **Redux** is actually a tool, **Fill** is inpaint/outpaint optimized version of *Flux-dev*  
   **Canny** & **Depth** are optimized versions of *Flux-dev* for their respective tasks: they are *not* ControlNets that work on top of a model  
diff --git a/modules/model_sana.py b/modules/model_sana.py
index eaecbceb8..f9986d726 100644
--- a/modules/model_sana.py
+++ b/modules/model_sana.py
@@ -1,24 +1,59 @@
+import time
+import torch
 import diffusers
 
 
-def load_sana(checkpoint_info, diffusers_load_config={}):
+"""
+Efficient-Large-Model/Sana_1600M_1024px_MultiLing_diffusers
+Efficient-Large-Model/Sana_1600M_1024px_diffusers
+Efficient-Large-Model/Sana_1600M_1024px_BF16_diffusers
+Efficient-Large-Model/Sana_1600M_512px_MultiLing_diffusers
+Efficient-Large-Model/Sana_1600M_512px_diffusers
+Efficient-Large-Model/Sana_600M_1024px_diffusers
+Efficient-Large-Model/Sana_600M_512px_diffusers
+"""
+
+
+def load_sana(checkpoint_info, kwargs={}):
     from modules import shared, sd_models, devices, modelloader, model_quant
     modelloader.hf_login()
 
     repo_id = checkpoint_info if isinstance(checkpoint_info, str) else checkpoint_info.path
     repo_id = sd_models.path_to_repo(repo_id)
+    kwargs.pop('load_connected_pipeline', None)
+    kwargs.pop('safety_checker', None)
+    kwargs.pop('requires_safety_checker', None)
+    kwargs.pop('torch_dtype', None)
+
+    if 'Sana_1600M' in repo_id:
+        if devices.dtype == torch.bfloat16:
+            repo_id = 'Efficient-Large-Model/Sana_1600M_1024px_BF16_diffusers'
+            kwargs['variant'] = 'bf16'
+            kwargs['torch_dtype'] = devices.dtype
+        else:
+            repo_id = 'Efficient-Large-Model/Sana_1600M_1024px_diffusers'
+            kwargs['variant'] = 'fp16'
+    if 'Sana_600M' in repo_id:
+        repo_id = 'Efficient-Large-Model/Sana_600M_1024px_diffusers'
+        kwargs['variant'] = 'fp16'
 
-    diffusers_load_config['variant'] = 'fp16'
-    diffusers_load_config['torch_dtype'] = devices.dtype
-    diffusers_load_config = model_quant.create_bnb_config(diffusers_load_config)
-    pipe = diffusers.SanaPipeline.from_pretrained( # SanaPAGPipeline
-        repo_id,
-        cache_dir = shared.opts.diffusers_dir,
-        **diffusers_load_config,
-    )
+    kwargs = model_quant.create_bnb_config(kwargs)
+    shared.log.debug(f'Load model: type=Sana repo="{repo_id}" args={kwargs}')
+    t0 = time.time()
+    pipe = diffusers.SanaPipeline.from_pretrained(repo_id, cache_dir = shared.opts.diffusers_dir, **kwargs)
+    if devices.dtype == torch.bfloat16 or devices.dtype == torch.float32:
+        pipe.transformer = pipe.transformer.to(dtype=devices.dtype)
+        pipe.text_encoder = pipe.text_encoder.to(dtype=devices.dtype)
+        pipe.vae = pipe.vae.to(dtype=devices.dtype)
+    if devices.dtype == torch.float16:
+        pipe.transformer = pipe.transformer.to(dtype=devices.dtype)
+        pipe.text_encoder = pipe.text_encoder.to(dtype=torch.float32) # gemma2 does not support fp16
+        pipe.vae = pipe.vae.to(dtype=torch.float32) # dc-ae often overflows in fp16
     if shared.opts.diffusers_eval:
         pipe.text_encoder.eval()
         pipe.transformer.eval()
+    t1 = time.time()
+    shared.log.debug(f'Load model: type=Sana target={devices.dtype} te={pipe.text_encoder.dtype} transformer={pipe.transformer.dtype} vae={pipe.vae.dtype} time={t1-t0:.2f}')
 
     devices.torch_gc()
     return pipe

From 36909a9f25f812d8b782cb1df754efa23e6a2e19 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Tue, 17 Dec 2024 09:38:40 -0500
Subject: [PATCH 120/162] update readme

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 722041c93..e6eabeff6 100644
--- a/README.md
+++ b/README.md
@@ -64,6 +64,7 @@ See [models overview](https://github.com/vladmandic/automatic/wiki/Models) for d
 - [StabilityAI Stable Video Diffusion](https://huggingface.co/stabilityai/stable-video-diffusion-img2vid) Base, XT 1.0, XT 1.1
 - [StabilityAI Stable Cascade](https://github.com/Stability-AI/StableCascade) *Full* and *Lite*
 - [Black Forest Labs FLUX.1](https://blackforestlabs.ai/announcing-black-forest-labs/) Dev, Schnell  
+- [NVLabs Sana](https://nvlabs.github.io/Sana/)
 - [AuraFlow](https://huggingface.co/fal/AuraFlow)
 - [AlphaVLLM Lumina-Next-SFT](https://huggingface.co/Alpha-VLLM/Lumina-Next-SFT-diffusers)  
 - [Playground AI](https://huggingface.co/playgroundai/playground-v2-256px-base) *v1, v2 256, v2 512, v2 1024 and latest v2.5*

From fd7fe8cea507f03b0acdb01a64dad724c6f1c45d Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Tue, 17 Dec 2024 13:29:36 -0500
Subject: [PATCH 121/162] add torchao

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md                 |  7 +++++++
 TODO.md                      | 13 +++----------
 installer.py                 |  5 ++++-
 modules/model_flux.py        |  6 ++++--
 modules/model_quant.py       |  8 ++++----
 modules/model_sana.py        |  1 +
 modules/model_tools.py       |  6 ++++--
 modules/sd_models.py         |  2 +-
 modules/sd_models_compile.py | 17 +++--------------
 modules/shared.py            | 24 ++++++++++++------------
 10 files changed, 43 insertions(+), 46 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index bfd14cacd..a1b41e7a6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -101,6 +101,13 @@
 
 ### Updates
 
+- **Quantization**
+  - Add `TorchAO` *pre* (during load) and *post* (during execution) quantization
+    **torchao** supports 4 different int-based and 3 float-based quantization schemes  
+  This is in addition to existing support for:  
+  - `BitsAndBytes` with 3 float-based quantization schemes  
+  - `Optimium.Quanto` with 3 int-based and 2 float-based quantizations schemes  
+  - `GGUF` with pre-quantized weights  
 - **IPEX**: update to IPEX 2.5.10+xpu  
 - **OpenVINO**: update to 2024.5.0  
 - **Sampler** improvements  
diff --git a/TODO.md b/TODO.md
index 996da5ad9..fafbea8c3 100644
--- a/TODO.md
+++ b/TODO.md
@@ -10,21 +10,14 @@ Main ToDo list can be found at [GitHub projects](https://github.com/users/vladma
 
 ## Future Candidates
 
-- SD35 IPAdapter: <https://github.com/huggingface/diffusers/issues/9966>
-- SD35 LoRA: <https://github.com/huggingface/diffusers/issues/9950>
-- Flux IPAdapter: <https://github.com/huggingface/diffusers/issues/9825>
+- SD35 IPAdapter: <https://github.com/huggingface/diffusers/pull/9987>
+- Flux IPAdapter: <https://github.com/huggingface/diffusers/pull/10261>
 - Flux NF4: <https://github.com/huggingface/diffusers/issues/9996>
-- SANA: <https://github.com/huggingface/diffusers/pull/9982>
 - LTX-Video: <https://github.com/huggingface/diffusers/pull/10021> <https://huggingface.co/Lightricks/LTX-Video> <https://huggingface.co/spaces/Lightricks/LTX-Video-Playground/tree/main>
-- TorchAO: <https://github.com/huggingface/diffusers/pull/10009>
+- GGUF: <https://github.com/huggingface/diffusers/pull/9964>
 
 ## Other
 
 - IPAdapter negative: <https://github.com/huggingface/diffusers/discussions/7167>
 - Control API enhance scripts compatibility
 - PixelSmith: <https://github.com/Thanos-DB/Pixelsmith>
-
-## Workaround in place
-
-- GGUF <https://github.com/huggingface/diffusers/issues/9487>
-- FlowMatch <https://github.com/huggingface/diffusers/issues/9607> <https://github.com/huggingface/diffusers/issues/9924>
diff --git a/installer.py b/installer.py
index 18f2b9399..2bb3df492 100644
--- a/installer.py
+++ b/installer.py
@@ -459,7 +459,7 @@ def check_python(supported_minors=[9, 10, 11, 12], reason=None):
 def check_diffusers():
     if args.skip_all or args.skip_requirements:
         return
-    sha = '5ed761a6f2a6dad56031f4e3e32223bfbe2dda01' # diffusers commit hash
+    sha = '1524781b88ac1a082e755a030ba9d73cd6948e84' # diffusers commit hash
     pkg = pkg_resources.working_set.by_key.get('diffusers', None)
     minor = int(pkg.version.split('.')[1] if pkg is not None else 0)
     cur = opts.get('diffusers_version', '') if minor > 0 else ''
@@ -483,6 +483,7 @@ def check_onnx():
 
 
 def check_torchao():
+    """
     if args.skip_all or args.skip_requirements:
         return
     if installed('torchao', quiet=True):
@@ -492,6 +493,8 @@ def check_torchao():
             pip('uninstall --yes torchao', ignore=True, quiet=True, uv=False)
             for m in [m for m in sys.modules if m.startswith('torchao')]:
                 del sys.modules[m]
+    """
+    return
 
 
 def install_cuda():
diff --git a/modules/model_flux.py b/modules/model_flux.py
index 8d6a02ef6..362f96d9b 100644
--- a/modules/model_flux.py
+++ b/modules/model_flux.py
@@ -197,6 +197,7 @@ def load_transformer(file_path): # triggered by opts.sd_unet change
             transformer = _transformer
     else:
         diffusers_load_config = model_quant.create_bnb_config(diffusers_load_config)
+        diffusers_load_config = model_quant.create_ao_config(diffusers_load_config)
         transformer = diffusers.FluxTransformer2DModel.from_single_file(file_path, **diffusers_load_config)
     if transformer is None:
         shared.log.error('Failed to load UNet model')
@@ -322,8 +323,9 @@ def load_flux(checkpoint_info, diffusers_load_config): # triggered by opts.sd_ch
             shared.log.warning(f'Load model: type=FLUX component={c} dtype={kwargs[c].dtype} cast dtype={devices.dtype} recast')
             kwargs[c] = kwargs[c].to(dtype=devices.dtype)
 
-    allow_bnb = 'gguf' not in (sd_unet.loaded_unet or '')
-    kwargs = model_quant.create_bnb_config(kwargs, allow_bnb)
+    allow_quant = 'gguf' not in (sd_unet.loaded_unet or '')
+    kwargs = model_quant.create_bnb_config(kwargs, allow_quant)
+    kwargs = model_quant.create_ao_config(kwargs, allow_quant)
     if checkpoint_info.path.endswith('.safetensors') and os.path.isfile(checkpoint_info.path):
         pipe = diffusers.FluxPipeline.from_single_file(checkpoint_info.path, cache_dir=shared.opts.diffusers_dir, **kwargs, **diffusers_load_config)
     else:
diff --git a/modules/model_quant.py b/modules/model_quant.py
index 5c0b40080..10e5eb99e 100644
--- a/modules/model_quant.py
+++ b/modules/model_quant.py
@@ -38,9 +38,9 @@ def create_ao_config(kwargs = None, allow_ao: bool = True):
             load_torchao()
             if ao is None:
                 return kwargs
-            ao_config = {}
-            # ao_config = diffusers.TorchAoConfig("int8wo") # TODO torchao
-            shared.log.debug(f'Quantization: module=all type=bnb dtype={shared.opts.torchao_quantization_type}')
+            diffusers.utils.import_utils.is_torchao_available = lambda: True
+            ao_config = diffusers.TorchAoConfig(shared.opts.torchao_quantization_type)
+            shared.log.debug(f'Quantization: module=all type=torchao dtype={shared.opts.torchao_quantization_type}')
             if kwargs is None:
                 return ao_config
             else:
@@ -53,7 +53,7 @@ def load_torchao(msg='', silent=False):
     global ao # pylint: disable=global-statement
     if ao is not None:
         return ao
-    install('torchao', quiet=True)
+    install('torchao==0.7.0', quiet=True)
     try:
         import torchao
         ao = torchao
diff --git a/modules/model_sana.py b/modules/model_sana.py
index f9986d726..c25a7ffb9 100644
--- a/modules/model_sana.py
+++ b/modules/model_sana.py
@@ -38,6 +38,7 @@ def load_sana(checkpoint_info, kwargs={}):
         kwargs['variant'] = 'fp16'
 
     kwargs = model_quant.create_bnb_config(kwargs)
+    kwargs = model_quant.create_ao_config(kwargs)
     shared.log.debug(f'Load model: type=Sana repo="{repo_id}" args={kwargs}')
     t0 = time.time()
     pipe = diffusers.SanaPipeline.from_pretrained(repo_id, cache_dir = shared.opts.diffusers_dir, **kwargs)
diff --git a/modules/model_tools.py b/modules/model_tools.py
index 07cd61b6e..fdeda5c2a 100644
--- a/modules/model_tools.py
+++ b/modules/model_tools.py
@@ -69,11 +69,13 @@ def load_modules(repo_id: str, params: dict):
             subfolder = 'text_encoder_2'
         if cls == transformers.T5EncoderModel: # t5-xxl
             subfolder = 'text_encoder_3'
-            kwargs['quantization_config'] = model_quant.create_bnb_config()
+            kwargs = model_quant.create_bnb_config(kwargs)
+            kwargs = model_quant.create_ao_config(kwargs)
             kwargs['variant'] = 'fp16'
         if cls == diffusers.SD3Transformer2DModel:
             subfolder = 'transformer'
-            kwargs['quantization_config'] = model_quant.create_bnb_config()
+            kwargs = model_quant.create_bnb_config(kwargs)
+            kwargs = model_quant.create_ao_config(kwargs)
         if subfolder is None:
             continue
         shared.log.debug(f'Load: module={name} class={cls.__name__} repo={repo_id} location={subfolder}')
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 661559ae9..52d0a1deb 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -282,7 +282,7 @@ def eval_model(model, op=None, sd_model=None): # pylint: disable=unused-argument
                 model.eval()
             return model
         sd_model = sd_models_compile.apply_compile_to_model(sd_model, eval_model, ["Model", "VAE", "Text Encoder"], op="eval")
-    if len(shared.opts.torchao_quantization) > 0 and shared.opts.torchao_quantization_mode != 'post':
+    if len(shared.opts.torchao_quantization) > 0 and shared.opts.torchao_quantization_mode == 'post':
         sd_model = sd_models_compile.torchao_quantization(sd_model)
 
     if shared.opts.opt_channelslast and hasattr(sd_model, 'unet'):
diff --git a/modules/sd_models_compile.py b/modules/sd_models_compile.py
index 38d3ef57f..bdd47e2e1 100644
--- a/modules/sd_models_compile.py
+++ b/modules/sd_models_compile.py
@@ -505,24 +505,13 @@ def compile_diffusers(sd_model):
 
 def torchao_quantization(sd_model):
     try:
-        install('torchao', quiet=True)
+        install('torchao==0.7.0', quiet=True)
         from torchao import quantization as q
     except Exception as e:
         shared.log.error(f"Quantization: type=TorchAO quantization not supported: {e}")
         return sd_model
-    if shared.opts.torchao_quantization_type == "int8+act":
-        fn = q.int8_dynamic_activation_int8_weight
-    elif shared.opts.torchao_quantization_type == "int8":
-        fn = q.int8_weight_only
-    elif shared.opts.torchao_quantization_type == "int4":
-        fn = q.int4_weight_only
-    elif shared.opts.torchao_quantization_type == "fp8+act":
-        fn = q.float8_dynamic_activation_float8_weight
-    elif shared.opts.torchao_quantization_type == "fp8":
-        fn = q.float8_weight_only
-    elif shared.opts.torchao_quantization_type == "fpx":
-        fn = q.fpx_weight_only
-    else:
+    fn = getattr(q, shared.opts.torchao_quantization_type, None)
+    if fn is None:
         shared.log.error(f"Quantization: type=TorchAO type={shared.opts.torchao_quantization_type} not supported")
         return sd_model
     shared.log.info(f"Quantization: type=TorchAO pipe={sd_model.__class__.__name__} quant={shared.opts.torchao_quantization_type} fn={fn} targets={shared.opts.torchao_quantization}")
diff --git a/modules/shared.py b/modules/shared.py
index 5e353b1b1..2a1e8c19d 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -583,23 +583,23 @@ def get_default_modes():
 
 options_templates.update(options_section(('quantization', "Quantization Settings"), {
     "bnb_sep": OptionInfo("<h2>BitsAndBytes</h2>", "", gr.HTML),
-    "bnb_quantization": OptionInfo([], "Enabled", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder"], "visible": native}),
-    "bnb_quantization_type": OptionInfo("nf4", "Type", gr.Radio, {"choices": ['nf4', 'fp8', 'fp4'], "visible": native}),
-    "bnb_quantization_storage": OptionInfo("uint8", "Backend storage", gr.Radio, {"choices": ["float16", "float32", "int8", "uint8", "float64", "bfloat16"], "visible": native}),
+    "bnb_quantization": OptionInfo([], "Quantization enabled", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder"], "visible": native}),
+    "bnb_quantization_type": OptionInfo("nf4", "Quantization type", gr.Dropdown, {"choices": ['nf4', 'fp8', 'fp4'], "visible": native}),
+    "bnb_quantization_storage": OptionInfo("uint8", "Backend storage", gr.Dropdown, {"choices": ["float16", "float32", "int8", "uint8", "float64", "bfloat16"], "visible": native}),
     "optimum_quanto_sep": OptionInfo("<h2>Optimum Quanto</h2>", "", gr.HTML),
-    "optimum_quanto_weights": OptionInfo([], "Enabled", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder", "ControlNet"], "visible": native}),
-    "optimum_quanto_weights_type": OptionInfo("qint8", "Type", gr.Radio, {"choices": ['qint8', 'qfloat8_e4m3fn', 'qfloat8_e5m2', 'qint4', 'qint2'], "visible": native}),
-    "optimum_quanto_activations_type": OptionInfo("none", "Activations ", gr.Radio, {"choices": ['none', 'qint8', 'qfloat8_e4m3fn', 'qfloat8_e5m2'], "visible": native}),
+    "optimum_quanto_weights": OptionInfo([], "Quantization enabled", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder", "ControlNet"], "visible": native}),
+    "optimum_quanto_weights_type": OptionInfo("qint8", "Quantization weights type", gr.Dropdown, {"choices": ['qint8', 'qfloat8_e4m3fn', 'qfloat8_e5m2', 'qint4', 'qint2'], "visible": native}),
+    "optimum_quanto_activations_type": OptionInfo("none", "Quantization activations type ", gr.Dropdown, {"choices": ['none', 'qint8', 'qfloat8_e4m3fn', 'qfloat8_e5m2'], "visible": native}),
     "torchao_sep": OptionInfo("<h2>TorchAO</h2>", "", gr.HTML),
-    "torchao_quantization": OptionInfo([], "Enabled", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder"], "visible": native}),
-    "torchao_quantization_mode": OptionInfo("pre", "Mode", gr.Radio, {"choices": ['pre', 'post'], "visible": native}),
-    "torchao_quantization_type": OptionInfo("int8", "Type", gr.Radio, {"choices": ["int8+act", "int8", "int4", "fp8+act", "fp8", "fpx"], "visible": native}),
+    "torchao_quantization": OptionInfo([], "Quantization enabled", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder"], "visible": native}),
+    "torchao_quantization_mode": OptionInfo("pre", "Quantization mode", gr.Dropdown, {"choices": ['pre', 'post'], "visible": native}),
+    "torchao_quantization_type": OptionInfo("int8_weight_only", "Quantization type", gr.Dropdown, {"choices": ['int4_weight_only', 'int8_dynamic_activation_int4_weight', 'int8_weight_only', 'int8_dynamic_activation_int8_weight', 'float8_weight_only', 'float8_dynamic_activation_float8_weight', 'float8_static_activation_float8_weight'], "visible": native}),
     "nncf_sep": OptionInfo("<h2>NNCF</h2>", "", gr.HTML),
-    "nncf_compress_weights": OptionInfo([], "Enabled", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder", "ControlNet"], "visible": native}),
-    "nncf_compress_weights_mode": OptionInfo("INT8", "Mode", gr.Radio, {"choices": ['INT8', 'INT8_SYM', 'INT4_ASYM', 'INT4_SYM', 'NF4'] if cmd_opts.use_openvino else ['INT8']}),
+    "nncf_compress_weights": OptionInfo([], "Quantization enabled", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder", "ControlNet"], "visible": native}),
+    "nncf_compress_weights_mode": OptionInfo("INT8", "Quantization type", gr.Dropdown, {"choices": ['INT8', 'INT8_SYM', 'INT4_ASYM', 'INT4_SYM', 'NF4'] if cmd_opts.use_openvino else ['INT8']}),
     "nncf_compress_weights_raito": OptionInfo(1.0, "Compress ratio", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01, "visible": cmd_opts.use_openvino}),
     "nncf_quantize": OptionInfo([], "OpenVINO enabled", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder"], "visible": cmd_opts.use_openvino}),
-    "nncf_quant_mode": OptionInfo("INT8", "OpenVINO mode", gr.Radio, {"choices": ['INT8', 'FP8_E4M3', 'FP8_E5M2'], "visible": cmd_opts.use_openvino}),
+    "nncf_quant_mode": OptionInfo("INT8", "OpenVINO mode", gr.Dropdown, {"choices": ['INT8', 'FP8_E4M3', 'FP8_E5M2'], "visible": cmd_opts.use_openvino}),
     "quant_shuffle_weights": OptionInfo(False, "Shuffle weights", gr.Checkbox, {"visible": native}),
 }))
 

From 468c7d6bc832ce14f2738a49d7e1d59f402632eb Mon Sep 17 00:00:00 2001
From: Disty0 <disty@disty.xyz>
Date: Tue, 17 Dec 2024 22:24:25 +0300
Subject: [PATCH 122/162] Use apply_compile_model with torchao

---
 modules/sd_models_compile.py | 31 +++++++++----------------------
 1 file changed, 9 insertions(+), 22 deletions(-)

diff --git a/modules/sd_models_compile.py b/modules/sd_models_compile.py
index bdd47e2e1..16540019c 100644
--- a/modules/sd_models_compile.py
+++ b/modules/sd_models_compile.py
@@ -47,7 +47,7 @@ def apply_compile_to_model(sd_model, function, options, op=None):
                 sd_model.prior_pipe.prior.clip_txt_pooled_mapper = backup_clip_txt_pooled_mapper
     if "Text Encoder" in options:
         if hasattr(sd_model, 'text_encoder') and hasattr(sd_model.text_encoder, 'config'):
-            if hasattr(sd_model, 'decoder_pipe') and hasattr(sd_model.decoder_pipe, 'text_encoder'):
+            if hasattr(sd_model, 'decoder_pipe') and hasattr(sd_model.decoder_pipe, 'text_encoder') and hasattr(sd_model.decoder_pipe.text_encoder, 'config'):
                 sd_model.decoder_pipe.text_encoder = function(sd_model.decoder_pipe.text_encoder, op="decoder_pipe.text_encoder", sd_model=sd_model)
             else:
                 if op == "nncf" and sd_model.text_encoder.__class__.__name__ in {"T5EncoderModel", "UMT5EncoderModel"}:
@@ -76,7 +76,7 @@ def apply_compile_to_model(sd_model, function, options, op=None):
                         dtype=torch.float32 if devices.dtype != torch.bfloat16 else torch.bfloat16
                     )
             sd_model.text_encoder_3 = function(sd_model.text_encoder_3, op="text_encoder_3", sd_model=sd_model)
-        if hasattr(sd_model, 'prior_pipe') and hasattr(sd_model.prior_pipe, 'text_encoder'):
+        if hasattr(sd_model, 'prior_pipe') and hasattr(sd_model.prior_pipe, 'text_encoder') and hasattr(sd_model.prior_pipe.text_encoder, 'config'):
             sd_model.prior_pipe.text_encoder = function(sd_model.prior_pipe.text_encoder, op="prior_pipe.text_encoder", sd_model=sd_model)
     if "VAE" in options:
         if hasattr(sd_model, 'vae') and hasattr(sd_model.vae, 'decode'):
@@ -510,34 +510,21 @@ def torchao_quantization(sd_model):
     except Exception as e:
         shared.log.error(f"Quantization: type=TorchAO quantization not supported: {e}")
         return sd_model
+
     fn = getattr(q, shared.opts.torchao_quantization_type, None)
     if fn is None:
         shared.log.error(f"Quantization: type=TorchAO type={shared.opts.torchao_quantization_type} not supported")
         return sd_model
+    def torchao_model(model, op=None, sd_model=None):
+        q.quantize_(model, fn(), device=devices.device)
+        return model
+
     shared.log.info(f"Quantization: type=TorchAO pipe={sd_model.__class__.__name__} quant={shared.opts.torchao_quantization_type} fn={fn} targets={shared.opts.torchao_quantization}")
     try:
         t0 = time.time()
-        modules = []
-        if hasattr(sd_model, 'unet') and 'Model' in shared.opts.torchao_quantization:
-            modules.append('unet')
-            q.quantize_(sd_model.unet, fn(), device=devices.device)
-        if hasattr(sd_model, 'transformer') and 'Model' in shared.opts.torchao_quantization:
-            modules.append('transformer')
-            q.quantize_(sd_model.transformer, fn(), device=devices.device)
-        if hasattr(sd_model, 'vae') and 'VAE' in shared.opts.torchao_quantization:
-            modules.append('vae')
-            q.quantize_(sd_model.vae, fn(), device=devices.device)
-        if hasattr(sd_model, 'text_encoder') and 'Text Encoder' in shared.opts.torchao_quantization:
-            modules.append('te1')
-            q.quantize_(sd_model.text_encoder, fn(), device=devices.device)
-        if hasattr(sd_model, 'text_encoder_2') and 'Text Encoder' in shared.opts.torchao_quantization:
-            modules.append('te2')
-            q.quantize_(sd_model.text_encoder_2, fn(), device=devices.device)
-        if hasattr(sd_model, 'text_encoder_3') and 'Text Encoder' in shared.opts.torchao_quantization:
-            modules.append('te3')
-            q.quantize_(sd_model.text_encoder_3, fn(), device=devices.device)
+        apply_compile_to_model(sd_model, torchao_model, shared.opts.torchao_quantization, op="torchao")
         t1 = time.time()
-        shared.log.info(f"Quantization: type=TorchAO modules={modules} time={t1-t0:.2f}")
+        shared.log.info(f"Quantization: type=TorchAO time={t1-t0:.2f}")
     except Exception as e:
         shared.log.error(f"Quantization: type=TorchAO {e}")
     setup_logging() # torchao uses dynamo which messes with logging so reset is needed

From 900cda298ecd1a48aef3715ff9e50375b281958c Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Tue, 17 Dec 2024 14:35:17 -0500
Subject: [PATCH 123/162] update changelog

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md |  8 ++++----
 README.md    | 51 ++++-----------------------------------------------
 wiki         |  2 +-
 3 files changed, 9 insertions(+), 52 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a1b41e7a6..def6e8afe 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -31,7 +31,7 @@
 - [Flux ControlNet LoRA](https://huggingface.co/black-forest-labs/FLUX.1-Canny-dev-lora)  
   alternative to standard ControlNets, FLUX.1 also allows LoRA to help guide the generation process  
   both **Depth** and **Canny** LoRAs are available in standard control menus  
-- [StabilityAI SD35 ControlNets]([sd3_medium](https://huggingface.co/stabilityai/stable-diffusion-3.5-controlnets))
+- [StabilityAI SD35 ControlNets](https://huggingface.co/stabilityai/stable-diffusion-3.5-controlnets)
   - In addition to previously released `InstantX` and `Alimama`, we now have *official* ones from StabilityAI  
 - [Style Aligned Image Generation](https://style-aligned-gen.github.io/)  
   enable in scripts, compatible with sd-xl  
@@ -102,7 +102,7 @@
 ### Updates
 
 - **Quantization**
-  - Add `TorchAO` *pre* (during load) and *post* (during execution) quantization
+  - Add `TorchAO` *pre* (during load) and *post* (during execution) quantization  
     **torchao** supports 4 different int-based and 3 float-based quantization schemes  
   This is in addition to existing support for:  
   - `BitsAndBytes` with 3 float-based quantization schemes  
@@ -395,7 +395,7 @@ A month later and with nearly 300 commits, here is the latest [SD.Next](https://
 
 #### New models for 2024-10-23
 
-- New fine-tuned [CLiP-ViT-L]((https://huggingface.co/zer0int/CLIP-GmP-ViT-L-14)) 1st stage **text-encoders** used by most models (SD15/SDXL/SD3/Flux/etc.) brings additional details to your images  
+- New fine-tuned [CLiP-ViT-L](https://huggingface.co/zer0int/CLIP-GmP-ViT-L-14) 1st stage **text-encoders** used by most models (SD15/SDXL/SD3/Flux/etc.) brings additional details to your images  
 - New models:  
   [Stable Diffusion 3.5 Large](https://huggingface.co/stabilityai/stable-diffusion-3.5-large)  
   [OmniGen](https://arxiv.org/pdf/2409.11340)  
@@ -727,7 +727,7 @@ Examples:
   - vae is list of manually downloaded safetensors  
   - text-encoder is list of predefined and manually downloaded text-encoders  
 - **controlnet** support:
-  support for **InstantX/Shakker-Labs** models including [Union-Pro](InstantX/FLUX.1-dev-Controlnet-Union)  
+  support for **InstantX/Shakker-Labs** models including [Union-Pro](https://huggingface.co/InstantX/FLUX.1-dev-Controlnet-Union)
   note that flux controlnet models are large, up to 6.6GB on top of already large base model!  
   as such, you may need to use offloading:sequential which is not as fast, but uses far less memory  
   when using union model, you must also select control mode in the control unit  
diff --git a/README.md b/README.md
index e6eabeff6..53e76a319 100644
--- a/README.md
+++ b/README.md
@@ -17,7 +17,7 @@
 
 - [Documentation](https://vladmandic.github.io/sdnext-docs/)
 - [SD.Next Features](#sdnext-features)
-- [Model support](#model-support)
+- [Model support](#model-support) and [Specifications]()
 - [Platform support](#platform-support)
 - [Getting started](#getting-started)
 
@@ -35,7 +35,6 @@ All individual features are not listed here, instead check [ChangeLog](CHANGELOG
 - Platform specific autodetection and tuning performed on install  
 - Optimized processing with latest `torch` developments with built-in support for `torch.compile`  
   and multiple compile backends: *Triton, ZLUDA, StableFast, DeepCache, OpenVINO, NNCF, IPEX, OneDiff*  
-- Improved prompt parser  
 - Built-in queue management  
 - Enterprise level logging and hardened API  
 - Built in installer with automatic updates and dependency management  
@@ -50,43 +49,13 @@ All individual features are not listed here, instead check [ChangeLog](CHANGELOG
 
 ![screenshot-modernui](https://github.com/user-attachments/assets/39e3bc9a-a9f7-4cda-ba33-7da8def08032)
 
-For screenshots and informations on other available themes, see [Themes Wiki](https://github.com/vladmandic/automatic/wiki/Themes)
+For screenshots and informations on other available themes, see [Themes Wiki](wiki/Themes.md)
 
 <br>
 
 ## Model support
 
-Additional models will be added as they become available and there is public interest in them  
-See [models overview](https://github.com/vladmandic/automatic/wiki/Models) for details on each model, including their architecture, complexity and other info  
-
-- [RunwayML Stable Diffusion](https://github.com/Stability-AI/stablediffusion/) 1.x and 2.x *(all variants)*
-- [StabilityAI Stable Diffusion XL](https://github.com/Stability-AI/generative-models), [StabilityAI Stable Diffusion 3.0](https://stability.ai/news/stable-diffusion-3-medium) Medium, [StabilityAI Stable Diffusion 3.5](https://huggingface.co/stabilityai/stable-diffusion-3.5-large) Medium, Large, Large Turbo
-- [StabilityAI Stable Video Diffusion](https://huggingface.co/stabilityai/stable-video-diffusion-img2vid) Base, XT 1.0, XT 1.1
-- [StabilityAI Stable Cascade](https://github.com/Stability-AI/StableCascade) *Full* and *Lite*
-- [Black Forest Labs FLUX.1](https://blackforestlabs.ai/announcing-black-forest-labs/) Dev, Schnell  
-- [NVLabs Sana](https://nvlabs.github.io/Sana/)
-- [AuraFlow](https://huggingface.co/fal/AuraFlow)
-- [AlphaVLLM Lumina-Next-SFT](https://huggingface.co/Alpha-VLLM/Lumina-Next-SFT-diffusers)  
-- [Playground AI](https://huggingface.co/playgroundai/playground-v2-256px-base) *v1, v2 256, v2 512, v2 1024 and latest v2.5*
-- [Tencent HunyuanDiT](https://github.com/Tencent/HunyuanDiT)
-- [OmniGen](https://arxiv.org/pdf/2409.11340)  
-- [Meissonic](https://github.com/viiika/Meissonic)  
-- [Kwai Kolors](https://huggingface.co/Kwai-Kolors/Kolors)  
-- [CogView 3+](https://huggingface.co/THUDM/CogView3-Plus-3B)
-- [LCM: Latent Consistency Models](https://github.com/openai/consistency_models)
-- [aMUSEd](https://huggingface.co/amused/amused-256) 256 and 512
-- [Segmind Vega](https://huggingface.co/segmind/Segmind-Vega), [Segmind SSD-1B](https://huggingface.co/segmind/SSD-1B), [Segmind SegMoE](https://github.com/segmind/segmoe) *SD and SD-XL*, [Segmind SD Distilled](https://huggingface.co/blog/sd_distillation) *(all variants)*
-- [Kandinsky](https://github.com/ai-forever/Kandinsky-2) *2.1 and 2.2 and latest 3.0*
-- [PixArt-α XL 2](https://github.com/PixArt-alpha/PixArt-alpha) *Medium and Large*, [PixArt-Σ](https://github.com/PixArt-alpha/PixArt-sigma)
-- [Warp Wuerstchen](https://huggingface.co/blog/wuertschen)
-- [Tsinghua UniDiffusion](https://github.com/thu-ml/unidiffuser)
-- [DeepFloyd IF](https://github.com/deep-floyd/IF) *Medium and Large*
-- [ModelScope T2V](https://huggingface.co/damo-vilab/text-to-video-ms-1.7b)
-- [BLIP-Diffusion](https://dxli94.github.io/BLIP-Diffusion-website/)
-- [KOALA 700M](https://github.com/youngwanLEE/sdxl-koala)
-- [VGen](https://huggingface.co/ali-vilab/i2vgen-xl)
-- [SDXS](https://github.com/IDKiro/sdxs)
-- [Hyper-SD](https://huggingface.co/ByteDance/Hyper-SD)
+SD.Next supports broad range of models: [supported models](wiki/Model-Support.md) and [model specs](wiki/Models.md)  
 
 ## Platform support
 
@@ -116,21 +85,9 @@ See [models overview](https://github.com/vladmandic/automatic/wiki/Models) for d
 > If you run into issues, check out [troubleshooting](https://github.com/vladmandic/automatic/wiki/Troubleshooting) and [debugging](https://github.com/vladmandic/automatic/wiki/Debug) guides  
 
 > [!TIP]
-> All command line options can also be set via env variable
+> All command line options can also be set via env variable  
 > For example `--debug` is same as `set SD_DEBUG=true`  
 
-## Backend support
-
-**SD.Next** supports two main backends: *Diffusers* and *Original*:
-
-- **Diffusers**: Based on new [Huggingface Diffusers](https://huggingface.co/docs/diffusers/index) implementation  
-  Supports *all* models listed below  
-  This backend is set as default for new installations  
-- **Original**: Based on [LDM](https://github.com/Stability-AI/stablediffusion) reference implementation and significantly expanded on by [A1111](https://github.com/AUTOMATIC1111/stable-diffusion-webui)  
-  This backend and is fully compatible with most existing functionality and extensions written for *A1111 SDWebUI*  
-  Supports **SD 1.x** and **SD 2.x** models  
-  All other model types such as *SD-XL, LCM, Stable Cascade, PixArt, Playground, Segmind, Kandinsky, etc.* require backend **Diffusers**  
-
 ### Collab
 
 - We'd love to have additional maintainers (with comes with full repo rights). If you're interested, ping us!  
diff --git a/wiki b/wiki
index 2870b888c..ab4707483 160000
--- a/wiki
+++ b/wiki
@@ -1 +1 @@
-Subproject commit 2870b888c1848930a93c8fd5475ffeb907f21384
+Subproject commit ab4707483b47ba661ad8c062cf48a19a0ca9abed

From fc63de69582e68a981dd2b6af9fbce67c43d2629 Mon Sep 17 00:00:00 2001
From: Disty0 <disty@disty.xyz>
Date: Tue, 17 Dec 2024 22:43:30 +0300
Subject: [PATCH 124/162] IPEX enable empty_cache fix for WSL on PyTorch

---
 modules/intel/ipex/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/intel/ipex/__init__.py b/modules/intel/ipex/__init__.py
index 147aa2798..94b0cd0d4 100644
--- a/modules/intel/ipex/__init__.py
+++ b/modules/intel/ipex/__init__.py
@@ -122,7 +122,7 @@ def ipex_init(): # pylint: disable=too-many-statements
                 torch.cuda.traceback = torch.xpu.traceback
 
             # Memory:
-            if legacy and 'linux' in sys.platform and "WSL2" in os.popen("uname -a").read():
+            if 'linux' in sys.platform and "WSL2" in os.popen("uname -a").read():
                 torch.xpu.empty_cache = lambda: None
             torch.cuda.empty_cache = torch.xpu.empty_cache
 

From 99fb10fdb54a1b13ff4c0b5ef8d9cd73a6fe3c92 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Tue, 17 Dec 2024 15:01:33 -0500
Subject: [PATCH 125/162] update readme

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 README.md | 24 +++++++++---------------
 wiki      |  2 +-
 2 files changed, 10 insertions(+), 16 deletions(-)

diff --git a/README.md b/README.md
index 53e76a319..e33b3112a 100644
--- a/README.md
+++ b/README.md
@@ -49,13 +49,13 @@ All individual features are not listed here, instead check [ChangeLog](CHANGELOG
 
 ![screenshot-modernui](https://github.com/user-attachments/assets/39e3bc9a-a9f7-4cda-ba33-7da8def08032)
 
-For screenshots and informations on other available themes, see [Themes Wiki](wiki/Themes.md)
+For screenshots and informations on other available themes, see [Themes](https://vladmandic.github.io/sdnext-docs/Themes/)
 
 <br>
 
 ## Model support
 
-SD.Next supports broad range of models: [supported models](wiki/Model-Support.md) and [model specs](wiki/Models.md)  
+SD.Next supports broad range of models: [supported models](https://vladmandic.github.io/sdnext-docs/Model-Support/) and [model specs](https://vladmandic.github.io/sdnext-docs/Models/)  
 
 ## Platform support
 
@@ -72,28 +72,22 @@ SD.Next supports broad range of models: [supported models](wiki/Model-Support.md
 
 ## Getting started
 
-- Get started with **SD.Next** by following the [installation instructions](https://github.com/vladmandic/automatic/wiki/Installation)  
-- For more details, check out [advanced installation](https://github.com/vladmandic/automatic/wiki/Advanced-Install) guide  
-- List and explanation of [command line arguments](https://github.com/vladmandic/automatic/wiki/CLI-Arguments)
+- Get started with **SD.Next** by following the [installation instructions](https://vladmandic.github.io/sdnext-docs/Installation/)  
+- For more details, check out [advanced installation](https://vladmandic.github.io/sdnext-docs/Advanced-Install/) guide  
+- List and explanation of [command line arguments](https://vladmandic.github.io/sdnext-docs/CLI-Arguments/)
 - Install walkthrough [video](https://www.youtube.com/watch?v=nWTnTyFTuAs)
 
 > [!TIP]
 > And for platform specific information, check out  
-> [WSL](https://github.com/vladmandic/automatic/wiki/WSL) | [Intel Arc](https://github.com/vladmandic/automatic/wiki/Intel-ARC) | [DirectML](https://github.com/vladmandic/automatic/wiki/DirectML) | [OpenVINO](https://github.com/vladmandic/automatic/wiki/OpenVINO) | [ONNX & Olive](https://github.com/vladmandic/automatic/wiki/ONNX-Runtime) | [ZLUDA](https://github.com/vladmandic/automatic/wiki/ZLUDA) | [AMD ROCm](https://github.com/vladmandic/automatic/wiki/AMD-ROCm) | [MacOS](https://github.com/vladmandic/automatic/wiki/MacOS-Python.md) | [nVidia](https://github.com/vladmandic/automatic/wiki/nVidia)
+> [WSL](https://vladmandic.github.io/sdnext-docs/WSL/) | [Intel Arc](https://vladmandic.github.io/sdnext-docs/Intel-ARC/) | [DirectML](https://vladmandic.github.io/sdnext-docs/DirectML/) | [OpenVINO](https://vladmandic.github.io/sdnext-docs/OpenVINO/) | [ONNX & Olive](https://vladmandic.github.io/sdnext-docs/ONNX-Runtime/) | [ZLUDA](https://vladmandic.github.io/sdnext-docs/ZLUDA/) | [AMD ROCm](https://vladmandic.github.io/sdnext-docs/AMD-ROCm/) | [MacOS](https://vladmandic.github.io/sdnext-docs/MacOS-Python/) | [nVidia](https://vladmandic.github.io/sdnext-docs/nVidia/) | [Docker](https://vladmandic.github.io/sdnext-docs/Docker/)
 
 > [!WARNING]
-> If you run into issues, check out [troubleshooting](https://github.com/vladmandic/automatic/wiki/Troubleshooting) and [debugging](https://github.com/vladmandic/automatic/wiki/Debug) guides  
+> If you run into issues, check out [troubleshooting](https://vladmandic.github.io/sdnext-docs/Troubleshooting/) and [debugging](https://vladmandic.github.io/sdnext-docs/Debug/) guides  
 
 > [!TIP]
 > All command line options can also be set via env variable  
 > For example `--debug` is same as `set SD_DEBUG=true`  
 
-### Collab
-
-- We'd love to have additional maintainers (with comes with full repo rights). If you're interested, ping us!  
-- In addition to general cross-platform code, desire is to have a lead for each of the main platforms  
-This should be fully cross-platform, but we'd really love to have additional contributors and/or maintainers to join and help lead the efforts on different platforms  
-
 ### Credits
 
 - Main credit goes to [Automatic1111 WebUI](https://github.com/AUTOMATIC1111/stable-diffusion-webui) for the original codebase  
@@ -113,8 +107,8 @@ This should be fully cross-platform, but we'd really love to have additional con
 
 ### Docs
 
-If you're unsure how to use a feature, best place to start is [Docs](https://vladmandic.github.io/sdnext-docs/) or [Wiki](https://github.com/vladmandic/automatic/wiki) and if its not there,  
-check [ChangeLog](CHANGELOG.md) for when feature was first introduced as it will always have a short note on how to use it  
+If you're unsure how to use a feature, best place to start is [Docs](https://vladmandic.github.io/sdnext-docs/) and if its not there,  
+check [ChangeLog](https://vladmandic.github.io/sdnext-docs/CHANGELOG/) for when feature was first introduced as it will always have a short note on how to use it  
 
 ### Sponsors
 
diff --git a/wiki b/wiki
index ab4707483..dfaa5e491 160000
--- a/wiki
+++ b/wiki
@@ -1 +1 @@
-Subproject commit ab4707483b47ba661ad8c062cf48a19a0ca9abed
+Subproject commit dfaa5e4919604fd8e77a41756585a3a6fcb1b0bc

From 096906efb869bcf4d88f197a79fee00d39bf7640 Mon Sep 17 00:00:00 2001
From: Seunghoon Lee <lshqqytiger@naver.com>
Date: Wed, 18 Dec 2024 20:51:25 +0900
Subject: [PATCH 126/162] relpath fails when venv is in different drive

---
 installer.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/installer.py b/installer.py
index 2bb3df492..c0d3c907d 100644
--- a/installer.py
+++ b/installer.py
@@ -1177,9 +1177,15 @@ def same(ver):
 
 
 def check_venv():
+    def try_relpath(p):
+        try:
+            return os.path.relpath(p)
+        except ValueError:
+            return p
+
     import site
-    pkg_path = [os.path.relpath(p) for p in site.getsitepackages() if os.path.exists(p)]
-    log.debug(f'Packages: venv={os.path.relpath(sys.prefix)} site={pkg_path}')
+    pkg_path = [try_relpath(p) for p in site.getsitepackages() if os.path.exists(p)]
+    log.debug(f'Packages: venv={try_relpath(sys.prefix)} site={pkg_path}')
     for p in pkg_path:
         invalid = []
         for f in os.listdir(p):

From ecf6aff1c2570e6a813882406cc5e5937128403d Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Wed, 18 Dec 2024 08:29:12 -0500
Subject: [PATCH 127/162] update wiki

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md             | 2 +-
 modules/images_resize.py | 2 +-
 wiki                     | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index def6e8afe..6978c4e57 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2242,7 +2242,7 @@ Also new is support for **SDXL-Turbo** as well as new **Kandinsky 3** models and
     - in *Advanced* params
     - allows control of *latent clamping*, *color centering* and *range maximization*  
     - supported by *XYZ grid*  
-  - [SD21 Turbo](https://huggingface.co/stabilityai/sd-turbo) and [SDXL Turbo](<https://huggingface.co/stabilityai/sdxl-turbo>) support  
+  - [SD21 Turbo](https://huggingface.co/stabilityai/sd-turbo) and [SDXL Turbo](https://huggingface.co/stabilityai/sdxl-turbo) support  
     - just set CFG scale (0.0-1.0) and steps (1-3) to a very low value  
     - compatible with original StabilityAI SDXL-Turbo or any of the newer merges
     - download safetensors or select from networks -> reference
diff --git a/modules/images_resize.py b/modules/images_resize.py
index 5cf3e57e4..5b7c816f8 100644
--- a/modules/images_resize.py
+++ b/modules/images_resize.py
@@ -122,7 +122,7 @@ def context_aware(im, width, height, context):
         from modules import masking
         res = fill(im, color=0)
         res, _mask = masking.outpaint(res)
-    elif resize_mode == 5:  # context-aware
+    elif resize_mode == 5: # context-aware
         res = context_aware(im, width, height, context)
     else:
         res = im.copy()
diff --git a/wiki b/wiki
index dfaa5e491..4dc357d28 160000
--- a/wiki
+++ b/wiki
@@ -1 +1 @@
-Subproject commit dfaa5e4919604fd8e77a41756585a3a6fcb1b0bc
+Subproject commit 4dc357d28956b827eb38df626e834fb6a25ec47c

From 4b35dfc2a2bcda6b7c42bcb6160f4c69b5451d96 Mon Sep 17 00:00:00 2001
From: Seunghoon Lee <lshqqytiger@naver.com>
Date: Wed, 18 Dec 2024 23:04:43 +0900
Subject: [PATCH 128/162] zluda HIP SDK 6.2 support

---
 modules/zluda_installer.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/modules/zluda_installer.py b/modules/zluda_installer.py
index 84c130e8d..5e43a6635 100644
--- a/modules/zluda_installer.py
+++ b/modules/zluda_installer.py
@@ -31,7 +31,10 @@ def install(zluda_path: os.PathLike) -> None:
     if os.path.exists(zluda_path):
         return
 
-    urllib.request.urlretrieve(f'https://github.com/lshqqytiger/ZLUDA/releases/download/rel.{os.environ.get("ZLUDA_HASH", "c0804ca624963aab420cb418412b1c7fbae3454b")}/ZLUDA-windows-rocm{rocm.version[0]}-amd64.zip', '_zluda')
+    commit = os.environ.get("ZLUDA_HASH", "1b6e012d8f2404840b524e2abae12cb91e1ac01d")
+    if rocm.version == "6.1":
+        commit = "c0804ca624963aab420cb418412b1c7fbae3454b"
+    urllib.request.urlretrieve(f'https://github.com/lshqqytiger/ZLUDA/releases/download/rel.{commit}/ZLUDA-windows-rocm{rocm.version[0]}-amd64.zip', '_zluda')
     with zipfile.ZipFile('_zluda', 'r') as archive:
         infos = archive.infolist()
         for info in infos:

From 85805f969ab426595950f8bde75e1875ca0faf2a Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Wed, 18 Dec 2024 09:16:13 -0500
Subject: [PATCH 129/162] add sana chi

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md               | 9 ++++++---
 installer.py               | 2 +-
 modules/processing_args.py | 5 +++++
 wiki                       | 2 +-
 4 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6978c4e57..3211a4752 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,15 +1,18 @@
 # Change Log for SD.Next
 
-## Update for 2024-12-17
+## Update for 2024-12-18
 
 ### New models and integrations
 
 - [NVLabs Sana](https://huggingface.co/Efficient-Large-Model/Sana_1600M_1024px)
-  **Sana** can synthesize high-resolution images with strong text-image alignment by using **Gemma2** as text-encoder  
-  and its *fast* - typically at least **2x** faster than sd-xl even for 1.6B variant  
   support for both 1.6B and 0.6B models  
+  **Sana** can synthesize high-resolution images with strong text-image alignment by using **Gemma2** as text-encoder  
+  and its *fast* - typically at least **2x** faster than sd-xl even for 1.6B variant and maintains performance regardless of resolution  
+  e.g., rendering at 4k is possible in less than 8GB vram  
   to use, select from *networks -> models -> reference* and models will be auto-downloaded on first use  
   *reference values*: sampler: default (or any flow-match variant), width/height: 1024, guidance scale: 4.5  
+  *note* like other LLM-based text-encoders, sana prefers long and descriptive prompts  
+  any short prompt below 300 characters will be auto-expanded using built in Gemma LLM before encoding while long prompts will be passed as-is  
 - [Flux Tools](https://blackforestlabs.ai/flux-1-tools/)  
   **Redux** is actually a tool, **Fill** is inpaint/outpaint optimized version of *Flux-dev*  
   **Canny** & **Depth** are optimized versions of *Flux-dev* for their respective tasks: they are *not* ControlNets that work on top of a model  
diff --git a/installer.py b/installer.py
index c0d3c907d..3dd0f13d5 100644
--- a/installer.py
+++ b/installer.py
@@ -459,7 +459,7 @@ def check_python(supported_minors=[9, 10, 11, 12], reason=None):
 def check_diffusers():
     if args.skip_all or args.skip_requirements:
         return
-    sha = '1524781b88ac1a082e755a030ba9d73cd6948e84' # diffusers commit hash
+    sha = '862a7d5038c1c53641ffcab146a7eeb5ab683656' # diffusers commit hash
     pkg = pkg_resources.working_set.by_key.get('diffusers', None)
     minor = int(pkg.version.split('.')[1] if pkg is not None else 0)
     cur = opts.get('diffusers_version', '') if minor > 0 else ''
diff --git a/modules/processing_args.py b/modules/processing_args.py
index e7f53ba8e..d0afb6722 100644
--- a/modules/processing_args.py
+++ b/modules/processing_args.py
@@ -164,6 +164,11 @@ def set_pipeline_args(p, model, prompts: list, negative_prompts: list, prompts_2
                 args['negative_prompt'] = negative_prompts[0]
             else:
                 args['negative_prompt'] = negative_prompts
+    if 'complex_human_instruction' in possible:
+        chi = any(len(p) < 300 for p in prompts)
+        p.extra_generation_params["CHI"] = chi
+        if not chi:
+            args['complex_human_instruction'] = None
     if prompt_parser_diffusers.embedder is not None and not prompt_parser_diffusers.embedder.scheduled_prompt: # not scheduled so we dont need it anymore
         prompt_parser_diffusers.embedder = None
 
diff --git a/wiki b/wiki
index 4dc357d28..470e75f0c 160000
--- a/wiki
+++ b/wiki
@@ -1 +1 @@
-Subproject commit 4dc357d28956b827eb38df626e834fb6a25ec47c
+Subproject commit 470e75f0c70a22ed3d65187c70f04c131400b35d

From a7e0723dcfa66fe36f4e2ffc1b3dc1d16e19024a Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Wed, 18 Dec 2024 13:02:22 -0500
Subject: [PATCH 130/162] profiling

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md                                   | 1 +
 modules/errors.py                              | 5 +++--
 modules/ipadapter.py                           | 7 ++++++-
 modules/processing.py                          | 4 +++-
 modules/processing_diffusers.py                | 4 ++++
 modules/prompt_parser_diffusers.py             | 2 ++
 modules/shared.py                              | 5 ++++-
 modules/textual_inversion/textual_inversion.py | 2 ++
 modules/timer.py                               | 6 +++++-
 9 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3211a4752..1931bd2b5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -124,6 +124,7 @@
 ### Fixes  
 
 - add `SD_NO_CACHE=true` env variable to disable file/folder caching  
+- add settings -> networks -> embeddings -> enable/disable
 - update `diffusers`  
 - fix README links  
 - fix sdxl controlnet single-file loader  
diff --git a/modules/errors.py b/modules/errors.py
index 527884cf1..5f2c54cb7 100644
--- a/modules/errors.py
+++ b/modules/errors.py
@@ -59,14 +59,14 @@ def exception(suppress=[]):
     console.print_exception(show_locals=False, max_frames=16, extra_lines=2, suppress=suppress, theme="ansi_dark", word_wrap=False, width=min([console.width, 200]))
 
 
-def profile(profiler, msg: str, n: int = 5):
+def profile(profiler, msg: str, n: int = 16):
     profiler.disable()
     import io
     import pstats
     stream = io.StringIO() # pylint: disable=abstract-class-instantiated
     p = pstats.Stats(profiler, stream=stream)
     p.sort_stats(pstats.SortKey.CUMULATIVE)
-    p.print_stats(100)
+    p.print_stats(200)
     # p.print_title()
     # p.print_call_heading(10, 'time')
     # p.print_callees(10)
@@ -81,6 +81,7 @@ def profile(profiler, msg: str, n: int = 5):
              and '_lsprof' not in x
              and '/profiler' not in x
              and 'rich' not in x
+             and 'profile_torch' not in x
              and x.strip() != ''
             ]
     txt = '\n'.join(lines[:min(n, len(lines))])
diff --git a/modules/ipadapter.py b/modules/ipadapter.py
index d5bfbec8c..4e93a6eee 100644
--- a/modules/ipadapter.py
+++ b/modules/ipadapter.py
@@ -14,6 +14,7 @@
 
 clip_repo = "h94/IP-Adapter"
 clip_loaded = None
+adapters_loaded = []
 ADAPTERS_NONE = {
     'None': { 'name': 'none', 'repo': 'none', 'subfolder': 'none' },
 }
@@ -129,9 +130,12 @@ def crop_images(images, crops):
 
 
 def unapply(pipe): # pylint: disable=arguments-differ
+    if len(adapters_loaded) == 0:
+        return
     try:
         if hasattr(pipe, 'set_ip_adapter_scale'):
             pipe.set_ip_adapter_scale(0)
+            pipe.unload_ip_adapter()
         if hasattr(pipe, 'unet') and hasattr(pipe.unet, 'config') and pipe.unet.config.encoder_hid_dim_type == 'ip_image_proj':
             pipe.unet.encoder_hid_proj = None
             pipe.config.encoder_hid_dim_type = None
@@ -141,7 +145,7 @@ def unapply(pipe): # pylint: disable=arguments-differ
 
 
 def apply(pipe, p: processing.StableDiffusionProcessing, adapter_names=[], adapter_scales=[1.0], adapter_crops=[False], adapter_starts=[0.0], adapter_ends=[1.0], adapter_images=[]):
-    global clip_loaded # pylint: disable=global-statement
+    global clip_loaded, adapters_loaded # pylint: disable=global-statement
     # overrides
     if hasattr(p, 'ip_adapter_names'):
         if isinstance(p.ip_adapter_names, str):
@@ -274,6 +278,7 @@ def apply(pipe, p: processing.StableDiffusionProcessing, adapter_names=[], adapt
         subfolders = [adapter['subfolder'] for adapter in adapters]
         names = [adapter['name'] for adapter in adapters]
         pipe.load_ip_adapter(repos, subfolder=subfolders, weight_name=names)
+        adapters_loaded = names
         if hasattr(p, 'ip_adapter_layers'):
             pipe.set_ip_adapter_scale(p.ip_adapter_layers)
             ip_str = ';'.join(adapter_names) + ':' + json.dumps(p.ip_adapter_layers)
diff --git a/modules/processing.py b/modules/processing.py
index b4839e402..4bc5e3c81 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -179,6 +179,8 @@ def process_images(p: StableDiffusionProcessing) -> Processed:
         timer.process.record('pre')
 
         if shared.cmd_opts.profile:
+            timer.startup.profile = True
+            timer.process.profile = True
             with context_hypertile_vae(p), context_hypertile_unet(p):
                 import torch.profiler # pylint: disable=redefined-outer-name
                 activities=[torch.profiler.ProfilerActivity.CPU]
@@ -476,7 +478,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
         p.scripts.postprocess(p, processed)
     timer.process.record('post')
     if not p.disable_extra_networks:
-        shared.log.info(f'Processed: images={len(output_images)} its={(p.steps * len(output_images)) / (t1 - t0):.2f} time={t1-t0:.2f} timers={timer.process.dct(min_time=0.02)} memory={memstats.memory_stats()}')
+        shared.log.info(f'Processed: images={len(output_images)} its={(p.steps * len(output_images)) / (t1 - t0):.2f} time={t1-t0:.2f} timers={timer.process.dct()} memory={memstats.memory_stats()}')
 
     devices.torch_gc(force=True)
     return processed
diff --git a/modules/processing_diffusers.py b/modules/processing_diffusers.py
index adb047511..581589262 100644
--- a/modules/processing_diffusers.py
+++ b/modules/processing_diffusers.py
@@ -4,6 +4,7 @@
 import numpy as np
 import torch
 import torchvision.transforms.functional as TF
+from PIL import Image
 from modules import shared, devices, processing, sd_models, errors, sd_hijack_hypertile, processing_vae, sd_models_compile, hidiffusion, timer, modelstats, extra_networks
 from modules.processing_helpers import resize_hires, calculate_base_steps, calculate_hires_steps, calculate_refiner_steps, save_intermediate, update_sampler, is_txt2img, is_refiner_enabled
 from modules.processing_args import set_pipeline_args
@@ -447,6 +448,9 @@ def process_diffusers(p: processing.StableDiffusionProcessing):
 
     sd_models_compile.openvino_recompile_model(p, hires=False, refiner=False) # recompile if a parameter changes
 
+    if hasattr(p, 'dummy'):
+        images = [Image.new(mode='RGB', size=(p.width, p.height))]
+        return images
     if 'base' not in p.skip:
         output = process_base(p)
     else:
diff --git a/modules/prompt_parser_diffusers.py b/modules/prompt_parser_diffusers.py
index 8c140e0d6..4e31c747a 100644
--- a/modules/prompt_parser_diffusers.py
+++ b/modules/prompt_parser_diffusers.py
@@ -63,6 +63,8 @@ def __init__(self, prompts, negative_prompts, steps, clip_skip, p):
         self.positive_schedule = None
         self.negative_schedule = None
         self.scheduled_prompt = False
+        if hasattr(p, 'dummy'):
+            return
         earlyout = self.checkcache(p)
         if earlyout:
             return
diff --git a/modules/shared.py b/modules/shared.py
index 2a1e8c19d..c3fc4e905 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -931,8 +931,11 @@ def get_default_modes():
 
     "extra_networks_styles_sep": OptionInfo("<h2>Styles</h2>", "", gr.HTML),
     "extra_networks_styles": OptionInfo(True, "Show built-in styles"),
+
     "extra_networks_embed_sep": OptionInfo("<h2>Embeddings</h2>", "", gr.HTML),
-    "diffusers_convert_embed": OptionInfo(False, "Auto-convert SD15 embeddings to SDXL ", gr.Checkbox, {"visible": native}),
+    "diffusers_enable_embed": OptionInfo(True, "Enable embeddings support", gr.Checkbox, {"visible": native}),
+    "diffusers_convert_embed": OptionInfo(False, "Auto-convert SD15 embeddings to SDXL", gr.Checkbox, {"visible": native}),
+
     "extra_networks_wildcard_sep": OptionInfo("<h2>Wildcards</h2>", "", gr.HTML),
     "wildcards_enabled": OptionInfo(True, "Enable file wildcards support"),
 }))
diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py
index dd4203a4f..27bb42116 100644
--- a/modules/textual_inversion/textual_inversion.py
+++ b/modules/textual_inversion/textual_inversion.py
@@ -274,6 +274,8 @@ def load_diffusers_embedding(self, filename: Union[str, List[str]] = None, data:
         overwrite = bool(data)
         if not shared.sd_loaded:
             return
+        if not shared.opts.diffusers_enable_embed:
+            return
         embeddings, skipped = open_embeddings(filename) or convert_bundled(data)
         for skip in skipped:
             self.skipped_embeddings[skip.name] = skipped
diff --git a/modules/timer.py b/modules/timer.py
index 7657ac8e8..43e859140 100644
--- a/modules/timer.py
+++ b/modules/timer.py
@@ -7,6 +7,7 @@ def __init__(self):
         self.start = time.time()
         self.records = {}
         self.total = 0
+        self.profile = False
 
     def elapsed(self, reset=True):
         end = time.time()
@@ -27,11 +28,12 @@ def record(self, category=None, extra_time=0, reset=True):
             category = sys._getframe(1).f_code.co_name # pylint: disable=protected-access
         if category not in self.records:
             self.records[category] = 0
-
         self.records[category] += e + extra_time
         self.total += e + extra_time
 
     def summary(self, min_time=0.05, total=True):
+        if self.profile:
+            min_time = -1
         res = f"{self.total:.2f} " if total else ''
         additions = [x for x in self.records.items() if x[1] >= min_time]
         if not additions:
@@ -40,6 +42,8 @@ def summary(self, min_time=0.05, total=True):
         return res
 
     def dct(self, min_time=0.05):
+        if self.profile:
+            return {k: round(v, 4) for k, v in self.records.items()}
         return {k: round(v, 2) for k, v in self.records.items() if v >= min_time}
 
     def reset(self):

From 01edcb0c3ab877d3b04409fda9cebe65cd047121 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Wed, 18 Dec 2024 17:36:42 -0500
Subject: [PATCH 131/162] add ltx-video

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md                    |  44 +++++++++--
 modules/processing_diffusers.py |   2 +
 scripts/ltxvideo.py             | 130 ++++++++++++++++++++++++++++++++
 wiki                            |   2 +-
 4 files changed, 170 insertions(+), 8 deletions(-)
 create mode 100644 scripts/ltxvideo.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1931bd2b5..13b831797 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,31 @@
 
 ## Update for 2024-12-18
 
+### Highlights
+
+*What's new?*
+
+While we have several new supported models, workflows and tools, this release is primarily about *quality-of-life improvements*:  
+- New memory management engine: list of changes that went into this one is too long for here,  
+  but main goal is enabling modern large models to run on standard consumer GPUs  
+  without performance hits typically associated with aggressive memory swapping and needs for constant manual tweaks  
+- New [documentation website](https://vladmandic.github.io/sdnext-docs/)  
+  with full search and tons of new documentation  
+- New settings panel with simplified and streamlined configuration  
+
+We've also added support for several new models (see [supported models](https://vladmandic.github.io/sdnext-docs/Model-Support/) for full list):  
+- [NVLabs Sana](https://huggingface.co/Efficient-Large-Model/Sana_1600M_1024px)  
+- [Lightricks LTX-Video](https://huggingface.co/Lightricks/LTX-Video)  
+
+And a lot of Control goodies and related goodies  
+- for SDXL there is new [ProMax](https://huggingface.co/xinsir/controlnet-union-sdxl-1.0), improved *Union* and *Tiling*  
+- for FLUX.1 there are [Flux Tools](https://blackforestlabs.ai/flux-1-tools/) as well as official *Canny* and *Depth* models and a cool [Redux](https://huggingface.co/black-forest-labs/FLUX.1-Redux-dev) model  
+- for SD 3.5 there are official *Canny*, *Blur* and *Depth* in addition to existing 3rd party models  
+
+Plus couple of new integrated workflows such as [FreeScale](https://github.com/ali-vilab/FreeScale) and [Style Aligned Image Generation](https://style-aligned-gen.github.io/)  
+
+[README](https://github.com/vladmandic/automatic/blob/master/README.md) | [CHANGELOG](https://github.com/vladmandic/automatic/blob/master/CHANGELOG.md) | [Docs](https://vladmandic.github.io/sdnext-docs/) | [WiKi](https://github.com/vladmandic/automatic/wiki) | [Discord](https://discord.com/invite/sd-next-federal-batch-inspectors-1101998836328697867)
+
 ### New models and integrations
 
 - [NVLabs Sana](https://huggingface.co/Efficient-Large-Model/Sana_1600M_1024px)
@@ -13,6 +38,13 @@
   *reference values*: sampler: default (or any flow-match variant), width/height: 1024, guidance scale: 4.5  
   *note* like other LLM-based text-encoders, sana prefers long and descriptive prompts  
   any short prompt below 300 characters will be auto-expanded using built in Gemma LLM before encoding while long prompts will be passed as-is  
+- **ControlNet**
+  - improved support for **Union** controlnets with granular control mode type
+  - added support for latest [Xinsir ProMax](https://huggingface.co/xinsir/controlnet-union-sdxl-1.0) all-in-one controlnet  
+  - added support for multiple **Tiling** controlnets, for example [Xinsir Tile](https://huggingface.co/xinsir/controlnet-tile-sdxl-1.0)  
+    *note*: when selecting tiles in control settings, you can also specify non-square ratios  
+    in which case it will use context-aware image resize to maintain overall composition  
+    *note*: available tiling options can be set in settings -> control  
 - [Flux Tools](https://blackforestlabs.ai/flux-1-tools/)  
   **Redux** is actually a tool, **Fill** is inpaint/outpaint optimized version of *Flux-dev*  
   **Canny** & **Depth** are optimized versions of *Flux-dev* for their respective tasks: they are *not* ControlNets that work on top of a model  
@@ -36,6 +68,11 @@
   both **Depth** and **Canny** LoRAs are available in standard control menus  
 - [StabilityAI SD35 ControlNets](https://huggingface.co/stabilityai/stable-diffusion-3.5-controlnets)
   - In addition to previously released `InstantX` and `Alimama`, we now have *official* ones from StabilityAI  
+- [Lightricks LTX-Video](https://huggingface.co/Lightricks/LTX-Video)
+  basic support for LTX-Video for text-to-video and image-to-video  
+  to use, select in *scripts -> ltx-video*  
+  *note* you may need to enable sequential offload for maximum gpu memory savings  
+  *note* ltx-video requires very long and descriptive prompt, see original link for examples  
 - [Style Aligned Image Generation](https://style-aligned-gen.github.io/)  
   enable in scripts, compatible with sd-xl  
   enter multiple prompts in prompt field separated by new line  
@@ -47,13 +84,6 @@
   run iterative generation of images at different scales to achieve better results  
   can render 4k sdxl images  
   *note*: disable live preview to avoid memory issues when generating large images  
-- **ControlNet**
-  - improved support for **Union** controlnets with granular control mode type
-  - added support for latest [Xinsir ProMax](https://huggingface.co/xinsir/controlnet-union-sdxl-1.0) all-in-one controlnet  
-  - added support for multiple **Tiling** controlnets, for example [Xinsir Tile](https://huggingface.co/xinsir/controlnet-tile-sdxl-1.0)  
-    *note*: when selecting tiles in control settings, you can also specify non-square ratios  
-    in which case it will use context-aware image resize to maintain overall composition  
-    *note*: available tiling options can be set in settings -> control  
 
 ### UI and workflow improvements
 
diff --git a/modules/processing_diffusers.py b/modules/processing_diffusers.py
index 581589262..d43660ca8 100644
--- a/modules/processing_diffusers.py
+++ b/modules/processing_diffusers.py
@@ -355,6 +355,8 @@ def process_decode(p: processing.StableDiffusionProcessing, output):
         if not hasattr(output, 'images') and hasattr(output, 'frames'):
             shared.log.debug(f'Generated: frames={len(output.frames[0])}')
             output.images = output.frames[0]
+        if output.images is not None and len(output.images) > 0 and isinstance(output.images[0], Image.Image):
+            return output.images
         model = shared.sd_model if not is_refiner_enabled(p) else shared.sd_refiner
         if not hasattr(model, 'vae'):
             if hasattr(model, 'pipe') and hasattr(model.pipe, 'vae'):
diff --git a/scripts/ltxvideo.py b/scripts/ltxvideo.py
new file mode 100644
index 000000000..54e2685a8
--- /dev/null
+++ b/scripts/ltxvideo.py
@@ -0,0 +1,130 @@
+import time
+import torch
+import gradio as gr
+import diffusers
+from modules import scripts, processing, shared, images, devices, sd_models, sd_checkpoint
+
+
+repo_id = 'a-r-r-o-w/LTX-Video-diffusers'
+presets = [
+    {"label": "custom", "width": 0, "height": 0, "num_frames": 0},
+    {"label": "1216x704, 41 frames", "width": 1216, "height": 704, "num_frames": 41},
+    {"label": "1088x704, 49 frames", "width": 1088, "height": 704, "num_frames": 49},
+    {"label": "1056x640, 57 frames", "width": 1056, "height": 640, "num_frames": 57},
+    {"label": "992x608, 65 frames", "width": 992, "height": 608, "num_frames": 65},
+    {"label": "896x608, 73 frames", "width": 896, "height": 608, "num_frames": 73},
+    {"label": "896x544, 81 frames", "width": 896, "height": 544, "num_frames": 81},
+    {"label": "832x544, 89 frames", "width": 832, "height": 544, "num_frames": 89},
+    {"label": "800x512, 97 frames", "width": 800, "height": 512, "num_frames": 97},
+    {"label": "768x512, 97 frames", "width": 768, "height": 512, "num_frames": 97},
+    {"label": "800x480, 105 frames", "width": 800, "height": 480, "num_frames": 105},
+    {"label": "736x480, 113 frames", "width": 736, "height": 480, "num_frames": 113},
+    {"label": "704x480, 121 frames", "width": 704, "height": 480, "num_frames": 121},
+    {"label": "704x448, 129 frames", "width": 704, "height": 448, "num_frames": 129},
+    {"label": "672x448, 137 frames", "width": 672, "height": 448, "num_frames": 137},
+    {"label": "640x416, 153 frames", "width": 640, "height": 416, "num_frames": 153},
+    {"label": "672x384, 161 frames", "width": 672, "height": 384, "num_frames": 161},
+    {"label": "640x384, 169 frames", "width": 640, "height": 384, "num_frames": 169},
+    {"label": "608x384, 177 frames", "width": 608, "height": 384, "num_frames": 177},
+    {"label": "576x384, 185 frames", "width": 576, "height": 384, "num_frames": 185},
+    {"label": "608x352, 193 frames", "width": 608, "height": 352, "num_frames": 193},
+    {"label": "576x352, 201 frames", "width": 576, "height": 352, "num_frames": 201},
+    {"label": "544x352, 209 frames", "width": 544, "height": 352, "num_frames": 209},
+    {"label": "512x352, 225 frames", "width": 512, "height": 352, "num_frames": 225},
+    {"label": "512x352, 233 frames", "width": 512, "height": 352, "num_frames": 233},
+    {"label": "544x320, 241 frames", "width": 544, "height": 320, "num_frames": 241},
+    {"label": "512x320, 249 frames", "width": 512, "height": 320, "num_frames": 249},
+    {"label": "512x320, 257 frames", "width": 512, "height": 320, "num_frames": 257},
+]
+
+
+class Script(scripts.Script):
+    def title(self):
+        return 'Video: LTX Video'
+
+    def show(self, is_img2img):
+        return shared.native
+
+    # return signature is array of gradio components
+    def ui(self, _is_img2img):
+        def video_type_change(video_type):
+            return [
+                gr.update(visible=video_type != 'None'),
+                gr.update(visible=video_type == 'GIF' or video_type == 'PNG'),
+                gr.update(visible=video_type == 'MP4'),
+                gr.update(visible=video_type == 'MP4'),
+            ]
+        def preset_change(preset):
+            return gr.update(visible=preset == 'custom')
+
+        with gr.Row():
+            gr.HTML('<a href="https://www.ltxvideo.org/">&nbsp LTX Video</a><br>')
+        with gr.Row():
+            preset_name = gr.Dropdown(label='Preset', choices=[p['label'] for p in presets], value='custom')
+            num_frames = gr.Slider(label='Frames', minimum=9, maximum=257, step=1, value=9)
+        with gr.Row():
+            video_type = gr.Dropdown(label='Video file', choices=['None', 'GIF', 'PNG', 'MP4'], value='None')
+            duration = gr.Slider(label='Duration', minimum=0.25, maximum=10, step=0.25, value=2, visible=False)
+        with gr.Row():
+            gif_loop = gr.Checkbox(label='Loop', value=True, visible=False)
+            mp4_pad = gr.Slider(label='Pad frames', minimum=0, maximum=24, step=1, value=1, visible=False)
+            mp4_interpolate = gr.Slider(label='Interpolate frames', minimum=0, maximum=24, step=1, value=0, visible=False)
+        preset_name.change(fn=preset_change, inputs=[preset_name], outputs=num_frames)
+        video_type.change(fn=video_type_change, inputs=[video_type], outputs=[duration, gif_loop, mp4_pad, mp4_interpolate])
+        return [preset_name, num_frames, video_type, duration, gif_loop, mp4_pad, mp4_interpolate]
+
+    def run(self, p: processing.StableDiffusionProcessing, preset_name, num_frames, video_type, duration, gif_loop, mp4_pad, mp4_interpolate): # pylint: disable=arguments-differ, unused-argument
+        # set params
+        preset = [p for p in presets if p['label'] == preset_name][0]
+        image = getattr(p, 'init_images', None)
+        image = None if image is None or len(image) == 0 else image[0]
+        if p.width == 0 or p.height == 0 and image is not None:
+            p.width = image.width
+            p.height = image.height
+        if preset['label'] != 'custom':
+            num_frames = preset['num_frames']
+            p.width = preset['width']
+            p.height = preset['height']
+        else:
+            num_frames = 8 * int(num_frames // 8) + 1
+            p.width = 32 * int(p.width // 32)
+            p.height = 32 * int(p.height // 32)
+        if image:
+            image = images.resize_image(resize_mode=2, im=image, width=p.width, height=p.height, upscaler_name=None, output_type='pil')
+            p.task_args['image'] = image
+        p.task_args['output_type'] = 'pil'
+        p.task_args['generator'] = torch.manual_seed(p.seed)
+        p.task_args['num_frames'] = num_frames
+        p.sampler_name = 'Default'
+        p.do_not_save_grid = True
+        p.ops.append('ltx')
+
+        # load model
+        cls = diffusers.LTXPipeline if image is None else diffusers.LTXImageToVideoPipeline
+        diffusers.LTXTransformer3DModel = diffusers.LTXVideoTransformer3DModel
+        diffusers.AutoencoderKLLTX = diffusers.AutoencoderKLLTXVideo
+        if shared.sd_model.__class__ != cls:
+            sd_models.unload_model_weights()
+            shared.sd_model = cls.from_pretrained(
+                repo_id,
+                cache_dir = shared.opts.hfcache_dir,
+                torch_dtype=devices.dtype,
+            )
+            sd_models.set_diffuser_options(shared.sd_model)
+            shared.sd_model.sd_checkpoint_info = sd_checkpoint.CheckpointInfo(repo_id)
+            shared.sd_model.sd_model_hash = None
+        shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
+        shared.sd_model.vae.enable_slicing()
+        shared.sd_model.vae.enable_tiling()
+        devices.torch_gc(force=True)
+        shared.log.debug(f'LTX: cls={shared.sd_model.__class__.__name__} preset={preset_name} args={p.task_args}')
+
+        # run processing
+        t0 = time.time()
+        processed = processing.process_images(p)
+        t1 = time.time()
+        if processed is not None and len(processed.images) > 0:
+            shared.log.info(f'LTX: frames={len(processed.images)} time={t1-t0:.2f}')
+            if video_type != 'None':
+                images.save_video(p, filename=None, images=processed.images, video_type=video_type, duration=duration, loop=gif_loop, pad=mp4_pad, interpolate=mp4_interpolate)
+        return processed
diff --git a/wiki b/wiki
index 470e75f0c..34ba1df45 160000
--- a/wiki
+++ b/wiki
@@ -1 +1 @@
-Subproject commit 470e75f0c70a22ed3d65187c70f04c131400b35d
+Subproject commit 34ba1df45d17da4ee09a2e5278e384bc1929dd8b

From 0803946c08e29f4d833883040394953e51945c7a Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Thu, 19 Dec 2024 11:34:19 -0500
Subject: [PATCH 132/162] update changelog and cleanup

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md                       | 20 +++++++++++---------
 extensions-builtin/sdnext-modernui |  2 +-
 html/locale_en.json                |  2 +-
 modules/postprocess/yolo.py        |  3 ++-
 modules/sd_samplers_common.py      |  2 +-
 modules/shared.py                  |  1 +
 modules/ui_control.py              |  2 +-
 7 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 13b831797..1ccd8afb2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,32 +1,34 @@
 # Change Log for SD.Next
 
-## Update for 2024-12-18
+## Update for 2024-12-19
 
-### Highlights
+### Highlights for 2024-12-19
 
-*What's new?*
+### SD.Next X-mass edition: *What's new?*
 
 While we have several new supported models, workflows and tools, this release is primarily about *quality-of-life improvements*:  
-- New memory management engine: list of changes that went into this one is too long for here,  
+- New memory management engine: list of changes that went into this one is long: changes to GPU offloading, LoRA loader, system memory management, etc.  
   but main goal is enabling modern large models to run on standard consumer GPUs  
   without performance hits typically associated with aggressive memory swapping and needs for constant manual tweaks  
 - New [documentation website](https://vladmandic.github.io/sdnext-docs/)  
   with full search and tons of new documentation  
 - New settings panel with simplified and streamlined configuration  
 
-We've also added support for several new models (see [supported models](https://vladmandic.github.io/sdnext-docs/Model-Support/) for full list):  
-- [NVLabs Sana](https://huggingface.co/Efficient-Large-Model/Sana_1600M_1024px)  
-- [Lightricks LTX-Video](https://huggingface.co/Lightricks/LTX-Video)  
+We've also added support for several new models (see [supported models](https://vladmandic.github.io/sdnext-docs/Model-Support/) for full list) such as [NVLabs Sana](https://huggingface.co/Efficient-Large-Model/Sana_1600M_1024px) and [Lightricks LTX-Video](https://huggingface.co/Lightricks/LTX-Video)  
 
-And a lot of Control goodies and related goodies  
+And a lot of Control and IPAdapter goodies  
 - for SDXL there is new [ProMax](https://huggingface.co/xinsir/controlnet-union-sdxl-1.0), improved *Union* and *Tiling*  
 - for FLUX.1 there are [Flux Tools](https://blackforestlabs.ai/flux-1-tools/) as well as official *Canny* and *Depth* models and a cool [Redux](https://huggingface.co/black-forest-labs/FLUX.1-Redux-dev) model  
 - for SD 3.5 there are official *Canny*, *Blur* and *Depth* in addition to existing 3rd party models  
 
 Plus couple of new integrated workflows such as [FreeScale](https://github.com/ali-vilab/FreeScale) and [Style Aligned Image Generation](https://style-aligned-gen.github.io/)  
 
+And it wouldn't be a X-mass edition custom themes: *Snowflake* and *Elf-Green*  
+
 [README](https://github.com/vladmandic/automatic/blob/master/README.md) | [CHANGELOG](https://github.com/vladmandic/automatic/blob/master/CHANGELOG.md) | [Docs](https://vladmandic.github.io/sdnext-docs/) | [WiKi](https://github.com/vladmandic/automatic/wiki) | [Discord](https://discord.com/invite/sd-next-federal-batch-inspectors-1101998836328697867)
 
+## Details for 2024-12-19
+
 ### New models and integrations
 
 - [NVLabs Sana](https://huggingface.co/Efficient-Large-Model/Sana_1600M_1024px)
@@ -174,7 +176,7 @@ Plus couple of new integrated workflows such as [FreeScale](https://github.com/a
 - lora auto-apply tags remove duplicates  
 - control load model on-demand if not already loaded  
 - taesd limit render to 2024px  
-- taesd downscale preview to 1024px max  
+- taesd downscale preview to 1024px max: configurable in settings -> live preview  
 
 ## Update for 2024-11-21
 
diff --git a/extensions-builtin/sdnext-modernui b/extensions-builtin/sdnext-modernui
index 3008cee4b..3f53ff719 160000
--- a/extensions-builtin/sdnext-modernui
+++ b/extensions-builtin/sdnext-modernui
@@ -1 +1 @@
-Subproject commit 3008cee4b67bb00f8f1a4fe4510ec27ba92aa418
+Subproject commit 3f53ff719f8024003873dd98bb64f21e9401844c
diff --git a/html/locale_en.json b/html/locale_en.json
index 3c1ac70a4..0767ccd23 100644
--- a/html/locale_en.json
+++ b/html/locale_en.json
@@ -223,7 +223,7 @@
   {"id":"","label":"System Paths","localized":"","hint":"Settings related to location of various model directories"},
   {"id":"","label":"Image Options","localized":"","hint":"Settings related to image format, metadata, and image grids"},
   {"id":"","label":"image naming & paths","localized":"","hint":"Settings related to image filenames, and output directories"},
-  {"id":"","label":"User Interface Options","localized":"","hint":"Settings related to user interface themes, and Quicksettings list"},
+  {"id":"","label":"User Interface","localized":"","hint":"Settings related to user interface themes, and Quicksettings list"},
   {"id":"","label":"Live Previews","localized":"","hint":"Settings related to live previews, audio notification, and log view"},
   {"id":"","label":"Sampler Settings","localized":"","hint":"Settings related to sampler selection and configuration, and diffuser specific sampler configuration"},
   {"id":"","label":"Postprocessing","localized":"","hint":"Settings related to post image generation processing, face restoration, and upscaling"},
diff --git a/modules/postprocess/yolo.py b/modules/postprocess/yolo.py
index 7bc259391..8fc203280 100644
--- a/modules/postprocess/yolo.py
+++ b/modules/postprocess/yolo.py
@@ -300,7 +300,8 @@ def restore(self, np_image, p: processing.StableDiffusionProcessing = None):
                 # combined.save('/tmp/item.png')
                 p.image_mask = Image.fromarray(p.image_mask)
 
-        shared.log.debug(f'Detailer processed: models={models_used}')
+        if len(models_used) > 0:
+            shared.log.debug(f'Detailer processed: models={models_used}')
         return np_image
 
     def ui(self, tab: str):
diff --git a/modules/sd_samplers_common.py b/modules/sd_samplers_common.py
index 5297d0bcd..2ac7949a6 100644
--- a/modules/sd_samplers_common.py
+++ b/modules/sd_samplers_common.py
@@ -62,7 +62,7 @@ def single_sample_to_image(sample, approximation=None):
                 sample = sample * (5 / abs(sample_min))
         """
         if approximation == 2: # TAESD
-            if sample.shape[-1] > 128 or sample.shape[-2] > 128:
+            if shared.opts.live_preview_downscale and (sample.shape[-1] > 128 or sample.shape[-2] > 128):
                 scale = 128 / max(sample.shape[-1], sample.shape[-2])
                 sample = torch.nn.functional.interpolate(sample.unsqueeze(0), scale_factor=[scale, scale], mode='bilinear', align_corners=False)[0]
             x_sample = sd_vae_taesd.decode(sample)
diff --git a/modules/shared.py b/modules/shared.py
index c3fc4e905..098213f8f 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -786,6 +786,7 @@ def get_default_modes():
     "show_progress_type": OptionInfo("Approximate", "Live preview method", gr.Radio, {"choices": ["Simple", "Approximate", "TAESD", "Full VAE"]}),
     "live_preview_refresh_period": OptionInfo(500, "Progress update period", gr.Slider, {"minimum": 0, "maximum": 5000, "step": 25}),
     "live_preview_taesd_layers": OptionInfo(3, "TAESD decode layers", gr.Slider, {"minimum": 1, "maximum": 3, "step": 1}),
+    "live_preview_downscale": OptionInfo(True, "Downscale high resolution live previews"),
     "logmonitor_show": OptionInfo(True, "Show log view"),
     "logmonitor_refresh_period": OptionInfo(5000, "Log view update period", gr.Slider, {"minimum": 0, "maximum": 30000, "step": 25}),
     "notification_audio_enable": OptionInfo(False, "Play a notification upon completion"),
diff --git a/modules/ui_control.py b/modules/ui_control.py
index 7baf74d75..cd78313a7 100644
--- a/modules/ui_control.py
+++ b/modules/ui_control.py
@@ -250,10 +250,10 @@ def create_ui(_blocks: gr.Blocks=None):
                                     process_id = gr.Dropdown(label="Processor", choices=processors.list_models(), value='None', elem_id=f'control_unit-{i}-process_name')
                                     model_id = gr.Dropdown(label="ControlNet", choices=controlnet.list_models(), value='None', elem_id=f'control_unit-{i}-model_name')
                                     ui_common.create_refresh_button(model_id, controlnet.list_models, lambda: {"choices": controlnet.list_models(refresh=True)}, f'refresh_controlnet_models_{i}')
+                                    control_mode = gr.Dropdown(label="CN Mode", choices=['default'], value='default', visible=False, elem_id=f'control_unit-{i}-mode')
                                     model_strength = gr.Slider(label="CN Strength", minimum=0.01, maximum=2.0, step=0.01, value=1.0, elem_id=f'control_unit-{i}-strength')
                                     control_start = gr.Slider(label="CN Start", minimum=0.0, maximum=1.0, step=0.05, value=0, elem_id=f'control_unit-{i}-start')
                                     control_end = gr.Slider(label="CN End", minimum=0.0, maximum=1.0, step=0.05, value=1.0, elem_id=f'control_unit-{i}-end')
-                                    control_mode = gr.Dropdown(label="CN Mode", choices=['default'], value='default', visible=False, elem_id=f'control_unit-{i}-mode')
                                     control_tile = gr.Dropdown(label="CN Tiles", choices=[x.strip() for x in shared.opts.control_tiles.split(',') if 'x' in x], value='1x1', visible=False, elem_id=f'control_unit-{i}-tile')
                                     reset_btn = ui_components.ToolButton(value=ui_symbols.reset)
                                     image_upload = gr.UploadButton(label=ui_symbols.upload, file_types=['image'], elem_classes=['form', 'gradio-button', 'tool'])

From 1d81ce172ffb14d604fe0203325e6d9acdef1ebb Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Thu, 19 Dec 2024 11:53:41 -0500
Subject: [PATCH 133/162] uninstall wandb

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md | 1 +
 installer.py | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1ccd8afb2..9df8c4e1e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -177,6 +177,7 @@ And it wouldn't be a X-mass edition custom themes: *Snowflake* and *Elf-Green*
 - control load model on-demand if not already loaded  
 - taesd limit render to 2024px  
 - taesd downscale preview to 1024px max: configurable in settings -> live preview  
+- uninstall conflicting `wandb` package  
 
 ## Update for 2024-11-21
 
diff --git a/installer.py b/installer.py
index 3dd0f13d5..4ba32e4c7 100644
--- a/installer.py
+++ b/installer.py
@@ -686,6 +686,8 @@ def install_torch_addons():
         install('nncf==2.7.0', 'nncf')
     if opts.get('optimum_quanto_weights', False):
         install('optimum-quanto==0.2.6', 'optimum-quanto')
+    if not args.experimental:
+        uninstall('wandb', quiet=True)
     if triton_command is not None:
         install(triton_command, 'triton', quiet=True)
 

From b5699778951e8b6bc8963c339f511e2599f5b501 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Thu, 19 Dec 2024 11:56:23 -0500
Subject: [PATCH 134/162] dont use uv for uninstall

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 installer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/installer.py b/installer.py
index 4ba32e4c7..76bd5e9e8 100644
--- a/installer.py
+++ b/installer.py
@@ -250,7 +250,7 @@ def uninstall(package, quiet = False):
         if installed(p, p, quiet=True):
             if not quiet:
                 log.warning(f'Package: {p} uninstall')
-            res += pip(f"uninstall {p} --yes --quiet", ignore=True, quiet=True)
+            res += pip(f"uninstall {p} --yes --quiet", ignore=True, quiet=True, uv=False)
     return res
 
 

From 15ea72ed74e2ca76f23d519f1e19311c7d97c5be Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Thu, 19 Dec 2024 14:52:29 -0500
Subject: [PATCH 135/162] add hunyuan video and mochi video

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md                    |  32 +++++++--
 scripts/animatediff.py          |   2 +-
 scripts/cogvideo.py             |   2 +-
 scripts/hunyuanvideo.py         | 111 ++++++++++++++++++++++++++++++++
 scripts/image2video.py          |   2 +-
 scripts/ltxvideo.py             |  64 ++++--------------
 scripts/mochivideo.py           |  85 ++++++++++++++++++++++++
 scripts/stablevideodiffusion.py |   2 +-
 scripts/text2video.py           |   2 +-
 wiki                            |   2 +-
 10 files changed, 242 insertions(+), 62 deletions(-)
 create mode 100644 scripts/hunyuanvideo.py
 create mode 100644 scripts/mochivideo.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9df8c4e1e..df917041b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,7 +14,8 @@ While we have several new supported models, workflows and tools, this release is
   with full search and tons of new documentation  
 - New settings panel with simplified and streamlined configuration  
 
-We've also added support for several new models (see [supported models](https://vladmandic.github.io/sdnext-docs/Model-Support/) for full list) such as [NVLabs Sana](https://huggingface.co/Efficient-Large-Model/Sana_1600M_1024px) and [Lightricks LTX-Video](https://huggingface.co/Lightricks/LTX-Video)  
+We've also added support for several new models (see [supported models](https://vladmandic.github.io/sdnext-docs/Model-Support/) for full list) such as highly anticipated [NVLabs Sana](https://huggingface.co/Efficient-Large-Model/Sana_1600M_1024px)  
+And several new video models: [Lightricks LTX-Video](https://huggingface.co/Lightricks/LTX-Video), [Hunyuan Video](https://huggingface.co/tencent/HunyuanVideo) and [Genmo Mochi.1 Preview](https://huggingface.co/genmo/mochi-1-preview)
 
 And a lot of Control and IPAdapter goodies  
 - for SDXL there is new [ProMax](https://huggingface.co/xinsir/controlnet-union-sdxl-1.0), improved *Union* and *Tiling*  
@@ -70,11 +71,6 @@ And it wouldn't be a X-mass edition custom themes: *Snowflake* and *Elf-Green*
   both **Depth** and **Canny** LoRAs are available in standard control menus  
 - [StabilityAI SD35 ControlNets](https://huggingface.co/stabilityai/stable-diffusion-3.5-controlnets)
   - In addition to previously released `InstantX` and `Alimama`, we now have *official* ones from StabilityAI  
-- [Lightricks LTX-Video](https://huggingface.co/Lightricks/LTX-Video)
-  basic support for LTX-Video for text-to-video and image-to-video  
-  to use, select in *scripts -> ltx-video*  
-  *note* you may need to enable sequential offload for maximum gpu memory savings  
-  *note* ltx-video requires very long and descriptive prompt, see original link for examples  
 - [Style Aligned Image Generation](https://style-aligned-gen.github.io/)  
   enable in scripts, compatible with sd-xl  
   enter multiple prompts in prompt field separated by new line  
@@ -87,6 +83,30 @@ And it wouldn't be a X-mass edition custom themes: *Snowflake* and *Elf-Green*
   can render 4k sdxl images  
   *note*: disable live preview to avoid memory issues when generating large images  
 
+### Video models
+
+- [Lightricks LTX-Video](https://huggingface.co/Lightricks/LTX-Video)
+  model size: 27.75gb
+  support for text-to-video and image-to-video, to use, select in *scripts -> ltx-video*  
+  *refrence values*: steps 50, width 704, height 512, frames 161, guidance scale 3.0
+- [Hunyuan Video](https://huggingface.co/tencent/HunyuanVideo)  
+  model size: 40.92gb
+  support for text-to-video, to use, select in *scripts -> hunyuan video*  
+  *refrence values*: steps 50, width 1280, height 720, frames 129, guidance scale 6.0
+- [Genmo Mochi.1 Preview](https://huggingface.co/genmo/mochi-1-preview)
+  support for text-to-video, to use, select in *scripts -> mochi.1 video*  
+  *refrence values*: steps 64, width 848, height 480, frames 19, guidance scale 4.5
+
+*Notes*:
+- all video models are very large and resource intensive!  
+  any use on gpus below 16gb and systems below 48gb ram is experimental at best  
+- sdnext support for video models is relatively basic with further optimizations pending community interest  
+  any future optimizations would likely have to go into partial loading and excecution instead of offloading inactive parts of the model  
+- new video models use generic llms for prompting and due to that requires very long and descriptive prompt  
+- you may need to enable sequential offload for maximum gpu memory savings  
+- optionally enable pre-quantization using bnb for additional memory savings  
+- reduce number of frames and/or resolution to reduce memory usage  
+
 ### UI and workflow improvements
 
 - **Docs**:
diff --git a/scripts/animatediff.py b/scripts/animatediff.py
index 91db60915..6c29f3fa5 100644
--- a/scripts/animatediff.py
+++ b/scripts/animatediff.py
@@ -250,7 +250,7 @@ def run(self, p: processing.StableDiffusionProcessing, adapter_index, frames, lo
         processing.fix_seed(p)
         p.extra_generation_params['AnimateDiff'] = loaded_adapter
         p.do_not_save_grid = True
-        p.ops.append('animatediff')
+        p.ops.append('video')
         p.task_args['generator'] = None
         p.task_args['num_frames'] = frames
         p.task_args['num_inference_steps'] = p.steps
diff --git a/scripts/cogvideo.py b/scripts/cogvideo.py
index a5efcd3e6..e689a5e3f 100644
--- a/scripts/cogvideo.py
+++ b/scripts/cogvideo.py
@@ -202,7 +202,7 @@ def run(self, p: processing.StableDiffusionProcessing, model, sampler, frames, g
         p.extra_generation_params['CogVideoX'] = model
         p.do_not_save_grid = True
         if 'animatediff' not in p.ops:
-            p.ops.append('cogvideox')
+            p.ops.append('video')
         if override:
             p.width = 720
             p.height = 480
diff --git a/scripts/hunyuanvideo.py b/scripts/hunyuanvideo.py
new file mode 100644
index 000000000..b94c8b8f8
--- /dev/null
+++ b/scripts/hunyuanvideo.py
@@ -0,0 +1,111 @@
+import time
+import torch
+import gradio as gr
+import diffusers
+from modules import scripts, processing, shared, images, devices, sd_models, sd_checkpoint, model_quant
+
+
+repo_id = 'tencent/HunyuanVideo'
+"""
+prompt_template = { # default
+    "template": (
+        "<|start_header_id|>system<|end_header_id|>\n\nDescribe the video by detailing the following aspects: "
+        "1. The main content and theme of the video."
+        "2. The color, shape, size, texture, quantity, text, and spatial relationships of the contents, including objects, people, and anything else."
+        "3. Actions, events, behaviors temporal relationships, physical movement changes of the contents."
+        "4. Background environment, light, style, atmosphere, and qualities."
+        "5. Camera angles, movements, and transitions used in the video."
+        "6. Thematic and aesthetic concepts associated with the scene, i.e. realistic, futuristic, fairy tale, etc<|eot_id|>"
+        "<|start_header_id|>user<|end_header_id|>\n\n{}<|eot_id|>"
+    ),
+    "crop_start": 95,
+}
+"""
+
+
+class Script(scripts.Script):
+    def title(self):
+        return 'Video: Hunyuan Video'
+
+    def show(self, is_img2img):
+        return not is_img2img if shared.native else False
+
+    # return signature is array of gradio components
+    def ui(self, _is_img2img):
+        def video_type_change(video_type):
+            return [
+                gr.update(visible=video_type != 'None'),
+                gr.update(visible=video_type == 'GIF' or video_type == 'PNG'),
+                gr.update(visible=video_type == 'MP4'),
+                gr.update(visible=video_type == 'MP4'),
+            ]
+
+        with gr.Row():
+            gr.HTML('<a href="https://huggingface.co/tencent/HunyuanVideo">&nbsp Hunyuan Video</a><br>')
+        with gr.Row():
+            num_frames = gr.Slider(label='Frames', minimum=9, maximum=257, step=1, value=45)
+        with gr.Row():
+            video_type = gr.Dropdown(label='Video file', choices=['None', 'GIF', 'PNG', 'MP4'], value='None')
+            duration = gr.Slider(label='Duration', minimum=0.25, maximum=10, step=0.25, value=2, visible=False)
+        with gr.Row():
+            gif_loop = gr.Checkbox(label='Loop', value=True, visible=False)
+            mp4_pad = gr.Slider(label='Pad frames', minimum=0, maximum=24, step=1, value=1, visible=False)
+            mp4_interpolate = gr.Slider(label='Interpolate frames', minimum=0, maximum=24, step=1, value=0, visible=False)
+        video_type.change(fn=video_type_change, inputs=[video_type], outputs=[duration, gif_loop, mp4_pad, mp4_interpolate])
+        return [num_frames, video_type, duration, gif_loop, mp4_pad, mp4_interpolate]
+
+    def run(self, p: processing.StableDiffusionProcessing, num_frames, video_type, duration, gif_loop, mp4_pad, mp4_interpolate): # pylint: disable=arguments-differ, unused-argument
+        # set params
+        num_frames = int(num_frames)
+        p.width = 32 * int(p.width // 32)
+        p.height = 32 * int(p.height // 32)
+        p.task_args['output_type'] = 'pil'
+        p.task_args['generator'] = torch.manual_seed(p.seed)
+        p.task_args['num_frames'] = num_frames
+        # p.task_args['prompt_template'] = prompt_template
+        p.sampler_name = 'Default'
+        p.do_not_save_grid = True
+        p.ops.append('video')
+
+        # load model
+        cls = diffusers.HunyuanVideoPipeline
+        if shared.sd_model.__class__ != cls:
+            sd_models.unload_model_weights()
+            kwargs = {}
+            kwargs = model_quant.create_bnb_config(kwargs)
+            kwargs = model_quant.create_ao_config(kwargs)
+            transformer = diffusers.HunyuanVideoTransformer3DModel.from_pretrained(
+                repo_id,
+                subfolder="transformer",
+                torch_dtype=devices.dtype,
+                revision="refs/pr/18",
+                cache_dir = shared.opts.hfcache_dir,
+                **kwargs
+            )
+            shared.sd_model = cls.from_pretrained(
+                repo_id,
+                transformer=transformer,
+                revision="refs/pr/18",
+                cache_dir = shared.opts.hfcache_dir,
+                torch_dtype=devices.dtype,
+                **kwargs
+            )
+            shared.sd_model.scheduler._shift = 7.0 # pylint: disable=protected-access
+            sd_models.set_diffuser_options(shared.sd_model)
+            shared.sd_model.sd_checkpoint_info = sd_checkpoint.CheckpointInfo(repo_id)
+            shared.sd_model.sd_model_hash = None
+        shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
+        shared.sd_model.vae.enable_slicing()
+        shared.sd_model.vae.enable_tiling()
+        devices.torch_gc(force=True)
+        shared.log.debug(f'Video: cls={shared.sd_model.__class__.__name__} args={p.task_args}')
+
+        # run processing
+        t0 = time.time()
+        processed = processing.process_images(p)
+        t1 = time.time()
+        if processed is not None and len(processed.images) > 0:
+            shared.log.info(f'Video: frames={len(processed.images)} time={t1-t0:.2f}')
+            if video_type != 'None':
+                images.save_video(p, filename=None, images=processed.images, video_type=video_type, duration=duration, loop=gif_loop, pad=mp4_pad, interpolate=mp4_interpolate)
+        return processed
diff --git a/scripts/image2video.py b/scripts/image2video.py
index 5e08922ee..ad6615f67 100644
--- a/scripts/image2video.py
+++ b/scripts/image2video.py
@@ -73,7 +73,7 @@ def run(self, p: processing.StableDiffusionProcessing, model_name, num_frames, v
         model = [m for m in MODELS if m['name'] == model_name][0]
         repo_id = model['url']
         shared.log.debug(f'Image2Video: model={model_name} frames={num_frames}, video={video_type} duration={duration} loop={gif_loop} pad={mp4_pad} interpolate={mp4_interpolate}')
-        p.ops.append('image2video')
+        p.ops.append('video')
         p.do_not_save_grid = True
         orig_pipeline = shared.sd_model
 
diff --git a/scripts/ltxvideo.py b/scripts/ltxvideo.py
index 54e2685a8..50530563a 100644
--- a/scripts/ltxvideo.py
+++ b/scripts/ltxvideo.py
@@ -2,40 +2,10 @@
 import torch
 import gradio as gr
 import diffusers
-from modules import scripts, processing, shared, images, devices, sd_models, sd_checkpoint
+from modules import scripts, processing, shared, images, devices, sd_models, sd_checkpoint, model_quant
 
 
 repo_id = 'a-r-r-o-w/LTX-Video-diffusers'
-presets = [
-    {"label": "custom", "width": 0, "height": 0, "num_frames": 0},
-    {"label": "1216x704, 41 frames", "width": 1216, "height": 704, "num_frames": 41},
-    {"label": "1088x704, 49 frames", "width": 1088, "height": 704, "num_frames": 49},
-    {"label": "1056x640, 57 frames", "width": 1056, "height": 640, "num_frames": 57},
-    {"label": "992x608, 65 frames", "width": 992, "height": 608, "num_frames": 65},
-    {"label": "896x608, 73 frames", "width": 896, "height": 608, "num_frames": 73},
-    {"label": "896x544, 81 frames", "width": 896, "height": 544, "num_frames": 81},
-    {"label": "832x544, 89 frames", "width": 832, "height": 544, "num_frames": 89},
-    {"label": "800x512, 97 frames", "width": 800, "height": 512, "num_frames": 97},
-    {"label": "768x512, 97 frames", "width": 768, "height": 512, "num_frames": 97},
-    {"label": "800x480, 105 frames", "width": 800, "height": 480, "num_frames": 105},
-    {"label": "736x480, 113 frames", "width": 736, "height": 480, "num_frames": 113},
-    {"label": "704x480, 121 frames", "width": 704, "height": 480, "num_frames": 121},
-    {"label": "704x448, 129 frames", "width": 704, "height": 448, "num_frames": 129},
-    {"label": "672x448, 137 frames", "width": 672, "height": 448, "num_frames": 137},
-    {"label": "640x416, 153 frames", "width": 640, "height": 416, "num_frames": 153},
-    {"label": "672x384, 161 frames", "width": 672, "height": 384, "num_frames": 161},
-    {"label": "640x384, 169 frames", "width": 640, "height": 384, "num_frames": 169},
-    {"label": "608x384, 177 frames", "width": 608, "height": 384, "num_frames": 177},
-    {"label": "576x384, 185 frames", "width": 576, "height": 384, "num_frames": 185},
-    {"label": "608x352, 193 frames", "width": 608, "height": 352, "num_frames": 193},
-    {"label": "576x352, 201 frames", "width": 576, "height": 352, "num_frames": 201},
-    {"label": "544x352, 209 frames", "width": 544, "height": 352, "num_frames": 209},
-    {"label": "512x352, 225 frames", "width": 512, "height": 352, "num_frames": 225},
-    {"label": "512x352, 233 frames", "width": 512, "height": 352, "num_frames": 233},
-    {"label": "544x320, 241 frames", "width": 544, "height": 320, "num_frames": 241},
-    {"label": "512x320, 249 frames", "width": 512, "height": 320, "num_frames": 249},
-    {"label": "512x320, 257 frames", "width": 512, "height": 320, "num_frames": 257},
-]
 
 
 class Script(scripts.Script):
@@ -54,14 +24,11 @@ def video_type_change(video_type):
                 gr.update(visible=video_type == 'MP4'),
                 gr.update(visible=video_type == 'MP4'),
             ]
-        def preset_change(preset):
-            return gr.update(visible=preset == 'custom')
 
         with gr.Row():
             gr.HTML('<a href="https://www.ltxvideo.org/">&nbsp LTX Video</a><br>')
         with gr.Row():
-            preset_name = gr.Dropdown(label='Preset', choices=[p['label'] for p in presets], value='custom')
-            num_frames = gr.Slider(label='Frames', minimum=9, maximum=257, step=1, value=9)
+            num_frames = gr.Slider(label='Frames', minimum=9, maximum=257, step=1, value=41)
         with gr.Row():
             video_type = gr.Dropdown(label='Video file', choices=['None', 'GIF', 'PNG', 'MP4'], value='None')
             duration = gr.Slider(label='Duration', minimum=0.25, maximum=10, step=0.25, value=2, visible=False)
@@ -69,26 +36,19 @@ def preset_change(preset):
             gif_loop = gr.Checkbox(label='Loop', value=True, visible=False)
             mp4_pad = gr.Slider(label='Pad frames', minimum=0, maximum=24, step=1, value=1, visible=False)
             mp4_interpolate = gr.Slider(label='Interpolate frames', minimum=0, maximum=24, step=1, value=0, visible=False)
-        preset_name.change(fn=preset_change, inputs=[preset_name], outputs=num_frames)
         video_type.change(fn=video_type_change, inputs=[video_type], outputs=[duration, gif_loop, mp4_pad, mp4_interpolate])
-        return [preset_name, num_frames, video_type, duration, gif_loop, mp4_pad, mp4_interpolate]
+        return [num_frames, video_type, duration, gif_loop, mp4_pad, mp4_interpolate]
 
-    def run(self, p: processing.StableDiffusionProcessing, preset_name, num_frames, video_type, duration, gif_loop, mp4_pad, mp4_interpolate): # pylint: disable=arguments-differ, unused-argument
+    def run(self, p: processing.StableDiffusionProcessing, num_frames, video_type, duration, gif_loop, mp4_pad, mp4_interpolate): # pylint: disable=arguments-differ, unused-argument
         # set params
-        preset = [p for p in presets if p['label'] == preset_name][0]
         image = getattr(p, 'init_images', None)
         image = None if image is None or len(image) == 0 else image[0]
         if p.width == 0 or p.height == 0 and image is not None:
             p.width = image.width
             p.height = image.height
-        if preset['label'] != 'custom':
-            num_frames = preset['num_frames']
-            p.width = preset['width']
-            p.height = preset['height']
-        else:
-            num_frames = 8 * int(num_frames // 8) + 1
-            p.width = 32 * int(p.width // 32)
-            p.height = 32 * int(p.height // 32)
+        num_frames = 8 * int(num_frames // 8) + 1
+        p.width = 32 * int(p.width // 32)
+        p.height = 32 * int(p.height // 32)
         if image:
             image = images.resize_image(resize_mode=2, im=image, width=p.width, height=p.height, upscaler_name=None, output_type='pil')
             p.task_args['image'] = image
@@ -97,7 +57,7 @@ def run(self, p: processing.StableDiffusionProcessing, preset_name, num_frames,
         p.task_args['num_frames'] = num_frames
         p.sampler_name = 'Default'
         p.do_not_save_grid = True
-        p.ops.append('ltx')
+        p.ops.append('video')
 
         # load model
         cls = diffusers.LTXPipeline if image is None else diffusers.LTXImageToVideoPipeline
@@ -105,10 +65,14 @@ def run(self, p: processing.StableDiffusionProcessing, preset_name, num_frames,
         diffusers.AutoencoderKLLTX = diffusers.AutoencoderKLLTXVideo
         if shared.sd_model.__class__ != cls:
             sd_models.unload_model_weights()
+            kwargs = {}
+            kwargs = model_quant.create_bnb_config(kwargs)
+            kwargs = model_quant.create_ao_config(kwargs)
             shared.sd_model = cls.from_pretrained(
                 repo_id,
                 cache_dir = shared.opts.hfcache_dir,
                 torch_dtype=devices.dtype,
+                **kwargs
             )
             sd_models.set_diffuser_options(shared.sd_model)
             shared.sd_model.sd_checkpoint_info = sd_checkpoint.CheckpointInfo(repo_id)
@@ -117,14 +81,14 @@ def run(self, p: processing.StableDiffusionProcessing, preset_name, num_frames,
         shared.sd_model.vae.enable_slicing()
         shared.sd_model.vae.enable_tiling()
         devices.torch_gc(force=True)
-        shared.log.debug(f'LTX: cls={shared.sd_model.__class__.__name__} preset={preset_name} args={p.task_args}')
+        shared.log.debug(f'Video: cls={shared.sd_model.__class__.__name__} args={p.task_args}')
 
         # run processing
         t0 = time.time()
         processed = processing.process_images(p)
         t1 = time.time()
         if processed is not None and len(processed.images) > 0:
-            shared.log.info(f'LTX: frames={len(processed.images)} time={t1-t0:.2f}')
+            shared.log.info(f'Video: frames={len(processed.images)} time={t1-t0:.2f}')
             if video_type != 'None':
                 images.save_video(p, filename=None, images=processed.images, video_type=video_type, duration=duration, loop=gif_loop, pad=mp4_pad, interpolate=mp4_interpolate)
         return processed
diff --git a/scripts/mochivideo.py b/scripts/mochivideo.py
new file mode 100644
index 000000000..f85616a5e
--- /dev/null
+++ b/scripts/mochivideo.py
@@ -0,0 +1,85 @@
+import time
+import torch
+import gradio as gr
+import diffusers
+from modules import scripts, processing, shared, images, devices, sd_models, sd_checkpoint, model_quant
+
+
+repo_id = 'genmo/mochi-1-preview'
+
+
+class Script(scripts.Script):
+    def title(self):
+        return 'Video: Mochi.1 Video'
+
+    def show(self, is_img2img):
+        return not is_img2img if shared.native else False
+
+    # return signature is array of gradio components
+    def ui(self, _is_img2img):
+        def video_type_change(video_type):
+            return [
+                gr.update(visible=video_type != 'None'),
+                gr.update(visible=video_type == 'GIF' or video_type == 'PNG'),
+                gr.update(visible=video_type == 'MP4'),
+                gr.update(visible=video_type == 'MP4'),
+            ]
+
+        with gr.Row():
+            gr.HTML('<a href="https://huggingface.co/genmo/mochi-1-preview">&nbsp Mochi.1 Video</a><br>')
+        with gr.Row():
+            num_frames = gr.Slider(label='Frames', minimum=9, maximum=257, step=1, value=45)
+        with gr.Row():
+            video_type = gr.Dropdown(label='Video file', choices=['None', 'GIF', 'PNG', 'MP4'], value='None')
+            duration = gr.Slider(label='Duration', minimum=0.25, maximum=10, step=0.25, value=2, visible=False)
+        with gr.Row():
+            gif_loop = gr.Checkbox(label='Loop', value=True, visible=False)
+            mp4_pad = gr.Slider(label='Pad frames', minimum=0, maximum=24, step=1, value=1, visible=False)
+            mp4_interpolate = gr.Slider(label='Interpolate frames', minimum=0, maximum=24, step=1, value=0, visible=False)
+        video_type.change(fn=video_type_change, inputs=[video_type], outputs=[duration, gif_loop, mp4_pad, mp4_interpolate])
+        return [num_frames, video_type, duration, gif_loop, mp4_pad, mp4_interpolate]
+
+    def run(self, p: processing.StableDiffusionProcessing, num_frames, video_type, duration, gif_loop, mp4_pad, mp4_interpolate): # pylint: disable=arguments-differ, unused-argument
+        # set params
+        num_frames = int(num_frames // 8)
+        p.width = 32 * int(p.width // 32)
+        p.height = 32 * int(p.height // 32)
+        p.task_args['output_type'] = 'pil'
+        p.task_args['generator'] = torch.manual_seed(p.seed)
+        p.task_args['num_frames'] = num_frames
+        p.sampler_name = 'Default'
+        p.do_not_save_grid = True
+        p.ops.append('video')
+
+        # load model
+        cls = diffusers.MochiPipeline
+        if shared.sd_model.__class__ != cls:
+            sd_models.unload_model_weights()
+            kwargs = {}
+            kwargs = model_quant.create_bnb_config(kwargs)
+            kwargs = model_quant.create_ao_config(kwargs)
+            shared.sd_model = cls.from_pretrained(
+                repo_id,
+                cache_dir = shared.opts.hfcache_dir,
+                torch_dtype=devices.dtype,
+                **kwargs
+            )
+            shared.sd_model.scheduler._shift = 7.0 # pylint: disable=protected-access
+            sd_models.set_diffuser_options(shared.sd_model)
+            shared.sd_model.sd_checkpoint_info = sd_checkpoint.CheckpointInfo(repo_id)
+            shared.sd_model.sd_model_hash = None
+        shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
+        shared.sd_model.vae.enable_slicing()
+        shared.sd_model.vae.enable_tiling()
+        devices.torch_gc(force=True)
+        shared.log.debug(f'Video: cls={shared.sd_model.__class__.__name__} args={p.task_args}')
+
+        # run processing
+        t0 = time.time()
+        processed = processing.process_images(p)
+        t1 = time.time()
+        if processed is not None and len(processed.images) > 0:
+            shared.log.info(f'Video: frames={len(processed.images)} time={t1-t0:.2f}')
+            if video_type != 'None':
+                images.save_video(p, filename=None, images=processed.images, video_type=video_type, duration=duration, loop=gif_loop, pad=mp4_pad, interpolate=mp4_interpolate)
+        return processed
diff --git a/scripts/stablevideodiffusion.py b/scripts/stablevideodiffusion.py
index cbf2ce003..f8da35b23 100644
--- a/scripts/stablevideodiffusion.py
+++ b/scripts/stablevideodiffusion.py
@@ -81,7 +81,7 @@ def run(self, p: processing.StableDiffusionProcessing, model, num_frames, overri
             p.width = 1024
             p.height = 576
             image = images.resize_image(resize_mode=2, im=image, width=p.width, height=p.height, upscaler_name=None, output_type='pil')
-        p.ops.append('svd')
+        p.ops.append('video')
         p.do_not_save_grid = True
         p.init_images = [image]
         p.sampler_name = 'Default' # svd does not support non-default sampler
diff --git a/scripts/text2video.py b/scripts/text2video.py
index dc4c44cac..c7b3d1c05 100644
--- a/scripts/text2video.py
+++ b/scripts/text2video.py
@@ -87,7 +87,7 @@ def run(self, p: processing.StableDiffusionProcessing, model_name, use_default,
             shared.opts.sd_model_checkpoint = checkpoint.name
             sd_models.reload_model_weights(op='model')
 
-        p.ops.append('text2video')
+        p.ops.append('video')
         p.do_not_save_grid = True
         if use_default:
             p.task_args['num_frames'] = model['params'][0]
diff --git a/wiki b/wiki
index 34ba1df45..a6c10ce38 160000
--- a/wiki
+++ b/wiki
@@ -1 +1 @@
-Subproject commit 34ba1df45d17da4ee09a2e5278e384bc1929dd8b
+Subproject commit a6c10ce38ef1da4d47cd68ad0aa8552d2d62c943

From 6e0ef691030204088fbbeb7389d0b0f9a68d1571 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Thu, 19 Dec 2024 15:01:38 -0500
Subject: [PATCH 136/162] update wiki

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 wiki | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wiki b/wiki
index a6c10ce38..8db442124 160000
--- a/wiki
+++ b/wiki
@@ -1 +1 @@
-Subproject commit a6c10ce38ef1da4d47cd68ad0aa8552d2d62c943
+Subproject commit 8db44212407343c1855d8811efb61f6e69bd4caa

From 20f2554cecc7be7291d3a2766ec2556d48111d55 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Fri, 20 Dec 2024 10:22:42 -0500
Subject: [PATCH 137/162] add sd35-ipadapter and more balanced offload
 optimizations

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md          |  16 ++--
 installer.py          |   4 +-
 modules/devices.py    |  18 ++--
 modules/ipadapter.py  | 193 +++++++++++++++++++++++++-----------------
 modules/processing.py |   2 +-
 modules/sd_models.py  |   9 +-
 scripts/flux_tools.py |   3 +-
 7 files changed, 148 insertions(+), 97 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index df917041b..d2d70f3e1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,7 +4,7 @@
 
 ### Highlights for 2024-12-19
 
-### SD.Next X-mass edition: *What's new?*
+### SD.Next Xmass edition: *What's new?*
 
 While we have several new supported models, workflows and tools, this release is primarily about *quality-of-life improvements*:  
 - New memory management engine: list of changes that went into this one is long: changes to GPU offloading, LoRA loader, system memory management, etc.  
@@ -15,18 +15,18 @@ While we have several new supported models, workflows and tools, this release is
 - New settings panel with simplified and streamlined configuration  
 
 We've also added support for several new models (see [supported models](https://vladmandic.github.io/sdnext-docs/Model-Support/) for full list) such as highly anticipated [NVLabs Sana](https://huggingface.co/Efficient-Large-Model/Sana_1600M_1024px)  
-And several new video models: [Lightricks LTX-Video](https://huggingface.co/Lightricks/LTX-Video), [Hunyuan Video](https://huggingface.co/tencent/HunyuanVideo) and [Genmo Mochi.1 Preview](https://huggingface.co/genmo/mochi-1-preview)
+And several new SOTA video models: [Lightricks LTX-Video](https://huggingface.co/Lightricks/LTX-Video), [Hunyuan Video](https://huggingface.co/tencent/HunyuanVideo) and [Genmo Mochi.1 Preview](https://huggingface.co/genmo/mochi-1-preview)  
 
 And a lot of Control and IPAdapter goodies  
-- for SDXL there is new [ProMax](https://huggingface.co/xinsir/controlnet-union-sdxl-1.0), improved *Union* and *Tiling*  
-- for FLUX.1 there are [Flux Tools](https://blackforestlabs.ai/flux-1-tools/) as well as official *Canny* and *Depth* models and a cool [Redux](https://huggingface.co/black-forest-labs/FLUX.1-Redux-dev) model  
-- for SD 3.5 there are official *Canny*, *Blur* and *Depth* in addition to existing 3rd party models  
+- for **SDXL** there is new [ProMax](https://huggingface.co/xinsir/controlnet-union-sdxl-1.0), improved *Union* and *Tiling* models 
+- for **FLUX.1** there are [Flux Tools](https://blackforestlabs.ai/flux-1-tools/) as well as official *Canny* and *Depth* models and a cool [Redux](https://huggingface.co/black-forest-labs/FLUX.1-Redux-dev) model  
+- for **SD3.5** there are official *Canny*, *Blur* and *Depth* models in addition to existing 3rd party models as well as [InstantX](https://huggingface.co/InstantX/SD3.5-Large-IP-Adapter) IP-adapter  
 
 Plus couple of new integrated workflows such as [FreeScale](https://github.com/ali-vilab/FreeScale) and [Style Aligned Image Generation](https://style-aligned-gen.github.io/)  
 
-And it wouldn't be a X-mass edition custom themes: *Snowflake* and *Elf-Green*  
+And it wouldn't be a Xmass edition without couple of custom themes: *Snowflake* and *Elf-Green*!  
 
-[README](https://github.com/vladmandic/automatic/blob/master/README.md) | [CHANGELOG](https://github.com/vladmandic/automatic/blob/master/CHANGELOG.md) | [Docs](https://vladmandic.github.io/sdnext-docs/) | [WiKi](https://github.com/vladmandic/automatic/wiki) | [Discord](https://discord.com/invite/sd-next-federal-batch-inspectors-1101998836328697867)
+[ReadMe](https://github.com/vladmandic/automatic/blob/master/README.md) | [ChangeLog](https://github.com/vladmandic/automatic/blob/master/CHANGELOG.md) | [Docs](https://vladmandic.github.io/sdnext-docs/) | [WiKi](https://github.com/vladmandic/automatic/wiki) | [Discord](https://discord.com/invite/sd-next-federal-batch-inspectors-1101998836328697867)
 
 ## Details for 2024-12-19
 
@@ -136,6 +136,7 @@ And it wouldn't be a X-mass edition custom themes: *Snowflake* and *Elf-Green*
   - balanced offload: add both high and low watermark, defaults as below  
     `0.25` for low-watermark: skip offload if memory usage is below 25%  
     `0.70` high-watermark: must offload if memory usage is above 70%  
+  - balanced offload will attempt to run offload as non-blocking and force gc at the end  
   - change-in-behavior:  
     low-end systems, triggered by either `lowvrwam` or by detection of <=4GB will use *sequential offload*  
     all other systems use *balanced offload* by default (can be changed in settings)  
@@ -198,6 +199,7 @@ And it wouldn't be a X-mass edition custom themes: *Snowflake* and *Elf-Green*
 - taesd limit render to 2024px  
 - taesd downscale preview to 1024px max: configurable in settings -> live preview  
 - uninstall conflicting `wandb` package  
+- dont skip diffusers version check if quick is specified  
 
 ## Update for 2024-11-21
 
diff --git a/installer.py b/installer.py
index 76bd5e9e8..f1360541f 100644
--- a/installer.py
+++ b/installer.py
@@ -457,9 +457,9 @@ def check_python(supported_minors=[9, 10, 11, 12], reason=None):
 
 # check diffusers version
 def check_diffusers():
-    if args.skip_all or args.skip_requirements:
+    if args.skip_all or args.skip_git:
         return
-    sha = '862a7d5038c1c53641ffcab146a7eeb5ab683656' # diffusers commit hash
+    sha = 'b64ca6c11cbc1644c22f1dae441c8124d588bb14' # diffusers commit hash
     pkg = pkg_resources.working_set.by_key.get('diffusers', None)
     minor = int(pkg.version.split('.')[1] if pkg is not None else 0)
     cur = opts.get('diffusers_version', '') if minor > 0 else ''
diff --git a/modules/devices.py b/modules/devices.py
index 3f1439fb7..949fab4aa 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -186,7 +186,7 @@ def get_device_for(task): # pylint: disable=unused-argument
     return get_optimal_device()
 
 
-def torch_gc(force=False, fast=False):
+def torch_gc(force:bool=False, fast:bool=False, reason:str=None):
     def get_stats():
         mem_dict = memstats.memory_stats()
         gpu_dict = mem_dict.get('gpu', {})
@@ -207,15 +207,21 @@ def get_stats():
     from modules.shared import cmd_opts
 
     t0 = time.time()
-    gpu, used_gpu, ram, used_ram, oom = get_stats()
+    gpu, used_gpu, ram, _used_ram, oom = get_stats()
     threshold = 0 if (cmd_opts.lowvram and not cmd_opts.use_zluda) else opts.torch_gc_threshold
     collected = 0
-    if force or threshold == 0 or used_gpu >= threshold or used_ram >= threshold:
+    if reason is None and force:
+        reason='force'
+    if threshold == 0 or used_gpu >= threshold:
         force = True
+        if reason is None:
+            reason = 'threshold'
     if oom > previous_oom:
         previous_oom = oom
         log.warning(f'Torch GPU out-of-memory error: {memstats.memory_stats()}')
         force = True
+        if reason is None:
+            reason = 'oom'
     if force:
         # actual gc
         collected = gc.collect() if not fast else 0 # python gc
@@ -237,10 +243,10 @@ def get_stats():
     new_gpu, new_used_gpu, new_ram, new_used_ram, oom = get_stats()
     before = { 'gpu': gpu, 'ram': ram }
     after = { 'gpu': new_gpu, 'ram': new_ram, 'oom': oom }
-    utilization = { 'gpu': new_used_gpu, 'ram': new_used_ram, 'threshold': threshold }
-    results = { 'saved': round(gpu - new_gpu, 2), 'collected': collected }
+    utilization = { 'gpu': new_used_gpu, 'ram': new_used_ram }
+    results = { 'gpu': round(gpu - new_gpu, 2), 'py': collected }
     fn = f'{sys._getframe(2).f_code.co_name}:{sys._getframe(1).f_code.co_name}' # pylint: disable=protected-access
-    log.debug(f'GC: utilization={utilization} gc={results} before={before} after={after} device={torch.device(get_optimal_device_name())} fn={fn} time={round(t1 - t0, 2)}')
+    log.debug(f'GC: current={after} prev={before} load={utilization} gc={results} fn={fn} why={reason} time={t1-t0:.2f}')
     return new_gpu, new_ram
 
 
diff --git a/modules/ipadapter.py b/modules/ipadapter.py
index 4e93a6eee..c1b6ed52f 100644
--- a/modules/ipadapter.py
+++ b/modules/ipadapter.py
@@ -9,12 +9,15 @@
 import time
 import json
 from PIL import Image
+import diffusers
+import transformers
 from modules import processing, shared, devices, sd_models
 
 
-clip_repo = "h94/IP-Adapter"
 clip_loaded = None
 adapters_loaded = []
+CLIP_ID = "h94/IP-Adapter"
+SIGLIP_ID = 'google/siglip-so400m-patch14-384'
 ADAPTERS_NONE = {
     'None': { 'name': 'none', 'repo': 'none', 'subfolder': 'none' },
 }
@@ -37,11 +40,13 @@
     'Ostris Composition ViT-H SDXL': { 'name': 'ip_plus_composition_sdxl.safetensors', 'repo': 'ostris/ip-composition-adapter', 'subfolder': '' },
 }
 ADAPTERS_SD3 = {
-    'InstantX Large': { 'name': 'ip-adapter.bin', 'repo': 'InstantX/SD3.5-Large-IP-Adapter' },
+    'None': { 'name': 'none', 'repo': 'none', 'subfolder': 'none' },
+    'InstantX Large': { 'name': 'none', 'repo': 'InstantX/SD3.5-Large-IP-Adapter', 'subfolder': 'none', 'revision': 'refs/pr/10' },
 }
 ADAPTERS_F1 = {
-    'XLabs AI v1': { 'name': 'ip_adapter.safetensors', 'repo': 'XLabs-AI/flux-ip-adapter' },
-    'XLabs AI v2': { 'name': 'ip_adapter.safetensors', 'repo': 'XLabs-AI/flux-ip-adapter-v2' },
+    'None': { 'name': 'none', 'repo': 'none', 'subfolder': 'none' },
+    'XLabs AI v1': { 'name': 'ip_adapter.safetensors', 'repo': 'XLabs-AI/flux-ip-adapter', 'subfolder': 'none' },
+    'XLabs AI v2': { 'name': 'ip_adapter.safetensors', 'repo': 'XLabs-AI/flux-ip-adapter-v2', 'subfolder': 'none' },
 }
 ADAPTERS = { **ADAPTERS_SD15, **ADAPTERS_SDXL, **ADAPTERS_SD3, **ADAPTERS_F1 }
 ADAPTERS_ALL = { **ADAPTERS_SD15, **ADAPTERS_SDXL, **ADAPTERS_SD3, **ADAPTERS_F1 }
@@ -126,6 +131,8 @@ def crop_images(images, crops):
                     shared.log.error(f'IP adapter: failed to crop image: source={len(images[i])} faces={len(cropped)}')
     except Exception as e:
         shared.log.error(f'IP adapter: failed to crop image: {e}')
+    if shared.sd_model_type == 'sd3' and len(images) == 1:
+        return images[0]
     return images
 
 
@@ -144,27 +151,64 @@ def unapply(pipe): # pylint: disable=arguments-differ
         pass
 
 
-def apply(pipe, p: processing.StableDiffusionProcessing, adapter_names=[], adapter_scales=[1.0], adapter_crops=[False], adapter_starts=[0.0], adapter_ends=[1.0], adapter_images=[]):
-    global clip_loaded, adapters_loaded # pylint: disable=global-statement
-    # overrides
-    if hasattr(p, 'ip_adapter_names'):
-        if isinstance(p.ip_adapter_names, str):
-            p.ip_adapter_names = [p.ip_adapter_names]
-        adapters = [ADAPTERS_ALL.get(adapter_name, None) for adapter_name in p.ip_adapter_names if adapter_name is not None and adapter_name.lower() != 'none']
-        adapter_names = p.ip_adapter_names
-    else:
-        if isinstance(adapter_names, str):
-            adapter_names = [adapter_names]
-        adapters = [ADAPTERS.get(adapter, None) for adapter in adapter_names]
-    adapters = [adapter for adapter in adapters if adapter is not None and adapter['name'].lower() != 'none']
-    if len(adapters) == 0:
-        unapply(pipe)
-        if hasattr(p, 'ip_adapter_images'):
-            del p.ip_adapter_images
-        return False
-    if shared.sd_model_type not in ['sd', 'sdxl', 'sd3', 'f1']:
-        shared.log.error(f'IP adapter: model={shared.sd_model_type} class={pipe.__class__.__name__} not supported')
-        return False
+def load_image_encoder(pipe: diffusers.DiffusionPipeline, adapter_names: list[str]):
+    global clip_loaded # pylint: disable=global-statement
+    for adapter_name in adapter_names:
+        # which clip to use
+        clip_repo = CLIP_ID
+        if 'ViT' not in adapter_name: # defaults per model
+            clip_subfolder = 'models/image_encoder' if shared.sd_model_type == 'sd' else 'sdxl_models/image_encoder'
+        if 'ViT-H' in adapter_name:
+            clip_subfolder = 'models/image_encoder' # this is vit-h
+        elif 'ViT-G' in adapter_name:
+            clip_subfolder = 'sdxl_models/image_encoder' # this is vit-g
+        else:
+            if shared.sd_model_type == 'sd':
+                clip_subfolder = 'models/image_encoder'
+            elif shared.sd_model_type == 'sdxl':
+                clip_subfolder = 'sdxl_models/image_encoder'
+            elif shared.sd_model_type == 'sd3':
+                clip_repo = SIGLIP_ID
+                clip_subfolder = None
+            elif shared.sd_model_type == 'f1':
+                shared.log.error(f'IP adapter: adapter={adapter_name} type={shared.sd_model_type} cls={shared.sd_model.__class__.__name__}: unsupported base model')
+                return False
+            else:
+                shared.log.error(f'IP adapter: unknown model type: {adapter_name}')
+                return False
+
+    # load image encoder used by ip adapter
+    if pipe.image_encoder is None or clip_loaded != f'{clip_repo}/{clip_subfolder}':
+        try:
+            if shared.sd_model_type == 'sd3':
+                pipe.image_encoder = transformers.SiglipVisionModel.from_pretrained(clip_repo, torch_dtype=devices.dtype, cache_dir=shared.opts.hfcache_dir)
+            else:
+                pipe.image_encoder = transformers.CLIPVisionModelWithProjection.from_pretrained(clip_repo, subfolder=clip_subfolder, torch_dtype=devices.dtype, cache_dir=shared.opts.hfcache_dir, use_safetensors=True)
+            shared.log.debug(f'IP adapter load: encoder="{clip_repo}/{clip_subfolder}" cls={pipe.image_encoder.__class__.__name__}')
+            clip_loaded = f'{clip_repo}/{clip_subfolder}'
+        except Exception as e:
+            shared.log.error(f'IP adapter load: encoder="{clip_repo}/{clip_subfolder}" {e}')
+            return False
+    sd_models.move_model(pipe.image_encoder, devices.device)
+    return True
+
+
+def load_feature_extractor(pipe):
+    # load feature extractor used by ip adapter
+    if pipe.feature_extractor is None:
+        try:
+            if shared.sd_model_type == 'sd3':
+                pipe.feature_extractor = transformers.SiglipImageProcessor.from_pretrained(SIGLIP_ID, torch_dtype=devices.dtype, cache_dir=shared.opts.hfcache_dir)
+            else:
+                pipe.feature_extractor = transformers.CLIPImageProcessor()
+            shared.log.debug(f'IP adapter load: extractor={pipe.feature_extractor.__class__.__name__}')
+        except Exception as e:
+            shared.log.error(f'IP adapter load: extractor {e}')
+            return False
+    return True
+
+
+def parse_params(p: processing.StableDiffusionProcessing, adapters: list, adapter_scales: list[float], adapter_crops: list[bool], adapter_starts: list[float], adapter_ends: list[float], adapter_images: list):
     if hasattr(p, 'ip_adapter_scales'):
         adapter_scales = p.ip_adapter_scales
     if hasattr(p, 'ip_adapter_crops'):
@@ -205,6 +249,33 @@ def apply(pipe, p: processing.StableDiffusionProcessing, adapter_names=[], adapt
     p.ip_adapter_starts = adapter_starts.copy()
     adapter_ends = get_scales(adapter_ends, adapter_images)
     p.ip_adapter_ends = adapter_ends.copy()
+    return adapter_images, adapter_masks, adapter_scales, adapter_crops, adapter_starts, adapter_ends
+
+
+def apply(pipe, p: processing.StableDiffusionProcessing, adapter_names=[], adapter_scales=[1.0], adapter_crops=[False], adapter_starts=[0.0], adapter_ends=[1.0], adapter_images=[]):
+    global adapters_loaded # pylint: disable=global-statement
+    # overrides
+    if hasattr(p, 'ip_adapter_names'):
+        if isinstance(p.ip_adapter_names, str):
+            p.ip_adapter_names = [p.ip_adapter_names]
+        adapters = [ADAPTERS_ALL.get(adapter_name, None) for adapter_name in p.ip_adapter_names if adapter_name is not None and adapter_name.lower() != 'none']
+        adapter_names = p.ip_adapter_names
+    else:
+        if isinstance(adapter_names, str):
+            adapter_names = [adapter_names]
+        adapters = [ADAPTERS.get(adapter_name, None) for adapter_name in adapter_names if adapter_name.lower() != 'none']
+
+    if len(adapters) == 0:
+        unapply(pipe)
+        if hasattr(p, 'ip_adapter_images'):
+            del p.ip_adapter_images
+        return False
+    if shared.sd_model_type not in ['sd', 'sdxl', 'sd3', 'f1']:
+        shared.log.error(f'IP adapter: model={shared.sd_model_type} class={pipe.__class__.__name__} not supported')
+        return False
+
+    adapter_images, adapter_masks, adapter_scales, adapter_crops, adapter_starts, adapter_ends = parse_params(p, adapters, adapter_scales, adapter_crops, adapter_starts, adapter_ends, adapter_images)
+
     # init code
     if pipe is None:
         return False
@@ -223,61 +294,29 @@ def apply(pipe, p: processing.StableDiffusionProcessing, adapter_names=[], adapt
         shared.log.error(f'IP adapter: pipeline not supported: {pipe.__class__.__name__}')
         return False
 
-    for adapter_name in adapter_names:
-        # which clip to use
-        if 'ViT' not in adapter_name: # defaults per model
-            if shared.sd_model_type == 'sd':
-                clip_subfolder = 'models/image_encoder'
-            else:
-                clip_subfolder = 'sdxl_models/image_encoder'
-        if 'ViT-H' in adapter_name:
-            clip_subfolder = 'models/image_encoder' # this is vit-h
-        elif 'ViT-G' in adapter_name:
-            clip_subfolder = 'sdxl_models/image_encoder' # this is vit-g
-        else:
-            if shared.sd_model_type == 'sd':
-                clip_subfolder = 'models/image_encoder'
-            elif shared.sd_model_type == 'sdxl':
-                clip_subfolder = 'sdxl_models/image_encoder'
-            elif shared.sd_model_type == 'sd3':
-                shared.log.error(f'IP adapter: adapter={adapter_name} type={shared.sd_model_type} cls={shared.sd_model.__class__.__name__}: unsupported base model')
-                return False
-            elif shared.sd_model_type == 'f1':
-                shared.log.error(f'IP adapter: adapter={adapter_name} type={shared.sd_model_type} cls={shared.sd_model.__class__.__name__}: unsupported base model')
-                return False
-            else:
-                shared.log.error(f'IP adapter: unknown model type: {adapter_name}')
-                return False
-
-    # load feature extractor used by ip adapter
-    if pipe.feature_extractor is None:
-        try:
-            from transformers import CLIPImageProcessor
-            shared.log.debug('IP adapter load: feature extractor')
-            pipe.feature_extractor = CLIPImageProcessor()
-        except Exception as e:
-            shared.log.error(f'IP adapter load: feature extractor {e}')
-            return False
+    if not load_image_encoder(pipe, adapter_names):
+        return False
 
-    # load image encoder used by ip adapter
-    if pipe.image_encoder is None or clip_loaded != f'{clip_repo}/{clip_subfolder}':
-        try:
-            from transformers import CLIPVisionModelWithProjection
-            shared.log.debug(f'IP adapter load: image encoder="{clip_repo}/{clip_subfolder}"')
-            pipe.image_encoder = CLIPVisionModelWithProjection.from_pretrained(clip_repo, subfolder=clip_subfolder, torch_dtype=devices.dtype, cache_dir=shared.opts.diffusers_dir, use_safetensors=True)
-            clip_loaded = f'{clip_repo}/{clip_subfolder}'
-        except Exception as e:
-            shared.log.error(f'IP adapter load: image encoder="{clip_repo}/{clip_subfolder}" {e}')
-            return False
-    sd_models.move_model(pipe.image_encoder, devices.device)
+    if not load_feature_extractor(pipe):
+        return False
 
     # main code
     try:
         t0 = time.time()
-        repos = [adapter['repo'] for adapter in adapters]
-        subfolders = [adapter['subfolder'] for adapter in adapters]
-        names = [adapter['name'] for adapter in adapters]
-        pipe.load_ip_adapter(repos, subfolder=subfolders, weight_name=names)
+        repos = [adapter.get('repo', None) for adapter in adapters if adapter.get('repo', 'none') != 'none']
+        subfolders = [adapter.get('subfolder', None) for adapter in adapters if adapter.get('subfolder', 'none') != 'none']
+        names = [adapter.get('name', None) for adapter in adapters if adapter.get('name', 'none') != 'none']
+        revisions = [adapter.get('revision', None) for adapter in adapters if adapter.get('revision', 'none') != 'none']
+        kwargs = {}
+        if len(repos) == 1:
+            repos = repos[0]
+        if len(subfolders) > 0:
+            kwargs['subfolder'] = subfolders if len(subfolders) > 1 else subfolders[0]
+        if len(names) > 0:
+            kwargs['weight_name'] = names if len(names) > 1 else names[0]
+        if len(revisions) > 0:
+            kwargs['revision'] = revisions[0]
+        pipe.load_ip_adapter(repos, **kwargs)
         adapters_loaded = names
         if hasattr(p, 'ip_adapter_layers'):
             pipe.set_ip_adapter_scale(p.ip_adapter_layers)
@@ -286,8 +325,8 @@ def apply(pipe, p: processing.StableDiffusionProcessing, adapter_names=[], adapt
             for i in range(len(adapter_scales)):
                 if adapter_starts[i] > 0:
                     adapter_scales[i] = 0.00
-            pipe.set_ip_adapter_scale(adapter_scales)
-            ip_str =  [f'{os.path.splitext(adapter)[0]}:{scale}:{start}:{end}' for adapter, scale, start, end in zip(adapter_names, adapter_scales, adapter_starts, adapter_ends)]
+            pipe.set_ip_adapter_scale(adapter_scales if len(adapter_scales) > 1 else adapter_scales[0])
+            ip_str =  [f'{os.path.splitext(adapter)[0]}:{scale}:{start}:{end}:{crop}' for adapter, scale, start, end, crop in zip(adapter_names, adapter_scales, adapter_starts, adapter_ends, adapter_crops)]
         p.task_args['ip_adapter_image'] = crop_images(adapter_images, adapter_crops)
         if len(adapter_masks) > 0:
             p.cross_attention_kwargs = { 'ip_adapter_masks': adapter_masks }
diff --git a/modules/processing.py b/modules/processing.py
index 4bc5e3c81..f39393f95 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -480,5 +480,5 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
     if not p.disable_extra_networks:
         shared.log.info(f'Processed: images={len(output_images)} its={(p.steps * len(output_images)) / (t1 - t0):.2f} time={t1-t0:.2f} timers={timer.process.dct()} memory={memstats.memory_stats()}')
 
-    devices.torch_gc(force=True)
+    devices.torch_gc(force=True, reason='final')
     return processed
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 52d0a1deb..a3698c5c7 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -496,7 +496,11 @@ def apply_balanced_offload_to_module(pipe):
                     used_gpu -= module_size
                 debug_move(f'Offload: type=balanced op={"move" if do_offload else "skip"} gpu={prev_gpu:.3f}:{used_gpu:.3f} perc={perc_gpu:.2f} ram={used_ram:.3f} current={module.device} dtype={module.dtype} component={module.__class__.__name__} size={module_size:.3f}')
             except Exception as e:
-                if 'bitsandbytes' not in str(e):
+                if 'out of memory' in str(e):
+                    devices.torch_gc(fast=True, force=True, reason='oom')
+                elif 'bitsandbytes' in str(e):
+                    pass
+                else:
                     shared.log.error(f'Offload: type=balanced op=apply module={module_name} {e}')
                 if os.environ.get('SD_MOVE_DEBUG', None):
                     errors.display(e, f'Offload: type=balanced op=apply module={module_name}')
@@ -508,7 +512,7 @@ def apply_balanced_offload_to_module(pipe):
             if device_map and max_memory:
                 module.balanced_offload_device_map = device_map
                 module.balanced_offload_max_memory = max_memory
-        devices.torch_gc(fast=True, force=True)
+        devices.torch_gc(fast=True, force=True, reason='offload')
 
     apply_balanced_offload_to_module(sd_model)
     if hasattr(sd_model, "pipe"):
@@ -518,7 +522,6 @@ def apply_balanced_offload_to_module(pipe):
     if hasattr(sd_model, "decoder_pipe"):
         apply_balanced_offload_to_module(sd_model.decoder_pipe)
     set_accelerate(sd_model)
-    devices.torch_gc(fast=True)
     t = time.time() - t0
     process_timer.add('offload', t)
     fn = f'{sys._getframe(2).f_code.co_name}:{sys._getframe(1).f_code.co_name}' # pylint: disable=protected-access
diff --git a/scripts/flux_tools.py b/scripts/flux_tools.py
index 909257a37..50904eedb 100644
--- a/scripts/flux_tools.py
+++ b/scripts/flux_tools.py
@@ -7,7 +7,8 @@
 from installer import install
 
 
-redux_pipe: diffusers.FluxPriorReduxPipeline = None
+# redux_pipe: diffusers.FluxPriorReduxPipeline = None
+redux_pipe = None
 processor_canny = None
 processor_depth = None
 title = 'Flux Tools'

From b5463a52fd1b651d48de3c2a1b5bea03f0e57152 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Fri, 20 Dec 2024 10:37:11 -0500
Subject: [PATCH 138/162] notify on torch install

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md | 1 +
 installer.py | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d2d70f3e1..9398e533b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -200,6 +200,7 @@ And it wouldn't be a Xmass edition without couple of custom themes: *Snowflake*
 - taesd downscale preview to 1024px max: configurable in settings -> live preview  
 - uninstall conflicting `wandb` package  
 - dont skip diffusers version check if quick is specified  
+- notify on torch install  
 
 ## Update for 2024-11-21
 
diff --git a/installer.py b/installer.py
index f1360541f..40ba7cea0 100644
--- a/installer.py
+++ b/installer.py
@@ -751,6 +751,8 @@ def check_torch():
                 log.warning('Torch: CPU-only version installed')
                 torch_command = os.environ.get('TORCH_COMMAND', 'torch torchvision')
     if 'torch' in torch_command and not args.version:
+        if not installed('torch'):
+            log.info(f'Torch: download and install in progress... cmd="{torch_command}"')
         install(torch_command, 'torch torchvision', quiet=True)
     else:
         try:

From 1649e4e3c4f5aa6334e266c0ba425b5ce6fefb67 Mon Sep 17 00:00:00 2001
From: Disty0 <disty@disty.xyz>
Date: Sat, 21 Dec 2024 01:25:49 +0300
Subject: [PATCH 139/162] IPEX dupe conv2d fix for conv1d and conv3d too

---
 modules/intel/ipex/hijacks.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/modules/intel/ipex/hijacks.py b/modules/intel/ipex/hijacks.py
index 7ec94138d..43721a64c 100644
--- a/modules/intel/ipex/hijacks.py
+++ b/modules/intel/ipex/hijacks.py
@@ -149,6 +149,15 @@ def functional_linear(input, weight, bias=None):
         bias.data = bias.data.to(dtype=weight.data.dtype)
     return original_functional_linear(input, weight, bias=bias)
 
+original_functional_conv1d = torch.nn.functional.conv1d
+@wraps(torch.nn.functional.conv1d)
+def functional_conv1d(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1):
+    if input.dtype != weight.data.dtype:
+        input = input.to(dtype=weight.data.dtype)
+    if bias is not None and bias.data.dtype != weight.data.dtype:
+        bias.data = bias.data.to(dtype=weight.data.dtype)
+    return original_functional_conv1d(input, weight, bias=bias, stride=stride, padding=padding, dilation=dilation, groups=groups)
+
 original_functional_conv2d = torch.nn.functional.conv2d
 @wraps(torch.nn.functional.conv2d)
 def functional_conv2d(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1):
@@ -158,6 +167,16 @@ def functional_conv2d(input, weight, bias=None, stride=1, padding=0, dilation=1,
         bias.data = bias.data.to(dtype=weight.data.dtype)
     return original_functional_conv2d(input, weight, bias=bias, stride=stride, padding=padding, dilation=dilation, groups=groups)
 
+# LTX Video
+original_functional_conv3d = torch.nn.functional.conv3d
+@wraps(torch.nn.functional.conv3d)
+def functional_conv3d(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1):
+    if input.dtype != weight.data.dtype:
+        input = input.to(dtype=weight.data.dtype)
+    if bias is not None and bias.data.dtype != weight.data.dtype:
+        bias.data = bias.data.to(dtype=weight.data.dtype)
+    return original_functional_conv3d(input, weight, bias=bias, stride=stride, padding=padding, dilation=dilation, groups=groups)
+
 # SwinIR BF16:
 original_functional_pad = torch.nn.functional.pad
 @wraps(torch.nn.functional.pad)
@@ -320,7 +339,9 @@ def ipex_hijacks(legacy=True):
     torch.nn.functional.group_norm = functional_group_norm
     torch.nn.functional.layer_norm = functional_layer_norm
     torch.nn.functional.linear = functional_linear
+    torch.nn.functional.conv1d = functional_conv1d
     torch.nn.functional.conv2d = functional_conv2d
+    torch.nn.functional.conv3d = functional_conv3d
     torch.nn.functional.pad = functional_pad
 
     torch.bmm = torch_bmm

From 58ad18ee589d2a84b2566bdccf5c8aac37e45d5d Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Fri, 20 Dec 2024 17:28:32 -0500
Subject: [PATCH 140/162] on-the-fly quant for sd35, flux and sana

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md                                  | 16 ++---
 html/reference.json                           | 12 +++-
 modules/model_flux.py                         | 33 +++++++--
 modules/model_sana.py                         | 67 ++++++++++++-------
 modules/model_sd3.py                          | 20 +++---
 modules/sd_models.py                          | 18 +++--
 .../textual_inversion/textual_inversion.py    |  5 +-
 wiki                                          |  2 +-
 8 files changed, 113 insertions(+), 60 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9398e533b..ebf17d6b2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,13 +1,13 @@
 # Change Log for SD.Next
 
-## Update for 2024-12-19
+## Update for 2024-12-20
 
-### Highlights for 2024-12-19
+### Highlights for 2024-12-20
 
 ### SD.Next Xmass edition: *What's new?*
 
 While we have several new supported models, workflows and tools, this release is primarily about *quality-of-life improvements*:  
-- New memory management engine: list of changes that went into this one is long: changes to GPU offloading, LoRA loader, system memory management, etc.  
+- New memory management engine: list of changes that went into this one is long: changes to GPU offloading, LoRA loader, system memory management, on-the-fly quantization, etc.  
   but main goal is enabling modern large models to run on standard consumer GPUs  
   without performance hits typically associated with aggressive memory swapping and needs for constant manual tweaks  
 - New [documentation website](https://vladmandic.github.io/sdnext-docs/)  
@@ -17,28 +17,28 @@ While we have several new supported models, workflows and tools, this release is
 We've also added support for several new models (see [supported models](https://vladmandic.github.io/sdnext-docs/Model-Support/) for full list) such as highly anticipated [NVLabs Sana](https://huggingface.co/Efficient-Large-Model/Sana_1600M_1024px)  
 And several new SOTA video models: [Lightricks LTX-Video](https://huggingface.co/Lightricks/LTX-Video), [Hunyuan Video](https://huggingface.co/tencent/HunyuanVideo) and [Genmo Mochi.1 Preview](https://huggingface.co/genmo/mochi-1-preview)  
 
-And a lot of Control and IPAdapter goodies  
+And a lot of **Control** and **IPAdapter** goodies  
 - for **SDXL** there is new [ProMax](https://huggingface.co/xinsir/controlnet-union-sdxl-1.0), improved *Union* and *Tiling* models 
 - for **FLUX.1** there are [Flux Tools](https://blackforestlabs.ai/flux-1-tools/) as well as official *Canny* and *Depth* models and a cool [Redux](https://huggingface.co/black-forest-labs/FLUX.1-Redux-dev) model  
 - for **SD3.5** there are official *Canny*, *Blur* and *Depth* models in addition to existing 3rd party models as well as [InstantX](https://huggingface.co/InstantX/SD3.5-Large-IP-Adapter) IP-adapter  
 
 Plus couple of new integrated workflows such as [FreeScale](https://github.com/ali-vilab/FreeScale) and [Style Aligned Image Generation](https://style-aligned-gen.github.io/)  
 
-And it wouldn't be a Xmass edition without couple of custom themes: *Snowflake* and *Elf-Green*!  
+And it wouldn't be a *Xmass edition* without couple of custom themes: *Snowflake* and *Elf-Green*!  
 
 [ReadMe](https://github.com/vladmandic/automatic/blob/master/README.md) | [ChangeLog](https://github.com/vladmandic/automatic/blob/master/CHANGELOG.md) | [Docs](https://vladmandic.github.io/sdnext-docs/) | [WiKi](https://github.com/vladmandic/automatic/wiki) | [Discord](https://discord.com/invite/sd-next-federal-batch-inspectors-1101998836328697867)
 
-## Details for 2024-12-19
+## Details for 2024-12-20
 
 ### New models and integrations
 
 - [NVLabs Sana](https://huggingface.co/Efficient-Large-Model/Sana_1600M_1024px)
-  support for both 1.6B and 0.6B models  
+  support for 1.6B 2048px, 1.6B 1024px and 0.6B 512px models  
   **Sana** can synthesize high-resolution images with strong text-image alignment by using **Gemma2** as text-encoder  
   and its *fast* - typically at least **2x** faster than sd-xl even for 1.6B variant and maintains performance regardless of resolution  
   e.g., rendering at 4k is possible in less than 8GB vram  
   to use, select from *networks -> models -> reference* and models will be auto-downloaded on first use  
-  *reference values*: sampler: default (or any flow-match variant), width/height: 1024, guidance scale: 4.5  
+  *reference values*: sampler: default (or any flow-match variant), steps: 20, width/height: 1024, guidance scale: 4.5  
   *note* like other LLM-based text-encoders, sana prefers long and descriptive prompts  
   any short prompt below 300 characters will be auto-expanded using built in Gemma LLM before encoding while long prompts will be passed as-is  
 - **ControlNet**
diff --git a/html/reference.json b/html/reference.json
index 8a0965697..43115c549 100644
--- a/html/reference.json
+++ b/html/reference.json
@@ -180,14 +180,20 @@
     "extras": "sampler: Default, cfg_scale: 3.5"
   },
 
-  "NVLabs Sana 1.6B": {
+  "NVLabs Sana 1.6B 2048px": {
+    "path": "Efficient-Large-Model/Sana_1600M_2Kpx_BF16_diffusers",
+    "desc": "Sana is a text-to-image framework that can efficiently generate images up to 4096 × 4096 resolution. Sana can synthesize high-resolution, high-quality images with strong text-image alignment at a remarkably fast speed, deployable on laptop GPU.",
+    "preview": "Efficient-Large-Model--Sana_1600M_1024px_diffusers.jpg",
+    "skip": true
+  }, 
+  "NVLabs Sana 1.6B 1024px": {
     "path": "Efficient-Large-Model/Sana_1600M_1024px_diffusers",
     "desc": "Sana is a text-to-image framework that can efficiently generate images up to 4096 × 4096 resolution. Sana can synthesize high-resolution, high-quality images with strong text-image alignment at a remarkably fast speed, deployable on laptop GPU.",
     "preview": "Efficient-Large-Model--Sana_1600M_1024px_diffusers.jpg",
     "skip": true
   }, 
-  "NVLabs Sana 0.6B": {
-    "path": "Efficient-Large-Model/Sana_600M_1024px_diffusers",
+  "NVLabs Sana 0.6B 512px": {
+    "path": "Efficient-Large-Model/Sana_600M_512px_diffusers",
     "desc": "Sana is a text-to-image framework that can efficiently generate images up to 4096 × 4096 resolution. Sana can synthesize high-resolution, high-quality images with strong text-image alignment at a remarkably fast speed, deployable on laptop GPU.",
     "preview": "Efficient-Large-Model--Sana_1600M_1024px_diffusers.jpg",
     "skip": true
diff --git a/modules/model_flux.py b/modules/model_flux.py
index 362f96d9b..ac1370ef7 100644
--- a/modules/model_flux.py
+++ b/modules/model_flux.py
@@ -110,6 +110,7 @@ def load_flux_bnb(checkpoint_info, diffusers_load_config): # pylint: disable=unu
     return transformer, text_encoder_2
 
 
+"""
 def quant_flux_bnb(checkpoint_info, transformer, text_encoder_2):
     repo_id = sd_models.path_to_repo(checkpoint_info.name)
     cache_dir=shared.opts.diffusers_dir
@@ -139,6 +140,24 @@ def quant_flux_bnb(checkpoint_info, transformer, text_encoder_2):
                 from modules import errors
                 errors.display(e, 'FLUX:')
     return transformer, text_encoder_2
+"""
+
+
+def load_quants(kwargs, repo_id, cache_dir):
+    if len(shared.opts.bnb_quantization) > 0:
+        quant_args = {}
+        quant_args = model_quant.create_bnb_config(quant_args)
+        quant_args = model_quant.create_ao_config(quant_args)
+        if not quant_args:
+            return
+        model_quant.load_bnb(f'Load model: type=FLUX quant={quant_args}')
+        if 'Model' in shared.opts.bnb_quantization and 'transformer' not in kwargs:
+            kwargs['transformer'] = diffusers.FluxTransformer2DModel.from_pretrained(repo_id, subfolder="transformer", cache_dir=cache_dir, torch_dtype=devices.dtype, **quant_args)
+            shared.log.debug(f'Quantization: module=transformer type=bnb dtype={shared.opts.bnb_quantization_type} storage={shared.opts.bnb_quantization_storage}')
+        if 'Text Encoder' in shared.opts.bnb_quantization and 'text_encoder_3' not in kwargs:
+            kwargs['text_encoder_2'] = transformers.T5EncoderModel.from_pretrained(repo_id, subfolder="text_encoder_2", cache_dir=cache_dir, torch_dtype=devices.dtype, **quant_args)
+            shared.log.debug(f'Quantization: module=t5 type=bnb dtype={shared.opts.bnb_quantization_type} storage={shared.opts.bnb_quantization_storage}')
+    return kwargs
 
 
 def load_flux_gguf(file_path):
@@ -148,9 +167,8 @@ def load_flux_gguf(file_path):
     from diffusers.loaders.single_file_utils import convert_flux_transformer_checkpoint_to_diffusers
     from modules import ggml, sd_hijack_accelerate
     with init_empty_weights():
-        from diffusers import FluxTransformer2DModel
-        config = FluxTransformer2DModel.load_config(os.path.join('configs', 'flux'), subfolder="transformer")
-        transformer = FluxTransformer2DModel.from_config(config).to(devices.dtype)
+        config = diffusers.FluxTransformer2DModel.load_config(os.path.join('configs', 'flux'), subfolder="transformer")
+        transformer = diffusers.FluxTransformer2DModel.from_config(config).to(devices.dtype)
         expected_state_dict_keys = list(transformer.state_dict().keys())
     state_dict, stats = ggml.load_gguf_state_dict(file_path, devices.dtype)
     state_dict = convert_flux_transformer_checkpoint_to_diffusers(state_dict)
@@ -295,7 +313,6 @@ def load_flux(checkpoint_info, diffusers_load_config): # triggered by opts.sd_ch
 
     # initialize pipeline with pre-loaded components
     kwargs = {}
-    # transformer, text_encoder_2 = quant_flux_bnb(checkpoint_info, transformer, text_encoder_2)
     if transformer is not None:
         kwargs['transformer'] = transformer
         sd_unet.loaded_unet = shared.opts.sd_unet
@@ -324,10 +341,14 @@ def load_flux(checkpoint_info, diffusers_load_config): # triggered by opts.sd_ch
             kwargs[c] = kwargs[c].to(dtype=devices.dtype)
 
     allow_quant = 'gguf' not in (sd_unet.loaded_unet or '')
+    fn = checkpoint_info.path
+    if (fn is None) or (not os.path.exists(fn) or os.path.isdir(fn)):
+        # transformer, text_encoder_2 = quant_flux_bnb(checkpoint_info, transformer, text_encoder_2)
+        kwargs = load_quants(kwargs, repo_id, cache_dir=shared.opts.diffusers_dir)
     kwargs = model_quant.create_bnb_config(kwargs, allow_quant)
     kwargs = model_quant.create_ao_config(kwargs, allow_quant)
-    if checkpoint_info.path.endswith('.safetensors') and os.path.isfile(checkpoint_info.path):
-        pipe = diffusers.FluxPipeline.from_single_file(checkpoint_info.path, cache_dir=shared.opts.diffusers_dir, **kwargs, **diffusers_load_config)
+    if fn.endswith('.safetensors') and os.path.isfile(fn):
+        pipe = diffusers.FluxPipeline.from_single_file(fn, cache_dir=shared.opts.diffusers_dir, **kwargs, **diffusers_load_config)
     else:
         pipe = cls.from_pretrained(repo_id, cache_dir=shared.opts.diffusers_dir, **kwargs, **diffusers_load_config)
 
diff --git a/modules/model_sana.py b/modules/model_sana.py
index c25a7ffb9..414f9b74d 100644
--- a/modules/model_sana.py
+++ b/modules/model_sana.py
@@ -1,54 +1,73 @@
+import os
 import time
 import torch
 import diffusers
+import transformers
+from modules import shared, sd_models, devices, modelloader, model_quant
 
 
-"""
-Efficient-Large-Model/Sana_1600M_1024px_MultiLing_diffusers
-Efficient-Large-Model/Sana_1600M_1024px_diffusers
-Efficient-Large-Model/Sana_1600M_1024px_BF16_diffusers
-Efficient-Large-Model/Sana_1600M_512px_MultiLing_diffusers
-Efficient-Large-Model/Sana_1600M_512px_diffusers
-Efficient-Large-Model/Sana_600M_1024px_diffusers
-Efficient-Large-Model/Sana_600M_512px_diffusers
-"""
+def load_quants(kwargs, repo_id, cache_dir):
+    if len(shared.opts.bnb_quantization) > 0:
+        quant_args = {}
+        quant_args = model_quant.create_bnb_config(quant_args)
+        quant_args = model_quant.create_ao_config(quant_args)
+        load_args = kwargs.copy()
+        if not quant_args:
+            return
+        model_quant.load_bnb(f'Load model: type=SD3 quant={quant_args} args={load_args}')
+        if 'Model' in shared.opts.bnb_quantization and 'transformer' not in kwargs:
+            kwargs['transformer'] = diffusers.models.SanaTransformer2DModel.from_pretrained(repo_id, subfolder="transformer", cache_dir=cache_dir, **load_args, **quant_args)
+            shared.log.debug(f'Quantization: module=transformer type=bnb dtype={shared.opts.bnb_quantization_type} storage={shared.opts.bnb_quantization_storage}')
+        if 'Text Encoder' in shared.opts.bnb_quantization and 'text_encoder_3' not in kwargs:
+            kwargs['text_encoder_3'] = transformers.AutoModelForCausalLM.from_pretrained(repo_id, subfolder="text_encoder", cache_dir=cache_dir, **load_args, **quant_args)
+            shared.log.debug(f'Quantization: module=t5 type=bnb dtype={shared.opts.bnb_quantization_type} storage={shared.opts.bnb_quantization_storage}')
+    return kwargs
 
 
 def load_sana(checkpoint_info, kwargs={}):
-    from modules import shared, sd_models, devices, modelloader, model_quant
     modelloader.hf_login()
 
-    repo_id = checkpoint_info if isinstance(checkpoint_info, str) else checkpoint_info.path
-    repo_id = sd_models.path_to_repo(repo_id)
+    fn = checkpoint_info if isinstance(checkpoint_info, str) else checkpoint_info.path
+    repo_id = sd_models.path_to_repo(fn)
     kwargs.pop('load_connected_pipeline', None)
     kwargs.pop('safety_checker', None)
     kwargs.pop('requires_safety_checker', None)
     kwargs.pop('torch_dtype', None)
 
+    if not repo_id.endswith('_diffusers'):
+        repo_id = f'{repo_id}_diffusers'
+    if devices.dtype == torch.bfloat16 and 'BF16' not in repo_id:
+        repo_id = repo_id.replace('_diffusers', '_BF16_diffusers')
+
     if 'Sana_1600M' in repo_id:
-        if devices.dtype == torch.bfloat16:
-            repo_id = 'Efficient-Large-Model/Sana_1600M_1024px_BF16_diffusers'
+        if devices.dtype == torch.bfloat16 or 'BF16' in repo_id:
+            if 'BF16' not in repo_id:
+                repo_id = repo_id.replace('_diffusers', '_BF16_diffusers')
             kwargs['variant'] = 'bf16'
             kwargs['torch_dtype'] = devices.dtype
         else:
-            repo_id = 'Efficient-Large-Model/Sana_1600M_1024px_diffusers'
             kwargs['variant'] = 'fp16'
     if 'Sana_600M' in repo_id:
-        repo_id = 'Efficient-Large-Model/Sana_600M_1024px_diffusers'
         kwargs['variant'] = 'fp16'
 
-    kwargs = model_quant.create_bnb_config(kwargs)
-    kwargs = model_quant.create_ao_config(kwargs)
-    shared.log.debug(f'Load model: type=Sana repo="{repo_id}" args={kwargs}')
+    if (fn is None) or (not os.path.exists(fn) or os.path.isdir(fn)):
+        kwargs = load_quants(kwargs, repo_id, cache_dir=shared.opts.diffusers_dir)
+    # kwargs = model_quant.create_bnb_config(kwargs)
+    # kwargs = model_quant.create_ao_config(kwargs)
+    shared.log.debug(f'Load model: type=Sana repo="{repo_id}" args={list(kwargs)}')
     t0 = time.time()
-    pipe = diffusers.SanaPipeline.from_pretrained(repo_id, cache_dir = shared.opts.diffusers_dir, **kwargs)
+    pipe = diffusers.SanaPipeline.from_pretrained(repo_id, cache_dir=shared.opts.diffusers_dir, **kwargs)
     if devices.dtype == torch.bfloat16 or devices.dtype == torch.float32:
-        pipe.transformer = pipe.transformer.to(dtype=devices.dtype)
-        pipe.text_encoder = pipe.text_encoder.to(dtype=devices.dtype)
+        if 'transformer' not in kwargs:
+            pipe.transformer = pipe.transformer.to(dtype=devices.dtype)
+        if 'text_encoder' not in kwargs:
+            pipe.text_encoder = pipe.text_encoder.to(dtype=devices.dtype)
         pipe.vae = pipe.vae.to(dtype=devices.dtype)
     if devices.dtype == torch.float16:
-        pipe.transformer = pipe.transformer.to(dtype=devices.dtype)
-        pipe.text_encoder = pipe.text_encoder.to(dtype=torch.float32) # gemma2 does not support fp16
+        if 'transformer' not in kwargs:
+            pipe.transformer = pipe.transformer.to(dtype=devices.dtype)
+        if 'text_encoder' not in kwargs:
+            pipe.text_encoder = pipe.text_encoder.to(dtype=torch.float32) # gemma2 does not support fp16
         pipe.vae = pipe.vae.to(dtype=torch.float32) # dc-ae often overflows in fp16
     if shared.opts.diffusers_eval:
         pipe.text_encoder.eval()
diff --git a/modules/model_sd3.py b/modules/model_sd3.py
index ba036760a..2842661bd 100644
--- a/modules/model_sd3.py
+++ b/modules/model_sd3.py
@@ -51,19 +51,17 @@ def load_overrides(kwargs, cache_dir):
 
 def load_quants(kwargs, repo_id, cache_dir):
     if len(shared.opts.bnb_quantization) > 0:
-        model_quant.load_bnb('Load model: type=SD3')
-        bnb_config = diffusers.BitsAndBytesConfig(
-            load_in_8bit=shared.opts.bnb_quantization_type in ['fp8'],
-            load_in_4bit=shared.opts.bnb_quantization_type in ['nf4', 'fp4'],
-            bnb_4bit_quant_storage=shared.opts.bnb_quantization_storage,
-            bnb_4bit_quant_type=shared.opts.bnb_quantization_type,
-            bnb_4bit_compute_dtype=devices.dtype
-        )
+        quant_args = {}
+        quant_args = model_quant.create_bnb_config(quant_args)
+        quant_args = model_quant.create_ao_config(quant_args)
+        if not quant_args:
+            return
+        model_quant.load_bnb(f'Load model: type=SD3 quant={quant_args}')
         if 'Model' in shared.opts.bnb_quantization and 'transformer' not in kwargs:
-            kwargs['transformer'] = diffusers.SD3Transformer2DModel.from_pretrained(repo_id, subfolder="transformer", cache_dir=cache_dir, quantization_config=bnb_config, torch_dtype=devices.dtype)
+            kwargs['transformer'] = diffusers.SD3Transformer2DModel.from_pretrained(repo_id, subfolder="transformer", cache_dir=cache_dir, torch_dtype=devices.dtype, **quant_args)
             shared.log.debug(f'Quantization: module=transformer type=bnb dtype={shared.opts.bnb_quantization_type} storage={shared.opts.bnb_quantization_storage}')
         if 'Text Encoder' in shared.opts.bnb_quantization and 'text_encoder_3' not in kwargs:
-            kwargs['text_encoder_3'] = transformers.T5EncoderModel.from_pretrained(repo_id, subfolder="text_encoder_3", variant='fp16', cache_dir=cache_dir, quantization_config=bnb_config, torch_dtype=devices.dtype)
+            kwargs['text_encoder_3'] = transformers.T5EncoderModel.from_pretrained(repo_id, subfolder="text_encoder_3", variant='fp16', cache_dir=cache_dir, torch_dtype=devices.dtype, **quant_args)
             shared.log.debug(f'Quantization: module=t5 type=bnb dtype={shared.opts.bnb_quantization_type} storage={shared.opts.bnb_quantization_storage}')
     return kwargs
 
@@ -127,7 +125,7 @@ def load_sd3(checkpoint_info, cache_dir=None, config=None):
 
     kwargs = {}
     kwargs = load_overrides(kwargs, cache_dir)
-    if fn is None or not os.path.exists(fn):
+    if (fn is None) or (not os.path.exists(fn) or os.path.isdir(fn)):
         kwargs = load_quants(kwargs, repo_id, cache_dir)
 
     loader = diffusers.StableDiffusion3Pipeline.from_pretrained
diff --git a/modules/sd_models.py b/modules/sd_models.py
index a3698c5c7..5939bccbd 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -371,17 +371,19 @@ def set_diffuser_offload(sd_model, op: str = 'model'):
 
 
 class OffloadHook(accelerate.hooks.ModelHook):
-    def __init__(self):
+    def __init__(self, checkpoint_name):
         if shared.opts.diffusers_offload_max_gpu_memory > 1:
             shared.opts.diffusers_offload_max_gpu_memory = 0.75
         if shared.opts.diffusers_offload_max_cpu_memory > 1:
             shared.opts.diffusers_offload_max_cpu_memory = 0.75
+        self.checkpoint_name = checkpoint_name
         self.min_watermark = shared.opts.diffusers_offload_min_gpu_memory
         self.max_watermark = shared.opts.diffusers_offload_max_gpu_memory
         self.cpu_watermark = shared.opts.diffusers_offload_max_cpu_memory
         self.gpu = int(shared.gpu_memory * shared.opts.diffusers_offload_max_gpu_memory * 1024*1024*1024)
         self.cpu = int(shared.cpu_memory * shared.opts.diffusers_offload_max_cpu_memory * 1024*1024*1024)
         self.offload_map = {}
+        self.param_map = {}
         gpu = f'{shared.gpu_memory * shared.opts.diffusers_offload_min_gpu_memory:.3f}-{shared.gpu_memory * shared.opts.diffusers_offload_max_gpu_memory}:{shared.gpu_memory}'
         shared.log.info(f'Offload: type=balanced op=init watermark={self.min_watermark}-{self.max_watermark} gpu={gpu} cpu={shared.cpu_memory:.3f} limit={shared.opts.cuda_mem_fraction:.2f}')
         self.validate()
@@ -440,12 +442,12 @@ def apply_balanced_offload(sd_model, exclude=[]):
     if sd_model.__class__.__name__ in excluded:
         return sd_model
     cached = True
-    if offload_hook_instance is None or offload_hook_instance.min_watermark != shared.opts.diffusers_offload_min_gpu_memory or offload_hook_instance.max_watermark != shared.opts.diffusers_offload_max_gpu_memory:
-        cached = False
-        offload_hook_instance = OffloadHook()
     checkpoint_name = sd_model.sd_checkpoint_info.name if getattr(sd_model, "sd_checkpoint_info", None) is not None else None
     if checkpoint_name is None:
         checkpoint_name = sd_model.__class__.__name__
+    if offload_hook_instance is None or offload_hook_instance.min_watermark != shared.opts.diffusers_offload_min_gpu_memory or offload_hook_instance.max_watermark != shared.opts.diffusers_offload_max_gpu_memory or checkpoint_name != offload_hook_instance.checkpoint_name:
+        cached = False
+        offload_hook_instance = OffloadHook(checkpoint_name)
 
     def get_pipe_modules(pipe):
         if hasattr(pipe, "_internal_dict"):
@@ -461,11 +463,13 @@ def get_pipe_modules(pipe):
                 if not isinstance(module, torch.nn.Module):
                     continue
                 try:
-                    module_size = sum(p.numel()*p.element_size() for p in module.parameters(recurse=True)) / 1024 / 1024 / 1024
+                    module_size = sum(p.numel() * p.element_size() for p in module.parameters(recurse=True)) / 1024 / 1024 / 1024
+                    param_num = sum(p.numel() for p in module.parameters(recurse=True)) / 1024 / 1024 / 1024
                 except Exception as e:
                     shared.log.error(f'Offload: type=balanced op=calc module={module_name} {e}')
                     module_size = 0
                 offload_hook_instance.offload_map[module_name] = module_size
+                offload_hook_instance.param_map[module_name] = param_num
             modules[module_name] = module_size
         modules = sorted(modules.items(), key=lambda x: x[1], reverse=True)
         return modules
@@ -494,7 +498,9 @@ def apply_balanced_offload_to_module(pipe):
                 if do_offload:
                     module = module.to(devices.cpu, non_blocking=True)
                     used_gpu -= module_size
-                debug_move(f'Offload: type=balanced op={"move" if do_offload else "skip"} gpu={prev_gpu:.3f}:{used_gpu:.3f} perc={perc_gpu:.2f} ram={used_ram:.3f} current={module.device} dtype={module.dtype} component={module.__class__.__name__} size={module_size:.3f}')
+                if not cached:
+                    shared.log.debug(f'Offload: type=balanced module={module_name} cls={module.__class__.__name__} dtype={module.dtype} quant={getattr(module, "quantization_method", None)} params={offload_hook_instance.param_map[module_name]:.3f} size={offload_hook_instance.offload_map[module_name]:.3f}')
+                debug_move(f'Offload: type=balanced op={"move" if do_offload else "skip"} gpu={prev_gpu:.3f}:{used_gpu:.3f} perc={perc_gpu:.2f} ram={used_ram:.3f} current={module.device} dtype={module.dtype} quant={getattr(module, "quantization_method", None)} module={module.__class__.__name__} size={module_size:.3f}')
             except Exception as e:
                 if 'out of memory' in str(e):
                     devices.torch_gc(fast=True, force=True, reason='oom')
diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py
index 27bb42116..1c6923285 100644
--- a/modules/textual_inversion/textual_inversion.py
+++ b/modules/textual_inversion/textual_inversion.py
@@ -11,6 +11,7 @@
 
 debug = shared.log.trace if os.environ.get('SD_TI_DEBUG', None) is not None else lambda *args, **kwargs: None
 debug('Trace: TEXTUAL INVERSION')
+supported_models = ['ldm', 'sd', 'sdxl']
 
 
 def list_embeddings(*dirs):
@@ -370,7 +371,7 @@ def load_from_file(self, path, filename):
             self.skipped_embeddings[name] = embedding
 
     def load_from_dir(self, embdir):
-        if sd_models.model_data.sd_model is None:
+        if not shared.sd_loaded:
             shared.log.info('Skipping embeddings load: model not loaded')
             return
         if not os.path.isdir(embdir.path):
@@ -390,6 +391,8 @@ def load_from_dir(self, embdir):
     def load_textual_inversion_embeddings(self, force_reload=False):
         if not shared.sd_loaded:
             return
+        if shared.sd_model_type not in supported_models:
+            return
         t0 = time.time()
         if not force_reload:
             need_reload = False
diff --git a/wiki b/wiki
index 8db442124..56ba782f7 160000
--- a/wiki
+++ b/wiki
@@ -1 +1 @@
-Subproject commit 8db44212407343c1855d8811efb61f6e69bd4caa
+Subproject commit 56ba782f744bb8f6928f6c365d6ffc547d339548

From dae181fefbd1d82509aa66e1a93e9da1e06e266f Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Fri, 20 Dec 2024 18:26:25 -0500
Subject: [PATCH 141/162] lora-direct with bnb

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 modules/control/run.py   |  5 +++--
 modules/lora/networks.py | 38 ++++++++++++++++++++++++++------------
 modules/scripts.py       |  1 +
 3 files changed, 30 insertions(+), 14 deletions(-)

diff --git a/modules/control/run.py b/modules/control/run.py
index e780b9bae..efe9c0b8b 100644
--- a/modules/control/run.py
+++ b/modules/control/run.py
@@ -698,7 +698,7 @@ def control_run(state: str = '',
                         # actual processing
                         if p.is_tile:
                             processed: processing.Processed = tile.run_tiling(p, input_image)
-                        if processed is None:
+                        if processed is None and p.scripts is not None:
                             processed = p.scripts.run(p, *p.script_args)
                         if processed is None:
                             processed: processing.Processed = processing.process_images(p) # run actual pipeline
@@ -706,7 +706,8 @@ def control_run(state: str = '',
                             script_run = True
 
                         # postprocessing
-                        processed = p.scripts.after(p, processed, *p.script_args)
+                        if p.scripts is not None:
+                            processed = p.scripts.after(p, processed, *p.script_args)
                         output = None
                         if processed is not None:
                             output = processed.images
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index a38945072..df7778ead 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -314,22 +314,29 @@ def network_backup_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.n
         if weights_backup is None and wanted_names != (): # pylint: disable=C1803
             weight = getattr(self, 'weight', None)
             self.network_weights_backup = None
-            if shared.opts.lora_fuse_diffusers:
-                self.network_weights_backup = True
-            elif getattr(weight, "quant_type", None) in ['nf4', 'fp4']:
+            if getattr(weight, "quant_type", None) in ['nf4', 'fp4']:
                 if bnb is None:
                     bnb = model_quant.load_bnb('Load network: type=LoRA', silent=True)
                 if bnb is not None:
                     with devices.inference_context():
-                        self.network_weights_backup = bnb.functional.dequantize_4bit(weight, quant_state=weight.quant_state, quant_type=weight.quant_type, blocksize=weight.blocksize,)
+                        if shared.opts.lora_fuse_diffusers:
+                            self.network_weights_backup = True
+                        else:
+                            self.network_weights_backup = bnb.functional.dequantize_4bit(weight, quant_state=weight.quant_state, quant_type=weight.quant_type, blocksize=weight.blocksize,)
                         self.quant_state = weight.quant_state
                         self.quant_type = weight.quant_type
                         self.blocksize = weight.blocksize
                 else:
-                    weights_backup = weight.clone()
-                self.network_weights_backup = weights_backup.to(devices.cpu)
+                    if shared.opts.lora_fuse_diffusers:
+                        self.network_weights_backup = True
+                    else:
+                        weights_backup = weight.clone()
+                        self.network_weights_backup = weights_backup.to(devices.cpu)
             else:
-                self.network_weights_backup = weight.clone().to(devices.cpu)
+                if shared.opts.lora_fuse_diffusers:
+                    self.network_weights_backup = True
+                else:
+                    self.network_weights_backup = weight.clone().to(devices.cpu)
 
         bias_backup = getattr(self, "network_bias_backup", None)
         if bias_backup is None:
@@ -408,13 +415,20 @@ def network_apply_direct(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.
         if updown is not None:
             if deactivate:
                 updown *= -1
-            try:
-                new_weight = self.weight.to(devices.device) + updown.to(devices.device)
-            except Exception:
-                new_weight = self.weight + updown
             if getattr(self, "quant_type", None) in ['nf4', 'fp4'] and bnb is not None:
-                self.weight = bnb.nn.Params4bit(new_weight, quant_state=self.quant_state, quant_type=self.quant_type, blocksize=self.blocksize)
+                try: # TODO lora-direct with bnb
+                    weight = bnb.functional.dequantize_4bit(self.weight, quant_state=self.quant_state, quant_type=self.quant_type, blocksize=self.blocksize)
+                    new_weight = weight.to(devices.device) + updown.to(devices.device)
+                    self.weight = bnb.nn.Params4bit(new_weight, quant_state=self.quant_state, quant_type=self.quant_type, blocksize=self.blocksize)
+                except Exception:
+                    # shared.log.error(f'Load network: type=LoRA quant=bnb type={self.quant_type} state={self.quant_state} blocksize={self.blocksize} {e}')
+                    extra_network_lora.errors['bnb'] = extra_network_lora.errors.get('bnb', 0) + 1
+                    new_weight = None
             else:
+                try:
+                    new_weight = self.weight.to(devices.device) + updown.to(devices.device)
+                except Exception:
+                    new_weight = self.weight + updown
                 self.weight = torch.nn.Parameter(new_weight, requires_grad=False)
             del new_weight
         if hasattr(self, "qweight") and hasattr(self, "freeze"):
diff --git a/modules/scripts.py b/modules/scripts.py
index cf2cf25b9..029d0db79 100644
--- a/modules/scripts.py
+++ b/modules/scripts.py
@@ -288,6 +288,7 @@ def register_scripts_from_module(module, scriptfile):
             current_basedir = paths.script_path
             t.record(os.path.basename(scriptfile.basedir) if scriptfile.basedir != paths.script_path else scriptfile.filename)
             sys.path = syspath
+
     global scripts_txt2img, scripts_img2img, scripts_control, scripts_postproc # pylint: disable=global-statement
     scripts_txt2img = ScriptRunner()
     scripts_img2img = ScriptRunner()

From ed3e5f06d6bb2fbbaa9ef8b9ed6a08030ac4c853 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Fri, 20 Dec 2024 18:48:06 -0500
Subject: [PATCH 142/162] linting

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 TODO.md                                        | 17 +++++++++++++++--
 extensions-builtin/Lora/lora_extract.py        | 13 -------------
 installer.py                                   |  2 +-
 modules/lora/lora_extract.py                   |  2 +-
 modules/model_flux.py                          |  4 ++--
 modules/model_sana.py                          |  2 +-
 modules/model_sd3.py                           |  2 +-
 modules/processing_class.py                    |  2 +-
 modules/sd_models.py                           |  2 +-
 modules/sd_models_compile.py                   |  2 +-
 modules/textual_inversion/textual_inversion.py |  2 +-
 11 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/TODO.md b/TODO.md
index fafbea8c3..ad43e6d06 100644
--- a/TODO.md
+++ b/TODO.md
@@ -10,10 +10,8 @@ Main ToDo list can be found at [GitHub projects](https://github.com/users/vladma
 
 ## Future Candidates
 
-- SD35 IPAdapter: <https://github.com/huggingface/diffusers/pull/9987>
 - Flux IPAdapter: <https://github.com/huggingface/diffusers/pull/10261>
 - Flux NF4: <https://github.com/huggingface/diffusers/issues/9996>
-- LTX-Video: <https://github.com/huggingface/diffusers/pull/10021> <https://huggingface.co/Lightricks/LTX-Video> <https://huggingface.co/spaces/Lightricks/LTX-Video-Playground/tree/main>
 - GGUF: <https://github.com/huggingface/diffusers/pull/9964>
 
 ## Other
@@ -21,3 +19,18 @@ Main ToDo list can be found at [GitHub projects](https://github.com/users/vladma
 - IPAdapter negative: <https://github.com/huggingface/diffusers/discussions/7167>
 - Control API enhance scripts compatibility
 - PixelSmith: <https://github.com/Thanos-DB/Pixelsmith>
+
+## Code TODO
+
+- python 3.12.4 or higher cause a mess with pydantic
+- enable ROCm for windows when available
+- enable full VAE mode for resize-latent
+- remove duplicate mask params
+- fix flux loader for civitai nf4 models
+- implement model in-memory caching
+- hypertile vae breaks for diffusers when using non-standard sizes
+- forcing reloading entire model as loading transformers only leads to massive memory usage
+- lora-direct with bnb
+- make lora for quantized flux
+- control script process
+- monkey-patch for modernui missing tabs.select event
diff --git a/extensions-builtin/Lora/lora_extract.py b/extensions-builtin/Lora/lora_extract.py
index c2e0a275b..1d92f3c6e 100644
--- a/extensions-builtin/Lora/lora_extract.py
+++ b/extensions-builtin/Lora/lora_extract.py
@@ -182,19 +182,6 @@ def make_lora(fn, maxrank, auto_rank, rank_ratio, modules, overwrite):
             progress.remove_task(task)
         t3 = time.time()
 
-        # TODO: Handle quant for Flux
-        # if 'te' in modules and getattr(shared.sd_model, 'transformer', None) is not None:
-        #     for name, module in shared.sd_model.transformer.named_modules():
-        #         if "norm" in name and "linear" not in name:
-        #             continue
-        #         weights_backup = getattr(module, "network_weights_backup", None)
-        #         if weights_backup is None:
-        #             continue
-        #         module.svdhandler = SVDHandler()
-        #         module.svdhandler.network_name = "lora_transformer_" + name.replace(".", "_")
-        #         module.svdhandler.decompose(module.weight, weights_backup)
-        #         module.svdhandler.findrank(rank, rank_ratio)
-
         lora_state_dict = {}
         for sub in ['text_encoder', 'text_encoder_2', 'unet', 'transformer']:
             submodel = getattr(shared.sd_model, sub, None)
diff --git a/installer.py b/installer.py
index 40ba7cea0..d186cc5ac 100644
--- a/installer.py
+++ b/installer.py
@@ -552,7 +552,7 @@ def install_rocm_zluda():
     log.info(msg)
     torch_command = ''
     if sys.platform == "win32":
-        # TODO after ROCm for Windows is released
+        # TODO enable ROCm for windows when available
 
         if args.device_id is not None:
             if os.environ.get('HIP_VISIBLE_DEVICES', None) is not None:
diff --git a/modules/lora/lora_extract.py b/modules/lora/lora_extract.py
index c2e0a275b..c7deb2530 100644
--- a/modules/lora/lora_extract.py
+++ b/modules/lora/lora_extract.py
@@ -182,7 +182,7 @@ def make_lora(fn, maxrank, auto_rank, rank_ratio, modules, overwrite):
             progress.remove_task(task)
         t3 = time.time()
 
-        # TODO: Handle quant for Flux
+        # TODO: make lora for quantized flux
         # if 'te' in modules and getattr(shared.sd_model, 'transformer', None) is not None:
         #     for name, module in shared.sd_model.transformer.named_modules():
         #         if "norm" in name and "linear" not in name:
diff --git a/modules/model_flux.py b/modules/model_flux.py
index ac1370ef7..759fc9b1e 100644
--- a/modules/model_flux.py
+++ b/modules/model_flux.py
@@ -149,7 +149,7 @@ def load_quants(kwargs, repo_id, cache_dir):
         quant_args = model_quant.create_bnb_config(quant_args)
         quant_args = model_quant.create_ao_config(quant_args)
         if not quant_args:
-            return
+            return kwargs
         model_quant.load_bnb(f'Load model: type=FLUX quant={quant_args}')
         if 'Model' in shared.opts.bnb_quantization and 'transformer' not in kwargs:
             kwargs['transformer'] = diffusers.FluxTransformer2DModel.from_pretrained(repo_id, subfolder="transformer", cache_dir=cache_dir, torch_dtype=devices.dtype, **quant_args)
@@ -208,7 +208,7 @@ def load_transformer(file_path): # triggered by opts.sd_unet change
         _transformer, _text_encoder_2 = load_flux_bnb(file_path, diffusers_load_config)
         if _transformer is not None:
             transformer = _transformer
-    elif 'nf4' in quant: # TODO right now this is not working for civitai published nf4 models
+    elif 'nf4' in quant: # TODO fix flux loader for civitai nf4 models
         from modules.model_flux_nf4 import load_flux_nf4
         _transformer, _text_encoder_2 = load_flux_nf4(file_path)
         if _transformer is not None:
diff --git a/modules/model_sana.py b/modules/model_sana.py
index 414f9b74d..b9f56c7c6 100644
--- a/modules/model_sana.py
+++ b/modules/model_sana.py
@@ -13,7 +13,7 @@ def load_quants(kwargs, repo_id, cache_dir):
         quant_args = model_quant.create_ao_config(quant_args)
         load_args = kwargs.copy()
         if not quant_args:
-            return
+            return kwargs
         model_quant.load_bnb(f'Load model: type=SD3 quant={quant_args} args={load_args}')
         if 'Model' in shared.opts.bnb_quantization and 'transformer' not in kwargs:
             kwargs['transformer'] = diffusers.models.SanaTransformer2DModel.from_pretrained(repo_id, subfolder="transformer", cache_dir=cache_dir, **load_args, **quant_args)
diff --git a/modules/model_sd3.py b/modules/model_sd3.py
index 2842661bd..e72ecd75c 100644
--- a/modules/model_sd3.py
+++ b/modules/model_sd3.py
@@ -55,7 +55,7 @@ def load_quants(kwargs, repo_id, cache_dir):
         quant_args = model_quant.create_bnb_config(quant_args)
         quant_args = model_quant.create_ao_config(quant_args)
         if not quant_args:
-            return
+            return kwargs
         model_quant.load_bnb(f'Load model: type=SD3 quant={quant_args}')
         if 'Model' in shared.opts.bnb_quantization and 'transformer' not in kwargs:
             kwargs['transformer'] = diffusers.SD3Transformer2DModel.from_pretrained(repo_id, subfolder="transformer", cache_dir=cache_dir, torch_dtype=devices.dtype, **quant_args)
diff --git a/modules/processing_class.py b/modules/processing_class.py
index 7a7d9cd36..45ca70815 100644
--- a/modules/processing_class.py
+++ b/modules/processing_class.py
@@ -170,7 +170,7 @@ def __init__(self,
         self.image_cfg_scale = image_cfg_scale
         self.scale_by = scale_by
         self.mask = mask
-        self.image_mask = mask # TODO duplciate mask params
+        self.image_mask = mask # TODO remove duplicate mask params
         self.latent_mask = latent_mask
         self.mask_blur = mask_blur
         self.inpainting_fill = inpainting_fill
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 5939bccbd..f7edc6ddf 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -1495,7 +1495,7 @@ def reload_model_weights(sd_model=None, info=None, reuse_dict=False, op='model',
             unload_model_weights(op=op)
             sd_model = None
     timer = Timer()
-    # TODO implement caching after diffusers implement state_dict loading
+    # TODO implement model in-memory caching
     state_dict = get_checkpoint_state_dict(checkpoint_info, timer) if not shared.native else None
     checkpoint_config = sd_models_config.find_checkpoint_config(state_dict, checkpoint_info)
     timer.record("config")
diff --git a/modules/sd_models_compile.py b/modules/sd_models_compile.py
index 16540019c..20a7d7de2 100644
--- a/modules/sd_models_compile.py
+++ b/modules/sd_models_compile.py
@@ -515,7 +515,7 @@ def torchao_quantization(sd_model):
     if fn is None:
         shared.log.error(f"Quantization: type=TorchAO type={shared.opts.torchao_quantization_type} not supported")
         return sd_model
-    def torchao_model(model, op=None, sd_model=None):
+    def torchao_model(model, op=None, sd_model=None): # pylint: disable=unused-argument
         q.quantize_(model, fn(), device=devices.device)
         return model
 
diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py
index 1c6923285..de12021a5 100644
--- a/modules/textual_inversion/textual_inversion.py
+++ b/modules/textual_inversion/textual_inversion.py
@@ -4,7 +4,7 @@
 import torch
 import safetensors.torch
 from PIL import Image
-from modules import shared, devices, sd_models, errors
+from modules import shared, devices, errors
 from modules.textual_inversion.image_embedding import embedding_from_b64, extract_image_data_embed
 from modules.files_cache import directory_files, directory_mtime, extension_filter
 

From fc8a18df36af2c041e26f9804f60673977547c4d Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Fri, 20 Dec 2024 18:58:29 -0500
Subject: [PATCH 143/162] ruff updates

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 extensions-builtin/Lora/network_lora.py | 4 ++--
 modules/devices_mac.py                  | 4 ++--
 modules/images_namegen.py               | 4 ++--
 modules/lora/network_lora.py            | 4 ++--
 modules/merging/merge_methods.py        | 2 +-
 modules/processing_args.py              | 2 +-
 modules/processing_helpers.py           | 4 ++--
 modules/sd_disable_initialization.py    | 2 +-
 modules/sd_hijack_unet.py               | 2 +-
 scripts/ltxvideo.py                     | 2 +-
 scripts/regional_prompting.py           | 2 +-
 11 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/extensions-builtin/Lora/network_lora.py b/extensions-builtin/Lora/network_lora.py
index 8ebda2e22..a410a8e3b 100644
--- a/extensions-builtin/Lora/network_lora.py
+++ b/extensions-builtin/Lora/network_lora.py
@@ -31,7 +31,7 @@ def create_module(self, weights, key, none_ok=False):
         if is_linear:
             weight = weight.reshape(weight.shape[0], -1)
             module = torch.nn.Linear(weight.shape[1], weight.shape[0], bias=False)
-        elif is_conv and key == "lora_down.weight" or key == "dyn_up":
+        elif is_conv and (key == "lora_down.weight" or key == "dyn_up"):
             if len(weight.shape) == 2:
                 weight = weight.reshape(weight.shape[0], -1, 1, 1)
             if weight.shape[2] != 1 or weight.shape[3] != 1:
@@ -40,7 +40,7 @@ def create_module(self, weights, key, none_ok=False):
                 module = torch.nn.Conv2d(weight.shape[1], weight.shape[0], (1, 1), bias=False)
         elif is_conv and key == "lora_mid.weight":
             module = torch.nn.Conv2d(weight.shape[1], weight.shape[0], self.sd_module.kernel_size, self.sd_module.stride, self.sd_module.padding, bias=False)
-        elif is_conv and key == "lora_up.weight" or key == "dyn_down":
+        elif is_conv and (key == "lora_up.weight" or key == "dyn_down"):
             module = torch.nn.Conv2d(weight.shape[1], weight.shape[0], (1, 1), bias=False)
         else:
             raise AssertionError(f'Lora unsupported: layer={self.network_key} type={type(self.sd_module).__name__}')
diff --git a/modules/devices_mac.py b/modules/devices_mac.py
index fe7c80f31..2ddc1fc22 100644
--- a/modules/devices_mac.py
+++ b/modules/devices_mac.py
@@ -24,7 +24,7 @@ def cumsum_fix(input, cumsum_func, *args, **kwargs): # pylint: disable=redefined
         output_dtype = kwargs.get('dtype', input.dtype)
         if output_dtype == torch.int64:
             return cumsum_func(input.cpu(), *args, **kwargs).to(input.device)
-        elif output_dtype == torch.bool or cumsum_needs_int_fix and (output_dtype == torch.int8 or output_dtype == torch.int16):
+        elif output_dtype == torch.bool or (cumsum_needs_int_fix and (output_dtype == torch.int8 or output_dtype == torch.int16)):
             return cumsum_func(input.to(torch.int32), *args, **kwargs).to(torch.int64)
     return cumsum_func(input, *args, **kwargs)
 
@@ -42,7 +42,7 @@ def cumsum_fix(input, cumsum_func, *args, **kwargs): # pylint: disable=redefined
 
         # MPS workaround for https://github.com/pytorch/pytorch/issues/79383
         CondFunc('torch.Tensor.to', lambda orig_func, self, *args, **kwargs: orig_func(self.contiguous(), *args, **kwargs),
-                                                          lambda _, self, *args, **kwargs: self.device.type != 'mps' and (args and isinstance(args[0], torch.device) and args[0].type == 'mps' or isinstance(kwargs.get('device'), torch.device) and kwargs['device'].type == 'mps'))
+                                                          lambda _, self, *args, **kwargs: self.device.type != 'mps' and ((args and isinstance(args[0], torch.device) and args[0].type == 'mps') or (isinstance(kwargs.get('device'), torch.device) and kwargs['device'].type == 'mps')))
         # MPS workaround for https://github.com/pytorch/pytorch/issues/80800
         CondFunc('torch.nn.functional.layer_norm', lambda orig_func, *args, **kwargs: orig_func(*([args[0].contiguous()] + list(args[1:])), **kwargs),
                                                                                         lambda _, *args, **kwargs: args and isinstance(args[0], torch.Tensor) and args[0].device.type == 'mps')
diff --git a/modules/images_namegen.py b/modules/images_namegen.py
index bc58f728a..7cad39b92 100644
--- a/modules/images_namegen.py
+++ b/modules/images_namegen.py
@@ -45,12 +45,12 @@ class FilenameGenerator:
         'prompt_hash': lambda self: hashlib.sha256(self.prompt.encode()).hexdigest()[0:8],
 
         'sampler': lambda self: self.p and self.p.sampler_name,
-        'seed': lambda self: self.seed and str(self.seed) or '',
+        'seed': lambda self: (self.seed and str(self.seed)) or '',
         'steps': lambda self: self.p and getattr(self.p, 'steps', 0),
         'cfg': lambda self: self.p and getattr(self.p, 'cfg_scale', 0),
         'clip_skip': lambda self: self.p and getattr(self.p, 'clip_skip', 0),
         'denoising': lambda self: self.p and getattr(self.p, 'denoising_strength', 0),
-        'styles': lambda self: self.p and ", ".join([style for style in self.p.styles if not style == "None"]) or "None",
+        'styles': lambda self: (self.p and ", ".join([style for style in self.p.styles if not style == "None"])) or "None",
         'uuid': lambda self: str(uuid.uuid4()),
     }
     default_time_format = '%Y%m%d%H%M%S'
diff --git a/modules/lora/network_lora.py b/modules/lora/network_lora.py
index 8bf475ebc..3604e059d 100644
--- a/modules/lora/network_lora.py
+++ b/modules/lora/network_lora.py
@@ -31,7 +31,7 @@ def create_module(self, weights, key, none_ok=False):
         if is_linear:
             weight = weight.reshape(weight.shape[0], -1)
             module = torch.nn.Linear(weight.shape[1], weight.shape[0], bias=False)
-        elif is_conv and key == "lora_down.weight" or key == "dyn_up":
+        elif is_conv and (key == "lora_down.weight" or key == "dyn_up"):
             if len(weight.shape) == 2:
                 weight = weight.reshape(weight.shape[0], -1, 1, 1)
             if weight.shape[2] != 1 or weight.shape[3] != 1:
@@ -40,7 +40,7 @@ def create_module(self, weights, key, none_ok=False):
                 module = torch.nn.Conv2d(weight.shape[1], weight.shape[0], (1, 1), bias=False)
         elif is_conv and key == "lora_mid.weight":
             module = torch.nn.Conv2d(weight.shape[1], weight.shape[0], self.sd_module.kernel_size, self.sd_module.stride, self.sd_module.padding, bias=False)
-        elif is_conv and key == "lora_up.weight" or key == "dyn_down":
+        elif is_conv and (key == "lora_up.weight" or key == "dyn_down"):
             module = torch.nn.Conv2d(weight.shape[1], weight.shape[0], (1, 1), bias=False)
         else:
             raise AssertionError(f'Lora unsupported: layer={self.network_key} type={type(self.sd_module).__name__}')
diff --git a/modules/merging/merge_methods.py b/modules/merging/merge_methods.py
index 3f704c20f..ce196b60c 100644
--- a/modules/merging/merge_methods.py
+++ b/modules/merging/merge_methods.py
@@ -4,7 +4,7 @@
 import torch
 from torch import Tensor
 
-__all__ = [
+__all__ = [ # noqa: RUF022
     "weighted_sum",
     "weighted_subtraction",
     "tensor_sum",
diff --git a/modules/processing_args.py b/modules/processing_args.py
index d0afb6722..3709368ee 100644
--- a/modules/processing_args.py
+++ b/modules/processing_args.py
@@ -23,7 +23,7 @@ def task_specific_kwargs(p, model):
         if isinstance(p.init_images[0], str):
             p.init_images = [helpers.decode_base64_to_image(i, quiet=True) for i in p.init_images]
         p.init_images = [i.convert('RGB') if i.mode != 'RGB' else i for i in p.init_images]
-    if sd_models.get_diffusers_task(model) == sd_models.DiffusersTaskType.TEXT_2_IMAGE or len(getattr(p, 'init_images', [])) == 0 and not is_img2img_model:
+    if (sd_models.get_diffusers_task(model) == sd_models.DiffusersTaskType.TEXT_2_IMAGE or len(getattr(p, 'init_images', [])) == 0) and not is_img2img_model:
         p.ops.append('txt2img')
         if hasattr(p, 'width') and hasattr(p, 'height'):
             task_args = {
diff --git a/modules/processing_helpers.py b/modules/processing_helpers.py
index 5d2661cc2..304e2c211 100644
--- a/modules/processing_helpers.py
+++ b/modules/processing_helpers.py
@@ -159,7 +159,7 @@ def create_random_tensors(shape, seeds, subseeds=None, subseed_strength=0.0, see
     # enables the generation of additional tensors with noise that the sampler will use during its processing.
     # Using those pre-generated tensors instead of simple torch.randn allows a batch with seeds [100, 101] to
     # produce the same images as with two batches [100], [101].
-    if p is not None and p.sampler is not None and (len(seeds) > 1 and shared.opts.enable_batch_seeds or eta_noise_seed_delta > 0):
+    if p is not None and p.sampler is not None and ((len(seeds) > 1 and shared.opts.enable_batch_seeds) or (eta_noise_seed_delta > 0)):
         sampler_noises = [[] for _ in range(p.sampler.number_of_needed_noises(p))]
     else:
         sampler_noises = None
@@ -414,7 +414,7 @@ def resize_hires(p, latents): # input=latents output=pil if not latent_upscaler
     if latent_upscaler is not None:
         return torch.nn.functional.interpolate(latents, size=(p.hr_upscale_to_y // 8, p.hr_upscale_to_x // 8), mode=latent_upscaler["mode"], antialias=latent_upscaler["antialias"])
     first_pass_images = processing_vae.vae_decode(latents=latents, model=shared.sd_model, full_quality=p.full_quality, output_type='pil', width=p.width, height=p.height)
-    if p.hr_upscale_to_x == 0 or p.hr_upscale_to_y == 0 and hasattr(p, 'init_hr'):
+    if p.hr_upscale_to_x == 0 or (p.hr_upscale_to_y == 0 and hasattr(p, 'init_hr')):
         shared.log.error('Hires: missing upscaling dimensions')
         return first_pass_images
     resized_images = []
diff --git a/modules/sd_disable_initialization.py b/modules/sd_disable_initialization.py
index e9ac1be92..688af0f34 100644
--- a/modules/sd_disable_initialization.py
+++ b/modules/sd_disable_initialization.py
@@ -53,7 +53,7 @@ def transformers_modeling_utils_load_pretrained_model(*args, **kwargs):
         def transformers_utils_hub_get_file_from_cache(original, url, *args, **kwargs):
 
             # this file is always 404, prevent making request
-            if url == 'https://huggingface.co/openai/clip-vit-large-patch14/resolve/main/added_tokens.json' or url == 'openai/clip-vit-large-patch14' and args[0] == 'added_tokens.json':
+            if (url == 'https://huggingface.co/openai/clip-vit-large-patch14/resolve/main/added_tokens.json' or url == 'openai/clip-vit-large-patch14') and args[0] == 'added_tokens.json':
                 return None
 
             try:
diff --git a/modules/sd_hijack_unet.py b/modules/sd_hijack_unet.py
index d8d356071..66040c5aa 100644
--- a/modules/sd_hijack_unet.py
+++ b/modules/sd_hijack_unet.py
@@ -70,7 +70,7 @@ def hijack_ddpm_edit():
 if version.parse(torch.__version__) <= version.parse("1.13.2") or torch.cuda.is_available():
     CondFunc('ldm.modules.diffusionmodules.util.GroupNorm32.forward', lambda orig_func, self, *args, **kwargs: orig_func(self.float(), *args, **kwargs), unet_needs_upcast)
     CondFunc('ldm.modules.attention.GEGLU.forward', lambda orig_func, self, x: orig_func(self.float(), x.float()).to(devices.dtype_unet), unet_needs_upcast)
-    CondFunc('open_clip.transformer.ResidualAttentionBlock.__init__', lambda orig_func, *args, **kwargs: kwargs.update({'act_layer': GELUHijack}) and False or orig_func(*args, **kwargs), lambda _, *args, **kwargs: kwargs.get('act_layer') is None or kwargs['act_layer'] == torch.nn.GELU)
+    CondFunc('open_clip.transformer.ResidualAttentionBlock.__init__', lambda orig_func, *args, **kwargs: (kwargs.update({'act_layer': GELUHijack}) and False) or orig_func(*args, **kwargs), lambda _, *args, **kwargs: kwargs.get('act_layer') is None or kwargs['act_layer'] == torch.nn.GELU)
 
 first_stage_cond = lambda _, self, *args, **kwargs: devices.unet_needs_upcast and self.model.diffusion_model.dtype == torch.float16 # pylint: disable=unnecessary-lambda-assignment
 first_stage_sub = lambda orig_func, self, x, **kwargs: orig_func(self, x.to(devices.dtype_vae), **kwargs) # pylint: disable=unnecessary-lambda-assignment
diff --git a/scripts/ltxvideo.py b/scripts/ltxvideo.py
index 50530563a..148fd4481 100644
--- a/scripts/ltxvideo.py
+++ b/scripts/ltxvideo.py
@@ -43,7 +43,7 @@ def run(self, p: processing.StableDiffusionProcessing, num_frames, video_type, d
         # set params
         image = getattr(p, 'init_images', None)
         image = None if image is None or len(image) == 0 else image[0]
-        if p.width == 0 or p.height == 0 and image is not None:
+        if (p.width == 0 or p.height == 0) and image is not None:
             p.width = image.width
             p.height = image.height
         num_frames = 8 * int(num_frames // 8) + 1
diff --git a/scripts/regional_prompting.py b/scripts/regional_prompting.py
index 48309704e..3d452b0c5 100644
--- a/scripts/regional_prompting.py
+++ b/scripts/regional_prompting.py
@@ -9,7 +9,7 @@
 def hijack_register_modules(self, **kwargs):
     for name, module in kwargs.items():
         register_dict = None
-        if module is None or isinstance(module, (tuple, list)) and module[0] is None:
+        if module is None or (isinstance(module, (tuple, list)) and module[0] is None):
             register_dict = {name: (None, None)}
         elif isinstance(module, bool):
             pass

From d9a320be020339f3b6ea59479972e79cc57467d0 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Fri, 20 Dec 2024 19:23:06 -0500
Subject: [PATCH 144/162] update changelog

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ebf17d6b2..a07fda734 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,7 @@ And a lot of **Control** and **IPAdapter** goodies
 Plus couple of new integrated workflows such as [FreeScale](https://github.com/ali-vilab/FreeScale) and [Style Aligned Image Generation](https://style-aligned-gen.github.io/)  
 
 And it wouldn't be a *Xmass edition* without couple of custom themes: *Snowflake* and *Elf-Green*!  
+All-in-all, we're around ~160 commits worth of updates, check changelog for full list  
 
 [ReadMe](https://github.com/vladmandic/automatic/blob/master/README.md) | [ChangeLog](https://github.com/vladmandic/automatic/blob/master/CHANGELOG.md) | [Docs](https://vladmandic.github.io/sdnext-docs/) | [WiKi](https://github.com/vladmandic/automatic/wiki) | [Discord](https://discord.com/invite/sd-next-federal-batch-inspectors-1101998836328697867)
 
@@ -555,7 +556,7 @@ And there are also other goodies like multiple *XYZ grid* improvements, addition
   - xyz grid support for sampler options  
   - metadata updates for sampler options  
   - modernui updates for sampler options  
-  - *note* sampler options defaults are not save in ui settings, they are saved in server settings  
+  - *note* sampler options defaults are not saved in ui settings, they are saved in server settings  
     to apply your defaults, set ui values and apply via *system -> settings -> apply settings*  
 
   *sampler options*:  

From 76755c6b6eb13e06a0de1ccee4480c37512e8a01 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Sat, 21 Dec 2024 09:19:51 -0500
Subject: [PATCH 145/162] switch gguf loader

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md             |  5 ++++-
 modules/ggml/__init__.py | 45 +++++++++++++++++++++++++++++++++++-----
 modules/model_flux.py    | 18 ++++++++++++----
 modules/model_sd3.py     | 13 +++++++++---
 modules/model_te.py      | 19 +++--------------
 modules/sd_detect.py     | 11 ++++++++++
 6 files changed, 82 insertions(+), 29 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a07fda734..d31807583 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,7 +7,7 @@
 ### SD.Next Xmass edition: *What's new?*
 
 While we have several new supported models, workflows and tools, this release is primarily about *quality-of-life improvements*:  
-- New memory management engine: list of changes that went into this one is long: changes to GPU offloading, LoRA loader, system memory management, on-the-fly quantization, etc.  
+- New memory management engine: list of changes that went into this one is long: changes to GPU offloading, brand new LoRA loader, system memory management, on-the-fly quantization, improved gguf loader, etc.  
   but main goal is enabling modern large models to run on standard consumer GPUs  
   without performance hits typically associated with aggressive memory swapping and needs for constant manual tweaks  
 - New [documentation website](https://vladmandic.github.io/sdnext-docs/)  
@@ -165,6 +165,7 @@ All-in-all, we're around ~160 commits worth of updates, check changelog for full
   - `BitsAndBytes` with 3 float-based quantization schemes  
   - `Optimium.Quanto` with 3 int-based and 2 float-based quantizations schemes  
   - `GGUF` with pre-quantized weights  
+  - Switch `GGUF` loader from custom to diffuser native
 - **IPEX**: update to IPEX 2.5.10+xpu  
 - **OpenVINO**: update to 2024.5.0  
 - **Sampler** improvements  
@@ -202,6 +203,8 @@ All-in-all, we're around ~160 commits worth of updates, check changelog for full
 - uninstall conflicting `wandb` package  
 - dont skip diffusers version check if quick is specified  
 - notify on torch install  
+- detect pipeline fro diffusers folder-style model  
+- do not recast flux quants  
 
 ## Update for 2024-11-21
 
diff --git a/modules/ggml/__init__.py b/modules/ggml/__init__.py
index acac6057c..44721d846 100644
--- a/modules/ggml/__init__.py
+++ b/modules/ggml/__init__.py
@@ -1,11 +1,35 @@
-from pathlib import Path
+import os
+import time
 import torch
-import gguf
-from .gguf_utils import TORCH_COMPATIBLE_QTYPES
-from .gguf_tensor import GGMLTensor
+import diffusers
+import transformers
 
 
-def load_gguf_state_dict(path: str, compute_dtype: torch.dtype) -> dict[str, GGMLTensor]:
+def install_gguf():
+    # pip install git+https://github.com/junejae/transformers@feature/t5-gguf
+    # https://github.com/ggerganov/llama.cpp/issues/9566
+    from installer import install
+    install('gguf', quiet=True)
+    import importlib
+    import gguf
+    from modules import shared
+    scripts_dir = os.path.join(os.path.dirname(gguf.__file__), '..', 'scripts')
+    if os.path.exists(scripts_dir):
+        os.rename(scripts_dir, scripts_dir + str(time.time()))
+    # monkey patch transformers/diffusers so they detect newly installed gguf pacakge correctly
+    ver = importlib.metadata.version('gguf')
+    transformers.utils.import_utils._is_gguf_available = True # pylint: disable=protected-access
+    transformers.utils.import_utils._gguf_version = ver # pylint: disable=protected-access
+    diffusers.utils.import_utils._is_gguf_available = True # pylint: disable=protected-access
+    diffusers.utils.import_utils._gguf_version = ver # pylint: disable=protected-access
+    shared.log.debug(f'Load GGUF: version={ver}')
+    return gguf
+
+
+def load_gguf_state_dict(path: str, compute_dtype: torch.dtype) -> dict:
+    gguf = install_gguf()
+    from .gguf_utils import TORCH_COMPATIBLE_QTYPES
+    from .gguf_tensor import GGMLTensor
     sd: dict[str, GGMLTensor] = {}
     stats = {}
     reader = gguf.GGUFReader(path)
@@ -19,3 +43,14 @@ def load_gguf_state_dict(path: str, compute_dtype: torch.dtype) -> dict[str, GGM
             stats[tensor.tensor_type.name] = 0
         stats[tensor.tensor_type.name] += 1
     return sd, stats
+
+
+def load_gguf(path, cls, compute_dtype: torch.dtype):
+    _gguf = install_gguf()
+    module = cls.from_single_file(
+        path,
+        quantization_config = diffusers.GGUFQuantizationConfig(compute_dtype=compute_dtype),
+        torch_dtype=compute_dtype,
+    )
+    module.gguf = 'gguf'
+    return module
diff --git a/modules/model_flux.py b/modules/model_flux.py
index 759fc9b1e..0a13a4d46 100644
--- a/modules/model_flux.py
+++ b/modules/model_flux.py
@@ -160,9 +160,10 @@ def load_quants(kwargs, repo_id, cache_dir):
     return kwargs
 
 
+"""
 def load_flux_gguf(file_path):
     transformer = None
-    model_te.install_gguf()
+    ggml.install_gguf()
     from accelerate import init_empty_weights
     from diffusers.loaders.single_file_utils import convert_flux_transformer_checkpoint_to_diffusers
     from modules import ggml, sd_hijack_accelerate
@@ -180,9 +181,11 @@ def load_flux_gguf(file_path):
             continue
         applied += 1
         sd_hijack_accelerate.hijack_set_module_tensor_simple(transformer, tensor_name=param_name, value=param, device=0)
+        transformer.gguf = 'gguf'
         state_dict[param_name] = None
     shared.log.debug(f'Load model: type=Unet/Transformer applied={applied} skipped={skipped} stats={stats}')
     return transformer, None
+"""
 
 
 def load_transformer(file_path): # triggered by opts.sd_unet change
@@ -197,7 +200,9 @@ def load_transformer(file_path): # triggered by opts.sd_unet change
     }
     shared.log.info(f'Load module: type=UNet/Transformer file="{file_path}" offload={shared.opts.diffusers_offload_mode} quant={quant} dtype={devices.dtype}')
     if 'gguf' in file_path.lower():
-        _transformer, _text_encoder_2 = load_flux_gguf(file_path)
+        # _transformer, _text_encoder_2 = load_flux_gguf(file_path)
+        from modules import ggml
+        _transformer = ggml.load_gguf(file_path, cls=diffusers.FluxTransformer2DModel, compute_dtype=devices.dtype)
         if _transformer is not None:
             transformer = _transformer
     elif quant == 'qint8' or quant == 'qint4':
@@ -336,9 +341,14 @@ def load_flux(checkpoint_info, diffusers_load_config): # triggered by opts.sd_ch
         cls = diffusers.FluxPipeline
     shared.log.debug(f'Load model: type=FLUX cls={cls.__name__} preloaded={list(kwargs)} revision={diffusers_load_config.get("revision", None)}')
     for c in kwargs:
+        if getattr(kwargs[c], 'quantization_method', None) is not None or getattr(kwargs[c], 'gguf', None) is not None:
+            shared.log.debug(f'Load model: type=FLUX component={c} dtype={kwargs[c].dtype} quant={getattr(kwargs[c], 'quantization_method', None) or getattr(kwargs[c], 'gguf', None)}')
         if kwargs[c].dtype == torch.float32 and devices.dtype != torch.float32:
-            shared.log.warning(f'Load model: type=FLUX component={c} dtype={kwargs[c].dtype} cast dtype={devices.dtype} recast')
-            kwargs[c] = kwargs[c].to(dtype=devices.dtype)
+            try:
+                kwargs[c] = kwargs[c].to(dtype=devices.dtype)
+                shared.log.warning(f'Load model: type=FLUX component={c} dtype={kwargs[c].dtype} cast dtype={devices.dtype} recast')
+            except Exception:
+                pass
 
     allow_quant = 'gguf' not in (sd_unet.loaded_unet or '')
     fn = checkpoint_info.path
diff --git a/modules/model_sd3.py b/modules/model_sd3.py
index e72ecd75c..36dfbd9b4 100644
--- a/modules/model_sd3.py
+++ b/modules/model_sd3.py
@@ -13,7 +13,9 @@ def load_overrides(kwargs, cache_dir):
                 sd_unet.loaded_unet = shared.opts.sd_unet
                 shared.log.debug(f'Load model: type=SD3 unet="{shared.opts.sd_unet}" fmt=safetensors')
             elif fn.endswith('.gguf'):
-                kwargs = load_gguf(kwargs, fn)
+                from modules import ggml
+                # kwargs = load_gguf(kwargs, fn)
+                kwargs['transformer'] = ggml.load_gguf(fn, cls=diffusers.SD3Transformer2DModel, compute_dtype=devices.dtype)
                 sd_unet.loaded_unet = shared.opts.sd_unet
                 shared.log.debug(f'Load model: type=SD3 unet="{shared.opts.sd_unet}" fmt=gguf')
         except Exception as e:
@@ -90,8 +92,9 @@ def load_missing(kwargs, fn, cache_dir):
     return kwargs
 
 
+"""
 def load_gguf(kwargs, fn):
-    model_te.install_gguf()
+    ggml.install_gguf()
     from accelerate import init_empty_weights
     from diffusers.loaders.single_file_utils import convert_sd3_transformer_checkpoint_to_diffusers
     from modules import ggml, sd_hijack_accelerate
@@ -108,10 +111,12 @@ def load_gguf(kwargs, fn):
             continue
         applied += 1
         sd_hijack_accelerate.hijack_set_module_tensor_simple(transformer, tensor_name=param_name, value=param, device=0)
+        transformer.gguf = 'gguf'
         state_dict[param_name] = None
     shared.log.debug(f'Load model: type=Unet/Transformer applied={applied} skipped={skipped} stats={stats} compute={devices.dtype}')
     kwargs['transformer'] = transformer
     return kwargs
+"""
 
 
 def load_sd3(checkpoint_info, cache_dir=None, config=None):
@@ -139,7 +144,9 @@ def load_sd3(checkpoint_info, cache_dir=None, config=None):
             # kwargs = load_missing(kwargs, fn, cache_dir)
             repo_id = fn
         elif fn.endswith('.gguf'):
-            kwargs = load_gguf(kwargs, fn)
+            from modules import ggml
+            kwargs['transformer'] = ggml.load_gguf(fn, cls=diffusers.SD3Transformer2DModel, compute_dtype=devices.dtype)
+            # kwargs = load_gguf(kwargs, fn)
             kwargs = load_missing(kwargs, fn, cache_dir)
             kwargs['variant'] = 'fp16'
     else:
diff --git a/modules/model_te.py b/modules/model_te.py
index 8227ba3e8..16bb6d222 100644
--- a/modules/model_te.py
+++ b/modules/model_te.py
@@ -12,20 +12,6 @@
 loaded_te = None
 
 
-def install_gguf():
-    # pip install git+https://github.com/junejae/transformers@feature/t5-gguf
-    install('gguf', quiet=True)
-    # https://github.com/ggerganov/llama.cpp/issues/9566
-    import gguf
-    scripts_dir = os.path.join(os.path.dirname(gguf.__file__), '..', 'scripts')
-    if os.path.exists(scripts_dir):
-        os.rename(scripts_dir, scripts_dir + '_gguf')
-    # monkey patch transformers so they detect gguf pacakge correctly
-    import importlib
-    transformers.utils.import_utils._is_gguf_available = True # pylint: disable=protected-access
-    transformers.utils.import_utils._gguf_version = importlib.metadata.version('gguf') # pylint: disable=protected-access
-
-
 def load_t5(name=None, cache_dir=None):
     global loaded_te # pylint: disable=global-statement
     if name is None:
@@ -34,8 +20,9 @@ def load_t5(name=None, cache_dir=None):
     modelloader.hf_login()
     repo_id = 'stabilityai/stable-diffusion-3-medium-diffusers'
     fn = te_dict.get(name) if name in te_dict else None
-    if fn is not None and 'gguf' in name.lower():
-        install_gguf()
+    if fn is not None and name.lower().endswith('gguf'):
+        from modules import ggml
+        ggml.install_gguf()
         with open(os.path.join('configs', 'flux', 'text_encoder_2', 'config.json'), encoding='utf8') as f:
             t5_config = transformers.T5Config(**json.load(f))
         t5 = transformers.T5EncoderModel.from_pretrained(None, gguf_file=fn, config=t5_config, device_map="auto", cache_dir=cache_dir, torch_dtype=devices.dtype)
diff --git a/modules/sd_detect.py b/modules/sd_detect.py
index 1931b9077..fe3d325c9 100644
--- a/modules/sd_detect.py
+++ b/modules/sd_detect.py
@@ -96,6 +96,17 @@ def detect_pipeline(f: str, op: str = 'model', warning=True, quiet=False):
                 guess = 'FLUX'
                 if size > 11000 and size < 16000:
                     warn(f'Model detected as FLUX UNET model, but attempting to load a base model: {op}={f} size={size} MB')
+            # guess for diffusers
+            index = os.path.join(f, 'model_index.json')
+            if os.path.exists(index) and os.path.isfile(index):
+                index = shared.readfile(index, silent=True)
+                cls = index.get('_class_name', None)
+                if cls is not None:
+                    pipeline = getattr(diffusers, cls)
+                if 'Flux' in pipeline.__name__:
+                    guess = 'FLUX'
+                if 'StableDiffusion3' in pipeline.__name__:
+                    guess = 'Stable Diffusion 3'
             # switch for specific variant
             if guess == 'Stable Diffusion' and 'inpaint' in f.lower():
                 guess = 'Stable Diffusion Inpaint'

From 29c76cd8fa35a52fe2753a58773755f12abbbea4 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Sat, 21 Dec 2024 10:16:33 -0500
Subject: [PATCH 146/162] batch image processing use captions

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md                |  8 ++--
 modules/images.py           |  2 +
 modules/img2img.py          | 93 +++++++++++++++++++++----------------
 modules/processing_class.py |  8 ++--
 modules/ui_img2img.py       |  9 ++--
 5 files changed, 70 insertions(+), 50 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d31807583..f62738950 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,8 +1,8 @@
 # Change Log for SD.Next
 
-## Update for 2024-12-20
+## Update for 2024-12-21
 
-### Highlights for 2024-12-20
+### Highlights for 2024-12-21
 
 ### SD.Next Xmass edition: *What's new?*
 
@@ -29,7 +29,7 @@ All-in-all, we're around ~160 commits worth of updates, check changelog for full
 
 [ReadMe](https://github.com/vladmandic/automatic/blob/master/README.md) | [ChangeLog](https://github.com/vladmandic/automatic/blob/master/CHANGELOG.md) | [Docs](https://vladmandic.github.io/sdnext-docs/) | [WiKi](https://github.com/vladmandic/automatic/wiki) | [Discord](https://discord.com/invite/sd-next-federal-batch-inspectors-1101998836328697867)
 
-## Details for 2024-12-20
+## Details for 2024-12-21
 
 ### New models and integrations
 
@@ -155,6 +155,8 @@ All-in-all, we're around ~160 commits worth of updates, check changelog for full
   - settings: reorganized and simplified  
   - browser -> server logging framework  
   - add addtional themes: `black-reimagined`, thanks @Artheriax  
+- **Batch**
+  - image batch processing will use caption files if they exist instead of default prompt  
 
 ### Updates
 
diff --git a/modules/images.py b/modules/images.py
index 910349bef..2cfbe941d 100644
--- a/modules/images.py
+++ b/modules/images.py
@@ -267,6 +267,8 @@ def safe_decode_string(s: bytes):
 
 
 def read_info_from_image(image: Image, watermark: bool = False):
+    if image is None:
+        return '', {}
     items = image.info or {}
     geninfo = items.pop('parameters', None) or items.pop('UserComment', None)
     if geninfo is not None and len(geninfo) > 0:
diff --git a/modules/img2img.py b/modules/img2img.py
index 077df1259..2e3eca54d 100644
--- a/modules/img2img.py
+++ b/modules/img2img.py
@@ -1,6 +1,7 @@
 import os
 import itertools # SBM Batch frames
 import numpy as np
+import filetype
 from PIL import Image, ImageOps, ImageFilter, ImageEnhance, ImageChops, UnidentifiedImageError
 import modules.scripts
 from modules import shared, processing, images
@@ -8,7 +9,6 @@
 from modules.ui import plaintext_to_html
 from modules.memstats import memory_stats
 
-
 debug = shared.log.trace if os.environ.get('SD_PROCESS_DEBUG', None) is not None else lambda *args, **kwargs: None
 debug('Trace: PROCESS')
 
@@ -16,24 +16,25 @@
 def process_batch(p, input_files, input_dir, output_dir, inpaint_mask_dir, args):
     shared.log.debug(f'batch: {input_files}|{input_dir}|{output_dir}|{inpaint_mask_dir}')
     processing.fix_seed(p)
+    image_files = []
     if input_files is not None and len(input_files) > 0:
         image_files = [f.name for f in input_files]
-    else:
-        if not os.path.isdir(input_dir):
-            shared.log.error(f"Process batch: directory not found: {input_dir}")
-            return
-        image_files = os.listdir(input_dir)
-        image_files = [os.path.join(input_dir, f) for f in image_files]
+        image_files = [f for f in image_files if filetype.is_image(f)]
+        shared.log.info(f'Process batch: input images={len(image_files)}')
+    elif os.path.isdir(input_dir):
+        image_files = [os.path.join(input_dir, f) for f in os.listdir(input_dir)]
+        image_files = [f for f in image_files if filetype.is_image(f)]
+        shared.log.info(f'Process batch: input folder="{input_dir}" images={len(image_files)}')
     is_inpaint_batch = False
-    if inpaint_mask_dir:
-        inpaint_masks = os.listdir(inpaint_mask_dir)
-        inpaint_masks = [os.path.join(inpaint_mask_dir, f) for f in inpaint_masks]
+    if inpaint_mask_dir and os.path.isdir(inpaint_mask_dir):
+        inpaint_masks = [os.path.join(inpaint_mask_dir, f) for f in os.listdir(inpaint_mask_dir)]
+        inpaint_masks = [f for f in inpaint_masks if filetype.is_image(f)]
         is_inpaint_batch = len(inpaint_masks) > 0
-    if is_inpaint_batch:
-        shared.log.info(f"Process batch: inpaint batch masks={len(inpaint_masks)}")
+        shared.log.info(f'Process batch: mask folder="{input_dir}" images={len(inpaint_masks)}')
     save_normally = output_dir == ''
     p.do_not_save_grid = True
     p.do_not_save_samples = not save_normally
+    p.default_prompt = p.prompt
     shared.state.job_count = len(image_files) * p.n_iter
     if shared.opts.batch_frame_mode: # SBM Frame mode is on, process each image in batch with same seed
         window_size = p.batch_size
@@ -55,14 +56,29 @@ def process_batch(p, input_files, input_dir, output_dir, inpaint_mask_dir, args)
         for image_file in batch_image_files:
             try:
                 img = Image.open(image_file)
-                if p.scale_by != 1:
-                    p.width = int(img.width * p.scale_by)
-                    p.height = int(img.height * p.scale_by)
+                img = ImageOps.exif_transpose(img)
+                batch_images.append(img)
+                # p.init()
+                p.width = int(img.width * p.scale_by)
+                p.height = int(img.height * p.scale_by)
+                caption_file = os.path.splitext(image_file)[0] + '.txt'
+                prompt_type='default'
+                if os.path.exists(caption_file):
+                    with open(caption_file, 'r', encoding='utf8') as f:
+                        p.prompt = f.read()
+                        prompt_type='file'
+                else:
+                    p.prompt = p.default_prompt
+                p.all_prompts = None
+                p.all_negative_prompts = None
+                p.all_seeds = None
+                p.all_subseeds = None
+                shared.log.debug(f'Process batch: image="{image_file}" prompt={prompt_type} i={i+1}/{len(image_files)}')
             except UnidentifiedImageError as e:
-                shared.log.error(f"Image error: {e}")
-                continue
-            img = ImageOps.exif_transpose(img)
-            batch_images.append(img)
+                shared.log.error(f'Process batch: image="{image_file}" {e}')
+        if len(batch_images) == 0:
+            shared.log.warning("Process batch: no images found in batch")
+            continue
         batch_images = batch_images * btcrept # Standard mode sends the same image per batchsize.
         p.init_images = batch_images
 
@@ -81,17 +97,20 @@ def process_batch(p, input_files, input_dir, output_dir, inpaint_mask_dir, args)
 
         batch_image_files = batch_image_files * btcrept # List used for naming later.
 
-        proc = modules.scripts.scripts_img2img.run(p, *args)
-        if proc is None:
-            proc = processing.process_images(p)
-        for n, (image, image_file) in enumerate(itertools.zip_longest(proc.images,batch_image_files)):
+        processed = modules.scripts.scripts_img2img.run(p, *args)
+        if processed is None:
+            processed = processing.process_images(p)
+
+        for n, (image, image_file) in enumerate(itertools.zip_longest(processed.images, batch_image_files)):
+            if image is None:
+                continue
             basename = ''
             if shared.opts.use_original_name_batch:
                 forced_filename, ext = os.path.splitext(os.path.basename(image_file))
             else:
                 forced_filename = None
                 ext = shared.opts.samples_format
-            if len(proc.images) > 1:
+            if len(processed.images) > 1:
                 basename = f'{n + i}' if shared.opts.batch_frame_mode else f'{n}'
             else:
                 basename = ''
@@ -103,7 +122,7 @@ def process_batch(p, input_files, input_dir, output_dir, inpaint_mask_dir, args)
             for k, v in items.items():
                 image.info[k] = v
             images.save_image(image, path=output_dir, basename=basename, seed=None, prompt=None, extension=ext, info=geninfo, short_filename=True, no_prompt=True, grid=False, pnginfo_section_name="extras", existing_info=image.info, forced_filename=forced_filename)
-        proc = modules.scripts.scripts_img2img.after(p, proc, *args)
+        processed = modules.scripts.scripts_img2img.after(p, processed, *args)
         shared.log.debug(f'Processed: images={len(batch_image_files)} memory={memory_stats()} batch')
 
 
@@ -147,24 +166,20 @@ def img2img(id_task: str, state: str, mode: int,
 
     debug(f'img2img: id_task={id_task}|mode={mode}|prompt={prompt}|negative_prompt={negative_prompt}|prompt_styles={prompt_styles}|init_img={init_img}|sketch={sketch}|init_img_with_mask={init_img_with_mask}|inpaint_color_sketch={inpaint_color_sketch}|inpaint_color_sketch_orig={inpaint_color_sketch_orig}|init_img_inpaint={init_img_inpaint}|init_mask_inpaint={init_mask_inpaint}|steps={steps}|sampler_index={sampler_index}||mask_blur={mask_blur}|mask_alpha={mask_alpha}|inpainting_fill={inpainting_fill}|full_quality={full_quality}|detailer={detailer}|tiling={tiling}|hidiffusion={hidiffusion}|n_iter={n_iter}|batch_size={batch_size}|cfg_scale={cfg_scale}|image_cfg_scale={image_cfg_scale}|clip_skip={clip_skip}|denoising_strength={denoising_strength}|seed={seed}|subseed{subseed}|subseed_strength={subseed_strength}|seed_resize_from_h={seed_resize_from_h}|seed_resize_from_w={seed_resize_from_w}|selected_scale_tab={selected_scale_tab}|height={height}|width={width}|scale_by={scale_by}|resize_mode={resize_mode}|resize_name={resize_name}|resize_context={resize_context}|inpaint_full_res={inpaint_full_res}|inpaint_full_res_padding={inpaint_full_res_padding}|inpainting_mask_invert={inpainting_mask_invert}|img2img_batch_files={img2img_batch_files}|img2img_batch_input_dir={img2img_batch_input_dir}|img2img_batch_output_dir={img2img_batch_output_dir}|img2img_batch_inpaint_mask_dir={img2img_batch_inpaint_mask_dir}|override_settings_texts={override_settings_texts}')
 
-    if mode == 5:
-        if img2img_batch_files is None or len(img2img_batch_files) == 0:
-            shared.log.debug('Init bactch images not set')
-        elif init_img:
-            shared.log.debug('Init image not set')
-
     if sampler_index is None:
         shared.log.warning('Sampler: invalid')
         sampler_index = 0
 
+    mode = int(mode)
+    image = None
+    mask = None
     override_settings = create_override_settings_dict(override_settings_texts)
 
-    if mode == 0:  # img2img
+    if mode == 0: # img2img
         if init_img is None:
             return [], '', '', 'Error: init image not provided'
         image = init_img.convert("RGB")
-        mask = None
-    elif mode == 1:  # inpaint
+    elif mode == 1: # inpaint
         if init_img_with_mask is None:
             return [], '', '', 'Error: init image with mask not provided'
         image = init_img_with_mask["image"]
@@ -176,8 +191,7 @@ def img2img(id_task: str, state: str, mode: int,
         if sketch is None:
             return [], '', '', 'Error: sketch image not provided'
         image = sketch.convert("RGB")
-        mask = None
-    elif mode == 3:  # composite
+    elif mode == 3: # composite
         if inpaint_color_sketch is None:
             return [], '', '', 'Error: color sketch image not provided'
         image = inpaint_color_sketch
@@ -188,15 +202,16 @@ def img2img(id_task: str, state: str, mode: int,
         blur = ImageFilter.GaussianBlur(mask_blur)
         image = Image.composite(image.filter(blur), orig, mask.filter(blur))
         image = image.convert("RGB")
-    elif mode == 4:  # inpaint upload mask
+    elif mode == 4: # inpaint upload mask
         if init_img_inpaint is None:
             return [], '', '', 'Error: inpaint image not provided'
         image = init_img_inpaint
         mask = init_mask_inpaint
+    elif mode == 5: # process batch
+        pass # handled later
     else:
         shared.log.error(f'Image processing unknown mode: {mode}')
-        image = None
-        mask = None
+
     if image is not None:
         image = ImageOps.exif_transpose(image)
         if selected_scale_tab == 1 and resize_mode != 0:
diff --git a/modules/processing_class.py b/modules/processing_class.py
index 45ca70815..2f1d589f3 100644
--- a/modules/processing_class.py
+++ b/modules/processing_class.py
@@ -348,13 +348,15 @@ def __init__(self, **kwargs):
         debug(f'Process init: mode={self.__class__.__name__} kwargs={kwargs}') # pylint: disable=protected-access
         super().__init__(**kwargs)
 
-    def init(self, all_prompts=None, all_seeds=None, all_subseeds=None):
+    def init(self, all_prompts=None, all_negative_prompts=None, all_seeds=None, all_subseeds=None):
         if shared.native:
             shared.sd_model = sd_models.set_diffuser_pipe(self.sd_model, sd_models.DiffusersTaskType.TEXT_2_IMAGE)
-        self.width = self.width or 512
-        self.height = self.height or 512
+        self.width = self.width or 1024
+        self.height = self.height or 1024
         if all_prompts is not None:
             self.all_prompts = all_prompts
+        if all_negative_prompts is not None:
+            self.all_negative_prompts = all_negative_prompts
         if all_seeds is not None:
             self.all_seeds = all_seeds
         if all_subseeds is not None:
diff --git a/modules/ui_img2img.py b/modules/ui_img2img.py
index 3c3d63656..45c901c6d 100644
--- a/modules/ui_img2img.py
+++ b/modules/ui_img2img.py
@@ -102,12 +102,11 @@ def fn_img_composite_change(img, img_composite):
                         init_mask_inpaint = gr.Image(label="Mask", source="upload", interactive=True, type="pil", elem_id="img_inpaint_mask")
 
                     with gr.TabItem('Batch', id='batch', elem_id="img2img_batch_tab") as tab_batch:
-                        hidden = '<br>Disabled when launched with --hide-ui-dir-config.' if shared.cmd_opts.hide_ui_dir_config else ''
-                        gr.HTML(f"<p style='padding-bottom: 1em;' class=\"text-gray-500\">Upload images or process images in a directory <br>Add inpaint batch mask directory to enable inpaint batch processing {hidden}</p>")
+                        gr.HTML("<p style='padding-bottom: 1em;' class=\"text-gray-500\">Run image processing on upload images or files in a folder<br>If masks are provided will run inpaint</p>")
                         img2img_batch_files = gr.Files(label="Batch Process", interactive=True, elem_id="img2img_image_batch")
-                        img2img_batch_input_dir = gr.Textbox(label="Inpaint batch input directory", **shared.hide_dirs, elem_id="img2img_batch_input_dir")
-                        img2img_batch_output_dir = gr.Textbox(label="Inpaint batch output directory", **shared.hide_dirs, elem_id="img2img_batch_output_dir")
-                        img2img_batch_inpaint_mask_dir = gr.Textbox(label="Inpaint batch mask directory", **shared.hide_dirs, elem_id="img2img_batch_inpaint_mask_dir")
+                        img2img_batch_input_dir = gr.Textbox(label="Batch input directory", **shared.hide_dirs, elem_id="img2img_batch_input_dir")
+                        img2img_batch_output_dir = gr.Textbox(label="Batch output directory", **shared.hide_dirs, elem_id="img2img_batch_output_dir")
+                        img2img_batch_inpaint_mask_dir = gr.Textbox(label="Batch mask directory", **shared.hide_dirs, elem_id="img2img_batch_inpaint_mask_dir")
 
                     img2img_tabs = [tab_img2img, tab_sketch, tab_inpaint, tab_inpaint_color, tab_inpaint_upload, tab_batch]
                     for i, tab in enumerate(img2img_tabs):

From 8924e5bc5dd1ffdda658aed5399962566f32c769 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Sat, 21 Dec 2024 10:36:24 -0500
Subject: [PATCH 147/162] fix

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 modules/processing_class.py | 4 +---
 modules/prompt_parser.py    | 2 +-
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/modules/processing_class.py b/modules/processing_class.py
index 2f1d589f3..f502e0dcc 100644
--- a/modules/processing_class.py
+++ b/modules/processing_class.py
@@ -348,15 +348,13 @@ def __init__(self, **kwargs):
         debug(f'Process init: mode={self.__class__.__name__} kwargs={kwargs}') # pylint: disable=protected-access
         super().__init__(**kwargs)
 
-    def init(self, all_prompts=None, all_negative_prompts=None, all_seeds=None, all_subseeds=None):
+    def init(self, all_prompts=None, all_seeds=None, all_subseeds=None):
         if shared.native:
             shared.sd_model = sd_models.set_diffuser_pipe(self.sd_model, sd_models.DiffusersTaskType.TEXT_2_IMAGE)
         self.width = self.width or 1024
         self.height = self.height or 1024
         if all_prompts is not None:
             self.all_prompts = all_prompts
-        if all_negative_prompts is not None:
-            self.all_negative_prompts = all_negative_prompts
         if all_seeds is not None:
             self.all_seeds = all_seeds
         if all_subseeds is not None:
diff --git a/modules/prompt_parser.py b/modules/prompt_parser.py
index 3a1288097..1eb7a77ee 100644
--- a/modules/prompt_parser.py
+++ b/modules/prompt_parser.py
@@ -147,7 +147,7 @@ def __default__(self, data, children, meta):
     def get_schedule(prompt):
         try:
             tree = schedule_parser.parse(prompt)
-        except lark.exceptions.LarkError:
+        except Exception:
             return [[steps, prompt]]
         return [[t, at_step(t, tree)] for t in collect_steps(steps, tree)]
 

From c3f02f11e4da3a8798d56b443715c13899e64d39 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Sat, 21 Dec 2024 10:56:24 -0500
Subject: [PATCH 148/162] fix xyz with lora none

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md               | 1 +
 modules/processing_args.py | 5 ++++-
 modules/sd_samplers.py     | 3 ++-
 scripts/xyz_grid_shared.py | 4 ++--
 4 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f62738950..6c45ff463 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -207,6 +207,7 @@ All-in-all, we're around ~160 commits worth of updates, check changelog for full
 - notify on torch install  
 - detect pipeline fro diffusers folder-style model  
 - do not recast flux quants  
+- fix xyz-grid with lora none  
 
 ## Update for 2024-11-21
 
diff --git a/modules/processing_args.py b/modules/processing_args.py
index 3709368ee..5ca6cfcd3 100644
--- a/modules/processing_args.py
+++ b/modules/processing_args.py
@@ -320,11 +320,14 @@ def set_pipeline_args(p, model, prompts: list, negative_prompts: list, prompts_2
         clean['negative_prompt'] = len(clean['negative_prompt'])
     clean.pop('generator', None)
     clean['parser'] = parser
-    for k, v in clean.items():
+    for k, v in clean.copy().items():
         if isinstance(v, torch.Tensor) or isinstance(v, np.ndarray):
             clean[k] = v.shape
         if isinstance(v, list) and len(v) > 0 and (isinstance(v[0], torch.Tensor) or isinstance(v[0], np.ndarray)):
             clean[k] = [x.shape for x in v]
+        if not debug_enabled and k.endswith('_embeds'):
+            del clean[k]
+            clean['prompt'] = 'embeds'
     shared.log.debug(f'Diffuser pipeline: {model.__class__.__name__} task={sd_models.get_diffusers_task(model)} batch={p.iteration + 1}/{p.n_iter}x{p.batch_size} set={clean}')
 
     if p.hdr_clamp or p.hdr_maximize or p.hdr_brightness != 0 or p.hdr_color != 0 or p.hdr_sharpen != 0:
diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py
index 1b1be2d1a..dc58a2419 100644
--- a/modules/sd_samplers.py
+++ b/modules/sd_samplers.py
@@ -94,7 +94,8 @@ def create_sampler(name, model):
         if hasattr(model, "prior_pipe") and hasattr(model.prior_pipe, "scheduler"):
             model.prior_pipe.scheduler = sampler.sampler
             model.prior_pipe.scheduler.config.clip_sample = False
-        shared.log.debug(f'Sampler: sampler="{sampler.name}" class="{model.scheduler.__class__.__name__} config={sampler.config}')
+        clean_config = {k: v for k, v in sampler.config.items() if v is not None and v is not False}
+        shared.log.debug(f'Sampler: sampler="{sampler.name}" class="{model.scheduler.__class__.__name__} config={clean_config}')
         return sampler.sampler
     else:
         return None
diff --git a/scripts/xyz_grid_shared.py b/scripts/xyz_grid_shared.py
index 3fc8d32c8..f9bf26c67 100644
--- a/scripts/xyz_grid_shared.py
+++ b/scripts/xyz_grid_shared.py
@@ -198,12 +198,12 @@ def list_lora():
 
 
 def apply_lora(p, x, xs):
+    p.all_prompts = None
+    p.all_negative_prompts = None
     if x == 'None':
         return
     x = os.path.basename(x)
     p.prompt = p.prompt + f" <lora:{x}:{shared.opts.extra_networks_default_multiplier}>"
-    p.all_prompts = None
-    p.all_negative_prompts = None
     shared.log.debug(f'XYZ grid apply LoRA: "{x}"')
 
 

From 11b2a055c36b0b3beb74480c3622875a15f16d79 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Sat, 21 Dec 2024 11:31:08 -0500
Subject: [PATCH 149/162] fix svd i2v

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md                    | 1 +
 modules/sd_models.py            | 1 +
 scripts/stablevideodiffusion.py | 6 +++++-
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6c45ff463..fdd90e8e8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -208,6 +208,7 @@ All-in-all, we're around ~160 commits worth of updates, check changelog for full
 - detect pipeline fro diffusers folder-style model  
 - do not recast flux quants  
 - fix xyz-grid with lora none  
+- fix svd image2video  
 
 ## Update for 2024-11-21
 
diff --git a/modules/sd_models.py b/modules/sd_models.py
index f7edc6ddf..9daccf11f 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -1192,6 +1192,7 @@ def set_diffuser_pipe(pipe, new_pipe_type):
         'InstantIRPipeline',
         'FluxFillPipeline',
         'FluxControlPipeline',
+        'StableVideoDiffusionPipeline',
     ]
 
     n = getattr(pipe.__class__, '__name__', '')
diff --git a/scripts/stablevideodiffusion.py b/scripts/stablevideodiffusion.py
index f8da35b23..127466701 100644
--- a/scripts/stablevideodiffusion.py
+++ b/scripts/stablevideodiffusion.py
@@ -16,7 +16,7 @@
 
 class Script(scripts.Script):
     def title(self):
-        return 'Video: SVD'
+        return 'Video: Stable Video Diffusion'
 
     def show(self, is_img2img):
         return is_img2img if shared.native else False
@@ -75,12 +75,16 @@ def run(self, p: processing.StableDiffusionProcessing, model, num_frames, overri
         if model_name != model_loaded or c != 'StableVideoDiffusionPipeline':
             shared.opts.sd_model_checkpoint = model_path
             sd_models.reload_model_weights()
+            shared.sd_model = shared.sd_model.to(torch.float32) # TODO svd runs in fp32
 
         # set params
         if override_resolution:
             p.width = 1024
             p.height = 576
             image = images.resize_image(resize_mode=2, im=image, width=p.width, height=p.height, upscaler_name=None, output_type='pil')
+        else:
+            p.width = image.width
+            p.height = image.height
         p.ops.append('video')
         p.do_not_save_grid = True
         p.init_images = [image]

From e458e3e448af0f5816b615c713a6489d5f106e51 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Sat, 21 Dec 2024 14:19:51 -0500
Subject: [PATCH 150/162] fix gallery display during generate

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md              | 1 +
 javascript/progressBar.js | 8 ++++++--
 requirements.txt          | 2 +-
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index fdd90e8e8..ea57477fb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -209,6 +209,7 @@ All-in-all, we're around ~160 commits worth of updates, check changelog for full
 - do not recast flux quants  
 - fix xyz-grid with lora none  
 - fix svd image2video  
+- fix gallery display during generate  
 
 ## Update for 2024-11-21
 
diff --git a/javascript/progressBar.js b/javascript/progressBar.js
index c385fe5db..dfd895f8e 100644
--- a/javascript/progressBar.js
+++ b/javascript/progressBar.js
@@ -70,9 +70,13 @@ function requestProgress(id_task, progressEl, galleryEl, atEnd = null, onProgres
   const initLivePreview = () => {
     if (!parentGallery) return;
     const footers = Array.from(gradioApp().querySelectorAll('.gallery_footer'));
-    for (const footer of footers) footer.style.display = 'none'; // remove all footers
+    for (const footer of footers) {
+      if (footer.id !== 'gallery_footer') footer.style.display = 'none'; // remove all footers
+    }
     const galleries = Array.from(gradioApp().querySelectorAll('.gallery_main'));
-    for (const gallery of galleries) gallery.style.display = 'none'; // remove all footers
+    for (const gallery of galleries) {
+      if (gallery.id !== 'gallery_gallery') gallery.style.display = 'none'; // remove all footers
+    }
 
     livePreview = document.createElement('div');
     livePreview.className = 'livePreview';
diff --git a/requirements.txt b/requirements.txt
index 572c4927b..7592162fa 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -52,7 +52,7 @@ numba==0.59.1
 protobuf==4.25.3
 pytorch_lightning==1.9.4
 tokenizers==0.21.0
-transformers==4.47.0
+transformers==4.47.1
 urllib3==1.26.19
 Pillow==10.4.0
 timm==0.9.16

From bf67ffd34a344a8fcd5c129b93b669fadecc8a47 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Sat, 21 Dec 2024 16:01:35 -0500
Subject: [PATCH 151/162] flux true-scale and flux ipadapters

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md            | 15 +++++++++----
 installer.py            |  2 +-
 modules/errors.py       |  4 ++--
 modules/ipadapter.py    | 50 ++++++++++++++++++++++++++++++-----------
 modules/pag/__init__.py | 11 ++++++---
 modules/processing.py   |  2 +-
 scripts/ipadapter.py    |  5 ++++-
 7 files changed, 64 insertions(+), 25 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ea57477fb..90eda3736 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,20 +7,23 @@
 ### SD.Next Xmass edition: *What's new?*
 
 While we have several new supported models, workflows and tools, this release is primarily about *quality-of-life improvements*:  
-- New memory management engine: list of changes that went into this one is long: changes to GPU offloading, brand new LoRA loader, system memory management, on-the-fly quantization, improved gguf loader, etc.  
+- New memory management engine  
+  list of changes that went into this one is long: changes to GPU offloading, brand new LoRA loader, system memory management, on-the-fly quantization, improved gguf loader, etc.  
   but main goal is enabling modern large models to run on standard consumer GPUs  
   without performance hits typically associated with aggressive memory swapping and needs for constant manual tweaks  
 - New [documentation website](https://vladmandic.github.io/sdnext-docs/)  
   with full search and tons of new documentation  
 - New settings panel with simplified and streamlined configuration  
 
-We've also added support for several new models (see [supported models](https://vladmandic.github.io/sdnext-docs/Model-Support/) for full list) such as highly anticipated [NVLabs Sana](https://huggingface.co/Efficient-Large-Model/Sana_1600M_1024px)  
+We've also added support for several new models such as highly anticipated [NVLabs Sana](https://huggingface.co/Efficient-Large-Model/Sana_1600M_1024px) (see [supported models](https://vladmandic.github.io/sdnext-docs/Model-Support/) for full list)  
 And several new SOTA video models: [Lightricks LTX-Video](https://huggingface.co/Lightricks/LTX-Video), [Hunyuan Video](https://huggingface.co/tencent/HunyuanVideo) and [Genmo Mochi.1 Preview](https://huggingface.co/genmo/mochi-1-preview)  
 
 And a lot of **Control** and **IPAdapter** goodies  
 - for **SDXL** there is new [ProMax](https://huggingface.co/xinsir/controlnet-union-sdxl-1.0), improved *Union* and *Tiling* models 
-- for **FLUX.1** there are [Flux Tools](https://blackforestlabs.ai/flux-1-tools/) as well as official *Canny* and *Depth* models and a cool [Redux](https://huggingface.co/black-forest-labs/FLUX.1-Redux-dev) model  
-- for **SD3.5** there are official *Canny*, *Blur* and *Depth* models in addition to existing 3rd party models as well as [InstantX](https://huggingface.co/InstantX/SD3.5-Large-IP-Adapter) IP-adapter  
+- for **FLUX.1** there are [Flux Tools](https://blackforestlabs.ai/flux-1-tools/) as well as official *Canny* and *Depth* models,  
+  a cool [Redux](https://huggingface.co/black-forest-labs/FLUX.1-Redux-dev) model as well as [XLabs](https://huggingface.co/XLabs-AI/flux-ip-adapter-v2) IP-adapter
+- for **SD3.5** there are official *Canny*, *Blur* and *Depth* models in addition to existing 3rd party models  
+  as well as [InstantX](https://huggingface.co/InstantX/SD3.5-Large-IP-Adapter) IP-adapter  
 
 Plus couple of new integrated workflows such as [FreeScale](https://github.com/ali-vilab/FreeScale) and [Style Aligned Image Generation](https://style-aligned-gen.github.io/)  
 
@@ -49,6 +52,10 @@ All-in-all, we're around ~160 commits worth of updates, check changelog for full
     *note*: when selecting tiles in control settings, you can also specify non-square ratios  
     in which case it will use context-aware image resize to maintain overall composition  
     *note*: available tiling options can be set in settings -> control  
+- **IP-Adapter**  
+  - FLUX.1 [XLabs](https://huggingface.co/XLabs-AI/flux-ip-adapter-v2) v1 and v2 IP-adapter  
+  - FLUX.1 secondary guidance, enabled using *Attention guidance* in advanced menu  
+  - SD 3.5 [InstantX](https://huggingface.co/InstantX/SD3.5-Large-IP-Adapter) IP-adapter  
 - [Flux Tools](https://blackforestlabs.ai/flux-1-tools/)  
   **Redux** is actually a tool, **Fill** is inpaint/outpaint optimized version of *Flux-dev*  
   **Canny** & **Depth** are optimized versions of *Flux-dev* for their respective tasks: they are *not* ControlNets that work on top of a model  
diff --git a/installer.py b/installer.py
index d186cc5ac..6aa8bb060 100644
--- a/installer.py
+++ b/installer.py
@@ -459,7 +459,7 @@ def check_python(supported_minors=[9, 10, 11, 12], reason=None):
 def check_diffusers():
     if args.skip_all or args.skip_git:
         return
-    sha = 'b64ca6c11cbc1644c22f1dae441c8124d588bb14' # diffusers commit hash
+    sha = '233dffdc3f56b26abaaba8363a5dd30dab7f0e40' # diffusers commit hash
     pkg = pkg_resources.working_set.by_key.get('diffusers', None)
     minor = int(pkg.version.split('.')[1] if pkg is not None else 0)
     cur = opts.get('diffusers_version', '') if minor > 0 else ''
diff --git a/modules/errors.py b/modules/errors.py
index 5f2c54cb7..6302057d7 100644
--- a/modules/errors.py
+++ b/modules/errors.py
@@ -36,7 +36,7 @@ def print_error_explanation(message):
         log.error(line)
 
 
-def display(e: Exception, task, suppress=[]):
+def display(e: Exception, task: str, suppress=[]):
     log.error(f"{task or 'error'}: {type(e).__name__}")
     console.print_exception(show_locals=False, max_frames=16, extra_lines=1, suppress=suppress, theme="ansi_dark", word_wrap=False, width=console.width)
 
@@ -48,7 +48,7 @@ def display_once(e: Exception, task):
     already_displayed[task] = 1
 
 
-def run(code, task):
+def run(code, task: str):
     try:
         code()
     except Exception as e:
diff --git a/modules/ipadapter.py b/modules/ipadapter.py
index c1b6ed52f..aa010c33d 100644
--- a/modules/ipadapter.py
+++ b/modules/ipadapter.py
@@ -11,12 +11,13 @@
 from PIL import Image
 import diffusers
 import transformers
-from modules import processing, shared, devices, sd_models
+from modules import processing, shared, devices, sd_models, errors
 
 
 clip_loaded = None
 adapters_loaded = []
 CLIP_ID = "h94/IP-Adapter"
+OPEN_ID = "openai/clip-vit-large-patch14"
 SIGLIP_ID = 'google/siglip-so400m-patch14-384'
 ADAPTERS_NONE = {
     'None': { 'name': 'none', 'repo': 'none', 'subfolder': 'none' },
@@ -136,14 +137,22 @@ def crop_images(images, crops):
     return images
 
 
-def unapply(pipe): # pylint: disable=arguments-differ
+def unapply(pipe, unload: bool = False): # pylint: disable=arguments-differ
     if len(adapters_loaded) == 0:
         return
     try:
         if hasattr(pipe, 'set_ip_adapter_scale'):
             pipe.set_ip_adapter_scale(0)
-            pipe.unload_ip_adapter()
-        if hasattr(pipe, 'unet') and hasattr(pipe.unet, 'config') and pipe.unet.config.encoder_hid_dim_type == 'ip_image_proj':
+            if unload:
+                shared.log.debug('IP adapter unload')
+                pipe.unload_ip_adapter()
+        if hasattr(pipe, 'unet'):
+            module = pipe.unet
+        elif hasattr(pipe, 'transformer'):
+            module = pipe.transformer
+        else:
+            module = None
+        if module is not None and hasattr(module, 'config') and module.config.encoder_hid_dim_type == 'ip_image_proj':
             pipe.unet.encoder_hid_proj = None
             pipe.config.encoder_hid_dim_type = None
             pipe.unet.set_default_attn_processor()
@@ -171,8 +180,8 @@ def load_image_encoder(pipe: diffusers.DiffusionPipeline, adapter_names: list[st
                 clip_repo = SIGLIP_ID
                 clip_subfolder = None
             elif shared.sd_model_type == 'f1':
-                shared.log.error(f'IP adapter: adapter={adapter_name} type={shared.sd_model_type} cls={shared.sd_model.__class__.__name__}: unsupported base model')
-                return False
+                clip_repo = OPEN_ID
+                clip_subfolder = None
             else:
                 shared.log.error(f'IP adapter: unknown model type: {adapter_name}')
                 return False
@@ -181,13 +190,22 @@ def load_image_encoder(pipe: diffusers.DiffusionPipeline, adapter_names: list[st
     if pipe.image_encoder is None or clip_loaded != f'{clip_repo}/{clip_subfolder}':
         try:
             if shared.sd_model_type == 'sd3':
-                pipe.image_encoder = transformers.SiglipVisionModel.from_pretrained(clip_repo, torch_dtype=devices.dtype, cache_dir=shared.opts.hfcache_dir)
+                image_encoder = transformers.SiglipVisionModel.from_pretrained(clip_repo, torch_dtype=devices.dtype, cache_dir=shared.opts.hfcache_dir)
             else:
-                pipe.image_encoder = transformers.CLIPVisionModelWithProjection.from_pretrained(clip_repo, subfolder=clip_subfolder, torch_dtype=devices.dtype, cache_dir=shared.opts.hfcache_dir, use_safetensors=True)
-            shared.log.debug(f'IP adapter load: encoder="{clip_repo}/{clip_subfolder}" cls={pipe.image_encoder.__class__.__name__}')
+                if clip_subfolder is None:
+                    image_encoder = transformers.CLIPVisionModelWithProjection.from_pretrained(clip_repo, torch_dtype=devices.dtype, cache_dir=shared.opts.hfcache_dir, use_safetensors=True)
+                    shared.log.debug(f'IP adapter load: encoder="{clip_repo}" cls={pipe.image_encoder.__class__.__name__}')
+                else:
+                    image_encoder = transformers.CLIPVisionModelWithProjection.from_pretrained(clip_repo, subfolder=clip_subfolder, torch_dtype=devices.dtype, cache_dir=shared.opts.hfcache_dir, use_safetensors=True)
+                    shared.log.debug(f'IP adapter load: encoder="{clip_repo}/{clip_subfolder}" cls={pipe.image_encoder.__class__.__name__}')
+            if hasattr(pipe, 'register_modules'):
+                pipe.register_modules(image_encoder=image_encoder)
+            else:
+                pipe.image_encoder = image_encoder
             clip_loaded = f'{clip_repo}/{clip_subfolder}'
         except Exception as e:
             shared.log.error(f'IP adapter load: encoder="{clip_repo}/{clip_subfolder}" {e}')
+            errors.display(e, 'IP adapter: type=encoder')
             return False
     sd_models.move_model(pipe.image_encoder, devices.device)
     return True
@@ -198,12 +216,17 @@ def load_feature_extractor(pipe):
     if pipe.feature_extractor is None:
         try:
             if shared.sd_model_type == 'sd3':
-                pipe.feature_extractor = transformers.SiglipImageProcessor.from_pretrained(SIGLIP_ID, torch_dtype=devices.dtype, cache_dir=shared.opts.hfcache_dir)
+                feature_extractor = transformers.SiglipImageProcessor.from_pretrained(SIGLIP_ID, torch_dtype=devices.dtype, cache_dir=shared.opts.hfcache_dir)
+            else:
+                feature_extractor = transformers.CLIPImageProcessor()
+            if hasattr(pipe, 'register_modules'):
+                pipe.register_modules(feature_extractor=feature_extractor)
             else:
-                pipe.feature_extractor = transformers.CLIPImageProcessor()
+                pipe.feature_extractor = feature_extractor
             shared.log.debug(f'IP adapter load: extractor={pipe.feature_extractor.__class__.__name__}')
         except Exception as e:
             shared.log.error(f'IP adapter load: extractor {e}')
+            errors.display(e, 'IP adapter: type=extractor')
             return False
     return True
 
@@ -266,7 +289,7 @@ def apply(pipe, p: processing.StableDiffusionProcessing, adapter_names=[], adapt
         adapters = [ADAPTERS.get(adapter_name, None) for adapter_name in adapter_names if adapter_name.lower() != 'none']
 
     if len(adapters) == 0:
-        unapply(pipe)
+        unapply(pipe, getattr(p, 'ip_adapter_unload', False))
         if hasattr(p, 'ip_adapter_images'):
             del p.ip_adapter_images
         return False
@@ -286,7 +309,7 @@ def apply(pipe, p: processing.StableDiffusionProcessing, adapter_names=[], adapt
         shared.log.error('IP adapter: no image provided')
         adapters = [] # unload adapter if previously loaded as it will cause runtime errors
     if len(adapters) == 0:
-        unapply(pipe)
+        unapply(pipe, getattr(p, 'ip_adapter_unload', False))
         if hasattr(p, 'ip_adapter_images'):
             del p.ip_adapter_images
         return False
@@ -335,4 +358,5 @@ def apply(pipe, p: processing.StableDiffusionProcessing, adapter_names=[], adapt
         shared.log.info(f'IP adapter: {ip_str} image={adapter_images} mask={adapter_masks is not None} time={t1-t0:.2f}')
     except Exception as e:
         shared.log.error(f'IP adapter load: adapters={adapter_names} repo={repos} folders={subfolders} names={names} {e}')
+        errors.display(e, 'IP adapter: type=adapter')
     return True
diff --git a/modules/pag/__init__.py b/modules/pag/__init__.py
index 8fe54c198..b7a56c40d 100644
--- a/modules/pag/__init__.py
+++ b/modules/pag/__init__.py
@@ -17,17 +17,22 @@ def apply(p: processing.StableDiffusionProcessing): # pylint: disable=arguments-
         unapply()
     if p.pag_scale == 0:
         return
-    if sd_models.get_diffusers_task(shared.sd_model) != sd_models.DiffusersTaskType.TEXT_2_IMAGE:
-        shared.log.warning(f'PAG: pipeline={c} not implemented')
-        return None
     if 'PAG' in shared.sd_model.__class__.__name__:
         pass
     elif detect.is_sd15(c):
+        if sd_models.get_diffusers_task(shared.sd_model) != sd_models.DiffusersTaskType.TEXT_2_IMAGE:
+            shared.log.warning(f'PAG: pipeline={c} not implemented')
+            return None
         orig_pipeline = shared.sd_model
         shared.sd_model = sd_models.switch_pipe(StableDiffusionPAGPipeline, shared.sd_model)
     elif detect.is_sdxl(c):
+        if sd_models.get_diffusers_task(shared.sd_model) != sd_models.DiffusersTaskType.TEXT_2_IMAGE:
+            shared.log.warning(f'PAG: pipeline={c} not implemented')
+            return None
         orig_pipeline = shared.sd_model
         shared.sd_model = sd_models.switch_pipe(StableDiffusionXLPAGPipeline, shared.sd_model)
+    elif detect.is_f1(c):
+        p.task_args['true_cfg_scale'] = p.pag_scale
     else:
         shared.log.warning(f'PAG: pipeline={c} required={StableDiffusionPipeline.__name__}')
         return None
diff --git a/modules/processing.py b/modules/processing.py
index f39393f95..6d9b64c17 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -451,7 +451,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
 
     if shared.native:
         from modules import ipadapter
-        ipadapter.unapply(shared.sd_model)
+        ipadapter.unapply(shared.sd_model, unload=getattr(p, 'ip_adapter_unload', False))
 
     if shared.opts.include_mask:
         if shared.opts.mask_apply_overlay and p.overlay_images is not None and len(p.overlay_images):
diff --git a/scripts/ipadapter.py b/scripts/ipadapter.py
index 60c70b9dc..5ca4ca578 100644
--- a/scripts/ipadapter.py
+++ b/scripts/ipadapter.py
@@ -57,6 +57,7 @@ def ui(self, _is_img2img):
             mask_galleries = []
             with gr.Row():
                 num_adapters = gr.Slider(label="Active IP adapters", minimum=1, maximum=MAX_ADAPTERS, step=1, value=1, scale=1)
+                unload_adapter = gr.Checkbox(label='Unload adapter', value=False, interactive=True)
             for i in range(MAX_ADAPTERS):
                 with gr.Accordion(f'Adapter {i+1}', visible=i==0) as unit:
                     with gr.Row():
@@ -85,7 +86,7 @@ def ui(self, _is_img2img):
             layers_label = gr.HTML('<a href="https://huggingface.co/docs/diffusers/main/en/using-diffusers/ip_adapter#style--layout-control" target="_blank">InstantStyle: advanced layer activation</a>', visible=False)
             layers = gr.Text(label='Layer scales', placeholder='{\n"down": {"block_2": [0.0, 1.0]},\n"up": {"block_0": [0.0, 1.0, 0.0]}\n}', rows=1, type='text', interactive=True, lines=5, visible=False, show_label=False)
             layers_active.change(fn=self.display_advanced, inputs=[layers_active], outputs=[layers_label, layers])
-        return [num_adapters] + adapters + scales + files + crops + starts + ends + masks + [layers_active] + [layers]
+        return [num_adapters] + [unload_adapter] + adapters + scales + files + crops + starts + ends + masks + [layers_active] + [layers]
 
     def process(self, p: processing.StableDiffusionProcessing, *args): # pylint: disable=arguments-differ
         if not shared.native:
@@ -94,6 +95,7 @@ def process(self, p: processing.StableDiffusionProcessing, *args): # pylint: dis
         if len(args) == 0:
             return
         units = args.pop(0)
+        unload = args.pop(0)
         if getattr(p, 'ip_adapter_names', []) == []:
             p.ip_adapter_names = args[:MAX_ADAPTERS][:units]
         if getattr(p, 'ip_adapter_scales', [0.0]) == [0.0]:
@@ -110,6 +112,7 @@ def process(self, p: processing.StableDiffusionProcessing, *args): # pylint: dis
             p.ip_adapter_masks = args[MAX_ADAPTERS*6:MAX_ADAPTERS*7][:units]
             p.ip_adapter_masks = [x for x in p.ip_adapter_masks if x]
         layers_active, layers = args[MAX_ADAPTERS*7:MAX_ADAPTERS*8]
+        p.ip_adapter_unload = unload
         if layers_active and len(layers) > 0:
             try:
                 layers = json.loads(layers)

From 042f5f9fed6caffab763130d70e5bd9920a7ef39 Mon Sep 17 00:00:00 2001
From: Disty0 <disty@disty.xyz>
Date: Sun, 22 Dec 2024 03:04:40 +0300
Subject: [PATCH 152/162] IPEX disable interpolate cpu fallback with IPEX 2.5

---
 installer.py                  | 2 ++
 modules/intel/ipex/hijacks.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/installer.py b/installer.py
index 6aa8bb060..07917986f 100644
--- a/installer.py
+++ b/installer.py
@@ -637,6 +637,8 @@ def install_ipex(torch_command):
         os.environ.setdefault('NEOReadDebugKeys', '1')
     if os.environ.get("ClDeviceGlobalMemSizeAvailablePercent", None) is None:
         os.environ.setdefault('ClDeviceGlobalMemSizeAvailablePercent', '100')
+    if os.environ.get("PYTORCH_ENABLE_XPU_FALLBACK", None) is None:
+        os.environ.setdefault('PYTORCH_ENABLE_XPU_FALLBACK', '1')
     if "linux" in sys.platform:
         torch_command = os.environ.get('TORCH_COMMAND', 'torch==2.5.1+cxx11.abi torchvision==0.20.1+cxx11.abi intel-extension-for-pytorch==2.5.10+xpu oneccl_bind_pt==2.5.0+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/')
         # torch_command = os.environ.get('TORCH_COMMAND', 'torch torchvision --index-url https://download.pytorch.org/whl/test/xpu') # test wheels are stable previews, significantly slower than IPEX
diff --git a/modules/intel/ipex/hijacks.py b/modules/intel/ipex/hijacks.py
index 43721a64c..b1c9a1182 100644
--- a/modules/intel/ipex/hijacks.py
+++ b/modules/intel/ipex/hijacks.py
@@ -313,7 +313,7 @@ def torch_load(f, map_location=None, *args, **kwargs):
 
 # Hijack Functions:
 def ipex_hijacks(legacy=True):
-    if legacy:
+    if legacy and float(torch.__version__[:3]) < 2.5:
         torch.nn.functional.interpolate = interpolate
     torch.tensor = torch_tensor
     torch.Tensor.to = Tensor_to

From dff846182f32f4182be64c76766ed71e63756335 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Sun, 22 Dec 2024 08:20:16 -0500
Subject: [PATCH 153/162] fix wildcards

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md              | 7 ++++---
 modules/processing_vae.py | 3 ++-
 modules/styles.py         | 2 +-
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 90eda3736..83eda417d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,8 +1,8 @@
 # Change Log for SD.Next
 
-## Update for 2024-12-21
+## Update for 2024-12-22
 
-### Highlights for 2024-12-21
+### Highlights for 2024-12-22
 
 ### SD.Next Xmass edition: *What's new?*
 
@@ -32,7 +32,7 @@ All-in-all, we're around ~160 commits worth of updates, check changelog for full
 
 [ReadMe](https://github.com/vladmandic/automatic/blob/master/README.md) | [ChangeLog](https://github.com/vladmandic/automatic/blob/master/CHANGELOG.md) | [Docs](https://vladmandic.github.io/sdnext-docs/) | [WiKi](https://github.com/vladmandic/automatic/wiki) | [Discord](https://discord.com/invite/sd-next-federal-batch-inspectors-1101998836328697867)
 
-## Details for 2024-12-21
+## Details for 2024-12-22
 
 ### New models and integrations
 
@@ -217,6 +217,7 @@ All-in-all, we're around ~160 commits worth of updates, check changelog for full
 - fix xyz-grid with lora none  
 - fix svd image2video  
 - fix gallery display during generate  
+- fix wildcards replacement to be unique  
 
 ## Update for 2024-11-21
 
diff --git a/modules/processing_vae.py b/modules/processing_vae.py
index 772a48adc..5d8f9fc84 100644
--- a/modules/processing_vae.py
+++ b/modules/processing_vae.py
@@ -144,7 +144,8 @@ def full_vae_decode(latents, model):
         decoded = model.vae.decode(latents, return_dict=False)[0]
     except Exception as e:
         shared.log.error(f'VAE decode: {stats} {e}')
-        errors.display(e, 'VAE decode')
+        if 'out of memory' not in str(e):
+            errors.display(e, 'VAE decode')
         decoded = []
 
     if hasattr(model.vae, "orig_dtype"):
diff --git a/modules/styles.py b/modules/styles.py
index 0dd48eb7f..d0228d33a 100644
--- a/modules/styles.py
+++ b/modules/styles.py
@@ -52,7 +52,7 @@ def check_files(prompt, wildcard, files):
                         choice = random.choice(lines).strip(' \n')
                         if '|' in choice:
                             choice = random.choice(choice.split('|')).strip(' []{}\n')
-                        prompt = prompt.replace(f"__{wildcard}__", choice)
+                        prompt = prompt.replace(f"__{wildcard}__", choice, 1)
                         shared.log.debug(f'Wildcards apply: wildcard="{wildcard}" choice="{choice}" file="{file}" choices={len(lines)}')
                         replaced.append(wildcard)
                 return prompt, True

From ea6d3107ab04291de89a22ae267552d52527550f Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Sun, 22 Dec 2024 08:39:31 -0500
Subject: [PATCH 154/162] update all todo statements

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 TODO.md                                       | 28 +++++++++----------
 installer.py                                  |  4 +--
 modules/api/control.py                        |  2 +-
 modules/consistory/consistory_unet_sdxl.py    |  1 -
 modules/control/run.py                        |  2 +-
 modules/ctrlx/__init__.py                     | 10 +++----
 modules/face/instantid_model.py               |  1 -
 modules/face/photomaker_model.py              | 10 +++----
 modules/ggml/gguf_tensor.py                   |  1 -
 modules/hidiffusion/hidiffusion.py            |  6 ++--
 modules/images_resize.py                      |  2 +-
 modules/instantir/aggregator.py               |  1 -
 .../ip_adapter/attention_processor.py         | 11 --------
 modules/lora/lora_extract.py                  |  2 +-
 modules/lora/networks.py                      |  2 +-
 modules/meissonic/pipeline_img2img.py         |  3 --
 modules/meissonic/pipeline_inpaint.py         |  3 --
 modules/meissonic/transformer.py              |  5 ----
 modules/model_flux.py                         |  4 +--
 modules/model_sd3.py                          |  2 +-
 modules/omnigen/utils.py                      |  1 -
 modules/onnx_impl/pipelines/__init__.py       |  2 +-
 modules/pag/pipe_sd.py                        |  2 --
 modules/pag/pipe_sdxl.py                      |  2 --
 modules/processing_class.py                   |  2 +-
 modules/processing_diffusers.py               |  7 -----
 modules/pulid/eva_clip/hf_model.py            |  3 --
 modules/schedulers/scheduler_tcd.py           |  1 -
 modules/schedulers/scheduler_ufogen.py        |  5 ----
 modules/schedulers/scheduler_vdm.py           |  2 +-
 modules/sd_hijack_hypertile.py                |  2 +-
 modules/sd_models.py                          |  2 +-
 modules/sd_samplers_common.py                 | 10 -------
 modules/sd_unet.py                            |  2 +-
 modules/segmoe/segmoe_model.py                |  4 +--
 modules/xadapter/adapter.py                   |  2 --
 modules/xadapter/unet_adapter.py              |  6 ++--
 scripts/instantir.py                          |  2 +-
 scripts/stablevideodiffusion.py               |  2 +-
 39 files changed, 46 insertions(+), 113 deletions(-)

diff --git a/TODO.md b/TODO.md
index ad43e6d06..bf0cecbb4 100644
--- a/TODO.md
+++ b/TODO.md
@@ -10,9 +10,7 @@ Main ToDo list can be found at [GitHub projects](https://github.com/users/vladma
 
 ## Future Candidates
 
-- Flux IPAdapter: <https://github.com/huggingface/diffusers/pull/10261>
-- Flux NF4: <https://github.com/huggingface/diffusers/issues/9996>
-- GGUF: <https://github.com/huggingface/diffusers/pull/9964>
+- Flux NF4 loader: <https://github.com/huggingface/diffusers/issues/9996>
 
 ## Other
 
@@ -22,15 +20,15 @@ Main ToDo list can be found at [GitHub projects](https://github.com/users/vladma
 
 ## Code TODO
 
-- python 3.12.4 or higher cause a mess with pydantic
-- enable ROCm for windows when available
-- enable full VAE mode for resize-latent
-- remove duplicate mask params
-- fix flux loader for civitai nf4 models
-- implement model in-memory caching
-- hypertile vae breaks for diffusers when using non-standard sizes
-- forcing reloading entire model as loading transformers only leads to massive memory usage
-- lora-direct with bnb
-- make lora for quantized flux
-- control script process
-- monkey-patch for modernui missing tabs.select event
+- TODO install: python 3.12.4 or higher cause a mess with pydantic (fixme)
+- TODO install: enable ROCm for windows when available (fixme)
+- TODO resize image: enable full VAE mode for resize-latent (fixme)
+- TODO processing: remove duplicate mask params (fixme)
+- TODO flux: fix loader for civitai nf4 models (fixme)
+- TODO model loader: implement model in-memory caching (fixme)
+- TODO hypertile: vae breaks when using non-standard sizes (fixme)
+- TODO model load: force-reloading entire model as loading transformers only leads to massive memory usage (fixme)
+- TODO lora load: direct with bnb (fixme)
+- TODO: lora make: support quantized flux (fixme)
+- TODO control: support scripts via api (fixme)
+- TODO modernui: monkey-patch for missing tabs.select event (fixme)
diff --git a/installer.py b/installer.py
index 07917986f..55f23c27f 100644
--- a/installer.py
+++ b/installer.py
@@ -430,7 +430,7 @@ def check_python(supported_minors=[9, 10, 11, 12], reason=None):
     if args.quick:
         return
     log.info(f'Python: version={platform.python_version()} platform={platform.system()} bin="{sys.executable}" venv="{sys.prefix}"')
-    if int(sys.version_info.major) == 3 and int(sys.version_info.minor) == 12 and int(sys.version_info.micro) > 3: # TODO python 3.12.4 or higher cause a mess with pydantic
+    if int(sys.version_info.major) == 3 and int(sys.version_info.minor) == 12 and int(sys.version_info.micro) > 3: # TODO install: python 3.12.4 or higher cause a mess with pydantic
         log.error(f"Python version incompatible: {sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro} required 3.12.3 or lower")
         if reason is not None:
             log.error(reason)
@@ -552,7 +552,7 @@ def install_rocm_zluda():
     log.info(msg)
     torch_command = ''
     if sys.platform == "win32":
-        # TODO enable ROCm for windows when available
+        # TODO install: enable ROCm for windows when available
 
         if args.device_id is not None:
             if os.environ.get('HIP_VISIBLE_DEVICES', None) is not None:
diff --git a/modules/api/control.py b/modules/api/control.py
index 345930341..411f71ff8 100644
--- a/modules/api/control.py
+++ b/modules/api/control.py
@@ -159,7 +159,7 @@ def post_control(self, req: ReqControl):
             output_images = []
             output_processed = []
             output_info = ''
-            # TODO control script process
+            # TODO control: support scripts via api
             # init script args, call scripts.script_control.run, call scripts.script_control.after
             run.control_set({ 'do_not_save_grid': not req.save_images, 'do_not_save_samples': not req.save_images, **self.prepare_ip_adapter(req) })
             run.control_set(getattr(req, "extra", {}))
diff --git a/modules/consistory/consistory_unet_sdxl.py b/modules/consistory/consistory_unet_sdxl.py
index 4dd9b42d2..940b4ba01 100644
--- a/modules/consistory/consistory_unet_sdxl.py
+++ b/modules/consistory/consistory_unet_sdxl.py
@@ -916,7 +916,6 @@ def forward(
         # 1. time
         timesteps = timestep
         if not torch.is_tensor(timesteps):
-            # TODO: this requires sync between CPU and GPU. So try to pass timesteps as tensors if you can
             # This would be a good case for the `match` statement (Python 3.10+)
             is_mps = sample.device.type == "mps"
             if isinstance(timestep, float):
diff --git a/modules/control/run.py b/modules/control/run.py
index efe9c0b8b..8cecb93af 100644
--- a/modules/control/run.py
+++ b/modules/control/run.py
@@ -311,7 +311,7 @@ def control_run(state: str = '',
     # processing.process_init(p)
     resize_mode_before = resize_mode_before if resize_name_before != 'None' and inputs is not None and len(inputs) > 0 else 0
 
-    # TODO monkey-patch for modernui missing tabs.select event
+    # TODO modernui: monkey-patch for missing tabs.select event
     if selected_scale_tab_before == 0 and resize_name_before != 'None' and scale_by_before != 1 and inputs is not None and len(inputs) > 0:
         shared.log.debug('Control: override resize mode=before')
         selected_scale_tab_before = 1
diff --git a/modules/ctrlx/__init__.py b/modules/ctrlx/__init__.py
index 87ff52f6c..07d06aeac 100644
--- a/modules/ctrlx/__init__.py
+++ b/modules/ctrlx/__init__.py
@@ -136,7 +136,7 @@ def appearance_guidance_scale(self):
     @torch.no_grad()
     def __call__(
         self,
-        prompt: Union[str, List[str]] = None,  # TODO: Support prompt_2 and negative_prompt_2
+        prompt: Union[str, List[str]] = None,
         structure_prompt: Optional[Union[str, List[str]]] = None,
         appearance_prompt: Optional[Union[str, List[str]]] = None,
         structure_image: Optional[PipelineImageInput] = None,
@@ -180,7 +180,6 @@ def __call__(
         callback_on_step_end_tensor_inputs: List[str] = ["latents"],
         **kwargs,
     ):
-        # TODO: Add function argument documentation
 
         callback = kwargs.pop("callback", None)
         callback_steps = kwargs.pop("callback_steps", None)
@@ -205,7 +204,7 @@ def __call__(
         target_size = target_size or (height, width)
 
         # 1. Check inputs. Raise error if not correct
-        self.check_inputs(  # TODO: Custom check_inputs for our method
+        self.check_inputs(
             prompt,
             None,  # prompt_2
             height,
@@ -425,7 +424,7 @@ def denoising_value_valid(dnv):
 
         # 7.2 Optionally get guidance scale embedding
         timestep_cond = None
-        if self.unet.config.time_cond_proj_dim is not None:  # TODO: Make guidance scale embedding work with batch_order
+        if self.unet.config.time_cond_proj_dim is not None:
             guidance_scale_tensor = torch.tensor(self.guidance_scale - 1).repeat(batch_size * num_images_per_prompt)
             timestep_cond = self.get_guidance_scale_embedding(
                 guidance_scale_tensor, embedding_dim=self.unet.config.time_cond_proj_dim
@@ -457,7 +456,6 @@ def denoising_value_valid(dnv):
 
                 register_attr(self, t=t.item(), do_control=True, batch_order=batch_order)
 
-                # TODO: For now, assume we are doing classifier-free guidance, support no CF-guidance later
                 latent_model_input = self.scheduler.scale_model_input(latents, t)
                 structure_latent_model_input = self.scheduler.scale_model_input(structure_latents, t)
                 appearance_latent_model_input = self.scheduler.scale_model_input(appearance_latents, t)
@@ -563,7 +561,7 @@ def denoising_value_valid(dnv):
                 # Self-recurrence
                 for _ in range(self_recurrence_schedule[i]):
                     if hasattr(self.scheduler, "_step_index"):  # For fancier schedulers
-                        self.scheduler._step_index -= 1  # TODO: Does this actually work?
+                        self.scheduler._step_index -= 1
 
                     t_prev = 0 if i + 1 >= num_inference_steps else timesteps[i + 1]
                     latents = noise_t2t(self.scheduler, t_prev, t, latents)
diff --git a/modules/face/instantid_model.py b/modules/face/instantid_model.py
index 51a4d7850..543b39ded 100644
--- a/modules/face/instantid_model.py
+++ b/modules/face/instantid_model.py
@@ -344,7 +344,6 @@ def __call__(
         return hidden_states
 
     def _memory_efficient_attention_xformers(self, query, key, value, attention_mask):
-        # TODO attention_mask
         query = query.contiguous()
         key = key.contiguous()
         value = value.contiguous()
diff --git a/modules/face/photomaker_model.py b/modules/face/photomaker_model.py
index b62fe73b8..3595c6a36 100644
--- a/modules/face/photomaker_model.py
+++ b/modules/face/photomaker_model.py
@@ -244,7 +244,7 @@ def encode_prompt_with_trigger_word(
             prompt_embeds_list = []
             prompts = [prompt, prompt_2]
             for prompt, tokenizer, text_encoder in zip(prompts, tokenizers, text_encoders):
-                input_ids = tokenizer.encode(prompt) # TODO: batch encode
+                input_ids = tokenizer.encode(prompt)
                 clean_index = 0
                 clean_input_ids = []
                 class_token_index = []
@@ -296,7 +296,7 @@ def encode_prompt_with_trigger_word(
             prompt_embeds = torch.concat(prompt_embeds_list, dim=-1)
 
         prompt_embeds = prompt_embeds.to(dtype=self.text_encoder_2.dtype, device=device)
-        class_tokens_mask = class_tokens_mask.to(device=device) # TODO: ignoring two-prompt case
+        class_tokens_mask = class_tokens_mask.to(device=device)
 
         return prompt_embeds, pooled_prompt_embeds, class_tokens_mask
 
@@ -332,7 +332,7 @@ def __call__(
         callback_steps: int = 1,
         # Added parameters (for PhotoMaker)
         input_id_images: PipelineImageInput = None,
-        start_merge_step: int = 0, # TODO: change to `style_strength_ratio` in the future
+        start_merge_step: int = 0,
         class_tokens_mask: Optional[torch.LongTensor] = None,
         prompt_embeds_text_only: Optional[torch.FloatTensor] = None,
         pooled_prompt_embeds_text_only: Optional[torch.FloatTensor] = None,
@@ -410,7 +410,7 @@ def __call__(
         (
             prompt_embeds_text_only,
             negative_prompt_embeds,
-            pooled_prompt_embeds_text_only, # TODO: replace the pooled_prompt_embeds with text only prompt
+            pooled_prompt_embeds_text_only,
             negative_pooled_prompt_embeds,
         ) = self.encode_prompt(
             prompt=prompt_text_only,
@@ -431,7 +431,7 @@ def __call__(
         if not isinstance(input_id_images[0], torch.Tensor):
             id_pixel_values = self.id_image_processor(input_id_images, return_tensors="pt").pixel_values
 
-        id_pixel_values = id_pixel_values.unsqueeze(0).to(device=device, dtype=dtype) # TODO: multiple prompts
+        id_pixel_values = id_pixel_values.unsqueeze(0).to(device=device, dtype=dtype)
 
         # 6. Get the update text embedding with the stacked ID embedding
         prompt_embeds = self.id_encoder(id_pixel_values, prompt_embeds, class_tokens_mask)
diff --git a/modules/ggml/gguf_tensor.py b/modules/ggml/gguf_tensor.py
index 4bc9117cb..8b2f608ac 100644
--- a/modules/ggml/gguf_tensor.py
+++ b/modules/ggml/gguf_tensor.py
@@ -131,7 +131,6 @@ def get_dequantized_tensor(self):
         if self._ggml_quantization_type in TORCH_COMPATIBLE_QTYPES:
             return self.quantized_data.to(self.compute_dtype)
         elif self._ggml_quantization_type in DEQUANTIZE_FUNCTIONS:
-            # TODO(ryand): Look into how the dtype param is intended to be used.
             return dequantize(
                 data=self.quantized_data, qtype=self._ggml_quantization_type, oshape=self.tensor_shape, dtype=None
             ).to(self.compute_dtype)
diff --git a/modules/hidiffusion/hidiffusion.py b/modules/hidiffusion/hidiffusion.py
index 7874f03af..d6f68eb15 100644
--- a/modules/hidiffusion/hidiffusion.py
+++ b/modules/hidiffusion/hidiffusion.py
@@ -234,7 +234,7 @@ def window_reverse(windows, window_size, H, W, shift_size):
                 norm_hidden_states = self.norm2(hidden_states)
                 norm_hidden_states = norm_hidden_states * (1 + scale_mlp) + shift_mlp
             if self._chunk_size is not None:
-                ff_output = _chunked_feed_forward(self.ff, norm_hidden_states, self._chunk_dim, self._chunk_size) # pylint: disable=undefined-variable # TODO hidiffusion undefined
+                ff_output = _chunked_feed_forward(self.ff, norm_hidden_states, self._chunk_dim, self._chunk_size) # pylint: disable=undefined-variable
             else:
                 ff_output = self.ff(norm_hidden_states)
             if self.use_ada_layer_norm_zero:
@@ -308,7 +308,7 @@ def forward(
                 self.T1 = int(self.max_timestep * self.T1_ratio)
 
             output_states = ()
-            _scale = cross_attention_kwargs.get("scale", 1.0) if cross_attention_kwargs is not None else 1.0 # TODO hidiffusion unused
+            _scale = cross_attention_kwargs.get("scale", 1.0) if cross_attention_kwargs is not None else 1.0
 
             blocks = list(zip(self.resnets, self.attentions))
 
@@ -407,7 +407,7 @@ def forward(
             encoder_attention_mask: Optional[torch.FloatTensor] = None,
         ) -> torch.FloatTensor:
 
-            def fix_scale(first, second): # TODO hidiffusion breaks hidden_scale.shape on 3rd generate with sdxl
+            def fix_scale(first, second):
                 if (first.shape[-1] != second.shape[-1] or first.shape[-2] != second.shape[-2]):
                     rescale = min(second.shape[-2] / first.shape[-2], second.shape[-1] / first.shape[-1])
                     # log.debug(f"HiDiffusion rescale: {hidden_states.shape} => {res_hidden_states_tuple[0].shape} scale={rescale}")
diff --git a/modules/images_resize.py b/modules/images_resize.py
index 5b7c816f8..362be79ee 100644
--- a/modules/images_resize.py
+++ b/modules/images_resize.py
@@ -11,7 +11,7 @@ def resize_image(resize_mode: int, im: Image.Image, width: int, height: int, ups
     def latent(im, w, h, upscaler):
         from modules.processing_vae import vae_encode, vae_decode
         import torch
-        latents = vae_encode(im, shared.sd_model, full_quality=False) # TODO enable full VAE mode for resize-latent
+        latents = vae_encode(im, shared.sd_model, full_quality=False) # TODO resize image: enable full VAE mode for resize-latent
         latents = torch.nn.functional.interpolate(latents, size=(int(h // 8), int(w // 8)), mode=upscaler["mode"], antialias=upscaler["antialias"])
         im = vae_decode(latents, shared.sd_model, output_type='pil', full_quality=False)[0]
         return im
diff --git a/modules/instantir/aggregator.py b/modules/instantir/aggregator.py
index fd6151003..1950f9efa 100644
--- a/modules/instantir/aggregator.py
+++ b/modules/instantir/aggregator.py
@@ -823,7 +823,6 @@ def forward(
         # 1. time
         timesteps = timestep
         if not torch.is_tensor(timesteps):
-            # TODO: this requires sync between CPU and GPU. So try to pass timesteps as tensors if you can
             # This would be a good case for the `match` statement (Python 3.10+)
             is_mps = sample.device.type == "mps"
             if isinstance(timestep, float):
diff --git a/modules/instantir/ip_adapter/attention_processor.py b/modules/instantir/ip_adapter/attention_processor.py
index ed6cf755f..68dce4281 100644
--- a/modules/instantir/ip_adapter/attention_processor.py
+++ b/modules/instantir/ip_adapter/attention_processor.py
@@ -390,7 +390,6 @@ def __call__(
         value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
 
         # the output of sdp = (batch, num_heads, seq_len, head_dim)
-        # TODO: add support for attn.scale when we move to Torch 2.1
         hidden_states = F.scaled_dot_product_attention(
             query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False
         )
@@ -496,7 +495,6 @@ def __call__(
         value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
 
         # the output of sdp = (batch, num_heads, seq_len, head_dim)
-        # TODO: add support for attn.scale when we move to Torch 2.1
         hidden_states = F.scaled_dot_product_attention(
             query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False
         )
@@ -640,7 +638,6 @@ def __call__(
         value_1 = value_1.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
 
         # the output of sdp = (batch, num_heads, seq_len, head_dim)
-        # TODO: add support for attn.scale when we move to Torch 2.1
         hidden_states_0 = F.scaled_dot_product_attention(
             query_0, key_0, value_0, attn_mask=attention_mask, dropout_p=0.0, is_causal=False
         )
@@ -654,7 +651,6 @@ def __call__(
         )
         hidden_states_0 = hidden_states_0 + ref_scale * _hidden_states_0 * 10
 
-        # TODO: drop this cross-attn
         _hidden_states_1 = F.scaled_dot_product_attention(
             query_1, key_0, value_0, attn_mask=attention_mask, dropout_p=0.0, is_causal=False
         )
@@ -771,7 +767,6 @@ def __call__(
         value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
 
         # the output of sdp = (batch, num_heads, seq_len, head_dim)
-        # TODO: add support for attn.scale when we move to Torch 2.1
         hidden_states = F.scaled_dot_product_attention(
             query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False
         )
@@ -883,7 +878,6 @@ def __call__(
         value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
 
         # the output of sdp = (batch, num_heads, seq_len, head_dim)
-        # TODO: add support for attn.scale when we move to Torch 2.1
         hidden_states = F.scaled_dot_product_attention(
             query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False
         )
@@ -1018,7 +1012,6 @@ def __call__(
         value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
 
         # the output of sdp = (batch, num_heads, seq_len, head_dim)
-        # TODO: add support for attn.scale when we move to Torch 2.1
         hidden_states = F.scaled_dot_product_attention(
             query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False
         )
@@ -1034,7 +1027,6 @@ def __call__(
         ip_value = ip_value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
 
         # the output of sdp = (batch, num_heads, seq_len, head_dim)
-        # TODO: add support for attn.scale when we move to Torch 2.1
         ip_hidden_states = F.scaled_dot_product_attention(
             query, ip_key, ip_value, attn_mask=None, dropout_p=0.0, is_causal=False
         )
@@ -1161,7 +1153,6 @@ def __call__(
         value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
 
         # the output of sdp = (batch, num_heads, seq_len, head_dim)
-        # TODO: add support for attn.scale when we move to Torch 2.1
         hidden_states = F.scaled_dot_product_attention(
             query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False
         )
@@ -1181,7 +1172,6 @@ def __call__(
         ip_value = ip_value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
 
         # the output of sdp = (batch, num_heads, seq_len, head_dim)
-        # TODO: add support for attn.scale when we move to Torch 2.1
         ip_hidden_states = F.scaled_dot_product_attention(
             query, ip_key, ip_value, attn_mask=None, dropout_p=0.0, is_causal=False
         )
@@ -1337,7 +1327,6 @@ def __call__(
         value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
 
         # the output of sdp = (batch, num_heads, seq_len, head_dim)
-        # TODO: add support for attn.scale when we move to Torch 2.1
         hidden_states = F.scaled_dot_product_attention(
             query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False
         )
diff --git a/modules/lora/lora_extract.py b/modules/lora/lora_extract.py
index c7deb2530..58cd065bb 100644
--- a/modules/lora/lora_extract.py
+++ b/modules/lora/lora_extract.py
@@ -182,7 +182,7 @@ def make_lora(fn, maxrank, auto_rank, rank_ratio, modules, overwrite):
             progress.remove_task(task)
         t3 = time.time()
 
-        # TODO: make lora for quantized flux
+        # TODO: lora make support quantized flux
         # if 'te' in modules and getattr(shared.sd_model, 'transformer', None) is not None:
         #     for name, module in shared.sd_model.transformer.named_modules():
         #         if "norm" in name and "linear" not in name:
diff --git a/modules/lora/networks.py b/modules/lora/networks.py
index df7778ead..fc90ddd2d 100644
--- a/modules/lora/networks.py
+++ b/modules/lora/networks.py
@@ -416,7 +416,7 @@ def network_apply_direct(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.
             if deactivate:
                 updown *= -1
             if getattr(self, "quant_type", None) in ['nf4', 'fp4'] and bnb is not None:
-                try: # TODO lora-direct with bnb
+                try: # TODO lora load: direct with bnb
                     weight = bnb.functional.dequantize_4bit(self.weight, quant_state=self.quant_state, quant_type=self.quant_type, blocksize=self.blocksize)
                     new_weight = weight.to(devices.device) + updown.to(devices.device)
                     self.weight = bnb.nn.Params4bit(new_weight, quant_state=self.quant_state, quant_type=self.quant_type, blocksize=self.blocksize)
diff --git a/modules/meissonic/pipeline_img2img.py b/modules/meissonic/pipeline_img2img.py
index f26af123d..13e5c3717 100644
--- a/modules/meissonic/pipeline_img2img.py
+++ b/modules/meissonic/pipeline_img2img.py
@@ -56,9 +56,6 @@ class Img2ImgPipeline(DiffusionPipeline):
 
     model_cpu_offload_seq = "text_encoder->transformer->vqvae"
 
-    # TODO - when calling self.vqvae.quantize, it uses self.vqvae.quantize.embedding.weight before
-    # the forward method of self.vqvae.quantize, so the hook doesn't get called to move the parameter
-    # off the meta device. There should be a way to fix this instead of just not offloading it
     _exclude_from_cpu_offload = ["vqvae"]
 
     def __init__(
diff --git a/modules/meissonic/pipeline_inpaint.py b/modules/meissonic/pipeline_inpaint.py
index 994846fba..d405afa53 100644
--- a/modules/meissonic/pipeline_inpaint.py
+++ b/modules/meissonic/pipeline_inpaint.py
@@ -53,9 +53,6 @@ class InpaintPipeline(DiffusionPipeline):
 
     model_cpu_offload_seq = "text_encoder->transformer->vqvae"
 
-    # TODO - when calling self.vqvae.quantize, it uses self.vqvae.quantize.embedding.weight before
-    # the forward method of self.vqvae.quantize, so the hook doesn't get called to move the parameter
-    # off the meta device. There should be a way to fix this instead of just not offloading it
     _exclude_from_cpu_offload = ["vqvae"]
 
     def __init__(
diff --git a/modules/meissonic/transformer.py b/modules/meissonic/transformer.py
index 64f91baa2..43e77ddc7 100644
--- a/modules/meissonic/transformer.py
+++ b/modules/meissonic/transformer.py
@@ -341,11 +341,6 @@ def __call__(
             key = torch.cat([encoder_hidden_states_key_proj, key], dim=2)
             value = torch.cat([encoder_hidden_states_value_proj, value], dim=2)
 
-        # if image_rotary_emb is not None: # TODO broken import
-            # from .embeddings import apply_rotary_emb
-            # query = apply_rotary_emb(query, image_rotary_emb)
-            # key = apply_rotary_emb(key, image_rotary_emb)
-
         hidden_states = F.scaled_dot_product_attention(query, key, value, dropout_p=0.0, is_causal=False)
         hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim)
         hidden_states = hidden_states.to(query.dtype)
diff --git a/modules/model_flux.py b/modules/model_flux.py
index 0a13a4d46..f2286866e 100644
--- a/modules/model_flux.py
+++ b/modules/model_flux.py
@@ -213,7 +213,7 @@ def load_transformer(file_path): # triggered by opts.sd_unet change
         _transformer, _text_encoder_2 = load_flux_bnb(file_path, diffusers_load_config)
         if _transformer is not None:
             transformer = _transformer
-    elif 'nf4' in quant: # TODO fix flux loader for civitai nf4 models
+    elif 'nf4' in quant: # TODO flux: fix loader for civitai nf4 models
         from modules.model_flux_nf4 import load_flux_nf4
         _transformer, _text_encoder_2 = load_flux_nf4(file_path)
         if _transformer is not None:
@@ -342,7 +342,7 @@ def load_flux(checkpoint_info, diffusers_load_config): # triggered by opts.sd_ch
     shared.log.debug(f'Load model: type=FLUX cls={cls.__name__} preloaded={list(kwargs)} revision={diffusers_load_config.get("revision", None)}')
     for c in kwargs:
         if getattr(kwargs[c], 'quantization_method', None) is not None or getattr(kwargs[c], 'gguf', None) is not None:
-            shared.log.debug(f'Load model: type=FLUX component={c} dtype={kwargs[c].dtype} quant={getattr(kwargs[c], 'quantization_method', None) or getattr(kwargs[c], 'gguf', None)}')
+            shared.log.debug(f'Load model: type=FLUX component={c} dtype={kwargs[c].dtype} quant={getattr(kwargs[c], "quantization_method", None) or getattr(kwargs[c], "gguf", None)}')
         if kwargs[c].dtype == torch.float32 and devices.dtype != torch.float32:
             try:
                 kwargs[c] = kwargs[c].to(dtype=devices.dtype)
diff --git a/modules/model_sd3.py b/modules/model_sd3.py
index 36dfbd9b4..1834c573e 100644
--- a/modules/model_sd3.py
+++ b/modules/model_sd3.py
@@ -1,7 +1,7 @@
 import os
 import diffusers
 import transformers
-from modules import shared, devices, sd_models, sd_unet, model_te, model_quant, model_tools
+from modules import shared, devices, sd_models, sd_unet, model_quant, model_tools
 
 
 def load_overrides(kwargs, cache_dir):
diff --git a/modules/omnigen/utils.py b/modules/omnigen/utils.py
index 5483d6eab..0304e1732 100644
--- a/modules/omnigen/utils.py
+++ b/modules/omnigen/utils.py
@@ -25,7 +25,6 @@ def update_ema(ema_model, model, decay=0.9999):
     """
     ema_params = dict(ema_model.named_parameters())
     for name, param in model.named_parameters():
-        # TODO: Consider applying only to params that require_grad to avoid small numerical changes of pos_embed
         ema_params[name].mul_(decay).add_(param.data, alpha=1 - decay)
 
 
diff --git a/modules/onnx_impl/pipelines/__init__.py b/modules/onnx_impl/pipelines/__init__.py
index ca1ddd2f7..a11b07fc7 100644
--- a/modules/onnx_impl/pipelines/__init__.py
+++ b/modules/onnx_impl/pipelines/__init__.py
@@ -241,7 +241,7 @@ def run_olive(self, submodels: List[str], in_dir: os.PathLike, out_dir: os.PathL
                 for i in range(len(flow)):
                     flow[i] = flow[i].replace("AutoExecutionProvider", shared.opts.onnx_execution_provider)
             olive_config["input_model"]["config"]["model_path"] = os.path.abspath(os.path.join(in_dir, submodel, "model.onnx"))
-            olive_config["systems"]["local_system"]["config"]["accelerators"][0]["device"] = "cpu" if shared.opts.onnx_execution_provider == ExecutionProvider.CPU else "gpu" # TODO: npu
+            olive_config["systems"]["local_system"]["config"]["accelerators"][0]["device"] = "cpu" if shared.opts.onnx_execution_provider == ExecutionProvider.CPU else "gpu"
             olive_config["systems"]["local_system"]["config"]["accelerators"][0]["execution_providers"] = [shared.opts.onnx_execution_provider]
 
             for pass_key in olive_config["passes"]:
diff --git a/modules/pag/pipe_sd.py b/modules/pag/pipe_sd.py
index 16f9b5319..11f4fb0cf 100644
--- a/modules/pag/pipe_sd.py
+++ b/modules/pag/pipe_sd.py
@@ -104,7 +104,6 @@ def __call__(
         value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
 
         # the output of sdp = (batch, num_heads, seq_len, head_dim)
-        # TODO: add support for attn.scale when we move to Torch 2.1
         hidden_states_org = F.scaled_dot_product_attention(
             query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False
         )
@@ -219,7 +218,6 @@ def __call__(
         value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
 
         # the output of sdp = (batch, num_heads, seq_len, head_dim)
-        # TODO: add support for attn.scale when we move to Torch 2.1
         hidden_states_org = F.scaled_dot_product_attention(
             query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False
         )
diff --git a/modules/pag/pipe_sdxl.py b/modules/pag/pipe_sdxl.py
index 82ae06c07..3a47af3e5 100644
--- a/modules/pag/pipe_sdxl.py
+++ b/modules/pag/pipe_sdxl.py
@@ -124,7 +124,6 @@ def __call__(
         value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
 
         # the output of sdp = (batch, num_heads, seq_len, head_dim)
-        # TODO: add support for attn.scale when we move to Torch 2.1
         hidden_states_org = F.scaled_dot_product_attention(
             query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False
         )
@@ -239,7 +238,6 @@ def __call__(
         value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
 
         # the output of sdp = (batch, num_heads, seq_len, head_dim)
-        # TODO: add support for attn.scale when we move to Torch 2.1
         hidden_states_org = F.scaled_dot_product_attention(
             query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False
         )
diff --git a/modules/processing_class.py b/modules/processing_class.py
index f502e0dcc..97e5f7b6f 100644
--- a/modules/processing_class.py
+++ b/modules/processing_class.py
@@ -170,7 +170,7 @@ def __init__(self,
         self.image_cfg_scale = image_cfg_scale
         self.scale_by = scale_by
         self.mask = mask
-        self.image_mask = mask # TODO remove duplicate mask params
+        self.image_mask = mask # TODO processing: remove duplicate mask params
         self.latent_mask = latent_mask
         self.mask_blur = mask_blur
         self.inpainting_fill = inpainting_fill
diff --git a/modules/processing_diffusers.py b/modules/processing_diffusers.py
index d43660ca8..aa820aa34 100644
--- a/modules/processing_diffusers.py
+++ b/modules/processing_diffusers.py
@@ -330,13 +330,6 @@ def process_refine(p: processing.StableDiffusionProcessing, output):
                 errors.display(e, 'Processing')
                 modelstats.analyze()
 
-            """ # TODO decode using refiner
-            if not shared.state.interrupted and not shared.state.skipped:
-                refiner_images = processing_vae.vae_decode(latents=refiner_output.images, model=shared.sd_refiner, full_quality=True, width=max(p.width, p.hr_upscale_to_x), height=max(p.height, p.hr_upscale_to_y))
-                for refiner_image in refiner_images:
-                    results.append(refiner_image)
-            """
-
         if shared.opts.diffusers_offload_mode == "balanced":
             shared.sd_refiner = sd_models.apply_balanced_offload(shared.sd_refiner)
         elif shared.opts.diffusers_move_refiner:
diff --git a/modules/pulid/eva_clip/hf_model.py b/modules/pulid/eva_clip/hf_model.py
index c4b9fd85b..d148bbff2 100644
--- a/modules/pulid/eva_clip/hf_model.py
+++ b/modules/pulid/eva_clip/hf_model.py
@@ -31,7 +31,6 @@ class PretrainedConfig:
 def _camel2snake(s):
     return re.sub(r'(?<!^)(?=[A-Z])', '_', s).lower()
 
-# TODO: ?last - for gpt-like models
 _POOLERS = {}
 
 def register_pooler(cls):
@@ -88,7 +87,6 @@ def __init__(
 
         self.output_dim = output_dim
 
-        # TODO: find better way to get this information
         uses_transformer_pooler = (pooler_type == "cls_pooler")
 
         if transformers is None:
@@ -101,7 +99,6 @@ def __init__(
             else:
                 create_func, model_args = (AutoModel.from_pretrained, model_name_or_path) if pretrained else (
                     AutoModel.from_config, self.config)
-            # TODO: do all model configs have this attribute? PretrainedConfig does so yes??
             if hasattr(self.config, "is_encoder_decoder") and self.config.is_encoder_decoder:
                 self.transformer = create_func(model_args)
                 self.transformer = self.transformer.encoder
diff --git a/modules/schedulers/scheduler_tcd.py b/modules/schedulers/scheduler_tcd.py
index bff12ab5a..9b2d4d35a 100644
--- a/modules/schedulers/scheduler_tcd.py
+++ b/modules/schedulers/scheduler_tcd.py
@@ -447,7 +447,6 @@ def set_timesteps(
             init_timestep = min(int(self.num_inference_steps * strength), self.num_inference_steps)
             t_start = max(self.num_inference_steps - init_timestep, 0)
             timesteps = timesteps[t_start * self.order :]
-            # TODO: also reset self.num_inference_steps?
         else:
             # 2.2 Create the "standard" TCD inference timestep schedule.
             if num_inference_steps > self.config.num_train_timesteps:
diff --git a/modules/schedulers/scheduler_ufogen.py b/modules/schedulers/scheduler_ufogen.py
index e03dec3c0..f4d8aee97 100644
--- a/modules/schedulers/scheduler_ufogen.py
+++ b/modules/schedulers/scheduler_ufogen.py
@@ -310,14 +310,11 @@ def set_timesteps(
             self.num_inference_steps = num_inference_steps
             self.custom_timesteps = False
 
-            # TODO: For now, handle special case when num_inference_steps == 1 separately
             if num_inference_steps == 1:
                 # Set the timestep schedule to num_train_timesteps - 1 rather than 0
                 # (that is, the one-step timestep schedule is always trailing rather than leading or linspace)
                 timesteps = np.array([self.config.num_train_timesteps - 1], dtype=np.int64)
             else:
-                # TODO: For now, retain the DDPM timestep spacing logic
-                # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://arxiv.org/abs/2305.08891
                 if self.config.timestep_spacing == "linspace":
                     timesteps = (
                         np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps)
@@ -446,8 +443,6 @@ def step(
         # Noise is not used on the final timestep of the timestep schedule.
         # This also means that noise is not used for one-step sampling.
         if t != self.timesteps[-1]:
-            # TODO: is this correct?
-            # Sample prev sample x_{t - 1} ~ q(x_{t - 1} | x_0 =  G(x_t, t))
             device = model_output.device
             noise = randn_tensor(model_output.shape, generator=generator, device=device, dtype=model_output.dtype)
             sqrt_alpha_prod_t_prev = alpha_prod_t_prev**0.5
diff --git a/modules/schedulers/scheduler_vdm.py b/modules/schedulers/scheduler_vdm.py
index 543b29ff3..492c30a0c 100644
--- a/modules/schedulers/scheduler_vdm.py
+++ b/modules/schedulers/scheduler_vdm.py
@@ -147,7 +147,7 @@ def __init__(
         self.timesteps = torch.from_numpy(self.get_timesteps(len(self)))
         if num_train_timesteps:
             alphas_cumprod = self.alphas_cumprod(torch.flip(self.timesteps, dims=(0,)))
-            alphas = alphas_cumprod[1:] / alphas_cumprod[:-1]  # TODO: Might not be exact
+            alphas = alphas_cumprod[1:] / alphas_cumprod[:-1]
             self.alphas = torch.cat([alphas_cumprod[:1], alphas])
             self.betas = 1 - self.alphas
 
diff --git a/modules/sd_hijack_hypertile.py b/modules/sd_hijack_hypertile.py
index dbf977b8d..69c4163dc 100644
--- a/modules/sd_hijack_hypertile.py
+++ b/modules/sd_hijack_hypertile.py
@@ -112,7 +112,7 @@ def wrapper(*args, **kwargs):
                 out = forward(x, *args[1:], **kwargs)
                 return out
             if x.ndim == 4: # VAE
-                # TODO hypertile vae breaks for diffusers when using non-standard sizes
+                # TODO hypertile: vae breaks when using non-standard sizes
                 if nh * nw > 1:
                     x = rearrange(x, "b c (nh h) (nw w) -> (b nh nw) c h w", nh=nh, nw=nw)
                 out = forward(x, *args[1:], **kwargs)
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 9daccf11f..087e8ecfc 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -1496,7 +1496,7 @@ def reload_model_weights(sd_model=None, info=None, reuse_dict=False, op='model',
             unload_model_weights(op=op)
             sd_model = None
     timer = Timer()
-    # TODO implement model in-memory caching
+    # TODO model loader: implement model in-memory caching
     state_dict = get_checkpoint_state_dict(checkpoint_info, timer) if not shared.native else None
     checkpoint_config = sd_models_config.find_checkpoint_config(state_dict, checkpoint_info)
     timer.record("config")
diff --git a/modules/sd_samplers_common.py b/modules/sd_samplers_common.py
index 2ac7949a6..a90ceec27 100644
--- a/modules/sd_samplers_common.py
+++ b/modules/sd_samplers_common.py
@@ -51,16 +51,6 @@ def single_sample_to_image(sample, approximation=None):
             return Image.new(mode="RGB", size=(512, 512))
         if len(sample.shape) == 4 and sample.shape[0]: # likely animatediff latent
             sample = sample.permute(1, 0, 2, 3)[0]
-        """
-        # TODO remove
-        if shared.native: # [-x,x] to [-5,5]
-            sample_max = torch.max(sample)
-            if sample_max > 5:
-                sample = sample * (5 / sample_max)
-            sample_min = torch.min(sample)
-            if sample_min < -5:
-                sample = sample * (5 / abs(sample_min))
-        """
         if approximation == 2: # TAESD
             if shared.opts.live_preview_downscale and (sample.shape[-1] > 128 or sample.shape[-2] > 128):
                 scale = 128 / max(sample.shape[-1], sample.shape[-2])
diff --git a/modules/sd_unet.py b/modules/sd_unet.py
index f730bdb74..deb0b24b0 100644
--- a/modules/sd_unet.py
+++ b/modules/sd_unet.py
@@ -36,7 +36,7 @@ def load_unet(model):
                 model.prior_pipe.text_encoder = prior_text_encoder.to(devices.device, dtype=devices.dtype)
         elif "Flux" in model.__class__.__name__ or "StableDiffusion3" in model.__class__.__name__:
             loaded_unet = shared.opts.sd_unet
-            sd_models.load_diffuser() # TODO forcing reloading entire model as loading transformers only leads to massive memory usage
+            sd_models.load_diffuser() # TODO model load: force-reloading entire model as loading transformers only leads to massive memory usage
             """
             from modules.model_flux import load_transformer
             transformer = load_transformer(unet_dict[shared.opts.sd_unet])
diff --git a/modules/segmoe/segmoe_model.py b/modules/segmoe/segmoe_model.py
index 4b96527be..a542c1fbb 100644
--- a/modules/segmoe/segmoe_model.py
+++ b/modules/segmoe/segmoe_model.py
@@ -136,7 +136,7 @@ def __init__(self, config_or_path, **kwargs) -> Any:
                 memory_format=torch.channels_last,
             )
 
-    def to(self, *args, **kwargs): # TODO added no-op to avoid error
+    def to(self, *args, **kwargs):
         self.pipe.to(*args, **kwargs)
 
     def load_from_scratch(self, config: str, **kwargs) -> None:
@@ -202,7 +202,6 @@ def load_from_scratch(self, config: str, **kwargs) -> None:
         self.config["down_idx_start"] = self.down_idx_start
         self.config["down_idx_end"] = self.down_idx_end
 
-        # TODO: Add Support for Scheduler Selection
         self.pipe.scheduler = DDPMScheduler.from_config(self.pipe.scheduler.config)
 
         # Load Experts
@@ -242,7 +241,6 @@ def load_from_scratch(self, config: str, **kwargs) -> None:
                             **kwargs,
                         )
 
-                        # TODO: Add Support for Scheduler Selection
                         expert.scheduler = DDPMScheduler.from_config(
                             expert.scheduler.config
                         )
diff --git a/modules/xadapter/adapter.py b/modules/xadapter/adapter.py
index 69030fae3..4096f71e7 100644
--- a/modules/xadapter/adapter.py
+++ b/modules/xadapter/adapter.py
@@ -266,8 +266,6 @@ def forward(self, x, t=None):
         b, c, _, _ = x[-1].shape
         if t is not None:
             if not torch.is_tensor(t):
-                # TODO: this requires sync between CPU and GPU. So try to pass timesteps as tensors if you can
-                # This would be a good case for the `match` statement (Python 3.10+)
                 is_mps = x[0].device.type == "mps"
                 if isinstance(timestep, float):
                     dtype = torch.float32 if is_mps else torch.float64
diff --git a/modules/xadapter/unet_adapter.py b/modules/xadapter/unet_adapter.py
index 5022f1847..5890c7749 100644
--- a/modules/xadapter/unet_adapter.py
+++ b/modules/xadapter/unet_adapter.py
@@ -807,8 +807,6 @@ def forward(
         # 1. time
         timesteps = timestep
         if not torch.is_tensor(timesteps):
-            # TODO: this requires sync between CPU and GPU. So try to pass timesteps as tensors if you can
-            # This would be a good case for the `match` statement (Python 3.10+)
             is_mps = sample.device.type == "mps"
             if isinstance(timestep, float):
                 dtype = torch.float32 if is_mps else torch.float64
@@ -1012,7 +1010,7 @@ def forward(
 
         if is_bridge:
             if up_block_additional_residual[0].shape != sample.shape:
-                pass # TODO VM patch
+                pass
             elif fusion_guidance_scale is not None:
                 sample = sample + fusion_guidance_scale * (up_block_additional_residual.pop(0) - sample)
             else:
@@ -1051,7 +1049,7 @@ def forward(
             ################# bridge usage #################
             if is_bridge and len(up_block_additional_residual) > 0:
                 if sample.shape != up_block_additional_residual[0].shape:
-                    pass # TODO VM PATCH
+                    pass
                 elif fusion_guidance_scale is not None:
                     sample = sample + fusion_guidance_scale * (up_block_additional_residual.pop(0) - sample)
                 else:
diff --git a/scripts/instantir.py b/scripts/instantir.py
index 5eb7d503a..6ab7733fe 100644
--- a/scripts/instantir.py
+++ b/scripts/instantir.py
@@ -80,7 +80,7 @@ def run(self, p: processing.StableDiffusionProcessing, *args): # pylint: disable
         devices.torch_gc()
 
     def after(self, p: processing.StableDiffusionProcessing, processed: processing.Processed, *args): # pylint: disable=arguments-differ, unused-argument
-        # TODO instantir is a mess to unload
+        # TODO instantir: a mess to unload
         """
         if self.orig_pipe is None:
             return processed
diff --git a/scripts/stablevideodiffusion.py b/scripts/stablevideodiffusion.py
index 127466701..c1283e1b6 100644
--- a/scripts/stablevideodiffusion.py
+++ b/scripts/stablevideodiffusion.py
@@ -75,7 +75,7 @@ def run(self, p: processing.StableDiffusionProcessing, model, num_frames, overri
         if model_name != model_loaded or c != 'StableVideoDiffusionPipeline':
             shared.opts.sd_model_checkpoint = model_path
             sd_models.reload_model_weights()
-            shared.sd_model = shared.sd_model.to(torch.float32) # TODO svd runs in fp32
+            shared.sd_model = shared.sd_model.to(torch.float32) # TODO svd: runs in fp32 causing dtype mismatch
 
         # set params
         if override_resolution:

From fcd23a7206cf673675ef87fa9beeda529a05c816 Mon Sep 17 00:00:00 2001
From: Disty0 <disty@disty.xyz>
Date: Sun, 22 Dec 2024 22:58:48 +0300
Subject: [PATCH 155/162] Update OpenVINO to 2024.6.0

---
 CHANGELOG.md | 2 +-
 installer.py | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 83eda417d..d88850eb7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -176,7 +176,7 @@ All-in-all, we're around ~160 commits worth of updates, check changelog for full
   - `GGUF` with pre-quantized weights  
   - Switch `GGUF` loader from custom to diffuser native
 - **IPEX**: update to IPEX 2.5.10+xpu  
-- **OpenVINO**: update to 2024.5.0  
+- **OpenVINO**: update to 2024.6.0  
 - **Sampler** improvements  
   - UniPC, DEIS, SA, DPM-Multistep: allow FlowMatch sigma method and prediction type  
   - Euler FlowMatch: add sigma methods (*karras/exponential/betas*)  
diff --git a/installer.py b/installer.py
index 55f23c27f..4e99bd054 100644
--- a/installer.py
+++ b/installer.py
@@ -652,12 +652,12 @@ def install_ipex(torch_command):
 
 
 def install_openvino(torch_command):
-    check_python(supported_minors=[8, 9, 10, 11, 12], reason='OpenVINO backend requires Python 3.9, 3.10 or 3.11')
+    check_python(supported_minors=[9, 10, 11, 12], reason='OpenVINO backend requires Python 3.9, 3.10 or 3.11')
     log.info('OpenVINO: selected')
     torch_command = os.environ.get('TORCH_COMMAND', 'torch==2.3.1+cpu torchvision==0.18.1+cpu --index-url https://download.pytorch.org/whl/cpu')
-    install(os.environ.get('OPENVINO_PACKAGE', 'openvino==2024.5.0'), 'openvino')
+    install(os.environ.get('OPENVINO_PACKAGE', 'openvino==2024.6.0'), 'openvino')
     install(os.environ.get('ONNXRUNTIME_PACKAGE', 'onnxruntime-openvino'), 'onnxruntime-openvino', ignore=True)
-    install('nncf==2.12.0', 'nncf')
+    install('nncf==2.14.1', 'nncf')
     os.environ.setdefault('PYTORCH_TRACING_MODE', 'TORCHFX')
     if os.environ.get("NEOReadDebugKeys", None) is None:
         os.environ.setdefault('NEOReadDebugKeys', '1')

From cfe1650c89a21f18a52c7aff0b3be371af1ffa7a Mon Sep 17 00:00:00 2001
From: Disty0 <disty@disty.xyz>
Date: Sun, 22 Dec 2024 23:45:19 +0300
Subject: [PATCH 156/162] Don't install OpenVINO without --use-openvino

---
 installer.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/installer.py b/installer.py
index 4e99bd054..111e92a19 100644
--- a/installer.py
+++ b/installer.py
@@ -734,8 +734,6 @@ def check_torch():
             torch_command = install_rocm_zluda()
         elif is_ipex_available:
             torch_command = install_ipex(torch_command)
-        elif allow_openvino:
-            torch_command = install_openvino(torch_command)
 
         else:
             machine = platform.machine()

From 6d3d23bddd6efd53196a400795ccfcdc31e6723b Mon Sep 17 00:00:00 2001
From: Disty0 <disty@disty.xyz>
Date: Mon, 23 Dec 2024 00:16:34 +0300
Subject: [PATCH 157/162] OpenVINO disable model caching by default

---
 CHANGELOG.md      | 4 +++-
 installer.py      | 5 ++++-
 modules/shared.py | 2 +-
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d88850eb7..88a738a86 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -176,7 +176,9 @@ All-in-all, we're around ~160 commits worth of updates, check changelog for full
   - `GGUF` with pre-quantized weights  
   - Switch `GGUF` loader from custom to diffuser native
 - **IPEX**: update to IPEX 2.5.10+xpu  
-- **OpenVINO**: update to 2024.6.0  
+- **OpenVINO**:  
+  - update to 2024.6.0  
+  - disable model caching by default  
 - **Sampler** improvements  
   - UniPC, DEIS, SA, DPM-Multistep: allow FlowMatch sigma method and prediction type  
   - Euler FlowMatch: add sigma methods (*karras/exponential/betas*)  
diff --git a/installer.py b/installer.py
index 111e92a19..26d9a3a11 100644
--- a/installer.py
+++ b/installer.py
@@ -654,7 +654,10 @@ def install_ipex(torch_command):
 def install_openvino(torch_command):
     check_python(supported_minors=[9, 10, 11, 12], reason='OpenVINO backend requires Python 3.9, 3.10 or 3.11')
     log.info('OpenVINO: selected')
-    torch_command = os.environ.get('TORCH_COMMAND', 'torch==2.3.1+cpu torchvision==0.18.1+cpu --index-url https://download.pytorch.org/whl/cpu')
+    if sys.platform == 'darwin':
+        torch_command = os.environ.get('TORCH_COMMAND', 'torch==2.3.1 torchvision==0.18.1')
+    else:
+        torch_command = os.environ.get('TORCH_COMMAND', 'torch==2.3.1+cpu torchvision==0.18.1+cpu --index-url https://download.pytorch.org/whl/cpu')
     install(os.environ.get('OPENVINO_PACKAGE', 'openvino==2024.6.0'), 'openvino')
     install(os.environ.get('ONNXRUNTIME_PACKAGE', 'onnxruntime-openvino'), 'onnxruntime-openvino', ignore=True)
     install('nncf==2.14.1', 'nncf')
diff --git a/modules/shared.py b/modules/shared.py
index 098213f8f..36e58ebb8 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -573,7 +573,7 @@ def get_default_modes():
     "openvino_sep": OptionInfo("<h2>OpenVINO</h2>", "", gr.HTML, {"visible": cmd_opts.use_openvino}),
     "openvino_devices": OptionInfo([], "OpenVINO devices to use", gr.CheckboxGroup, {"choices": get_openvino_device_list() if cmd_opts.use_openvino else [], "visible": cmd_opts.use_openvino}), # pylint: disable=E0606
     "openvino_accuracy": OptionInfo("performance", "OpenVINO accuracy mode", gr.Radio, {"choices": ['performance', 'accuracy'], "visible": cmd_opts.use_openvino}),
-    "openvino_disable_model_caching": OptionInfo(False, "OpenVINO disable model caching", gr.Checkbox, {"visible": cmd_opts.use_openvino}),
+    "openvino_disable_model_caching": OptionInfo(True, "OpenVINO disable model caching", gr.Checkbox, {"visible": cmd_opts.use_openvino}),
     "openvino_disable_memory_cleanup": OptionInfo(True, "OpenVINO disable memory cleanup after compile", gr.Checkbox, {"visible": cmd_opts.use_openvino}),
 
     "directml_sep": OptionInfo("<h2>DirectML</h2>", "", gr.HTML, {"visible": devices.backend == "directml"}),

From b15cf4adaee75d17797e1e4b0560ca447998fa8f Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Sun, 22 Dec 2024 17:38:51 -0500
Subject: [PATCH 158/162] fix pag with batch count

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md            |  2 ++
 modules/pag/__init__.py | 21 +++++++++++----------
 scripts/animatediff.py  |  2 +-
 3 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 88a738a86..703804ab3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -220,6 +220,8 @@ All-in-all, we're around ~160 commits worth of updates, check changelog for full
 - fix svd image2video  
 - fix gallery display during generate  
 - fix wildcards replacement to be unique  
+- fix animatediff-xl  
+- fix pag with batch count  
 
 ## Update for 2024-11-21
 
diff --git a/modules/pag/__init__.py b/modules/pag/__init__.py
index b7a56c40d..a72f7825d 100644
--- a/modules/pag/__init__.py
+++ b/modules/pag/__init__.py
@@ -12,29 +12,29 @@ def apply(p: processing.StableDiffusionProcessing): # pylint: disable=arguments-
     global orig_pipeline # pylint: disable=global-statement
     if not shared.native:
         return None
-    c = shared.sd_model.__class__ if shared.sd_loaded else None
-    if c == StableDiffusionPAGPipeline or c == StableDiffusionXLPAGPipeline:
-        unapply()
+    cls = shared.sd_model.__class__ if shared.sd_loaded else None
+    if cls == StableDiffusionPAGPipeline or cls == StableDiffusionXLPAGPipeline:
+        cls = unapply()
     if p.pag_scale == 0:
         return
-    if 'PAG' in shared.sd_model.__class__.__name__:
+    if 'PAG' in cls.__name__:
         pass
-    elif detect.is_sd15(c):
+    elif detect.is_sd15(cls):
         if sd_models.get_diffusers_task(shared.sd_model) != sd_models.DiffusersTaskType.TEXT_2_IMAGE:
-            shared.log.warning(f'PAG: pipeline={c} not implemented')
+            shared.log.warning(f'PAG: pipeline={cls.__name__} not implemented')
             return None
         orig_pipeline = shared.sd_model
         shared.sd_model = sd_models.switch_pipe(StableDiffusionPAGPipeline, shared.sd_model)
-    elif detect.is_sdxl(c):
+    elif detect.is_sdxl(cls):
         if sd_models.get_diffusers_task(shared.sd_model) != sd_models.DiffusersTaskType.TEXT_2_IMAGE:
-            shared.log.warning(f'PAG: pipeline={c} not implemented')
+            shared.log.warning(f'PAG: pipeline={cls.__name__} not implemented')
             return None
         orig_pipeline = shared.sd_model
         shared.sd_model = sd_models.switch_pipe(StableDiffusionXLPAGPipeline, shared.sd_model)
-    elif detect.is_f1(c):
+    elif detect.is_f1(cls):
         p.task_args['true_cfg_scale'] = p.pag_scale
     else:
-        shared.log.warning(f'PAG: pipeline={c} required={StableDiffusionPipeline.__name__}')
+        shared.log.warning(f'PAG: pipeline={cls.__name__} required={StableDiffusionPipeline.__name__}')
         return None
 
     p.task_args['pag_scale'] = p.pag_scale
@@ -54,3 +54,4 @@ def unapply():
     if orig_pipeline is not None:
         shared.sd_model = orig_pipeline
         orig_pipeline = None
+    return shared.sd_model.__class__
diff --git a/scripts/animatediff.py b/scripts/animatediff.py
index 6c29f3fa5..f44c85bb7 100644
--- a/scripts/animatediff.py
+++ b/scripts/animatediff.py
@@ -182,7 +182,7 @@ def set_free_init(method, iters, order, spatial, temporal):
 def set_free_noise(frames):
     context_length = 16
     context_stride = 4
-    if frames >= context_length:
+    if frames >= context_length and hasattr(shared.sd_model, 'enable_free_noise'):
         shared.log.debug(f'AnimateDiff free noise: frames={frames} context={context_length} stride={context_stride}')
         shared.sd_model.enable_free_noise(context_length=context_length, context_stride=context_stride)
 

From 2794b1abc49315fdd98ba8ece84baae86df93819 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Mon, 23 Dec 2024 11:56:51 -0500
Subject: [PATCH 159/162] new ltxvideo, offloading, quantization

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md                    | 23 +++++-----
 installer.py                    |  2 +-
 modules/processing.py           | 17 +++++--
 modules/processing_diffusers.py |  3 ++
 modules/processing_vae.py       | 12 +++--
 scripts/ltxvideo.py             | 78 ++++++++++++++++++++++++++++-----
 6 files changed, 105 insertions(+), 30 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 703804ab3..0dac17552 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,8 +1,8 @@
 # Change Log for SD.Next
 
-## Update for 2024-12-22
+## Update for 2024-12-23
 
-### Highlights for 2024-12-22
+### Highlights for 2024-12-23
 
 ### SD.Next Xmass edition: *What's new?*
 
@@ -32,7 +32,7 @@ All-in-all, we're around ~160 commits worth of updates, check changelog for full
 
 [ReadMe](https://github.com/vladmandic/automatic/blob/master/README.md) | [ChangeLog](https://github.com/vladmandic/automatic/blob/master/CHANGELOG.md) | [Docs](https://vladmandic.github.io/sdnext-docs/) | [WiKi](https://github.com/vladmandic/automatic/wiki) | [Discord](https://discord.com/invite/sd-next-federal-batch-inspectors-1101998836328697867)
 
-## Details for 2024-12-22
+## Details for 2024-12-23
 
 ### New models and integrations
 
@@ -93,17 +93,20 @@ All-in-all, we're around ~160 commits worth of updates, check changelog for full
 
 ### Video models
 
-- [Lightricks LTX-Video](https://huggingface.co/Lightricks/LTX-Video)
-  model size: 27.75gb
+- [Lightricks LTX-Video](https://huggingface.co/Lightricks/LTX-Video)  
+  model size: 27.75gb  
+  support for 0.9.0, 0.9.1 and custom safetensor-based models with full quantization and offloading support  
   support for text-to-video and image-to-video, to use, select in *scripts -> ltx-video*  
-  *refrence values*: steps 50, width 704, height 512, frames 161, guidance scale 3.0
+  *refrence values*: steps 50, width 704, height 512, frames 161, guidance scale 3.0  
 - [Hunyuan Video](https://huggingface.co/tencent/HunyuanVideo)  
-  model size: 40.92gb
+  model size: 40.92gb  
   support for text-to-video, to use, select in *scripts -> hunyuan video*  
-  *refrence values*: steps 50, width 1280, height 720, frames 129, guidance scale 6.0
-- [Genmo Mochi.1 Preview](https://huggingface.co/genmo/mochi-1-preview)
+  basic support only  
+  *refrence values*: steps 50, width 1280, height 720, frames 129, guidance scale 6.0  
+- [Genmo Mochi.1 Preview](https://huggingface.co/genmo/mochi-1-preview)  
   support for text-to-video, to use, select in *scripts -> mochi.1 video*  
-  *refrence values*: steps 64, width 848, height 480, frames 19, guidance scale 4.5
+  basic support only  
+  *refrence values*: steps 64, width 848, height 480, frames 19, guidance scale 4.5  
 
 *Notes*:
 - all video models are very large and resource intensive!  
diff --git a/installer.py b/installer.py
index 26d9a3a11..46ccce92c 100644
--- a/installer.py
+++ b/installer.py
@@ -459,7 +459,7 @@ def check_python(supported_minors=[9, 10, 11, 12], reason=None):
 def check_diffusers():
     if args.skip_all or args.skip_git:
         return
-    sha = '233dffdc3f56b26abaaba8363a5dd30dab7f0e40' # diffusers commit hash
+    sha = '4b557132ce955d58fd84572c03e79f43bdc91450' # diffusers commit hash
     pkg = pkg_resources.working_set.by_key.get('diffusers', None)
     minor = int(pkg.version.split('.')[1] if pkg is not None else 0)
     cur = opts.get('diffusers_version', '') if minor > 0 else ''
diff --git a/modules/processing.py b/modules/processing.py
index 6d9b64c17..c85938268 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -357,7 +357,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
             for i, sample in enumerate(samples):
                 debug(f'Processing result: index={i+1}/{len(samples)} iteration={n+1}/{p.n_iter}')
                 p.batch_index = i
-                if type(sample) == Image.Image:
+                if isinstance(sample, Image.Image) or (isinstance(sample, list) and isinstance(sample[0], Image.Image)):
                     image = sample
                     sample = np.array(sample)
                 else:
@@ -399,11 +399,20 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
 
                 info = create_infotext(p, p.prompts, p.seeds, p.subseeds, index=i, all_negative_prompts=p.negative_prompts)
                 infotexts.append(info)
-                image.info["parameters"] = info
-                output_images.append(image)
+                if isinstance(image, list):
+                    for img in image:
+                        img.info["parameters"] = info
+                    output_images = image
+                else:
+                    image.info["parameters"] = info
+                    output_images.append(image)
                 if shared.opts.samples_save and not p.do_not_save_samples and p.outpath_samples is not None:
                     info = create_infotext(p, p.prompts, p.seeds, p.subseeds, index=i)
-                    images.save_image(image, p.outpath_samples, "", p.seeds[i], p.prompts[i], shared.opts.samples_format, info=info, p=p) # main save image
+                    if isinstance(image, list):
+                        for img in image:
+                            images.save_image(img, p.outpath_samples, "", p.seeds[i], p.prompts[i], shared.opts.samples_format, info=info, p=p) # main save image
+                    else:
+                        images.save_image(image, p.outpath_samples, "", p.seeds[i], p.prompts[i], shared.opts.samples_format, info=info, p=p) # main save image
                 if hasattr(p, 'mask_for_overlay') and p.mask_for_overlay and any([shared.opts.save_mask, shared.opts.save_mask_composite, shared.opts.return_mask, shared.opts.return_mask_composite]):
                     image_mask = p.mask_for_overlay.convert('RGB')
                     image1 = image.convert('RGBA').convert('RGBa')
diff --git a/modules/processing_diffusers.py b/modules/processing_diffusers.py
index aa820aa34..33d875e95 100644
--- a/modules/processing_diffusers.py
+++ b/modules/processing_diffusers.py
@@ -361,6 +361,7 @@ def process_decode(p: processing.StableDiffusionProcessing, output):
             else:
                 width = getattr(p, 'width', 0)
                 height = getattr(p, 'height', 0)
+            frames = p.task_args.get('num_frames', None)
             if isinstance(output.images, list):
                 results = []
                 for i in range(len(output.images)):
@@ -370,6 +371,7 @@ def process_decode(p: processing.StableDiffusionProcessing, output):
                         full_quality = p.full_quality,
                         width = width,
                         height = height,
+                        frames = frames,
                     )
                     for result in list(result_batch):
                         results.append(result)
@@ -380,6 +382,7 @@ def process_decode(p: processing.StableDiffusionProcessing, output):
                     full_quality = p.full_quality,
                     width = width,
                     height = height,
+                    frames = frames,
                 )
         elif hasattr(output, 'images'):
             results = output.images
diff --git a/modules/processing_vae.py b/modules/processing_vae.py
index 5d8f9fc84..1710bd992 100644
--- a/modules/processing_vae.py
+++ b/modules/processing_vae.py
@@ -203,7 +203,7 @@ def taesd_vae_encode(image):
     return encoded
 
 
-def vae_decode(latents, model, output_type='np', full_quality=True, width=None, height=None):
+def vae_decode(latents, model, output_type='np', full_quality=True, width=None, height=None, frames=None):
     t0 = time.time()
     model = model or shared.sd_model
     if not hasattr(model, 'vae') and hasattr(model, 'pipe'):
@@ -221,7 +221,11 @@ def vae_decode(latents, model, output_type='np', full_quality=True, width=None,
         shared.log.error('VAE not found in model')
         return []
 
-    if hasattr(model, "_unpack_latents") and hasattr(model, "vae_scale_factor") and width is not None and height is not None: # FLUX
+    if hasattr(model, '_unpack_latents') and hasattr(model, 'transformer_spatial_patch_size') and frames is not None: # LTX
+        latent_num_frames = (frames - 1) // model.vae_temporal_compression_ratio + 1
+        latents = model._unpack_latents(latents.unsqueeze(0), latent_num_frames, height // 32, width // 32, model.transformer_spatial_patch_size, model.transformer_temporal_patch_size) # pylint: disable=protected-access
+        latents = model._denormalize_latents(latents, model.vae.latents_mean, model.vae.latents_std, model.vae.config.scaling_factor) # pylint: disable=protected-access
+    if hasattr(model, '_unpack_latents') and hasattr(model, "vae_scale_factor") and width is not None and height is not None: # FLUX
         latents = model._unpack_latents(latents, height, width, model.vae_scale_factor) # pylint: disable=protected-access
     if len(latents.shape) == 3: # lost a batch dim in hires
         latents = latents.unsqueeze(0)
@@ -238,7 +242,9 @@ def vae_decode(latents, model, output_type='np', full_quality=True, width=None,
         decoded = taesd_vae_decode(latents=latents)
 
     if torch.is_tensor(decoded):
-        if hasattr(model, 'image_processor'):
+        if hasattr(model, 'video_processor'):
+            imgs = model.video_processor.postprocess_video(decoded, output_type='pil')
+        elif hasattr(model, 'image_processor'):
             imgs = model.image_processor.postprocess(decoded, output_type=output_type)
         elif hasattr(model, "vqgan"):
             imgs = decoded.permute(0, 2, 3, 1).cpu().float().numpy()
diff --git a/scripts/ltxvideo.py b/scripts/ltxvideo.py
index 148fd4481..007c4f4cc 100644
--- a/scripts/ltxvideo.py
+++ b/scripts/ltxvideo.py
@@ -1,11 +1,40 @@
+import os
 import time
 import torch
 import gradio as gr
 import diffusers
+import transformers
 from modules import scripts, processing, shared, images, devices, sd_models, sd_checkpoint, model_quant
 
 
-repo_id = 'a-r-r-o-w/LTX-Video-diffusers'
+repos = {
+    '0.9.0': 'a-r-r-o-w/LTX-Video-diffusers',
+    '0.9.1': 'a-r-r-o-w/LTX-Video-0.9.1-diffusers',
+    'custom': None,
+}
+
+
+def load_quants(kwargs, repo_id):
+    if len(shared.opts.bnb_quantization) > 0:
+        quant_args = {}
+        quant_args = model_quant.create_bnb_config(quant_args)
+        quant_args = model_quant.create_ao_config(quant_args)
+        if not quant_args:
+            return kwargs
+        model_quant.load_bnb(f'Load model: type=LTX quant={quant_args}')
+        if 'Model' in shared.opts.bnb_quantization and 'transformer' not in kwargs:
+            kwargs['transformer'] = diffusers.LTXVideoTransformer3DModel.from_pretrained(repo_id, subfolder="transformer", cache_dir=shared.opts.hfcache_dir, torch_dtype=devices.dtype, **quant_args)
+            shared.log.debug(f'Quantization: module=transformer type=bnb dtype={shared.opts.bnb_quantization_type} storage={shared.opts.bnb_quantization_storage}')
+        if 'Text Encoder' in shared.opts.bnb_quantization and 'text_encoder_3' not in kwargs:
+            kwargs['text_encoder'] = transformers.T5EncoderModel.from_pretrained(repo_id, subfolder="text_encoder", cache_dir=shared.opts.hfcache_dir, torch_dtype=devices.dtype, **quant_args)
+            shared.log.debug(f'Quantization: module=t5 type=bnb dtype={shared.opts.bnb_quantization_type} storage={shared.opts.bnb_quantization_storage}')
+    return kwargs
+
+
+def hijack_decode(*args, **kwargs):
+    shared.log.debug('Video: decode')
+    shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
+    return shared.sd_model.vae.orig_decode(*args, **kwargs)
 
 
 class Script(scripts.Script):
@@ -24,11 +53,19 @@ def video_type_change(video_type):
                 gr.update(visible=video_type == 'MP4'),
                 gr.update(visible=video_type == 'MP4'),
             ]
+        def model_change(model):
+            return gr.update(visible=model == 'custom')
 
         with gr.Row():
             gr.HTML('<a href="https://www.ltxvideo.org/">&nbsp LTX Video</a><br>')
+        with gr.Row():
+            model = gr.Dropdown(label='LTX Model', choices=list(repos), value='0.9.1')
+            decode = gr.Dropdown(label='Decode', choices=['diffusers', 'native'], value='diffusers', visible=False)
         with gr.Row():
             num_frames = gr.Slider(label='Frames', minimum=9, maximum=257, step=1, value=41)
+            sampler = gr.Checkbox(label='Override sampler', value=True)
+        with gr.Row():
+            model_custom = gr.Textbox(value='', label='Path to model file', visible=False)
         with gr.Row():
             video_type = gr.Dropdown(label='Video file', choices=['None', 'GIF', 'PNG', 'MP4'], value='None')
             duration = gr.Slider(label='Duration', minimum=0.25, maximum=10, step=0.25, value=2, visible=False)
@@ -37,9 +74,10 @@ def video_type_change(video_type):
             mp4_pad = gr.Slider(label='Pad frames', minimum=0, maximum=24, step=1, value=1, visible=False)
             mp4_interpolate = gr.Slider(label='Interpolate frames', minimum=0, maximum=24, step=1, value=0, visible=False)
         video_type.change(fn=video_type_change, inputs=[video_type], outputs=[duration, gif_loop, mp4_pad, mp4_interpolate])
-        return [num_frames, video_type, duration, gif_loop, mp4_pad, mp4_interpolate]
+        model.change(fn=model_change, inputs=[model], outputs=[model_custom])
+        return [model, model_custom, decode, sampler, num_frames, video_type, duration, gif_loop, mp4_pad, mp4_interpolate]
 
-    def run(self, p: processing.StableDiffusionProcessing, num_frames, video_type, duration, gif_loop, mp4_pad, mp4_interpolate): # pylint: disable=arguments-differ, unused-argument
+    def run(self, p: processing.StableDiffusionProcessing, model, model_custom, decode, sampler, num_frames, video_type, duration, gif_loop, mp4_pad, mp4_interpolate): # pylint: disable=arguments-differ, unused-argument
         # set params
         image = getattr(p, 'init_images', None)
         image = None if image is None or len(image) == 0 else image[0]
@@ -49,32 +87,48 @@ def run(self, p: processing.StableDiffusionProcessing, num_frames, video_type, d
         num_frames = 8 * int(num_frames // 8) + 1
         p.width = 32 * int(p.width // 32)
         p.height = 32 * int(p.height // 32)
+        processing.fix_seed(p)
         if image:
             image = images.resize_image(resize_mode=2, im=image, width=p.width, height=p.height, upscaler_name=None, output_type='pil')
             p.task_args['image'] = image
-        p.task_args['output_type'] = 'pil'
-        p.task_args['generator'] = torch.manual_seed(p.seed)
+        p.task_args['output_type'] = 'latent' if decode == 'native' else 'pil'
+        p.task_args['generator'] = torch.Generator(devices.device).manual_seed(p.seed)
         p.task_args['num_frames'] = num_frames
-        p.sampler_name = 'Default'
         p.do_not_save_grid = True
+        if sampler:
+            p.sampler_name = 'Default'
         p.ops.append('video')
 
         # load model
         cls = diffusers.LTXPipeline if image is None else diffusers.LTXImageToVideoPipeline
         diffusers.LTXTransformer3DModel = diffusers.LTXVideoTransformer3DModel
         diffusers.AutoencoderKLLTX = diffusers.AutoencoderKLLTXVideo
+        repo_id = repos[model]
+        if repo_id is None:
+            repo_id = model_custom
         if shared.sd_model.__class__ != cls:
             sd_models.unload_model_weights()
             kwargs = {}
             kwargs = model_quant.create_bnb_config(kwargs)
             kwargs = model_quant.create_ao_config(kwargs)
-            shared.sd_model = cls.from_pretrained(
-                repo_id,
-                cache_dir = shared.opts.hfcache_dir,
-                torch_dtype=devices.dtype,
-                **kwargs
-            )
+            if os.path.isfile(repo_id):
+                shared.sd_model = cls.from_single_file(
+                    repo_id,
+                    cache_dir = shared.opts.hfcache_dir,
+                    torch_dtype=devices.dtype,
+                    **kwargs
+                )
+            else:
+                kwargs = load_quants(kwargs, repo_id)
+                shared.sd_model = cls.from_pretrained(
+                    repo_id,
+                    cache_dir = shared.opts.hfcache_dir,
+                    torch_dtype=devices.dtype,
+                    **kwargs
+                )
             sd_models.set_diffuser_options(shared.sd_model)
+            shared.sd_model.vae.orig_decode = shared.sd_model.vae.decode
+            shared.sd_model.vae.decode = hijack_decode
             shared.sd_model.sd_checkpoint_info = sd_checkpoint.CheckpointInfo(repo_id)
             shared.sd_model.sd_model_hash = None
         shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)

From 7d663249e8eca0170c7acc79d2d88de98d0322b9 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Mon, 23 Dec 2024 12:23:18 -0500
Subject: [PATCH 160/162] move postprocessing scripts to accordions

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 TODO.md                              | 27 +++++-----
 modules/scripts.py                   | 58 ++++++++++++----------
 modules/shared.py                    |  2 +-
 scripts/postprocessing_codeformer.py |  7 +--
 scripts/postprocessing_gfpgan.py     |  5 +-
 scripts/postprocessing_upscale.py    | 74 ++++++++++++++--------------
 scripts/postprocessing_video.py      | 67 ++++++++++++-------------
 7 files changed, 123 insertions(+), 117 deletions(-)

diff --git a/TODO.md b/TODO.md
index bf0cecbb4..6d89c838f 100644
--- a/TODO.md
+++ b/TODO.md
@@ -11,24 +11,21 @@ Main ToDo list can be found at [GitHub projects](https://github.com/users/vladma
 ## Future Candidates
 
 - Flux NF4 loader: <https://github.com/huggingface/diffusers/issues/9996>
-
-## Other
-
 - IPAdapter negative: <https://github.com/huggingface/diffusers/discussions/7167>
 - Control API enhance scripts compatibility
 - PixelSmith: <https://github.com/Thanos-DB/Pixelsmith>
 
 ## Code TODO
 
-- TODO install: python 3.12.4 or higher cause a mess with pydantic (fixme)
-- TODO install: enable ROCm for windows when available (fixme)
-- TODO resize image: enable full VAE mode for resize-latent (fixme)
-- TODO processing: remove duplicate mask params (fixme)
-- TODO flux: fix loader for civitai nf4 models (fixme)
-- TODO model loader: implement model in-memory caching (fixme)
-- TODO hypertile: vae breaks when using non-standard sizes (fixme)
-- TODO model load: force-reloading entire model as loading transformers only leads to massive memory usage (fixme)
-- TODO lora load: direct with bnb (fixme)
-- TODO: lora make: support quantized flux (fixme)
-- TODO control: support scripts via api (fixme)
-- TODO modernui: monkey-patch for missing tabs.select event (fixme)
+- TODO install: python 3.12.4 or higher cause a mess with pydantic
+- TODO install: enable ROCm for windows when available
+- TODO resize image: enable full VAE mode for resize-latent
+- TODO processing: remove duplicate mask params
+- TODO flux: fix loader for civitai nf4 models
+- TODO model loader: implement model in-memory caching
+- TODO hypertile: vae breaks when using non-standard sizes
+- TODO model load: force-reloading entire model as loading transformers only leads to massive memory usage
+- TODO lora load: direct with bnb
+- TODO lora make: support quantized flux
+- TODO control: support scripts via api
+- TODO modernui: monkey-patch for missing tabs.select event
diff --git a/modules/scripts.py b/modules/scripts.py
index 029d0db79..9c410a3b6 100644
--- a/modules/scripts.py
+++ b/modules/scripts.py
@@ -330,6 +330,7 @@ def __init__(self):
         self.scripts = []
         self.selectable_scripts = []
         self.alwayson_scripts = []
+        self.auto_processing_scripts = []
         self.titles = []
         self.infotext_fields = []
         self.paste_field_names = []
@@ -337,6 +338,31 @@ def __init__(self):
         self.is_img2img = False
         self.inputs = [None]
 
+    def add_script(self, script_class, path, is_img2img, is_control):
+        try:
+            script = script_class()
+            script.filename = path
+            script.is_txt2img = not is_img2img
+            script.is_img2img = is_img2img
+            if is_control: # this is messy but show is a legacy function that is not aware of control tab
+                v1 = script.show(script.is_txt2img)
+                v2 = script.show(script.is_img2img)
+                if v1 == AlwaysVisible or v2 == AlwaysVisible:
+                    visibility = AlwaysVisible
+                else:
+                    visibility = v1 or v2
+            else:
+                visibility = script.show(script.is_img2img)
+            if visibility == AlwaysVisible:
+                self.scripts.append(script)
+                self.alwayson_scripts.append(script)
+                script.alwayson = True
+            elif visibility:
+                self.scripts.append(script)
+                self.selectable_scripts.append(script)
+        except Exception as e:
+            errors.log.error(f'Script initialize: {path} {e}')
+
     def initialize_scripts(self, is_img2img=False, is_control=False):
         from modules import scripts_auto_postprocessing
 
@@ -351,34 +377,14 @@ def initialize_scripts(self, is_img2img=False, is_control=False):
         self.scripts.clear()
         self.alwayson_scripts.clear()
         self.selectable_scripts.clear()
-        auto_processing_scripts = scripts_auto_postprocessing.create_auto_preprocessing_script_data()
+        self.auto_processing_scripts = scripts_auto_postprocessing.create_auto_preprocessing_script_data()
 
-        all_scripts = auto_processing_scripts + scripts_data
-        sorted_scripts = sorted(all_scripts, key=lambda x: x.script_class().title().lower())
+        sorted_scripts = sorted(scripts_data, key=lambda x: x.script_class().title().lower())
         for script_class, path, _basedir, _script_module in sorted_scripts:
-            try:
-                script = script_class()
-                script.filename = path
-                script.is_txt2img = not is_img2img
-                script.is_img2img = is_img2img
-                if is_control: # this is messy but show is a legacy function that is not aware of control tab
-                    v1 = script.show(script.is_txt2img)
-                    v2 = script.show(script.is_img2img)
-                    if v1 == AlwaysVisible or v2 == AlwaysVisible:
-                        visibility = AlwaysVisible
-                    else:
-                        visibility = v1 or v2
-                else:
-                    visibility = script.show(script.is_img2img)
-                if visibility == AlwaysVisible:
-                    self.scripts.append(script)
-                    self.alwayson_scripts.append(script)
-                    script.alwayson = True
-                elif visibility:
-                    self.scripts.append(script)
-                    self.selectable_scripts.append(script)
-            except Exception as e:
-                errors.log.error(f'Script initialize: {path} {e}')
+            self.add_script(script_class, path, is_img2img, is_control)
+        sorted_scripts = sorted(self.auto_processing_scripts, key=lambda x: x.script_class().title().lower())
+        for script_class, path, _basedir, _script_module in sorted_scripts:
+            self.add_script(script_class, path, is_img2img, is_control)
 
     def prepare_ui(self):
         self.inputs = [None]
diff --git a/modules/shared.py b/modules/shared.py
index 36e58ebb8..31429e076 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -837,7 +837,7 @@ def get_default_modes():
 
 options_templates.update(options_section(('postprocessing', "Postprocessing"), {
     'postprocessing_enable_in_main_ui': OptionInfo([], "Additional postprocessing operations", gr.Dropdown, lambda: {"multiselect":True, "choices": [x.name for x in shared_items.postprocessing_scripts()]}),
-    'postprocessing_operation_order': OptionInfo([], "Postprocessing operation order", gr.Dropdown, lambda: {"multiselect":True, "choices": [x.name for x in shared_items.postprocessing_scripts()]}),
+    'postprocessing_operation_order': OptionInfo([], "Postprocessing operation order", gr.Dropdown, lambda: {"multiselect":True, "choices": [x.name for x in shared_items.postprocessing_scripts()], "visible": False }),
 
     "postprocessing_sep_img2img": OptionInfo("<h2>Inpaint</h2>", "", gr.HTML),
     "img2img_color_correction": OptionInfo(False, "Apply color correction"),
diff --git a/scripts/postprocessing_codeformer.py b/scripts/postprocessing_codeformer.py
index 3e5fd451d..8ed677736 100644
--- a/scripts/postprocessing_codeformer.py
+++ b/scripts/postprocessing_codeformer.py
@@ -10,9 +10,10 @@ class ScriptPostprocessingCodeFormer(scripts_postprocessing.ScriptPostprocessing
     order = 3000
 
     def ui(self):
-        with gr.Row():
-            codeformer_visibility = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Strength", value=0.0, elem_id="extras_codeformer_visibility")
-            codeformer_weight = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Weight", value=0.2, elem_id="extras_codeformer_weight")
+        with gr.Accordion('Restore faces: CodeFormer', open = False):
+            with gr.Row():
+                codeformer_visibility = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Strength", value=0.0, elem_id="extras_codeformer_visibility")
+                codeformer_weight = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Weight", value=0.2, elem_id="extras_codeformer_weight")
         return { "codeformer_visibility": codeformer_visibility, "codeformer_weight": codeformer_weight }
 
     def process(self, pp: scripts_postprocessing.PostprocessedImage, codeformer_visibility, codeformer_weight): # pylint: disable=arguments-differ
diff --git a/scripts/postprocessing_gfpgan.py b/scripts/postprocessing_gfpgan.py
index a69f97c9e..1e17c2b16 100644
--- a/scripts/postprocessing_gfpgan.py
+++ b/scripts/postprocessing_gfpgan.py
@@ -9,8 +9,9 @@ class ScriptPostprocessingGfpGan(scripts_postprocessing.ScriptPostprocessing):
     order = 2000
 
     def ui(self):
-        with gr.Row():
-            gfpgan_visibility = gr.Slider(minimum=0.0, maximum=1.0, step=0.001, label="Strength", value=0, elem_id="extras_gfpgan_visibility")
+        with gr.Accordion('Restore faces: GFPGan', open = False):
+            with gr.Row():
+                gfpgan_visibility = gr.Slider(minimum=0.0, maximum=1.0, step=0.001, label="Strength", value=0, elem_id="extras_gfpgan_visibility")
         return { "gfpgan_visibility": gfpgan_visibility }
 
     def process(self, pp: scripts_postprocessing.PostprocessedImage, gfpgan_visibility): # pylint: disable=arguments-differ
diff --git a/scripts/postprocessing_upscale.py b/scripts/postprocessing_upscale.py
index 0e07bc847..104a0fb37 100644
--- a/scripts/postprocessing_upscale.py
+++ b/scripts/postprocessing_upscale.py
@@ -10,43 +10,43 @@ class ScriptPostprocessingUpscale(scripts_postprocessing.ScriptPostprocessing):
     order = 1000
 
     def ui(self):
-        selected_tab = gr.State(value=0) # pylint: disable=abstract-class-instantiated
-
-        with gr.Column():
-            with gr.Row(elem_id="extras_upscale"):
-                with gr.Tabs(elem_id="extras_resize_mode"):
-                    with gr.TabItem('Scale by', elem_id="extras_scale_by_tab") as tab_scale_by:
-                        upscaling_resize = gr.Slider(minimum=0.1, maximum=8.0, step=0.05, label="Resize", value=2.0, elem_id="extras_upscaling_resize")
-
-                    with gr.TabItem('Scale to', elem_id="extras_scale_to_tab") as tab_scale_to:
-                        with gr.Row():
-                            with gr.Row(elem_id="upscaling_column_size"):
-                                upscaling_resize_w = gr.Slider(minimum=64, maximum=4096, step=8, label="Width", value=1024, elem_id="extras_upscaling_resize_w")
-                                upscaling_resize_h = gr.Slider(minimum=64, maximum=4096, step=8, label="Height", value=1024, elem_id="extras_upscaling_resize_h")
-                                upscaling_res_switch_btn = ToolButton(value=symbols.switch, elem_id="upscaling_res_switch_btn")
-                                upscaling_crop = gr.Checkbox(label='Crop to fit', value=True, elem_id="extras_upscaling_crop")
-
-            with gr.Row():
-                extras_upscaler_1 = gr.Dropdown(label='Upscaler', elem_id="extras_upscaler_1", choices=[x.name for x in shared.sd_upscalers], value=shared.sd_upscalers[0].name)
-
-            with gr.Row():
-                extras_upscaler_2 = gr.Dropdown(label='Refine Upscaler', elem_id="extras_upscaler_2", choices=[x.name for x in shared.sd_upscalers], value=shared.sd_upscalers[0].name)
-                extras_upscaler_2_visibility = gr.Slider(minimum=0.0, maximum=1.0, step=0.001, label="Upscaler 2 visibility", value=0.0, elem_id="extras_upscaler_2_visibility")
-
-        upscaling_res_switch_btn.click(lambda w, h: (h, w), inputs=[upscaling_resize_w, upscaling_resize_h], outputs=[upscaling_resize_w, upscaling_resize_h], show_progress=False)
-        tab_scale_by.select(fn=lambda: 0, inputs=[], outputs=[selected_tab])
-        tab_scale_to.select(fn=lambda: 1, inputs=[], outputs=[selected_tab])
-
-        return {
-            "upscale_mode": selected_tab,
-            "upscale_by": upscaling_resize,
-            "upscale_to_width": upscaling_resize_w,
-            "upscale_to_height": upscaling_resize_h,
-            "upscale_crop": upscaling_crop,
-            "upscaler_1_name": extras_upscaler_1,
-            "upscaler_2_name": extras_upscaler_2,
-            "upscaler_2_visibility": extras_upscaler_2_visibility,
-        }
+        with gr.Accordion('Postprocess Upscale', open = False):
+            selected_tab = gr.State(value=0) # pylint: disable=abstract-class-instantiated
+            with gr.Column():
+                with gr.Row(elem_id="extras_upscale"):
+                    with gr.Tabs(elem_id="extras_resize_mode"):
+                        with gr.TabItem('Scale by', elem_id="extras_scale_by_tab") as tab_scale_by:
+                            upscaling_resize = gr.Slider(minimum=0.1, maximum=8.0, step=0.05, label="Resize", value=2.0, elem_id="extras_upscaling_resize")
+
+                        with gr.TabItem('Scale to', elem_id="extras_scale_to_tab") as tab_scale_to:
+                            with gr.Row():
+                                with gr.Row(elem_id="upscaling_column_size"):
+                                    upscaling_resize_w = gr.Slider(minimum=64, maximum=4096, step=8, label="Width", value=1024, elem_id="extras_upscaling_resize_w")
+                                    upscaling_resize_h = gr.Slider(minimum=64, maximum=4096, step=8, label="Height", value=1024, elem_id="extras_upscaling_resize_h")
+                                    upscaling_res_switch_btn = ToolButton(value=symbols.switch, elem_id="upscaling_res_switch_btn")
+                                    upscaling_crop = gr.Checkbox(label='Crop to fit', value=True, elem_id="extras_upscaling_crop")
+
+                with gr.Row():
+                    extras_upscaler_1 = gr.Dropdown(label='Upscaler', elem_id="extras_upscaler_1", choices=[x.name for x in shared.sd_upscalers], value=shared.sd_upscalers[0].name)
+
+                with gr.Row():
+                    extras_upscaler_2 = gr.Dropdown(label='Refine Upscaler', elem_id="extras_upscaler_2", choices=[x.name for x in shared.sd_upscalers], value=shared.sd_upscalers[0].name)
+                    extras_upscaler_2_visibility = gr.Slider(minimum=0.0, maximum=1.0, step=0.001, label="Upscaler 2 visibility", value=0.0, elem_id="extras_upscaler_2_visibility")
+
+            upscaling_res_switch_btn.click(lambda w, h: (h, w), inputs=[upscaling_resize_w, upscaling_resize_h], outputs=[upscaling_resize_w, upscaling_resize_h], show_progress=False)
+            tab_scale_by.select(fn=lambda: 0, inputs=[], outputs=[selected_tab])
+            tab_scale_to.select(fn=lambda: 1, inputs=[], outputs=[selected_tab])
+
+            return {
+                "upscale_mode": selected_tab,
+                "upscale_by": upscaling_resize,
+                "upscale_to_width": upscaling_resize_w,
+                "upscale_to_height": upscaling_resize_h,
+                "upscale_crop": upscaling_crop,
+                "upscaler_1_name": extras_upscaler_1,
+                "upscaler_2_name": extras_upscaler_2,
+                "upscaler_2_visibility": extras_upscaler_2_visibility,
+            }
 
     def upscale(self, image, info, upscaler, upscale_mode, upscale_by,  upscale_to_width, upscale_to_height, upscale_crop):
         if upscale_mode == 1:
diff --git a/scripts/postprocessing_video.py b/scripts/postprocessing_video.py
index 8c266639c..75d375572 100644
--- a/scripts/postprocessing_video.py
+++ b/scripts/postprocessing_video.py
@@ -7,40 +7,41 @@ class ScriptPostprocessingUpscale(scripts_postprocessing.ScriptPostprocessing):
     name = "Video"
 
     def ui(self):
-        def video_type_change(video_type):
-            return [
-                gr.update(visible=video_type != 'None'),
-                gr.update(visible=video_type == 'GIF' or video_type == 'PNG'),
-                gr.update(visible=video_type == 'MP4'),
-                gr.update(visible=video_type == 'MP4'),
-                gr.update(visible=video_type == 'MP4'),
-                gr.update(visible=video_type == 'MP4'),
-            ]
+        with gr.Accordion('Create video', open = False):
+            def video_type_change(video_type):
+                return [
+                    gr.update(visible=video_type != 'None'),
+                    gr.update(visible=video_type == 'GIF' or video_type == 'PNG'),
+                    gr.update(visible=video_type == 'MP4'),
+                    gr.update(visible=video_type == 'MP4'),
+                    gr.update(visible=video_type == 'MP4'),
+                    gr.update(visible=video_type == 'MP4'),
+                ]
 
-        with gr.Row():
-            gr.HTML("<span>&nbsp Video</span><br>")
-        with gr.Row():
-            video_type = gr.Dropdown(label='Video file', choices=['None', 'GIF', 'PNG', 'MP4'], value='None', elem_id="extras_video_type")
-            duration = gr.Slider(label='Duration', minimum=0.25, maximum=10, step=0.25, value=2, visible=False, elem_id="extras_video_duration")
-        with gr.Row():
-            loop = gr.Checkbox(label='Loop', value=True, visible=False, elem_id="extras_video_loop")
-            pad = gr.Slider(label='Pad frames', minimum=0, maximum=24, step=1, value=1, visible=False, elem_id="extras_video_pad")
-            interpolate = gr.Slider(label='Interpolate frames', minimum=0, maximum=24, step=1, value=0, visible=False, elem_id="extras_video_interpolate")
-            scale = gr.Slider(label='Rescale', minimum=0.5, maximum=2, step=0.05, value=1, visible=False, elem_id="extras_video_scale")
-            change = gr.Slider(label='Frame change sensitivity', minimum=0, maximum=1, step=0.05, value=0.3, visible=False, elem_id="extras_video_change")
-        with gr.Row():
-            filename = gr.Textbox(label='Filename', placeholder='enter filename', lines=1, elem_id="extras_video_filename")
-        video_type.change(fn=video_type_change, inputs=[video_type], outputs=[duration, loop, pad, interpolate, scale, change])
-        return {
-            "filename": filename,
-            "video_type": video_type,
-            "duration": duration,
-            "loop": loop,
-            "pad": pad,
-            "interpolate": interpolate,
-            "scale": scale,
-            "change": change,
-        }
+            with gr.Row():
+                gr.HTML("<span>&nbsp Video</span><br>")
+            with gr.Row():
+                video_type = gr.Dropdown(label='Video file', choices=['None', 'GIF', 'PNG', 'MP4'], value='None', elem_id="extras_video_type")
+                duration = gr.Slider(label='Duration', minimum=0.25, maximum=10, step=0.25, value=2, visible=False, elem_id="extras_video_duration")
+            with gr.Row():
+                loop = gr.Checkbox(label='Loop', value=True, visible=False, elem_id="extras_video_loop")
+                pad = gr.Slider(label='Pad frames', minimum=0, maximum=24, step=1, value=1, visible=False, elem_id="extras_video_pad")
+                interpolate = gr.Slider(label='Interpolate frames', minimum=0, maximum=24, step=1, value=0, visible=False, elem_id="extras_video_interpolate")
+                scale = gr.Slider(label='Rescale', minimum=0.5, maximum=2, step=0.05, value=1, visible=False, elem_id="extras_video_scale")
+                change = gr.Slider(label='Frame change sensitivity', minimum=0, maximum=1, step=0.05, value=0.3, visible=False, elem_id="extras_video_change")
+            with gr.Row():
+                filename = gr.Textbox(label='Filename', placeholder='enter filename', lines=1, elem_id="extras_video_filename")
+            video_type.change(fn=video_type_change, inputs=[video_type], outputs=[duration, loop, pad, interpolate, scale, change])
+            return {
+                "filename": filename,
+                "video_type": video_type,
+                "duration": duration,
+                "loop": loop,
+                "pad": pad,
+                "interpolate": interpolate,
+                "scale": scale,
+                "change": change,
+            }
 
     def postprocess(self, images, filename, video_type, duration, loop, pad, interpolate, scale, change): # pylint: disable=arguments-differ
         filename = filename.strip() if filename is not None else ''

From 6c2654d8c8b45c94a715e2d22b24da31e8abd879 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Mon, 23 Dec 2024 12:41:02 -0500
Subject: [PATCH 161/162] css tweaks

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 javascript/base.css   | 4 ++--
 javascript/sdnext.css | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/javascript/base.css b/javascript/base.css
index 6c18ad7c5..a30a71845 100644
--- a/javascript/base.css
+++ b/javascript/base.css
@@ -79,7 +79,7 @@ table.settings-value-table td { padding: 0.4em; border: 1px solid #ccc; max-widt
 
 /* extra networks */
 .extra-networks > div { margin: 0; border-bottom: none !important; }
-.extra-networks .second-line { display: flex; width: -moz-available; width: -webkit-fill-available; gap: 0.3em; box-shadow: var(--input-shadow); }
+.extra-networks .second-line { display: flex; width: -moz-available; width: -webkit-fill-available; gap: 0.3em; box-shadow: var(--input-shadow); margin-bottom: 2px; }
 .extra-networks .search { flex: 1; }
 .extra-networks .description { flex: 3; }
 .extra-networks .tab-nav > button { margin-right: 0; height: 24px; padding: 2px 4px 2px 4px; }
@@ -88,7 +88,7 @@ table.settings-value-table td { padding: 0.4em; border: 1px solid #ccc; max-widt
 .extra-networks .custom-button { width: 120px; width: 100%; background: none; justify-content: left; text-align: left; padding: 2px 8px 2px 16px; text-indent: -8px; box-shadow: none; line-break: auto; }
 .extra-networks .custom-button:hover { background: var(--button-primary-background-fill) }
 .extra-networks-tab { padding: 0 !important; }
-.extra-network-subdirs { background: var(--input-background-fill); overflow-x: hidden; overflow-y: auto; min-width: max(15%, 120px); padding-top: 0.5em; margin-top: -4px !important; }
+.extra-network-subdirs { background: var(--input-background-fill); overflow-x: hidden; overflow-y: auto; min-width: max(15%, 120px); padding-top: 0.5em; }
 .extra-networks-page { display: flex }
 .extra-network-cards { display: flex; flex-wrap: wrap; overflow-y: auto; overflow-x: hidden; align-content: flex-start; width: -moz-available; width: -webkit-fill-available; }
 .extra-network-cards .card { height: fit-content; margin: 0 0 0.5em 0.5em; position: relative; scroll-snap-align: start; scroll-margin-top: 0; }
diff --git a/javascript/sdnext.css b/javascript/sdnext.css
index 60d835cd4..6e33551ef 100644
--- a/javascript/sdnext.css
+++ b/javascript/sdnext.css
@@ -14,7 +14,7 @@ table { overflow-x: auto !important; overflow-y: auto !important; }
 td { border-bottom: none !important; padding: 0 0.5em !important; }
 tr { border-bottom: none !important; padding: 0 0.5em !important; }
 td > div > span { overflow-y: auto; max-height: 3em; overflow-x: hidden; }
-textarea { overflow-y: auto !important; }
+textarea { overflow-y: auto !important; border-radius: 4px !important; }
 span { font-size: var(--text-md) !important; }
 button { font-size: var(--text-lg) !important; min-width: unset !important; }
 input[type='color'] { width: 64px; height: 32px; }
@@ -217,7 +217,7 @@ table.settings-value-table td { padding: 0.4em; border: 1px solid #ccc; max-widt
 .extra_networks_root { width: 0; position: absolute; height: auto; right: 0; top: 13em; z-index: 100; } /* default is sidebar view */
 .extra-networks { background: var(--background-color); padding: var(--block-label-padding); }
 .extra-networks > div { margin: 0; border-bottom: none !important; gap: 0.3em 0; }
-.extra-networks .second-line { display: flex; width: -moz-available; width: -webkit-fill-available; gap: 0.3em; box-shadow: var(--input-shadow); }
+.extra-networks .second-line { display: flex; width: -moz-available; width: -webkit-fill-available; gap: 0.3em; box-shadow: var(--input-shadow); margin-bottom: 2px; }
 .extra-networks .search { flex: 1; height: 4em; }
 .extra-networks .description { flex: 3; }
 .extra-networks .tab-nav>button { margin-right: 0; height: 24px; padding: 2px 4px 2px 4px; }
@@ -226,7 +226,7 @@ table.settings-value-table td { padding: 0.4em; border: 1px solid #ccc; max-widt
 .extra-networks .custom-button { width: 120px; width: 100%; background: none; justify-content: left; text-align: left; padding: 3px 3px 3px 12px; text-indent: -6px; box-shadow: none; line-break: auto; }
 .extra-networks .custom-button:hover { background: var(--button-primary-background-fill) }
 .extra-networks-tab { padding: 0 !important; }
-.extra-network-subdirs { background: var(--input-background-fill); overflow-x: hidden; overflow-y: auto; min-width: max(15%, 120px); padding-top: 0.5em; margin-top: -4px !important; }
+.extra-network-subdirs { background: var(--input-background-fill); overflow-x: hidden; overflow-y: auto; min-width: max(15%, 120px); padding-top: 0.5em; border-radius: 4px; }
 .extra-networks-page { display: flex }
 .extra-network-cards { display: flex; flex-wrap: wrap; overflow-y: auto; overflow-x: hidden; align-content: flex-start; width: -moz-available; width: -webkit-fill-available; }
 .extra-network-cards .card { height: fit-content; margin: 0 0 0.5em 0.5em; position: relative; scroll-snap-align: start; scroll-margin-top: 0; }

From a71bae4f42c84cf8a87cc1fe4f5bcd8b997afa8a Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Tue, 24 Dec 2024 08:40:46 -0500
Subject: [PATCH 162/162] update requirements and changelog

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md                                    | 8 ++++----
 README.md                                       | 5 ++---
 extensions-builtin/stable-diffusion-webui-rembg | 2 +-
 installer.py                                    | 2 +-
 modules/processing_vae.py                       | 5 ++---
 requirements.txt                                | 2 +-
 6 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0dac17552..b778dca21 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,8 +1,8 @@
 # Change Log for SD.Next
 
-## Update for 2024-12-23
+## Update for 2024-12-24
 
-### Highlights for 2024-12-23
+### Highlights for 2024-12-24
 
 ### SD.Next Xmass edition: *What's new?*
 
@@ -28,11 +28,11 @@ And a lot of **Control** and **IPAdapter** goodies
 Plus couple of new integrated workflows such as [FreeScale](https://github.com/ali-vilab/FreeScale) and [Style Aligned Image Generation](https://style-aligned-gen.github.io/)  
 
 And it wouldn't be a *Xmass edition* without couple of custom themes: *Snowflake* and *Elf-Green*!  
-All-in-all, we're around ~160 commits worth of updates, check changelog for full list  
+All-in-all, we're around ~180 commits worth of updates, check the changelog for full list  
 
 [ReadMe](https://github.com/vladmandic/automatic/blob/master/README.md) | [ChangeLog](https://github.com/vladmandic/automatic/blob/master/CHANGELOG.md) | [Docs](https://vladmandic.github.io/sdnext-docs/) | [WiKi](https://github.com/vladmandic/automatic/wiki) | [Discord](https://discord.com/invite/sd-next-federal-batch-inspectors-1101998836328697867)
 
-## Details for 2024-12-23
+## Details for 2024-12-24
 
 ### New models and integrations
 
diff --git a/README.md b/README.md
index e33b3112a..2a220ca1d 100644
--- a/README.md
+++ b/README.md
@@ -26,7 +26,7 @@
 All individual features are not listed here, instead check [ChangeLog](CHANGELOG.md) for full list of changes
 - Multiple UIs!  
   ▹ **Standard | Modern**  
-- Multiple diffusion models!  
+- Multiple [diffusion models](https://vladmandic.github.io/sdnext-docs/Model-Support/)!  
 - Built-in Control for Text, Image, Batch and video processing!  
 - Multiplatform!  
  ▹ **Windows | Linux | MacOS | nVidia | AMD | IntelArc/IPEX | DirectML | OpenVINO | ONNX+Olive | ZLUDA**
@@ -36,7 +36,6 @@ All individual features are not listed here, instead check [ChangeLog](CHANGELOG
 - Optimized processing with latest `torch` developments with built-in support for `torch.compile`  
   and multiple compile backends: *Triton, ZLUDA, StableFast, DeepCache, OpenVINO, NNCF, IPEX, OneDiff*  
 - Built-in queue management  
-- Enterprise level logging and hardened API  
 - Built in installer with automatic updates and dependency management  
 - Mobile compatible  
 
@@ -68,7 +67,7 @@ SD.Next supports broad range of models: [supported models](https://vladmandic.gi
 - Any GPU or device compatible with **OpenVINO** libraries on both *Windows and Linux*  
 - *Apple M1/M2* on *OSX* using built-in support in Torch with **MPS** optimizations  
 - *ONNX/Olive*  
-- *AMD* GPUs on Windows using **ZLUDA** libraries
+- *AMD* GPUs on Windows using **ZLUDA** libraries  
 
 ## Getting started
 
diff --git a/extensions-builtin/stable-diffusion-webui-rembg b/extensions-builtin/stable-diffusion-webui-rembg
index ff2bbd168..50bc931ce 160000
--- a/extensions-builtin/stable-diffusion-webui-rembg
+++ b/extensions-builtin/stable-diffusion-webui-rembg
@@ -1 +1 @@
-Subproject commit ff2bbd16820617ce26183a6c6538dc95a312b5de
+Subproject commit 50bc931ce6715e30bb86da52033e01e98add4e6c
diff --git a/installer.py b/installer.py
index 46ccce92c..43c4e84e3 100644
--- a/installer.py
+++ b/installer.py
@@ -459,7 +459,7 @@ def check_python(supported_minors=[9, 10, 11, 12], reason=None):
 def check_diffusers():
     if args.skip_all or args.skip_git:
         return
-    sha = '4b557132ce955d58fd84572c03e79f43bdc91450' # diffusers commit hash
+    sha = '6dfaec348780c6153a4cfd03a01972a291d67f82' # diffusers commit hash
     pkg = pkg_resources.working_set.by_key.get('diffusers', None)
     minor = int(pkg.version.split('.')[1] if pkg is not None else 0)
     cur = opts.get('diffusers_version', '') if minor > 0 else ''
diff --git a/modules/processing_vae.py b/modules/processing_vae.py
index 1710bd992..95b83daa4 100644
--- a/modules/processing_vae.py
+++ b/modules/processing_vae.py
@@ -141,7 +141,8 @@ def full_vae_decode(latents, model):
 
     log_debug(f'VAE config: {model.vae.config}')
     try:
-        decoded = model.vae.decode(latents, return_dict=False)[0]
+        with devices.inference_context():
+            decoded = model.vae.decode(latents, return_dict=False)[0]
     except Exception as e:
         shared.log.error(f'VAE decode: {stats} {e}')
         if 'out of memory' not in str(e):
@@ -159,8 +160,6 @@ def full_vae_decode(latents, model):
             model.vae.apply(sd_models.convert_to_faketensors)
             devices.torch_gc(force=True)
 
-    # if shared.opts.diffusers_offload_mode == "balanced":
-    #    shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model)
     elif shared.opts.diffusers_move_unet and not getattr(model, 'has_accelerate', False) and base_device is not None:
         sd_models.move_base(model, base_device)
     t1 = time.time()
diff --git a/requirements.txt b/requirements.txt
index 7592162fa..a67ad1ba2 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -45,7 +45,7 @@ accelerate==1.2.1
 opencv-contrib-python-headless==4.9.0.80
 einops==0.4.1
 gradio==3.43.2
-huggingface_hub==0.26.5
+huggingface_hub==0.27.0
 numexpr==2.8.8
 numpy==1.26.4
 numba==0.59.1